From 7d6e1a876c088941a8fdd9e1ffef62f1cca086fc Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:30:46 -0700 Subject: [PATCH 01/12] feat(web): show context-window usage in Ask Sourcebot Resolve each model's context window from the models.dev catalog (already fetched by the setup wizard) and bake it into chat message metadata. The Details card now renders a usage gauge from the latest step's input tokens. Models with no catalog entry (openai-compatible/self-hosted) fall back to the existing raw token count. --- .../web/src/app/api/(server)/ee/chat/route.ts | 7 ++ packages/web/src/ee/features/chat/agent.ts | 4 + .../components/chatThread/detailsCard.tsx | 43 ++++++- .../chat/modelContextWindow.server.ts | 109 ++++++++++++++++++ .../features/chat/modelContextWindow.test.ts | 98 ++++++++++++++++ packages/web/src/features/chat/types.ts | 5 + 6 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 packages/web/src/features/chat/modelContextWindow.server.ts create mode 100644 packages/web/src/features/chat/modelContextWindow.test.ts diff --git a/packages/web/src/app/api/(server)/ee/chat/route.ts b/packages/web/src/app/api/(server)/ee/chat/route.ts index 0f20ee8e3..7d16acc96 100644 --- a/packages/web/src/app/api/(server)/ee/chat/route.ts +++ b/packages/web/src/app/api/(server)/ee/chat/route.ts @@ -6,6 +6,7 @@ import { additionalChatRequestParamsSchema } from "@/features/chat/types"; import { getLanguageModelKey } from "@/features/chat/utils"; import { checkAskEntitlement, getConfiguredLanguageModels, isOwnerOfChat, updateChatMessages } from "@/features/chat/utils.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; +import { resolveContextWindow } from "@/features/chat/modelContextWindow.server"; import { apiHandler } from "@/lib/apiHandler"; import { ErrorCode } from "@/lib/errorCodes"; import { captureEvent } from "@/lib/posthog"; @@ -89,6 +90,11 @@ export const POST = apiHandler(async (req: NextRequest) => { const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig); + // Total context window for the selected model, used as the + // denominator for the UI's context-usage gauge. Undefined when + // unknown (e.g. openai-compatible / self-hosted models). + const contextWindow = await resolveContextWindow(languageModelConfig); + // No-op for non-Anthropic providers / when caching is disabled, so // it never perturbs other providers' requests. const promptCacheStrategy = getPromptCacheStrategy( @@ -139,6 +145,7 @@ export const POST = apiHandler(async (req: NextRequest) => { disabledMcpServerIds, model, modelName: languageModelConfig.displayName ?? languageModelConfig.model, + contextWindow, promptCacheStrategy, modelProviderOptions: providerOptions, modelTemperature: temperature, diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts index d2f3a4761..f376c7c6d 100644 --- a/packages/web/src/ee/features/chat/agent.ts +++ b/packages/web/src/ee/features/chat/agent.ts @@ -54,6 +54,8 @@ interface CreateMessageStreamResponseProps { disabledMcpServerIds?: string[]; model: AISDKLanguageModelV3; modelName: string; + // Total context window of the model in tokens, or undefined when unknown. + contextWindow?: number; promptCacheStrategy: PromptCacheStrategy; onFinish: UIMessageStreamOnFinishCallback; onError: (error: unknown) => string; @@ -73,6 +75,7 @@ export const createMessageStream = async ({ disabledMcpServerIds, model, modelName, + contextWindow, promptCacheStrategy, modelProviderOptions, modelTemperature, @@ -279,6 +282,7 @@ export const createMessageStream = async ({ // phases so earlier phases' steps are preserved in order. stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage], modelName, + contextWindow, traceId, } }); diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index e95af69d4..a2161de70 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -9,7 +9,7 @@ import useCaptureEvent from '@/hooks/useCaptureEvent'; import { cn, getShortenedNumberDisplayString } from '@/lib/utils'; import isEqual from "fast-deep-equal/react"; import { useStickToBottom } from 'use-stick-to-bottom'; -import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react'; +import { Brain, ChevronDown, ChevronRight, Clock, Gauge, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react'; import { memo, ReactNode, useCallback, useEffect, useMemo, useState } from 'react'; import { usePrevious } from '@uidotdev/usehooks'; import { SBChatMessageMetadata, SBChatMessagePart, StepTokenUsageEntry } from '@/features/chat/types'; @@ -86,6 +86,20 @@ const DetailsCardComponent = ({ ? Math.round((cacheReadTokens / inputTokens) * 100) : 0; + // Context-window usage gauge. "In use" is the input the model saw on its + // most recent step — i.e. the full accumulated prompt occupying the window + // right now — not the cumulative totalInputTokens (a billing sum). The + // gauge is shown only when the model's window is known (resolved from the + // models.dev catalog); unknown windows degrade to the raw token count. + const stepTokenUsage = metadata?.stepTokenUsage; + const currentContextTokens = stepTokenUsage && stepTokenUsage.length > 0 + ? stepTokenUsage[stepTokenUsage.length - 1].inputTokens + : undefined; + const contextWindow = metadata?.contextWindow; + const contextUsagePercent = currentContextTokens !== undefined && contextWindow !== undefined && contextWindow > 0 + ? Math.min(100, Math.round((currentContextTokens / contextWindow) * 100)) + : undefined; + const handleExpandedChanged = useCallback((next: boolean) => { captureEvent('wa_chat_details_card_toggled', { chatId, isExpanded: next }); onExpandedChanged(next); @@ -193,6 +207,33 @@ const DetailsCardComponent = ({ )} )} + {contextUsagePercent !== undefined && currentContextTokens !== undefined && contextWindow !== undefined && ( + + +
+ +
+
= 90, + "bg-yellow-500": contextUsagePercent >= 75 && contextUsagePercent < 90, + "bg-foreground": contextUsagePercent < 75, + })} + style={{ width: `${contextUsagePercent}%` }} + /> +
+ + {getShortenedNumberDisplayString(currentContextTokens, 0)} / {getShortenedNumberDisplayString(contextWindow, 0)} ({contextUsagePercent}%) + +
+ + +
+ The most recent step's prompt used {currentContextTokens.toLocaleString()} of the model's {contextWindow.toLocaleString()}-token context window ({contextUsagePercent}%). +
+
+ + )} {metadata?.totalResponseTimeMs && (
diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts new file mode 100644 index 000000000..d15336e4c --- /dev/null +++ b/packages/web/src/features/chat/modelContextWindow.server.ts @@ -0,0 +1,109 @@ +import 'server-only'; + +import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; +import { createLogger } from '@sourcebot/shared'; + +const logger = createLogger('model-context-window'); + +// The same public, unauthenticated catalog the setup wizard already consumes +// (see packages/setupWizard/src/models.ts). Each model entry exposes a +// `limit.context` field holding the total context window in tokens. +const MODELS_DEV_API_URL = 'https://models.dev/api.json'; +const FETCH_TIMEOUT_MS = 8000; +// Re-fetch the (~2.4 MB) catalog at most once per this interval per server +// process. New models trickle in daily; a stale window for a few hours is fine. +const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; + +// Sourcebot provider id -> models.dev top-level catalog key. Only providers +// whose Sourcebot id differs from the models.dev id need an entry; everything +// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai, +// openrouter, google-vertex, google-vertex-anthropic) matches 1:1. +const PROVIDER_ID_OVERRIDES: Record = { + 'google-generative-ai': 'google', +}; + +type ModelsDevModel = { + id: string; + limit?: { + context?: number; + output?: number; + }; +}; + +type ModelsDevProvider = { + id: string; + models?: Record; +}; + +export type ModelsDevCatalog = Record; + +let catalogPromise: Promise | null = null; +let catalogFetchedAt = 0; + +const fetchCatalog = async (): Promise => { + try { + const response = await fetch(MODELS_DEV_API_URL, { + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }); + if (!response.ok) { + logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`); + return null; + } + return await response.json() as ModelsDevCatalog; + } catch (error) { + logger.warn(`Failed to fetch models.dev catalog: ${error}`); + return null; + } +}; + +const loadCatalog = async (): Promise => { + const now = Date.now(); + if (!catalogPromise || now - catalogFetchedAt > CATALOG_TTL_MS) { + catalogFetchedAt = now; + catalogPromise = fetchCatalog().then((catalog) => { + // Don't memoize failures — let the next caller retry instead of + // being stuck with a null catalog until the TTL expires. + if (!catalog) { + catalogPromise = null; + } + return catalog; + }); + } + return catalogPromise; +}; + +/** + * Pure lookup of a model's context window in a models.dev catalog. Separated + * from the network fetch so it can be unit-tested directly. + * + * Returns the total context window (input + output share it) in tokens, or + * `undefined` when the model isn't catalogued or has no usable window. + */ +export const lookupContextWindow = ( + catalog: ModelsDevCatalog | null, + config: Pick, +): number | undefined => { + if (!catalog) { + return undefined; + } + const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider; + const context = catalog[providerId]?.models?.[config.model]?.limit?.context; + // `limit` is schema-optional, and models.dev reports a 0 context window for + // non-text models (image/audio/etc.). Treat both as "unknown" so the UI + // gracefully omits the gauge rather than rendering a bogus denominator. + return typeof context === 'number' && context > 0 ? context : undefined; +}; + +/** + * Resolves the context window (in tokens) for a configured language model from + * the models.dev catalog. Returns `undefined` when unknown — e.g. arbitrary + * openai-compatible / self-hosted ids, provider/model ids that don't match the + * catalog's keys (bedrock ARNs, vertex `@`-suffixed ids, azure deployments), or + * when models.dev is unreachable. Never throws into the request path. + */ +export const resolveContextWindow = async ( + config: Pick, +): Promise => { + const catalog = await loadCatalog(); + return lookupContextWindow(catalog, config); +}; diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts new file mode 100644 index 000000000..e8adc3517 --- /dev/null +++ b/packages/web/src/features/chat/modelContextWindow.test.ts @@ -0,0 +1,98 @@ +import { afterEach, describe, expect, test, vi } from 'vitest'; +import type { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; + +vi.mock('server-only', () => ({ default: vi.fn() })); + +vi.mock('@sourcebot/shared', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})); + +import { lookupContextWindow, resolveContextWindow, type ModelsDevCatalog } from './modelContextWindow.server'; + +const catalog: ModelsDevCatalog = { + anthropic: { + id: 'anthropic', + models: { + 'claude-sonnet-4-5': { id: 'claude-sonnet-4-5', limit: { context: 200000, output: 64000 } }, + }, + }, + // models.dev keys Gemini under 'google', whereas Sourcebot's provider id is + // 'google-generative-ai' — exercises PROVIDER_ID_OVERRIDES. + google: { + id: 'google', + models: { + 'gemini-2.5-pro': { id: 'gemini-2.5-pro', limit: { context: 1048576, output: 65536 } }, + }, + }, + openai: { + id: 'openai', + models: { + 'gpt-4.1': { id: 'gpt-4.1', limit: { context: 1047576 } }, + // Non-text model: models.dev reports a 0 context window. + 'gpt-image-1': { id: 'gpt-image-1', limit: { context: 0, output: 0 } }, + // Catalogued model with no `limit` object at all. + 'no-limit-model': { id: 'no-limit-model' }, + }, + }, +}; + +const model = (provider: string, modelId: string) => + ({ provider, model: modelId }) as Pick; + +describe('lookupContextWindow', () => { + test('returns the context window for a direct provider/model hit', () => { + expect(lookupContextWindow(catalog, model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); + expect(lookupContextWindow(catalog, model('openai', 'gpt-4.1'))).toBe(1047576); + }); + + test('maps google-generative-ai to the catalog\'s google key', () => { + expect(lookupContextWindow(catalog, model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576); + }); + + test('returns undefined for an uncatalogued provider', () => { + expect(lookupContextWindow(catalog, model('mistral', 'mistral-large-latest'))).toBeUndefined(); + }); + + test('returns undefined for an uncatalogued model id (e.g. openai-compatible / self-hosted)', () => { + expect(lookupContextWindow(catalog, model('openai-compatible', 'my-local-model'))).toBeUndefined(); + expect(lookupContextWindow(catalog, model('anthropic', 'claude-unknown'))).toBeUndefined(); + }); + + test('treats a 0 context window (non-text models) as unknown', () => { + expect(lookupContextWindow(catalog, model('openai', 'gpt-image-1'))).toBeUndefined(); + }); + + test('treats a missing limit object as unknown', () => { + expect(lookupContextWindow(catalog, model('openai', 'no-limit-model'))).toBeUndefined(); + }); + + test('returns undefined when the catalog is null (fetch failed / unreachable)', () => { + expect(lookupContextWindow(null, model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + }); +}); + +describe('resolveContextWindow', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + test('fetches the catalog once and resolves windows (incl. provider mapping)', async () => { + const fetchMock = vi.fn(async () => ({ + ok: true, + json: async () => catalog, + }) as unknown as Response); + vi.stubGlobal('fetch', fetchMock); + + expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); + // Subsequent lookups reuse the cached catalog rather than refetching. + expect(await resolveContextWindow(model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576); + expect(await resolveContextWindow(model('openai-compatible', 'my-local-model'))).toBeUndefined(); + + expect(fetchMock).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 38a737a09..ca01d9d22 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -59,6 +59,11 @@ export const sbChatMessageMetadataSchema = z.object({ totalCacheReadTokens: z.number().optional(), totalCacheWriteTokens: z.number().optional(), totalResponseTimeMs: z.number().optional(), + // Total context window of the model used for this turn, in tokens, resolved + // from the models.dev catalog. Undefined when the window is unknown (e.g. + // openai-compatible / self-hosted ids, uncatalogued models). Baked into the + // message so the gauge denominator stays stable across catalog changes. + contextWindow: z.number().optional(), feedback: z.array(z.object({ type: z.enum(['like', 'dislike']), timestamp: z.string(), // ISO date string From af5de37c067d0a707888ee79e83cb6b28f1657d4 Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 12:26:55 -0700 Subject: [PATCH 02/12] fix(web): resolve contextWindow in the programmatic askCodebase path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit createMessageStream's MCP/programmatic caller omitted contextWindow, so chats created via ask_codebase rendered the Details card without a usage gauge even for catalogued models — unlike the same chat created from the web API. Resolve it from the already-available languageModelConfig so the gauge is deterministic per model, not per entry point. --- packages/web/src/ee/features/mcp/askCodebase.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index 4b7cfb7b0..e2b234037 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -2,6 +2,7 @@ import { sew } from "@/middleware/sew"; import { getConfiguredLanguageModels, updateChatMessages, checkAskEntitlement } from "@/features/chat/utils.server"; import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; +import { resolveContextWindow } from "@/features/chat/modelContextWindow.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; import { ErrorCode } from "@/lib/errorCodes"; @@ -84,6 +85,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise Date: Thu, 25 Jun 2026 14:56:19 -0700 Subject: [PATCH 03/12] feat(web): render context-window usage as a colored ring gauge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the horizontal bar with a circular ring showing the percentage inside and the used / total token counts beside it. The arc and percentage are colored by usage — green below 70%, yellow from 70%, red from 90%. --- .../components/chatThread/detailsCard.tsx | 91 +++++++++++++++---- 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index a2161de70..57f8352fd 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -9,7 +9,7 @@ import useCaptureEvent from '@/hooks/useCaptureEvent'; import { cn, getShortenedNumberDisplayString } from '@/lib/utils'; import isEqual from "fast-deep-equal/react"; import { useStickToBottom } from 'use-stick-to-bottom'; -import { Brain, ChevronDown, ChevronRight, Clock, Gauge, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react'; +import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react'; import { memo, ReactNode, useCallback, useEffect, useMemo, useState } from 'react'; import { usePrevious } from '@uidotdev/usehooks'; import { SBChatMessageMetadata, SBChatMessagePart, StepTokenUsageEntry } from '@/features/chat/types'; @@ -210,21 +210,12 @@ const DetailsCardComponent = ({ {contextUsagePercent !== undefined && currentContextTokens !== undefined && contextWindow !== undefined && ( -
- -
-
= 90, - "bg-yellow-500": contextUsagePercent >= 75 && contextUsagePercent < 90, - "bg-foreground": contextUsagePercent < 75, - })} - style={{ width: `${contextUsagePercent}%` }} - /> -
- - {getShortenedNumberDisplayString(currentContextTokens, 0)} / {getShortenedNumberDisplayString(contextWindow, 0)} ({contextUsagePercent}%) - +
+
@@ -408,6 +399,74 @@ const StepTokenUsage = ({ usage, label = 'step' }: { usage: StepTokenUsageEntry, ); } + +// Usage thresholds for the context-window gauge. Below `YELLOW` the window has +// plenty of headroom (green); past `RED` it's nearly full (red). +const CONTEXT_USAGE_YELLOW_PERCENT = 70; +const CONTEXT_USAGE_RED_PERCENT = 90; + +const getContextUsageColorClass = (percent: number): string => { + if (percent >= CONTEXT_USAGE_RED_PERCENT) { + return "text-red-500"; + } + if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) { + return "text-yellow-500"; + } + return "text-green-500"; +}; + +// A circular ring showing how much of the model's context window the most +// recent step occupies, with the percentage inside the ring and the +// " / " token counts beside it. The progress arc and percentage +// share a single usage-based color (green/yellow/red) over a neutral track. +const ContextWindowGauge = ({ used, total, percent }: { used: number, total: number, percent: number }) => { + const size = 34; + const strokeWidth = 4; + const radius = (size - strokeWidth) / 2; + const circumference = 2 * Math.PI * radius; + const dashOffset = circumference * (1 - Math.min(100, percent) / 100); + const colorClass = getContextUsageColorClass(percent); + + return ( +
+
+ + {/* Neutral track. */} + + {/* Progress arc. */} + + + + {percent}% + +
+ + {getShortenedNumberDisplayString(used, 0).toUpperCase()} + / {getShortenedNumberDisplayString(total, 0).toUpperCase()} + +
+ ); +} + type GuardedToolType = | 'tool-read_file' | 'tool-grep' From c72acdab343a5dc048b6b0b21e693416f55c33b5 Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 15:02:12 -0700 Subject: [PATCH 04/12] feat(web): make the context-window gauge a compact inline indicator Shrink the ring and move the percentage beside it, reading "% of " instead of a number-in-ring. The arc and percentage stay colored by usage (green/yellow/red). --- .../components/chatThread/detailsCard.tsx | 78 +++++++++---------- 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index 57f8352fd..cd37ffaf6 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -212,7 +212,6 @@ const DetailsCardComponent = ({
@@ -415,54 +414,47 @@ const getContextUsageColorClass = (percent: number): string => { return "text-green-500"; }; -// A circular ring showing how much of the model's context window the most -// recent step occupies, with the percentage inside the ring and the -// " / " token counts beside it. The progress arc and percentage -// share a single usage-based color (green/yellow/red) over a neutral track. -const ContextWindowGauge = ({ used, total, percent }: { used: number, total: number, percent: number }) => { - const size = 34; - const strokeWidth = 4; +// A compact context-window indicator: a small ring whose arc tracks usage, +// followed by the usage percentage and the model's total window size +// ("% of "). The ring and percentage share a single +// usage-based color (green/yellow/red) over a neutral track. +const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => { + const size = 18; + const strokeWidth = 2.5; const radius = (size - strokeWidth) / 2; const circumference = 2 * Math.PI * radius; const dashOffset = circumference * (1 - Math.min(100, percent) / 100); const colorClass = getContextUsageColorClass(percent); return ( -
-
- - {/* Neutral track. */} - - {/* Progress arc. */} - - - - {percent}% - -
- - {getShortenedNumberDisplayString(used, 0).toUpperCase()} - / {getShortenedNumberDisplayString(total, 0).toUpperCase()} - +
+ + {/* Neutral track. */} + + {/* Progress arc. */} + + + {percent}% + of {getShortenedNumberDisplayString(total, 0).toUpperCase()}
); } From 1b5c6c5cb5b55f6b8d5a2908c29f8e169958daf6 Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 15:09:35 -0700 Subject: [PATCH 05/12] feat(web): shrink context gauge ring, gray track, desaturated green Reduce the ring to 14px, switch the track to a solid palette gray (the theme tokens lack an alpha channel, so /opacity on them was ignored and the track rendered at full brightness), and use a desaturated sage green for the in-range percentage. --- .../chat/components/chatThread/detailsCard.tsx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index cd37ffaf6..d879598bf 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -411,7 +411,8 @@ const getContextUsageColorClass = (percent: number): string => { if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) { return "text-yellow-500"; } - return "text-green-500"; + // Desaturated sage green rather than the neon green-500. + return "text-[#6cb38f]"; }; // A compact context-window indicator: a small ring whose arc tracks usage, @@ -419,8 +420,8 @@ const getContextUsageColorClass = (percent: number): string => { // ("% of "). The ring and percentage share a single // usage-based color (green/yellow/red) over a neutral track. const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => { - const size = 18; - const strokeWidth = 2.5; + const size = 14; + const strokeWidth = 2; const radius = (size - strokeWidth) / 2; const circumference = 2 * Math.PI * radius; const dashOffset = circumference * (1 - Math.min(100, percent) / 100); @@ -429,7 +430,9 @@ const ContextWindowGauge = ({ total, percent }: { total: number, percent: number return (
- {/* Neutral track. */} + {/* Neutral gray track. (Theme tokens here are defined without an + alpha channel, so an /opacity modifier on them is silently + ignored — use a solid palette gray instead.) */} {/* Progress arc. */} Date: Thu, 25 Jun 2026 19:59:09 -0700 Subject: [PATCH 06/12] chore(web): trim verbose comments on the context-window code --- packages/web/src/app/api/(server)/ee/chat/route.ts | 2 +- packages/web/src/ee/features/chat/agent.ts | 1 - .../features/chat/components/chatThread/detailsCard.tsx | 8 +------- packages/web/src/features/chat/types.ts | 4 ---- 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/packages/web/src/app/api/(server)/ee/chat/route.ts b/packages/web/src/app/api/(server)/ee/chat/route.ts index 7d16acc96..cbb11f06e 100644 --- a/packages/web/src/app/api/(server)/ee/chat/route.ts +++ b/packages/web/src/app/api/(server)/ee/chat/route.ts @@ -92,7 +92,7 @@ export const POST = apiHandler(async (req: NextRequest) => { // Total context window for the selected model, used as the // denominator for the UI's context-usage gauge. Undefined when - // unknown (e.g. openai-compatible / self-hosted models). + // unknown (e.g. self-hosted models). const contextWindow = await resolveContextWindow(languageModelConfig); // No-op for non-Anthropic providers / when caching is disabled, so diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts index f376c7c6d..8f5daa749 100644 --- a/packages/web/src/ee/features/chat/agent.ts +++ b/packages/web/src/ee/features/chat/agent.ts @@ -54,7 +54,6 @@ interface CreateMessageStreamResponseProps { disabledMcpServerIds?: string[]; model: AISDKLanguageModelV3; modelName: string; - // Total context window of the model in tokens, or undefined when unknown. contextWindow?: number; promptCacheStrategy: PromptCacheStrategy; onFinish: UIMessageStreamOnFinishCallback; diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index d879598bf..3715f8632 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -88,9 +88,7 @@ const DetailsCardComponent = ({ // Context-window usage gauge. "In use" is the input the model saw on its // most recent step — i.e. the full accumulated prompt occupying the window - // right now — not the cumulative totalInputTokens (a billing sum). The - // gauge is shown only when the model's window is known (resolved from the - // models.dev catalog); unknown windows degrade to the raw token count. + // right now — not the cumulative totalInputTokens. const stepTokenUsage = metadata?.stepTokenUsage; const currentContextTokens = stepTokenUsage && stepTokenUsage.length > 0 ? stepTokenUsage[stepTokenUsage.length - 1].inputTokens @@ -415,10 +413,6 @@ const getContextUsageColorClass = (percent: number): string => { return "text-[#6cb38f]"; }; -// A compact context-window indicator: a small ring whose arc tracks usage, -// followed by the usage percentage and the model's total window size -// ("% of "). The ring and percentage share a single -// usage-based color (green/yellow/red) over a neutral track. const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => { const size = 14; const strokeWidth = 2; diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index ca01d9d22..dc0758462 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -59,10 +59,6 @@ export const sbChatMessageMetadataSchema = z.object({ totalCacheReadTokens: z.number().optional(), totalCacheWriteTokens: z.number().optional(), totalResponseTimeMs: z.number().optional(), - // Total context window of the model used for this turn, in tokens, resolved - // from the models.dev catalog. Undefined when the window is unknown (e.g. - // openai-compatible / self-hosted ids, uncatalogued models). Baked into the - // message so the gauge denominator stays stable across catalog changes. contextWindow: z.number().optional(), feedback: z.array(z.object({ type: z.enum(['like', 'dislike']), From 8f361e7d2fcc139fc2051dde94cf198b9788d7b3 Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:07:29 -0700 Subject: [PATCH 07/12] docs: add changelog entry for the context-window usage gauge --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 689718d36..671b6aaa1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) +- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) From 2a9e9e35a7536d2195bfe949a2c94df8cb4c6745 Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:25:20 -0700 Subject: [PATCH 08/12] chore(web): trim comments and lower context-gauge color thresholds Also drop the gauge's yellow/red thresholds from 70/90 to 50/80 so the ring shifts color earlier as the context fills. --- .../chat/components/chatThread/detailsCard.tsx | 11 +++-------- .../src/features/chat/modelContextWindow.server.ts | 11 ++--------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index 3715f8632..7b6c7867f 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -397,10 +397,8 @@ const StepTokenUsage = ({ usage, label = 'step' }: { usage: StepTokenUsageEntry, } -// Usage thresholds for the context-window gauge. Below `YELLOW` the window has -// plenty of headroom (green); past `RED` it's nearly full (red). -const CONTEXT_USAGE_YELLOW_PERCENT = 70; -const CONTEXT_USAGE_RED_PERCENT = 90; +const CONTEXT_USAGE_YELLOW_PERCENT = 50; +const CONTEXT_USAGE_RED_PERCENT = 80; const getContextUsageColorClass = (percent: number): string => { if (percent >= CONTEXT_USAGE_RED_PERCENT) { @@ -409,7 +407,6 @@ const getContextUsageColorClass = (percent: number): string => { if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) { return "text-yellow-500"; } - // Desaturated sage green rather than the neon green-500. return "text-[#6cb38f]"; }; @@ -424,9 +421,7 @@ const ContextWindowGauge = ({ total, percent }: { total: number, percent: number return (
- {/* Neutral gray track. (Theme tokens here are defined without an - alpha channel, so an /opacity modifier on them is silently - ignored — use a solid palette gray instead.) */} + {/* Neutral gray track. */} => { * Pure lookup of a model's context window in a models.dev catalog. Separated * from the network fetch so it can be unit-tested directly. * - * Returns the total context window (input + output share it) in tokens, or - * `undefined` when the model isn't catalogued or has no usable window. + * Returns the total context window in tokens, or `undefined` when the model + * isn't catalogued or has no usable window. */ export const lookupContextWindow = ( catalog: ModelsDevCatalog | null, @@ -94,13 +94,6 @@ export const lookupContextWindow = ( return typeof context === 'number' && context > 0 ? context : undefined; }; -/** - * Resolves the context window (in tokens) for a configured language model from - * the models.dev catalog. Returns `undefined` when unknown — e.g. arbitrary - * openai-compatible / self-hosted ids, provider/model ids that don't match the - * catalog's keys (bedrock ARNs, vertex `@`-suffixed ids, azure deployments), or - * when models.dev is unreachable. Never throws into the request path. - */ export const resolveContextWindow = async ( config: Pick, ): Promise => { From 15af04c90cb2f96f4b9cb494389a45b2b252c1fd Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:30:28 -0700 Subject: [PATCH 09/12] docs: use placeholder PR number in context-window changelog entry Real PR number to be filled in once the PR is opened. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 671b6aaa1..f9cf81007 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) -- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) +- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#XXXX](https://github.com/sourcebot-dev/sourcebot/pull/XXXX) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) From e4b54e449e4237c0cbb6b4a070da6b2f0fced43c Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:41:18 -0700 Subject: [PATCH 10/12] docs: fill in PR number for context-window changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9cf81007..671b6aaa1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) -- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#XXXX](https://github.com/sourcebot-dev/sourcebot/pull/XXXX) +- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) From 32089ba363d8ff97b0aac387ac6834cadd5656fa Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:59:29 -0700 Subject: [PATCH 11/12] fix(web): serve last-known-good models.dev catalog on fetch failure Previously a failed fetch reset the cache to null, so every chat send re-attempted the fetch and blocked up to the 8s timeout during a models.dev outage; a failed TTL refresh also discarded the previously-cached catalog. Switch loadCatalog to stale-while-revalidate: after the first successful load the request path never blocks (it serves the last-known-good catalog and refreshes in the background), failed refreshes keep the cached value, and a 60s negative-cache window bounds retries during an outage. --- .../chat/modelContextWindow.server.ts | 42 +++++++++--- .../features/chat/modelContextWindow.test.ts | 64 +++++++++++++++++++ 2 files changed, 97 insertions(+), 9 deletions(-) diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts index 62b2345c9..0e70dc04f 100644 --- a/packages/web/src/features/chat/modelContextWindow.server.ts +++ b/packages/web/src/features/chat/modelContextWindow.server.ts @@ -13,6 +13,9 @@ const FETCH_TIMEOUT_MS = 8000; // Re-fetch the (~2.4 MB) catalog at most once per this interval per server // process. New models trickle in daily; a stale window for a few hours is fine. const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; +// After a failed fetch, don't reattempt for this long. Without it, an outage in +// models.dev would make every chat send pay the fetch timeout on the request path. +const NEGATIVE_CACHE_MS = 60 * 1000; // Sourcebot provider id -> models.dev top-level catalog key. Only providers // whose Sourcebot id differs from the models.dev id need an entry; everything @@ -37,8 +40,14 @@ type ModelsDevProvider = { export type ModelsDevCatalog = Record; -let catalogPromise: Promise | null = null; +// Last successfully-fetched catalog. Served while fresh, and kept as a fallback +// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL), +// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and +// `inFlightFetch` dedupes concurrent fetches. +let cachedCatalog: ModelsDevCatalog | null = null; let catalogFetchedAt = 0; +let lastFailedAt = 0; +let inFlightFetch: Promise | null = null; const fetchCatalog = async (): Promise => { try { @@ -58,18 +67,33 @@ const fetchCatalog = async (): Promise => { const loadCatalog = async (): Promise => { const now = Date.now(); - if (!catalogPromise || now - catalogFetchedAt > CATALOG_TTL_MS) { - catalogFetchedAt = now; - catalogPromise = fetchCatalog().then((catalog) => { - // Don't memoize failures — let the next caller retry instead of - // being stuck with a null catalog until the TTL expires. - if (!catalog) { - catalogPromise = null; + const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS; + const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS; + + // Kick off a (deduped) refresh when the cache is stale/empty and we're not + // within the post-failure backoff window. On success it replaces the cache; + // on failure it only records the failure time, leaving the last-known-good + // catalog intact. + if (!isFresh && !isBackingOff && !inFlightFetch) { + inFlightFetch = fetchCatalog().then((catalog) => { + if (catalog) { + cachedCatalog = catalog; + catalogFetchedAt = Date.now(); + } else { + lastFailedAt = Date.now(); } + inFlightFetch = null; return catalog; }); } - return catalogPromise; + + // Once a catalog has loaded once, never block the request path on the + // network: serve the last-known-good value (even if stale) and let any + // refresh settle in the background. Only the very first load awaits. + if (cachedCatalog !== null) { + return cachedCatalog; + } + return inFlightFetch ?? null; }; /** diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts index e8adc3517..9476820ae 100644 --- a/packages/web/src/features/chat/modelContextWindow.test.ts +++ b/packages/web/src/features/chat/modelContextWindow.test.ts @@ -96,3 +96,67 @@ describe('resolveContextWindow', () => { expect(fetchMock).toHaveBeenCalledTimes(1); }); }); + +describe('resolveContextWindow resilience', () => { + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + vi.resetModules(); + }); + + // Re-import the module so each scenario starts with fresh internal cache state. + const importFresh = async () => { + vi.resetModules(); + return await import('./modelContextWindow.server'); + }; + + test('negative-caches failures instead of refetching on every call', async () => { + const fetchMock = vi.fn(async () => ({ + ok: false, + status: 503, + statusText: 'Service Unavailable', + }) as unknown as Response); + vi.stubGlobal('fetch', fetchMock); + + const mod = await importFresh(); + + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + expect(await mod.resolveContextWindow(model('openai', 'gpt-4.1'))).toBeUndefined(); + + // Only the first attempt hit the network; the rest were short-circuited + // by the negative-cache window, so chat sends don't repeatedly block. + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + test('preserves the last-known-good catalog when a refresh fails', async () => { + let nowMs = 1_700_000_000_000; + vi.spyOn(Date, 'now').mockImplementation(() => nowMs); + + let shouldFail = false; + const fetchMock = vi.fn(async () => (shouldFail + ? { ok: false, status: 503, statusText: 'Service Unavailable' } + : { ok: true, json: async () => catalog }) as unknown as Response); + vi.stubGlobal('fetch', fetchMock); + + const mod = await importFresh(); + + // First load populates the cache. + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); + expect(fetchMock).toHaveBeenCalledTimes(1); + + // Advance past the TTL and make every refresh fail. + nowMs += 7 * 60 * 60 * 1000; + shouldFail = true; + + // Stale-while-revalidate: serves the cached value and refreshes in the + // background (which fails). + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); + // Let the background refresh settle. + await new Promise((resolve) => setTimeout(resolve, 0)); + expect(fetchMock).toHaveBeenCalledTimes(2); + + // The failed refresh must not have discarded the good catalog. + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); + }); +}); From 28f61eb6c38345ca769a5d0b45b28283562737af Mon Sep 17 00:00:00 2001 From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com> Date: Fri, 26 Jun 2026 09:39:08 -0700 Subject: [PATCH 12/12] docs: mark context-window gauge changelog entry as [EE] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 671b6aaa1..e6d3a2c05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) -- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) +- [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)