Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
- [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)

### Fixed
- Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)
Expand Down
7 changes: 7 additions & 0 deletions packages/web/src/app/api/(server)/ee/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { additionalChatRequestParamsSchema } from "@/features/chat/types";
import { getLanguageModelKey } from "@/features/chat/utils";
import { checkAskEntitlement, getConfiguredLanguageModels, isOwnerOfChat, updateChatMessages } from "@/features/chat/utils.server";
import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server";
import { resolveContextWindow } from "@/features/chat/modelContextWindow.server";
import { apiHandler } from "@/lib/apiHandler";
import { ErrorCode } from "@/lib/errorCodes";
import { captureEvent } from "@/lib/posthog";
Expand Down Expand Up @@ -89,6 +90,11 @@ export const POST = apiHandler(async (req: NextRequest) => {

const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig);

// Total context window for the selected model, used as the
// denominator for the UI's context-usage gauge. Undefined when
// unknown (e.g. self-hosted models).
const contextWindow = await resolveContextWindow(languageModelConfig);

// No-op for non-Anthropic providers / when caching is disabled, so
// it never perturbs other providers' requests.
const promptCacheStrategy = getPromptCacheStrategy(
Expand Down Expand Up @@ -139,6 +145,7 @@ export const POST = apiHandler(async (req: NextRequest) => {
disabledMcpServerIds,
model,
modelName: languageModelConfig.displayName ?? languageModelConfig.model,
contextWindow,
promptCacheStrategy,
modelProviderOptions: providerOptions,
modelTemperature: temperature,
Expand Down
3 changes: 3 additions & 0 deletions packages/web/src/ee/features/chat/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ interface CreateMessageStreamResponseProps {
disabledMcpServerIds?: string[];
model: AISDKLanguageModelV3;
modelName: string;
contextWindow?: number;
promptCacheStrategy: PromptCacheStrategy;
onFinish: UIMessageStreamOnFinishCallback<SBChatMessage>;
onError: (error: unknown) => string;
Expand All @@ -73,6 +74,7 @@ export const createMessageStream = async ({
disabledMcpServerIds,
model,
modelName,
contextWindow,
promptCacheStrategy,
modelProviderOptions,
modelTemperature,
Expand Down Expand Up @@ -279,6 +281,7 @@ export const createMessageStream = async ({
// phases so earlier phases' steps are preserved in order.
stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage],
modelName,
contextWindow,
traceId,
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,18 @@ const DetailsCardComponent = ({
? Math.round((cacheReadTokens / inputTokens) * 100)
: 0;

// Context-window usage gauge. "In use" is the input the model saw on its
// most recent step — i.e. the full accumulated prompt occupying the window
// right now — not the cumulative totalInputTokens.
const stepTokenUsage = metadata?.stepTokenUsage;
const currentContextTokens = stepTokenUsage && stepTokenUsage.length > 0
? stepTokenUsage[stepTokenUsage.length - 1].inputTokens
: undefined;
const contextWindow = metadata?.contextWindow;
const contextUsagePercent = currentContextTokens !== undefined && contextWindow !== undefined && contextWindow > 0
? Math.min(100, Math.round((currentContextTokens / contextWindow) * 100))
: undefined;

const handleExpandedChanged = useCallback((next: boolean) => {
captureEvent('wa_chat_details_card_toggled', { chatId, isExpanded: next });
onExpandedChanged(next);
Expand Down Expand Up @@ -193,6 +205,23 @@ const DetailsCardComponent = ({
)}
</div>
)}
{contextUsagePercent !== undefined && currentContextTokens !== undefined && contextWindow !== undefined && (
<Tooltip>
<TooltipTrigger asChild>
<div className="cursor-help">
<ContextWindowGauge
total={contextWindow}
percent={contextUsagePercent}
/>
</div>
</TooltipTrigger>
<TooltipContent side="bottom">
<div className="max-w-xs text-xs">
The most recent step&apos;s prompt used {currentContextTokens.toLocaleString()} of the model&apos;s {contextWindow.toLocaleString()}-token context window ({contextUsagePercent}%).
</div>
</TooltipContent>
</Tooltip>
)}
{metadata?.totalResponseTimeMs && (
<div className="flex items-center text-xs">
<Clock className="w-3 h-3 mr-1 flex-shrink-0" />
Expand Down Expand Up @@ -367,6 +396,61 @@ const StepTokenUsage = ({ usage, label = 'step' }: { usage: StepTokenUsageEntry,
);
}


const CONTEXT_USAGE_YELLOW_PERCENT = 50;
const CONTEXT_USAGE_RED_PERCENT = 80;

const getContextUsageColorClass = (percent: number): string => {
if (percent >= CONTEXT_USAGE_RED_PERCENT) {
return "text-red-500";
}
if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) {
return "text-yellow-500";
}
return "text-[#6cb38f]";

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to confirm, does this look good in light mode?

};

const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => {
const size = 14;
const strokeWidth = 2;
const radius = (size - strokeWidth) / 2;
const circumference = 2 * Math.PI * radius;
const dashOffset = circumference * (1 - Math.min(100, percent) / 100);
const colorClass = getContextUsageColorClass(percent);

return (
<div className="flex items-center gap-1.5 text-xs whitespace-nowrap">
<svg width={size} height={size} className="-rotate-90 flex-shrink-0">
{/* Neutral gray track. */}
<circle
cx={size / 2}
cy={size / 2}
r={radius}
fill="none"
stroke="currentColor"
strokeWidth={strokeWidth}
className="text-zinc-500"
/>
{/* Progress arc. */}
<circle
cx={size / 2}
cy={size / 2}
r={radius}
fill="none"
stroke="currentColor"
strokeWidth={strokeWidth}
strokeLinecap="round"
strokeDasharray={circumference}
strokeDashoffset={dashOffset}
className={cn("transition-all duration-300", colorClass)}
/>
</svg>
<span className={cn("font-semibold", colorClass)}>{percent}%</span>
<span className="text-muted-foreground">of {getShortenedNumberDisplayString(total, 0).toUpperCase()}</span>
</div>
);
}

type GuardedToolType =
| 'tool-read_file'
| 'tool-grep'
Expand Down
3 changes: 3 additions & 0 deletions packages/web/src/ee/features/mcp/askCodebase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { sew } from "@/middleware/sew";
import { getConfiguredLanguageModels, updateChatMessages, checkAskEntitlement } from "@/features/chat/utils.server";
import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server";
import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server";
import { resolveContextWindow } from "@/features/chat/modelContextWindow.server";
import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types";
import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils";
import { ErrorCode } from "@/lib/errorCodes";
Expand Down Expand Up @@ -84,6 +85,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul

const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig);
const modelName = languageModelConfig.displayName ?? languageModelConfig.model;
const contextWindow = await resolveContextWindow(languageModelConfig);

// No-op for non-Anthropic providers / when caching is disabled.
const promptCacheStrategy = getPromptCacheStrategy(
Expand Down Expand Up @@ -182,6 +184,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
prisma,
model,
modelName,
contextWindow,
promptCacheStrategy,
modelProviderOptions: providerOptions,
modelTemperature: temperature,
Expand Down
126 changes: 126 additions & 0 deletions packages/web/src/features/chat/modelContextWindow.server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import 'server-only';

import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
import { createLogger } from '@sourcebot/shared';

const logger = createLogger('model-context-window');

// The same public, unauthenticated catalog the setup wizard already consumes
// (see packages/setupWizard/src/models.ts). Each model entry exposes a
// `limit.context` field holding the total context window in tokens.
const MODELS_DEV_API_URL = 'https://models.dev/api.json';
const FETCH_TIMEOUT_MS = 8000;
// Re-fetch the (~2.4 MB) catalog at most once per this interval per server
// process. New models trickle in daily; a stale window for a few hours is fine.
const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
// After a failed fetch, don't reattempt for this long. Without it, an outage in
// models.dev would make every chat send pay the fetch timeout on the request path.
const NEGATIVE_CACHE_MS = 60 * 1000;

// Sourcebot provider id -> models.dev top-level catalog key. Only providers
// whose Sourcebot id differs from the models.dev id need an entry; everything
// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai,
// openrouter, google-vertex, google-vertex-anthropic) matches 1:1.
const PROVIDER_ID_OVERRIDES: Record<string, string> = {
'google-generative-ai': 'google',
};

type ModelsDevModel = {
id: string;
limit?: {
context?: number;
output?: number;
};
};

type ModelsDevProvider = {
id: string;
models?: Record<string, ModelsDevModel>;
};

export type ModelsDevCatalog = Record<string, ModelsDevProvider>;

// Last successfully-fetched catalog. Served while fresh, and kept as a fallback
// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL),
// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and
// `inFlightFetch` dedupes concurrent fetches.
let cachedCatalog: ModelsDevCatalog | null = null;
let catalogFetchedAt = 0;
let lastFailedAt = 0;
let inFlightFetch: Promise<ModelsDevCatalog | null> | null = null;

const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
try {
const response = await fetch(MODELS_DEV_API_URL, {
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
});
if (!response.ok) {
logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`);
return null;
}
return await response.json() as ModelsDevCatalog;
} catch (error) {
logger.warn(`Failed to fetch models.dev catalog: ${error}`);
return null;
}
};

const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
const now = Date.now();
const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS;
const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS;

// Kick off a (deduped) refresh when the cache is stale/empty and we're not
// within the post-failure backoff window. On success it replaces the cache;
// on failure it only records the failure time, leaving the last-known-good
// catalog intact.
if (!isFresh && !isBackingOff && !inFlightFetch) {
inFlightFetch = fetchCatalog().then((catalog) => {
if (catalog) {
cachedCatalog = catalog;
catalogFetchedAt = Date.now();
} else {
lastFailedAt = Date.now();
}
inFlightFetch = null;
return catalog;
});
}

// Once a catalog has loaded once, never block the request path on the
// network: serve the last-known-good value (even if stale) and let any
// refresh settle in the background. Only the very first load awaits.
if (cachedCatalog !== null) {
return cachedCatalog;
}
return inFlightFetch ?? null;
};
Comment thread
coderabbitai[bot] marked this conversation as resolved.

/**
* Pure lookup of a model's context window in a models.dev catalog. Separated
* from the network fetch so it can be unit-tested directly.
*
* Returns the total context window in tokens, or `undefined` when the model
* isn't catalogued or has no usable window.
*/
export const lookupContextWindow = (
catalog: ModelsDevCatalog | null,
config: Pick<LanguageModel, 'provider' | 'model'>,
): number | undefined => {
if (!catalog) {
return undefined;
}
const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider;
const context = catalog[providerId]?.models?.[config.model]?.limit?.context;
// `limit` is schema-optional, and models.dev reports a 0 context window for
// non-text models (image/audio/etc.). Treat both as "unknown" so the UI
// gracefully omits the gauge rather than rendering a bogus denominator.
return typeof context === 'number' && context > 0 ? context : undefined;
};

export const resolveContextWindow = async (
config: Pick<LanguageModel, 'provider' | 'model'>,
): Promise<number | undefined> => {
const catalog = await loadCatalog();
return lookupContextWindow(catalog, config);
};
Loading
Loading