Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
- [EE] Added mermaid diagram rendering to Ask Sourcebot answers, with pan/zoom, copy/export, in-thread deep links, and an interleaved right-panel view. [#1369](https://github.com/sourcebot-dev/sourcebot/pull/1369)
- [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
- Added language model input-modality and document capability resolution, automatically resolved from the models.dev catalog (falls back to text-only for uncatalogued/self-hosted models). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372)

### Fixed
- Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)
Expand Down
4 changes: 4 additions & 0 deletions packages/web/src/ee/features/mcp/askCodebase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server";
import { resolveContextWindow } from "@/features/chat/modelContextWindow.server";
import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types";
import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils";
import { resolveModelCapabilities } from "@/features/chat/modelCapabilities.server";
import { ErrorCode } from "@/lib/errorCodes";
import { ServiceError, ServiceErrorException } from "@/lib/serviceError";
import { withOptionalAuth } from "@/middleware/withAuth";
Expand Down Expand Up @@ -86,6 +87,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig);
const modelName = languageModelConfig.displayName ?? languageModelConfig.model;
const contextWindow = await resolveContextWindow(languageModelConfig);
const { inputModalities, supportedDocumentTypes } = await resolveModelCapabilities(languageModelConfig);

// No-op for non-Anthropic providers / when caching is disabled.
const promptCacheStrategy = getPromptCacheStrategy(
Expand Down Expand Up @@ -246,6 +248,8 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
provider: languageModelConfig.provider,
model: languageModelConfig.model,
displayName: languageModelConfig.displayName,
inputModalities,
supportedDocumentTypes,
},
} satisfies AskCodebaseResult;
})
Expand Down
137 changes: 137 additions & 0 deletions packages/web/src/features/chat/modelCapabilities.server.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import { afterEach, describe, expect, test, vi } from 'vitest';
import type { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';

vi.mock('server-only', () => ({ default: vi.fn() }));

vi.mock('@sourcebot/shared', () => ({
createLogger: () => ({
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
debug: vi.fn(),
}),
}));

import { lookupModelCapabilities, resolveModelCapabilities } from './modelCapabilities.server';
import type { ModelsDevCatalog } from './modelsDevCatalog.server';

const catalog: ModelsDevCatalog = {
anthropic: {
id: 'anthropic',
models: {
// Text + image + a document (pdf) container format.
'claude-sonnet-4-5': {
id: 'claude-sonnet-4-5',
modalities: { input: ['text', 'image', 'pdf'], output: ['text'] },
},
},
},
// models.dev keys Gemini under 'google', whereas Sourcebot's provider id is
// 'google-generative-ai' — exercises the provider id override.
google: {
id: 'google',
models: {
'gemini-2.5-pro': {
id: 'gemini-2.5-pro',
modalities: { input: ['text', 'image', 'audio', 'video', 'pdf'], output: ['text'] },
},
},
},
openai: {
id: 'openai',
models: {
// Catalogued model that omits `text` from its input list.
'image-only': { id: 'image-only', modalities: { input: ['image'], output: ['text'] } },
// Catalogued model with no `modalities` object at all.
'no-modalities-model': { id: 'no-modalities-model' },
},
},
};

const model = (provider: string, modelId: string) =>
({ provider, model: modelId }) as Pick<LanguageModel, 'provider' | 'model'>;

describe('lookupModelCapabilities', () => {
test('splits modalities and document types for a direct provider/model hit', () => {
expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-sonnet-4-5'))).toEqual({
inputModalities: ['text', 'image'],
supportedDocumentTypes: ['pdf'],
});
});

test('maps google-generative-ai to the catalog\'s google key', () => {
expect(lookupModelCapabilities(catalog, model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({
inputModalities: ['text', 'image', 'audio', 'video'],
supportedDocumentTypes: ['pdf'],
});
});

test('always includes text even when the catalog omits it', () => {
expect(lookupModelCapabilities(catalog, model('openai', 'image-only'))).toEqual({
inputModalities: ['text', 'image'],
supportedDocumentTypes: [],
});
});

test('falls back to text-only for a catalogued model with no modalities', () => {
expect(lookupModelCapabilities(catalog, model('openai', 'no-modalities-model'))).toEqual({
inputModalities: ['text'],
supportedDocumentTypes: [],
});
});

test('falls back to text-only for an uncatalogued model (e.g. openai-compatible / self-hosted)', () => {
expect(lookupModelCapabilities(catalog, model('openai-compatible', 'my-local-model'))).toEqual({
inputModalities: ['text'],
supportedDocumentTypes: [],
});
expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-unknown'))).toEqual({
inputModalities: ['text'],
supportedDocumentTypes: [],
});
});

test('falls back to text-only when the catalog is null (fetch failed / unreachable)', () => {
expect(lookupModelCapabilities(null, model('anthropic', 'claude-sonnet-4-5'))).toEqual({
inputModalities: ['text'],
supportedDocumentTypes: [],
});
});
});

describe('resolveModelCapabilities', () => {
afterEach(() => {
vi.unstubAllGlobals();
});

test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => {
const fetchMock = vi.fn(async () => ({
ok: true,
json: async () => catalog,
}) as unknown as Response);
vi.stubGlobal('fetch', fetchMock);

// The request path never blocks on the fetch: the first lookup kicks off
// the background fetch and falls back to text-only while it's in flight.
expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
inputModalities: ['text'],
supportedDocumentTypes: [],
});

// Once the background fetch settles, lookups resolve from the cached catalog.
await vi.waitFor(async () => {
expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
inputModalities: ['text', 'image'],
supportedDocumentTypes: ['pdf'],
});
});

// Subsequent lookups reuse the cached catalog rather than refetching.
expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({
inputModalities: ['text', 'image', 'audio', 'video'],
supportedDocumentTypes: ['pdf'],
});

expect(fetchMock).toHaveBeenCalledTimes(1);
});
});
64 changes: 64 additions & 0 deletions packages/web/src/features/chat/modelCapabilities.server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import 'server-only';

import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
import { DocumentType, InputModality } from './types';
import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server';

// models.dev folds every accepted input — perceptual channels (text, image,
// audio, video) AND container formats (pdf) — into a single `modalities.input`
// list. Sourcebot keeps those two concepts apart: `inputModalities` are the
// raw channels a model encodes, while `supportedDocumentTypes` are rich
// compound formats providers decompose server-side. We partition the catalog's
// input list into those two buckets here.
const INPUT_MODALITY_VALUES = ['text', 'image', 'audio', 'video'] as const satisfies readonly InputModality[];
const DOCUMENT_TYPE_VALUES = ['pdf'] as const satisfies readonly DocumentType[];

const isInputModality = (value: string): value is InputModality =>
(INPUT_MODALITY_VALUES as readonly string[]).includes(value);

const isDocumentType = (value: string): value is DocumentType =>
(DOCUMENT_TYPE_VALUES as readonly string[]).includes(value);

export type ModelCapabilities = {
inputModalities: InputModality[];
supportedDocumentTypes: DocumentType[];
};

/**
* Pure lookup of a model's input capabilities in a models.dev catalog.
* Separated from the network fetch so it can be unit-tested directly.
*
* Resolution is automatic from the catalog — capabilities are NOT hand-declared
* in config.json. When a model isn't catalogued (e.g. a self-hosted /
* openai-compatible endpoint we can't introspect), we fall back to text-only
* with no document support: the model stays fully usable for normal chat, and
* richer attachments stay gated off until we can positively confirm support.
*/
export const lookupModelCapabilities = (
catalog: ModelsDevCatalog | null,
config: Pick<LanguageModel, 'provider' | 'model'>,
): ModelCapabilities => {
const providerId = resolveProviderId(config.provider);
const inputs = catalog?.[providerId]?.models?.[config.model]?.modalities?.input;

if (!inputs || inputs.length === 0) {
return { inputModalities: ['text'], supportedDocumentTypes: [] };
}

const inputModalities = inputs.filter(isInputModality);
const supportedDocumentTypes = inputs.filter(isDocumentType);

// Every model accepts text, even if the catalog omits it from the list.
if (!inputModalities.includes('text')) {
inputModalities.unshift('text');
}

return { inputModalities, supportedDocumentTypes };
};

export const resolveModelCapabilities = async (
config: Pick<LanguageModel, 'provider' | 'model'>,
): Promise<ModelCapabilities> => {
const catalog = await loadCatalog();
return lookupModelCapabilities(catalog, config);
};
99 changes: 5 additions & 94 deletions packages/web/src/features/chat/modelContextWindow.server.ts
Original file line number Diff line number Diff line change
@@ -1,100 +1,11 @@
import 'server-only';

import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
import { createLogger } from '@sourcebot/shared';
import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server';

const logger = createLogger('model-context-window');

// The same public, unauthenticated catalog the setup wizard already consumes
// (see packages/setupWizard/src/models.ts). Each model entry exposes a
// `limit.context` field holding the total context window in tokens.
const MODELS_DEV_API_URL = 'https://models.dev/api.json';
const FETCH_TIMEOUT_MS = 8000;
// Re-fetch the (~2.4 MB) catalog at most once per this interval per server
// process. New models trickle in daily; a stale window for a few hours is fine.
const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
// After a failed fetch, don't reattempt for this long. Without it, an outage in
// models.dev would make every chat send pay the fetch timeout on the request path.
const NEGATIVE_CACHE_MS = 60 * 1000;

// Sourcebot provider id -> models.dev top-level catalog key. Only providers
// whose Sourcebot id differs from the models.dev id need an entry; everything
// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai,
// openrouter, google-vertex, google-vertex-anthropic) matches 1:1.
const PROVIDER_ID_OVERRIDES: Record<string, string> = {
'google-generative-ai': 'google',
};

type ModelsDevModel = {
id: string;
limit?: {
context?: number;
output?: number;
};
};

type ModelsDevProvider = {
id: string;
models?: Record<string, ModelsDevModel>;
};

export type ModelsDevCatalog = Record<string, ModelsDevProvider>;

// Last successfully-fetched catalog. Served while fresh, and kept as a fallback
// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL),
// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and
// `inFlightFetch` dedupes concurrent fetches.
let cachedCatalog: ModelsDevCatalog | null = null;
let catalogFetchedAt = 0;
let lastFailedAt = 0;
let inFlightFetch: Promise<ModelsDevCatalog | null> | null = null;

const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
try {
const response = await fetch(MODELS_DEV_API_URL, {
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
});
if (!response.ok) {
logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`);
return null;
}
return await response.json() as ModelsDevCatalog;
} catch (error) {
logger.warn(`Failed to fetch models.dev catalog: ${error}`);
return null;
}
};

const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
const now = Date.now();
const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS;
const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS;

// Kick off a (deduped) refresh when the cache is stale/empty and we're not
// within the post-failure backoff window. On success it replaces the cache;
// on failure it only records the failure time, leaving the last-known-good
// catalog intact.
if (!isFresh && !isBackingOff && !inFlightFetch) {
inFlightFetch = fetchCatalog().then((catalog) => {
if (catalog) {
cachedCatalog = catalog;
catalogFetchedAt = Date.now();
} else {
lastFailedAt = Date.now();
}
inFlightFetch = null;
return catalog;
});
}

// Once a catalog has loaded once, never block the request path on the
// network: serve the last-known-good value (even if stale) and let any
// refresh settle in the background. Only the very first load awaits.
if (cachedCatalog !== null) {
return cachedCatalog;
}
return inFlightFetch ?? null;
};
// Re-exported so existing consumers/tests can keep importing the catalog type
// from here.
export type { ModelsDevCatalog } from './modelsDevCatalog.server';

/**
* Pure lookup of a model's context window in a models.dev catalog. Separated
Expand All @@ -110,7 +21,7 @@ export const lookupContextWindow = (
if (!catalog) {
return undefined;
}
const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider;
const providerId = resolveProviderId(config.provider);
const context = catalog[providerId]?.models?.[config.model]?.limit?.context;
// `limit` is schema-optional, and models.dev reports a 0 context window for
// non-text models (image/audio/etc.). Treat both as "unknown" so the UI
Expand Down
13 changes: 11 additions & 2 deletions packages/web/src/features/chat/modelContextWindow.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,19 @@ describe('resolveContextWindow', () => {
vi.unstubAllGlobals();
});

test('fetches the catalog once and resolves windows (incl. provider mapping)', async () => {
test('fetches the catalog once in the background and resolves windows (incl. provider mapping)', async () => {
const fetchMock = vi.fn(async () => ({
ok: true,
json: async () => catalog,
}) as unknown as Response);
vi.stubGlobal('fetch', fetchMock);

// The request path never blocks on the fetch: the first lookup kicks off
// the background fetch and falls back to "unknown" while it's in flight.
expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();

// Once the background fetch settles, lookups resolve from the cached catalog.
await new Promise((resolve) => setTimeout(resolve, 0));
expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
// Subsequent lookups reuse the cached catalog rather than refetching.
expect(await resolveContextWindow(model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576);
Expand Down Expand Up @@ -141,7 +147,10 @@ describe('resolveContextWindow resilience', () => {

const mod = await importFresh();

// First load populates the cache.
// First load kicks off the background fetch (returning the "unknown"
// fallback until it settles), which then populates the cache.
expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();
await new Promise((resolve) => setTimeout(resolve, 0));
expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
expect(fetchMock).toHaveBeenCalledTimes(1);

Expand Down
Loading
Loading