sourcebot-dev · whoisthey · Jun 27, 2026 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
 - [EE] Added mermaid diagram rendering to Ask Sourcebot answers, with pan/zoom, copy/export, in-thread deep links, and an interleaved right-panel view. [#1369](https://github.com/sourcebot-dev/sourcebot/pull/1369)
 - [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
+- Added language model input-modality and document capability resolution, automatically resolved from the models.dev catalog (falls back to text-only for uncatalogued/self-hosted models). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372)
 
 ### Fixed
 - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)

diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts
@@ -5,6 +5,7 @@ import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server";
 import { resolveContextWindow } from "@/features/chat/modelContextWindow.server";
 import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types";
 import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils";
+import { resolveModelCapabilities } from "@/features/chat/modelCapabilities.server";
 import { ErrorCode } from "@/lib/errorCodes";
 import { ServiceError, ServiceErrorException } from "@/lib/serviceError";
 import { withOptionalAuth } from "@/middleware/withAuth";
@@ -86,6 +87,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
             const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig);
             const modelName = languageModelConfig.displayName ?? languageModelConfig.model;
             const contextWindow = await resolveContextWindow(languageModelConfig);
+            const { inputModalities, supportedDocumentTypes } = await resolveModelCapabilities(languageModelConfig);
 
             // No-op for non-Anthropic providers / when caching is disabled.
             const promptCacheStrategy = getPromptCacheStrategy(
@@ -246,6 +248,8 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
                     provider: languageModelConfig.provider,
                     model: languageModelConfig.model,
                     displayName: languageModelConfig.displayName,
+                    inputModalities,
+                    supportedDocumentTypes,
                 },
             } satisfies AskCodebaseResult;
         })

diff --git a/packages/web/src/features/chat/modelCapabilities.server.test.ts b/packages/web/src/features/chat/modelCapabilities.server.test.ts
@@ -0,0 +1,137 @@
+import { afterEach, describe, expect, test, vi } from 'vitest';
+import type { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
+
+vi.mock('server-only', () => ({ default: vi.fn() }));
+
+vi.mock('@sourcebot/shared', () => ({
+    createLogger: () => ({
+        info: vi.fn(),
+        warn: vi.fn(),
+        error: vi.fn(),
+        debug: vi.fn(),
+    }),
+}));
+
+import { lookupModelCapabilities, resolveModelCapabilities } from './modelCapabilities.server';
+import type { ModelsDevCatalog } from './modelsDevCatalog.server';
+
+const catalog: ModelsDevCatalog = {
+    anthropic: {
+        id: 'anthropic',
+        models: {
+            // Text + image + a document (pdf) container format.
+            'claude-sonnet-4-5': {
+                id: 'claude-sonnet-4-5',
+                modalities: { input: ['text', 'image', 'pdf'], output: ['text'] },
+            },
+        },
+    },
+    // models.dev keys Gemini under 'google', whereas Sourcebot's provider id is
+    // 'google-generative-ai' — exercises the provider id override.
+    google: {
+        id: 'google',
+        models: {
+            'gemini-2.5-pro': {
+                id: 'gemini-2.5-pro',
+                modalities: { input: ['text', 'image', 'audio', 'video', 'pdf'], output: ['text'] },
+            },
+        },
+    },
+    openai: {
+        id: 'openai',
+        models: {
+            // Catalogued model that omits `text` from its input list.
+            'image-only': { id: 'image-only', modalities: { input: ['image'], output: ['text'] } },
+            // Catalogued model with no `modalities` object at all.
+            'no-modalities-model': { id: 'no-modalities-model' },
+        },
+    },
+};
+
+const model = (provider: string, modelId: string) =>
+    ({ provider, model: modelId }) as Pick<LanguageModel, 'provider' | 'model'>;
+
+describe('lookupModelCapabilities', () => {
+    test('splits modalities and document types for a direct provider/model hit', () => {
+        expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-sonnet-4-5'))).toEqual({
+            inputModalities: ['text', 'image'],
+            supportedDocumentTypes: ['pdf'],
+        });
+    });
+
+    test('maps google-generative-ai to the catalog\'s google key', () => {
+        expect(lookupModelCapabilities(catalog, model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({
+            inputModalities: ['text', 'image', 'audio', 'video'],
+            supportedDocumentTypes: ['pdf'],
+        });
+    });
+
+    test('always includes text even when the catalog omits it', () => {
+        expect(lookupModelCapabilities(catalog, model('openai', 'image-only'))).toEqual({
+            inputModalities: ['text', 'image'],
+            supportedDocumentTypes: [],
+        });
+    });
+
+    test('falls back to text-only for a catalogued model with no modalities', () => {
+        expect(lookupModelCapabilities(catalog, model('openai', 'no-modalities-model'))).toEqual({
+            inputModalities: ['text'],
+            supportedDocumentTypes: [],
+        });
+    });
+
+    test('falls back to text-only for an uncatalogued model (e.g. openai-compatible / self-hosted)', () => {
+        expect(lookupModelCapabilities(catalog, model('openai-compatible', 'my-local-model'))).toEqual({
+            inputModalities: ['text'],
+            supportedDocumentTypes: [],
+        });
+        expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-unknown'))).toEqual({
+            inputModalities: ['text'],
+            supportedDocumentTypes: [],
+        });
+    });
+
+    test('falls back to text-only when the catalog is null (fetch failed / unreachable)', () => {
+        expect(lookupModelCapabilities(null, model('anthropic', 'claude-sonnet-4-5'))).toEqual({
+            inputModalities: ['text'],
+            supportedDocumentTypes: [],
+        });
+    });
+});
+
+describe('resolveModelCapabilities', () => {
+    afterEach(() => {
+        vi.unstubAllGlobals();
+    });
+
+    test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => {
+        const fetchMock = vi.fn(async () => ({
+            ok: true,
+            json: async () => catalog,
+        }) as unknown as Response);
+        vi.stubGlobal('fetch', fetchMock);
+
+        // The request path never blocks on the fetch: the first lookup kicks off
+        // the background fetch and falls back to text-only while it's in flight.
+        expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
+            inputModalities: ['text'],
+            supportedDocumentTypes: [],
+        });
+
+        // Once the background fetch settles, lookups resolve from the cached catalog.
+        await vi.waitFor(async () => {
+            expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
+                inputModalities: ['text', 'image'],
+                supportedDocumentTypes: ['pdf'],
+            });
+        });
+
+        // Subsequent lookups reuse the cached catalog rather than refetching.
+        expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({
+            inputModalities: ['text', 'image', 'audio', 'video'],
+            supportedDocumentTypes: ['pdf'],
+        });
+
+        expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+});
diff --git a/packages/web/src/features/chat/modelCapabilities.server.ts b/packages/web/src/features/chat/modelCapabilities.server.ts
@@ -0,0 +1,64 @@
+import 'server-only';
+
+import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
+import { DocumentType, InputModality } from './types';
+import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server';
+
+// models.dev folds every accepted input — perceptual channels (text, image,
+// audio, video) AND container formats (pdf) — into a single `modalities.input`
+// list. Sourcebot keeps those two concepts apart: `inputModalities` are the
+// raw channels a model encodes, while `supportedDocumentTypes` are rich
+// compound formats providers decompose server-side. We partition the catalog's
+// input list into those two buckets here.
+const INPUT_MODALITY_VALUES = ['text', 'image', 'audio', 'video'] as const satisfies readonly InputModality[];
+const DOCUMENT_TYPE_VALUES = ['pdf'] as const satisfies readonly DocumentType[];
+
+const isInputModality = (value: string): value is InputModality =>
+    (INPUT_MODALITY_VALUES as readonly string[]).includes(value);
+
+const isDocumentType = (value: string): value is DocumentType =>
+    (DOCUMENT_TYPE_VALUES as readonly string[]).includes(value);
+
+export type ModelCapabilities = {
+    inputModalities: InputModality[];
+    supportedDocumentTypes: DocumentType[];
+};
+
+/**
+ * Pure lookup of a model's input capabilities in a models.dev catalog.
+ * Separated from the network fetch so it can be unit-tested directly.
+ *
+ * Resolution is automatic from the catalog — capabilities are NOT hand-declared
+ * in config.json. When a model isn't catalogued (e.g. a self-hosted /
+ * openai-compatible endpoint we can't introspect), we fall back to text-only
+ * with no document support: the model stays fully usable for normal chat, and
+ * richer attachments stay gated off until we can positively confirm support.
+ */
+export const lookupModelCapabilities = (
+    catalog: ModelsDevCatalog | null,
+    config: Pick<LanguageModel, 'provider' | 'model'>,
+): ModelCapabilities => {
+    const providerId = resolveProviderId(config.provider);
+    const inputs = catalog?.[providerId]?.models?.[config.model]?.modalities?.input;
+
+    if (!inputs || inputs.length === 0) {
+        return { inputModalities: ['text'], supportedDocumentTypes: [] };
+    }
+
+    const inputModalities = inputs.filter(isInputModality);
+    const supportedDocumentTypes = inputs.filter(isDocumentType);
+
+    // Every model accepts text, even if the catalog omits it from the list.
+    if (!inputModalities.includes('text')) {
+        inputModalities.unshift('text');
+    }
+
+    return { inputModalities, supportedDocumentTypes };
+};
+
+export const resolveModelCapabilities = async (
+    config: Pick<LanguageModel, 'provider' | 'model'>,
+): Promise<ModelCapabilities> => {
+    const catalog = await loadCatalog();
+    return lookupModelCapabilities(catalog, config);
+};
diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts
@@ -1,100 +1,11 @@
 import 'server-only';
 
 import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
-import { createLogger } from '@sourcebot/shared';
+import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server';
 
-const logger = createLogger('model-context-window');
-
-// The same public, unauthenticated catalog the setup wizard already consumes
-// (see packages/setupWizard/src/models.ts). Each model entry exposes a
-// `limit.context` field holding the total context window in tokens.
-const MODELS_DEV_API_URL = 'https://models.dev/api.json';
-const FETCH_TIMEOUT_MS = 8000;
-// Re-fetch the (~2.4 MB) catalog at most once per this interval per server
-// process. New models trickle in daily; a stale window for a few hours is fine.
-const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
-// After a failed fetch, don't reattempt for this long. Without it, an outage in
-// models.dev would make every chat send pay the fetch timeout on the request path.
-const NEGATIVE_CACHE_MS = 60 * 1000;
-
-// Sourcebot provider id -> models.dev top-level catalog key. Only providers
-// whose Sourcebot id differs from the models.dev id need an entry; everything
-// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai,
-// openrouter, google-vertex, google-vertex-anthropic) matches 1:1.
-const PROVIDER_ID_OVERRIDES: Record<string, string> = {
-    'google-generative-ai': 'google',
-};
-
-type ModelsDevModel = {
-    id: string;
-    limit?: {
-        context?: number;
-        output?: number;
-    };
-};
-
-type ModelsDevProvider = {
-    id: string;
-    models?: Record<string, ModelsDevModel>;
-};
-
-export type ModelsDevCatalog = Record<string, ModelsDevProvider>;
-
-// Last successfully-fetched catalog. Served while fresh, and kept as a fallback
-// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL),
-// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and
-// `inFlightFetch` dedupes concurrent fetches.
-let cachedCatalog: ModelsDevCatalog | null = null;
-let catalogFetchedAt = 0;
-let lastFailedAt = 0;
-let inFlightFetch: Promise<ModelsDevCatalog | null> | null = null;
-
-const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
-    try {
-        const response = await fetch(MODELS_DEV_API_URL, {
-            signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
-        });
-        if (!response.ok) {
-            logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`);
-            return null;
-        }
-        return await response.json() as ModelsDevCatalog;
-    } catch (error) {
-        logger.warn(`Failed to fetch models.dev catalog: ${error}`);
-        return null;
-    }
-};
-
-const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
-    const now = Date.now();
-    const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS;
-    const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS;
-
-    // Kick off a (deduped) refresh when the cache is stale/empty and we're not
-    // within the post-failure backoff window. On success it replaces the cache;
-    // on failure it only records the failure time, leaving the last-known-good
-    // catalog intact.
-    if (!isFresh && !isBackingOff && !inFlightFetch) {
-        inFlightFetch = fetchCatalog().then((catalog) => {
-            if (catalog) {
-                cachedCatalog = catalog;
-                catalogFetchedAt = Date.now();
-            } else {
-                lastFailedAt = Date.now();
-            }
-            inFlightFetch = null;
-            return catalog;
-        });
-    }
-
-    // Once a catalog has loaded once, never block the request path on the
-    // network: serve the last-known-good value (even if stale) and let any
-    // refresh settle in the background. Only the very first load awaits.
-    if (cachedCatalog !== null) {
-        return cachedCatalog;
-    }
-    return inFlightFetch ?? null;
-};
+// Re-exported so existing consumers/tests can keep importing the catalog type
+// from here.
+export type { ModelsDevCatalog } from './modelsDevCatalog.server';
 
 /**
  * Pure lookup of a model's context window in a models.dev catalog. Separated
@@ -110,7 +21,7 @@ export const lookupContextWindow = (
     if (!catalog) {
         return undefined;
     }
-    const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider;
+    const providerId = resolveProviderId(config.provider);
     const context = catalog[providerId]?.models?.[config.model]?.limit?.context;
     // `limit` is schema-optional, and models.dev reports a 0 context window for
     // non-text models (image/audio/etc.). Treat both as "unknown" so the UI

diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts
@@ -81,13 +81,19 @@ describe('resolveContextWindow', () => {
         vi.unstubAllGlobals();
     });
 
-    test('fetches the catalog once and resolves windows (incl. provider mapping)', async () => {
+    test('fetches the catalog once in the background and resolves windows (incl. provider mapping)', async () => {
         const fetchMock = vi.fn(async () => ({
             ok: true,
             json: async () => catalog,
         }) as unknown as Response);
         vi.stubGlobal('fetch', fetchMock);
 
+        // The request path never blocks on the fetch: the first lookup kicks off
+        // the background fetch and falls back to "unknown" while it's in flight.
+        expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();
+
+        // Once the background fetch settles, lookups resolve from the cached catalog.
+        await new Promise((resolve) => setTimeout(resolve, 0));
         expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
         // Subsequent lookups reuse the cached catalog rather than refetching.
         expect(await resolveContextWindow(model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576);
@@ -141,7 +147,10 @@ describe('resolveContextWindow resilience', () => {
 
         const mod = await importFresh();
 
-        // First load populates the cache.
+        // First load kicks off the background fetch (returning the "unknown"
+        // fallback until it settles), which then populates the cache.
+        expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();
+        await new Promise((resolve) => setTimeout(resolve, 0));
         expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
         expect(fetchMock).toHaveBeenCalledTimes(1);