diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b1bce463..b0725af08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) - [EE] Added mermaid diagram rendering to Ask Sourcebot answers, with pan/zoom, copy/export, in-thread deep links, and an interleaved right-panel view. [#1369](https://github.com/sourcebot-dev/sourcebot/pull/1369) - [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) +- Added language model input-modality and document capability resolution, automatically resolved from the models.dev catalog (falls back to text-only for uncatalogued/self-hosted models). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index e2b234037..35337d29f 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -5,6 +5,7 @@ import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { resolveContextWindow } from "@/features/chat/modelContextWindow.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; +import { resolveModelCapabilities } from "@/features/chat/modelCapabilities.server"; import { ErrorCode } from "@/lib/errorCodes"; import { ServiceError, ServiceErrorException } from "@/lib/serviceError"; import { withOptionalAuth } from "@/middleware/withAuth"; @@ -86,6 +87,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise ({ default: vi.fn() })); + +vi.mock('@sourcebot/shared', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})); + +import { lookupModelCapabilities, resolveModelCapabilities } from './modelCapabilities.server'; +import type { ModelsDevCatalog } from './modelsDevCatalog.server'; + +const catalog: ModelsDevCatalog = { + anthropic: { + id: 'anthropic', + models: { + // Text + image + a document (pdf) container format. + 'claude-sonnet-4-5': { + id: 'claude-sonnet-4-5', + modalities: { input: ['text', 'image', 'pdf'], output: ['text'] }, + }, + }, + }, + // models.dev keys Gemini under 'google', whereas Sourcebot's provider id is + // 'google-generative-ai' — exercises the provider id override. + google: { + id: 'google', + models: { + 'gemini-2.5-pro': { + id: 'gemini-2.5-pro', + modalities: { input: ['text', 'image', 'audio', 'video', 'pdf'], output: ['text'] }, + }, + }, + }, + openai: { + id: 'openai', + models: { + // Catalogued model that omits `text` from its input list. + 'image-only': { id: 'image-only', modalities: { input: ['image'], output: ['text'] } }, + // Catalogued model with no `modalities` object at all. + 'no-modalities-model': { id: 'no-modalities-model' }, + }, + }, +}; + +const model = (provider: string, modelId: string) => + ({ provider, model: modelId }) as Pick; + +describe('lookupModelCapabilities', () => { + test('splits modalities and document types for a direct provider/model hit', () => { + expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: ['pdf'], + }); + }); + + test('maps google-generative-ai to the catalog\'s google key', () => { + expect(lookupModelCapabilities(catalog, model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({ + inputModalities: ['text', 'image', 'audio', 'video'], + supportedDocumentTypes: ['pdf'], + }); + }); + + test('always includes text even when the catalog omits it', () => { + expect(lookupModelCapabilities(catalog, model('openai', 'image-only'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: [], + }); + }); + + test('falls back to text-only for a catalogued model with no modalities', () => { + expect(lookupModelCapabilities(catalog, model('openai', 'no-modalities-model'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + }); + + test('falls back to text-only for an uncatalogued model (e.g. openai-compatible / self-hosted)', () => { + expect(lookupModelCapabilities(catalog, model('openai-compatible', 'my-local-model'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-unknown'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + }); + + test('falls back to text-only when the catalog is null (fetch failed / unreachable)', () => { + expect(lookupModelCapabilities(null, model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + }); +}); + +describe('resolveModelCapabilities', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => { + const fetchMock = vi.fn(async () => ({ + ok: true, + json: async () => catalog, + }) as unknown as Response); + vi.stubGlobal('fetch', fetchMock); + + // The request path never blocks on the fetch: the first lookup kicks off + // the background fetch and falls back to text-only while it's in flight. + expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + + // Once the background fetch settles, lookups resolve from the cached catalog. + await vi.waitFor(async () => { + expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: ['pdf'], + }); + }); + + // Subsequent lookups reuse the cached catalog rather than refetching. + expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({ + inputModalities: ['text', 'image', 'audio', 'video'], + supportedDocumentTypes: ['pdf'], + }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/web/src/features/chat/modelCapabilities.server.ts b/packages/web/src/features/chat/modelCapabilities.server.ts new file mode 100644 index 000000000..87d2cb131 --- /dev/null +++ b/packages/web/src/features/chat/modelCapabilities.server.ts @@ -0,0 +1,64 @@ +import 'server-only'; + +import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; +import { DocumentType, InputModality } from './types'; +import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server'; + +// models.dev folds every accepted input — perceptual channels (text, image, +// audio, video) AND container formats (pdf) — into a single `modalities.input` +// list. Sourcebot keeps those two concepts apart: `inputModalities` are the +// raw channels a model encodes, while `supportedDocumentTypes` are rich +// compound formats providers decompose server-side. We partition the catalog's +// input list into those two buckets here. +const INPUT_MODALITY_VALUES = ['text', 'image', 'audio', 'video'] as const satisfies readonly InputModality[]; +const DOCUMENT_TYPE_VALUES = ['pdf'] as const satisfies readonly DocumentType[]; + +const isInputModality = (value: string): value is InputModality => + (INPUT_MODALITY_VALUES as readonly string[]).includes(value); + +const isDocumentType = (value: string): value is DocumentType => + (DOCUMENT_TYPE_VALUES as readonly string[]).includes(value); + +export type ModelCapabilities = { + inputModalities: InputModality[]; + supportedDocumentTypes: DocumentType[]; +}; + +/** + * Pure lookup of a model's input capabilities in a models.dev catalog. + * Separated from the network fetch so it can be unit-tested directly. + * + * Resolution is automatic from the catalog — capabilities are NOT hand-declared + * in config.json. When a model isn't catalogued (e.g. a self-hosted / + * openai-compatible endpoint we can't introspect), we fall back to text-only + * with no document support: the model stays fully usable for normal chat, and + * richer attachments stay gated off until we can positively confirm support. + */ +export const lookupModelCapabilities = ( + catalog: ModelsDevCatalog | null, + config: Pick, +): ModelCapabilities => { + const providerId = resolveProviderId(config.provider); + const inputs = catalog?.[providerId]?.models?.[config.model]?.modalities?.input; + + if (!inputs || inputs.length === 0) { + return { inputModalities: ['text'], supportedDocumentTypes: [] }; + } + + const inputModalities = inputs.filter(isInputModality); + const supportedDocumentTypes = inputs.filter(isDocumentType); + + // Every model accepts text, even if the catalog omits it from the list. + if (!inputModalities.includes('text')) { + inputModalities.unshift('text'); + } + + return { inputModalities, supportedDocumentTypes }; +}; + +export const resolveModelCapabilities = async ( + config: Pick, +): Promise => { + const catalog = await loadCatalog(); + return lookupModelCapabilities(catalog, config); +}; diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts index 0e70dc04f..f87bbcf3b 100644 --- a/packages/web/src/features/chat/modelContextWindow.server.ts +++ b/packages/web/src/features/chat/modelContextWindow.server.ts @@ -1,100 +1,11 @@ import 'server-only'; import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; -import { createLogger } from '@sourcebot/shared'; +import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server'; -const logger = createLogger('model-context-window'); - -// The same public, unauthenticated catalog the setup wizard already consumes -// (see packages/setupWizard/src/models.ts). Each model entry exposes a -// `limit.context` field holding the total context window in tokens. -const MODELS_DEV_API_URL = 'https://models.dev/api.json'; -const FETCH_TIMEOUT_MS = 8000; -// Re-fetch the (~2.4 MB) catalog at most once per this interval per server -// process. New models trickle in daily; a stale window for a few hours is fine. -const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; -// After a failed fetch, don't reattempt for this long. Without it, an outage in -// models.dev would make every chat send pay the fetch timeout on the request path. -const NEGATIVE_CACHE_MS = 60 * 1000; - -// Sourcebot provider id -> models.dev top-level catalog key. Only providers -// whose Sourcebot id differs from the models.dev id need an entry; everything -// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai, -// openrouter, google-vertex, google-vertex-anthropic) matches 1:1. -const PROVIDER_ID_OVERRIDES: Record = { - 'google-generative-ai': 'google', -}; - -type ModelsDevModel = { - id: string; - limit?: { - context?: number; - output?: number; - }; -}; - -type ModelsDevProvider = { - id: string; - models?: Record; -}; - -export type ModelsDevCatalog = Record; - -// Last successfully-fetched catalog. Served while fresh, and kept as a fallback -// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL), -// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and -// `inFlightFetch` dedupes concurrent fetches. -let cachedCatalog: ModelsDevCatalog | null = null; -let catalogFetchedAt = 0; -let lastFailedAt = 0; -let inFlightFetch: Promise | null = null; - -const fetchCatalog = async (): Promise => { - try { - const response = await fetch(MODELS_DEV_API_URL, { - signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), - }); - if (!response.ok) { - logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`); - return null; - } - return await response.json() as ModelsDevCatalog; - } catch (error) { - logger.warn(`Failed to fetch models.dev catalog: ${error}`); - return null; - } -}; - -const loadCatalog = async (): Promise => { - const now = Date.now(); - const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS; - const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS; - - // Kick off a (deduped) refresh when the cache is stale/empty and we're not - // within the post-failure backoff window. On success it replaces the cache; - // on failure it only records the failure time, leaving the last-known-good - // catalog intact. - if (!isFresh && !isBackingOff && !inFlightFetch) { - inFlightFetch = fetchCatalog().then((catalog) => { - if (catalog) { - cachedCatalog = catalog; - catalogFetchedAt = Date.now(); - } else { - lastFailedAt = Date.now(); - } - inFlightFetch = null; - return catalog; - }); - } - - // Once a catalog has loaded once, never block the request path on the - // network: serve the last-known-good value (even if stale) and let any - // refresh settle in the background. Only the very first load awaits. - if (cachedCatalog !== null) { - return cachedCatalog; - } - return inFlightFetch ?? null; -}; +// Re-exported so existing consumers/tests can keep importing the catalog type +// from here. +export type { ModelsDevCatalog } from './modelsDevCatalog.server'; /** * Pure lookup of a model's context window in a models.dev catalog. Separated @@ -110,7 +21,7 @@ export const lookupContextWindow = ( if (!catalog) { return undefined; } - const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider; + const providerId = resolveProviderId(config.provider); const context = catalog[providerId]?.models?.[config.model]?.limit?.context; // `limit` is schema-optional, and models.dev reports a 0 context window for // non-text models (image/audio/etc.). Treat both as "unknown" so the UI diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts index 9476820ae..818251a3f 100644 --- a/packages/web/src/features/chat/modelContextWindow.test.ts +++ b/packages/web/src/features/chat/modelContextWindow.test.ts @@ -81,13 +81,19 @@ describe('resolveContextWindow', () => { vi.unstubAllGlobals(); }); - test('fetches the catalog once and resolves windows (incl. provider mapping)', async () => { + test('fetches the catalog once in the background and resolves windows (incl. provider mapping)', async () => { const fetchMock = vi.fn(async () => ({ ok: true, json: async () => catalog, }) as unknown as Response); vi.stubGlobal('fetch', fetchMock); + // The request path never blocks on the fetch: the first lookup kicks off + // the background fetch and falls back to "unknown" while it's in flight. + expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + + // Once the background fetch settles, lookups resolve from the cached catalog. + await new Promise((resolve) => setTimeout(resolve, 0)); expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); // Subsequent lookups reuse the cached catalog rather than refetching. expect(await resolveContextWindow(model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576); @@ -141,7 +147,10 @@ describe('resolveContextWindow resilience', () => { const mod = await importFresh(); - // First load populates the cache. + // First load kicks off the background fetch (returning the "unknown" + // fallback until it settles), which then populates the cache. + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + await new Promise((resolve) => setTimeout(resolve, 0)); expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); expect(fetchMock).toHaveBeenCalledTimes(1); diff --git a/packages/web/src/features/chat/modelsDevCatalog.server.ts b/packages/web/src/features/chat/modelsDevCatalog.server.ts new file mode 100644 index 000000000..f2344b6f7 --- /dev/null +++ b/packages/web/src/features/chat/modelsDevCatalog.server.ts @@ -0,0 +1,117 @@ +import 'server-only'; + +import { createLogger } from '@sourcebot/shared'; + +const logger = createLogger('models-dev-catalog'); + +// The same public, unauthenticated catalog the setup wizard already consumes +// (see packages/setupWizard/src/models.ts). Each model entry exposes a +// `limit.context` field (total context window in tokens) and a `modalities` +// field describing the inputs/outputs the model supports natively. +const MODELS_DEV_API_URL = 'https://models.dev/api.json'; +const FETCH_TIMEOUT_MS = 8000; +// Re-fetch the (~2.4 MB) catalog at most once per this interval per server +// process. New models trickle in daily; a stale window for a few hours is fine. +const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; +// After a failed fetch, don't reattempt for this long. Since the request path +// never blocks on the fetch (see loadCatalog), this throttles background +// refresh attempts to once per interval during a models.dev outage instead of +// kicking one off on (nearly) every request. +const NEGATIVE_CACHE_MS = 60 * 1000; + +// Sourcebot provider id -> models.dev top-level catalog key. Only providers +// whose Sourcebot id differs from the models.dev id need an entry; everything +// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai, +// openrouter, google-vertex, google-vertex-anthropic) matches 1:1. +const PROVIDER_ID_OVERRIDES: Record = { + 'google-generative-ai': 'google', +}; + +export const resolveProviderId = (provider: string): string => + PROVIDER_ID_OVERRIDES[provider] ?? provider; + +type ModelsDevModel = { + id: string; + limit?: { + context?: number; + output?: number; + }; + modalities?: { + // e.g. ["text", "image", "pdf", "audio", "video"] + input?: string[]; + output?: string[]; + }; +}; + +type ModelsDevProvider = { + id: string; + models?: Record; +}; + +export type ModelsDevCatalog = Record; + +// Last successfully-fetched catalog. Served while fresh, and kept as a fallback +// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL), +// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and +// `inFlightFetch` dedupes concurrent fetches. +let cachedCatalog: ModelsDevCatalog | null = null; +let catalogFetchedAt = 0; +let lastFailedAt = 0; +let inFlightFetch: Promise | null = null; + +const fetchCatalog = async (): Promise => { + try { + const response = await fetch(MODELS_DEV_API_URL, { + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }); + if (!response.ok) { + logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`); + return null; + } + return await response.json() as ModelsDevCatalog; + } catch (error) { + logger.warn(`Failed to fetch models.dev catalog: ${error}`); + return null; + } +}; + +/** + * Returns the cached models.dev catalog, refreshing it in the background when + * stale. The request path NEVER blocks on the network: the last-known-good + * catalog is returned immediately (even if stale), or null before the first + * successful fetch lands, and any refresh settles in the background. + * + * Consequences of never awaiting: + * - For the brief window after a cold start (before the first fetch resolves), + * capability resolution falls back to text-only; it self-heals on the next + * request once the background fetch populates the cache. + * - An unreachable catalog (e.g. an airgapped deployment) costs nothing on the + * request path instead of repeatedly paying the fetch timeout. + */ +export const loadCatalog = async (): Promise => { + const now = Date.now(); + const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS; + const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS; + + // Kick off a (deduped) refresh when the cache is stale/empty and we're not + // within the post-failure backoff window. On success it replaces the cache; + // on failure it only records the failure time, leaving the last-known-good + // catalog intact. The promise is intentionally not awaited here so the + // request path never waits on models.dev. + if (!isFresh && !isBackingOff && !inFlightFetch) { + inFlightFetch = fetchCatalog().then((catalog) => { + if (catalog) { + cachedCatalog = catalog; + catalogFetchedAt = Date.now(); + } else { + lastFailedAt = Date.now(); + } + inFlightFetch = null; + return catalog; + }); + } + + // Serve whatever we currently have cached (possibly null on a cold start) + // and let any in-flight refresh settle in the background. + return cachedCatalog; +}; diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index dc0758462..659551d4f 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -209,10 +209,18 @@ type _AssertAllProviders = LanguageModelProvider extends typeof languageModelPro const _assertAllProviders: _AssertAllProviders = true; void _assertAllProviders; +export const inputModalities = ['text', 'image', 'audio', 'video'] as const; +export type InputModality = typeof inputModalities[number]; + +export const documentTypes = ['pdf'] as const; +export type DocumentType = typeof documentTypes[number]; + export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), + inputModalities: z.array(z.enum(inputModalities)).default(['text']).describe("The input modalities the model can accept (images, audio, video, text). Single-medium attachments are gated by these. Defaults to text-only."), + supportedDocumentTypes: z.array(z.enum(documentTypes)).default([]).describe("Rich compound document formats (e.g. PDF) the model can ingest natively, distinct from single-medium attachments gated by inputModalities. Defaults to none."), }); /** @@ -222,6 +230,8 @@ export type LanguageModelInfo = { provider: LanguageModelProvider, model: LanguageModel['model'], displayName?: LanguageModel['displayName'], + inputModalities: InputModality[], + supportedDocumentTypes: DocumentType[], } // Additional request body data that we send along to the chat API. diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index ffc3483a4..90c83c859 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -3,15 +3,19 @@ import 'server-only'; import { getAnonymousId } from '@/lib/anonymousId'; import { Chat, Prisma, PrismaClient, User } from '@sourcebot/db'; import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; -import { env, loadConfig } from '@sourcebot/shared'; +import { createLogger, env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; +import { resolveModelCapabilities } from './modelCapabilities.server'; +import { loadCatalog } from './modelsDevCatalog.server'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; import { StatusCodes } from 'http-status-codes'; +const logger = createLogger('chat-utils'); + /** * Returns a FORBIDDEN ServiceError when the deployment lacks the `ask` * entitlement, or null when Ask is available. Gates the generative chat @@ -127,9 +131,35 @@ export const getConfiguredLanguageModels = async (): Promise => */ export const getConfiguredLanguageModelsInfo = async () => { const models = await getConfiguredLanguageModels(); - return models.map((model): LanguageModelInfo => ({ - provider: model.provider, - model: model.model, - displayName: model.displayName, + return Promise.all(models.map(async (model): Promise => { + const { inputModalities, supportedDocumentTypes } = await resolveModelCapabilities(model); + return { + provider: model.provider, + model: model.model, + displayName: model.displayName, + inputModalities, + supportedDocumentTypes, + }; })); }; + +/** + * Eagerly warms the models.dev capability catalog at server startup so the first + * request after a cold start resolves real model capabilities instead of the + * text-only fallback. No-op when no language models are configured (avoids a + * gratuitous outbound call for deployments not using Ask). Best-effort and + * non-blocking: loadCatalog kicks off a background fetch and returns immediately, + * and any unexpected error is logged rather than surfaced. + */ +export const warmModelCapabilitiesCatalog = (): void => { + void (async () => { + const configuredModels = await getConfiguredLanguageModels(); + if (configuredModels.length === 0) { + return; + } + logger.info(`Warming models.dev capability catalog for ${configuredModels.length} configured language model(s)`); + void loadCatalog(); + })().catch((error) => { + logger.error(`Failed to warm models.dev capability catalog: ${error}`); + }); +}; diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index 0f4a4383e..108bd9cc2 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -430,9 +430,11 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isTurn } /** - * Generates a unique key given a LanguageModelInfo object. + * Generates a unique key for a language model. Accepts any object carrying the + * identifying fields, so both the full `LanguageModel` config and the + * client-safe `LanguageModelInfo` can be keyed with it. */ -export const getLanguageModelKey = (model: LanguageModelInfo) => { +export const getLanguageModelKey = (model: Pick) => { return `${model.provider}-${model.model}-${model.displayName}`; } diff --git a/packages/web/src/initialize.ts b/packages/web/src/initialize.ts index 0a8eb90f9..406116dee 100644 --- a/packages/web/src/initialize.ts +++ b/packages/web/src/initialize.ts @@ -4,6 +4,7 @@ import { startChangelogPollingJob } from '@/features/changelog/pollChangelog'; import { createLogger, env } from "@sourcebot/shared"; import { hasEntitlement } from '@/lib/entitlements'; import { SINGLE_TENANT_ORG_ID } from './lib/constants'; +import { warmModelCapabilitiesCatalog } from '@/features/chat/utils.server'; const logger = createLogger('web-initialize'); @@ -77,4 +78,5 @@ const init = async () => { await init(); startServicePingCronJob(); startChangelogPollingJob(); + warmModelCapabilitiesCatalog(); })();