From 8540cdba92d8581c2eabd6507021663360726ca3 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Thu, 25 Jun 2026 20:58:07 -0700 Subject: [PATCH 01/10] feat(web): add language model inputModalities capability plumbing Add an optional `inputModalities` declaration to language model config and expose a resolved capability set to the client. - Schema: add optional `inputModalities` (`text` | `image` | `pdf`) to every provider definition in `schemas/v3/languageModel.json` and regenerate the schema types/snippets. - Add a fail-closed `resolveModelInputModalities` resolver that defaults to text-only when a model does not declare its input modalities. - Expose the resolved `inputModalities` on the client-safe `LanguageModelInfo` (populated via `getConfiguredLanguageModelsInfo` and the MCP ask path). This is groundwork for chat file attachments. It adds no attachment UI and no live provider capability probing yet. Co-authored-by: Cursor --- docs/snippets/schemas/v3/index.schema.mdx | 264 ++++++++++++++++++ .../schemas/v3/languageModel.schema.mdx | 264 ++++++++++++++++++ packages/schemas/src/v3/index.schema.ts | 264 ++++++++++++++++++ packages/schemas/src/v3/index.type.ts | 48 ++++ .../schemas/src/v3/languageModel.schema.ts | 264 ++++++++++++++++++ packages/schemas/src/v3/languageModel.type.ts | 48 ++++ .../web/src/ee/features/mcp/askCodebase.ts | 2 + .../src/features/chat/modelCapabilities.ts | 13 + packages/web/src/features/chat/types.ts | 4 + .../web/src/features/chat/utils.server.ts | 2 + schemas/v3/languageModel.json | 134 ++++++++- 11 files changed, 1306 insertions(+), 1 deletion(-) create mode 100644 packages/web/src/features/chat/modelCapabilities.ts diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 864359251..e0b00c540 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1860,6 +1860,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1998,6 +2009,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2133,6 +2155,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2240,6 +2273,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2361,6 +2405,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2484,6 +2539,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2623,6 +2689,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2730,6 +2807,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2863,6 +2951,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3027,6 +3126,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3135,6 +3245,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3246,6 +3367,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3426,6 +3558,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3564,6 +3707,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3699,6 +3853,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3806,6 +3971,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3927,6 +4103,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4050,6 +4237,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4189,6 +4387,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4296,6 +4505,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4429,6 +4649,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4593,6 +4824,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4701,6 +4943,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4812,6 +5065,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 90aee08af..7c7874207 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -174,6 +174,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -312,6 +323,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -447,6 +469,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -554,6 +587,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -675,6 +719,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -798,6 +853,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -937,6 +1003,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1044,6 +1121,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1177,6 +1265,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1341,6 +1440,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1449,6 +1559,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1560,6 +1681,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1740,6 +1872,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1878,6 +2021,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2013,6 +2167,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2120,6 +2285,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2241,6 +2417,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2364,6 +2551,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2503,6 +2701,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2610,6 +2819,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2743,6 +2963,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2907,6 +3138,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3015,6 +3257,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3126,6 +3379,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 8c1d64b52..257c8ae7d 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1859,6 +1859,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1997,6 +2008,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2132,6 +2154,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2239,6 +2272,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2360,6 +2404,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2483,6 +2538,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2622,6 +2688,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2729,6 +2806,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2862,6 +2950,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3026,6 +3125,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3134,6 +3244,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3245,6 +3366,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3425,6 +3557,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3563,6 +3706,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3698,6 +3852,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3805,6 +3970,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3926,6 +4102,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4049,6 +4236,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4188,6 +4386,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4295,6 +4504,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4428,6 +4648,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4592,6 +4823,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4700,6 +4942,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4811,6 +5064,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 7fa7f5a17..85dbaac43 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -762,6 +762,10 @@ export interface AmazonBedrockLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional headers to use with the model. @@ -842,6 +846,10 @@ export interface AnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface AzureLanguageModel { /** @@ -897,6 +905,10 @@ export interface AzureLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface DeepSeekLanguageModel { /** @@ -936,6 +948,10 @@ export interface DeepSeekLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -983,6 +999,10 @@ export interface GoogleGenerativeAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -1030,6 +1050,10 @@ export interface GoogleVertexAnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexLanguageModel { /** @@ -1085,6 +1109,10 @@ export interface GoogleVertexLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface MistralLanguageModel { /** @@ -1124,6 +1152,10 @@ export interface MistralLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAILanguageModel { /** @@ -1171,6 +1203,10 @@ export interface OpenAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAICompatibleLanguageModel { /** @@ -1215,6 +1251,10 @@ export interface OpenAICompatibleLanguageModel { * Optional temperature setting to use with the model. */ temperature?: number; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional query parameters to include in the request url. @@ -1279,6 +1319,10 @@ export interface OpenRouterLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface XaiLanguageModel { /** @@ -1318,6 +1362,10 @@ export interface XaiLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GitHubAppConfig { /** diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index ab418ce79..85c2bf8a8 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -173,6 +173,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -311,6 +322,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -446,6 +468,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -553,6 +586,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -674,6 +718,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -797,6 +852,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -936,6 +1002,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1043,6 +1120,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1176,6 +1264,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1340,6 +1439,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1448,6 +1558,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1559,6 +1680,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1739,6 +1871,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1877,6 +2020,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2012,6 +2166,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2119,6 +2284,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2240,6 +2416,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2363,6 +2550,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2502,6 +2700,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2609,6 +2818,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2742,6 +2962,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2906,6 +3137,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3014,6 +3256,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3125,6 +3378,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index 5c3b25668..df4569ee8 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -88,6 +88,10 @@ export interface AmazonBedrockLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional headers to use with the model. @@ -168,6 +172,10 @@ export interface AnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface AzureLanguageModel { /** @@ -223,6 +231,10 @@ export interface AzureLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface DeepSeekLanguageModel { /** @@ -262,6 +274,10 @@ export interface DeepSeekLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -309,6 +325,10 @@ export interface GoogleGenerativeAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -356,6 +376,10 @@ export interface GoogleVertexAnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexLanguageModel { /** @@ -411,6 +435,10 @@ export interface GoogleVertexLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface MistralLanguageModel { /** @@ -450,6 +478,10 @@ export interface MistralLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAILanguageModel { /** @@ -497,6 +529,10 @@ export interface OpenAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAICompatibleLanguageModel { /** @@ -541,6 +577,10 @@ export interface OpenAICompatibleLanguageModel { * Optional temperature setting to use with the model. */ temperature?: number; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional query parameters to include in the request url. @@ -605,6 +645,10 @@ export interface OpenRouterLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface XaiLanguageModel { /** @@ -644,4 +688,8 @@ export interface XaiLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index 4b7cfb7b0..8b2432fb5 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -4,6 +4,7 @@ import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; +import { resolveModelInputModalities } from "@/features/chat/modelCapabilities"; import { ErrorCode } from "@/lib/errorCodes"; import { ServiceError, ServiceErrorException } from "@/lib/serviceError"; import { withOptionalAuth } from "@/middleware/withAuth"; @@ -243,6 +244,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise { + const declared = config.inputModalities; + if (declared && declared.length > 0) { + return declared; + } + return ['text']; +} diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 38a737a09..615fe2b1c 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -208,10 +208,13 @@ type _AssertAllProviders = LanguageModelProvider extends typeof languageModelPro const _assertAllProviders: _AssertAllProviders = true; void _assertAllProviders; +export type InputModality = 'text' | 'image' | 'pdf'; + export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), + inputModalities: z.array(z.enum(['text', 'image', 'pdf'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), }); /** @@ -221,6 +224,7 @@ export type LanguageModelInfo = { provider: LanguageModelProvider, model: LanguageModel['model'], displayName?: LanguageModel['displayName'], + inputModalities: InputModality[], } // Additional request body data that we send along to the chat API. diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index ffc3483a4..7ec47b677 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -7,6 +7,7 @@ import { env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; +import { resolveModelInputModalities } from './modelCapabilities'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; @@ -131,5 +132,6 @@ export const getConfiguredLanguageModelsInfo = async () => { provider: model.provider, model: model.model, displayName: model.displayName, + inputModalities: resolveModelInputModalities(model), })); }; diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index 3f1d13d52..0fb96217a 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -50,6 +50,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -93,6 +104,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -160,6 +182,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -199,6 +232,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -252,6 +296,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -307,6 +362,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -378,6 +444,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -417,6 +494,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -482,6 +570,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -537,6 +636,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -577,6 +687,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -620,6 +741,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -667,4 +799,4 @@ "$ref": "#/definitions/XaiLanguageModel" } ] -} \ No newline at end of file +} From a473b49cd8de430e00183305a563f34dba39c113 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Thu, 25 Jun 2026 20:58:48 -0700 Subject: [PATCH 02/10] docs: add CHANGELOG entry for language model inputModalities Co-authored-by: Cursor --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 689718d36..5163f833a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) +- Added optional `inputModalities` configuration for language models, exposing model input-modality capabilities (defaults to text-only). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) From 4b57d279bea951a86e806b46e555a30eebe615dc Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:08:53 -0700 Subject: [PATCH 03/10] refactor(schemas): split document types out of inputModalities inputModalities now only enumerates true perceptual channels (text | image | audio | video). Document/container formats like PDF move to a separate fail-closed `supportedDocumentTypes` field, since PDF is not a model modality but a format providers decompose into text/image internally. Co-authored-by: Cursor --- CHANGELOG.md | 2 +- docs/snippets/schemas/v3/index.schema.mdx | 288 ++++++++++++++++-- .../schemas/v3/languageModel.schema.mdx | 288 ++++++++++++++++-- packages/schemas/src/v3/index.schema.ts | 288 ++++++++++++++++-- packages/schemas/src/v3/index.type.ts | 96 ++++-- .../schemas/src/v3/languageModel.schema.ts | 288 ++++++++++++++++-- packages/schemas/src/v3/languageModel.type.ts | 96 ++++-- .../web/src/ee/features/mcp/askCodebase.ts | 3 +- .../src/features/chat/modelCapabilities.ts | 13 +- packages/web/src/features/chat/types.ts | 7 +- .../web/src/features/chat/utils.server.ts | 3 +- schemas/v3/languageModel.json | 144 ++++++++- 12 files changed, 1354 insertions(+), 162 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5163f833a..caa90e9b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) -- Added optional `inputModalities` configuration for language models, exposing model input-modality capabilities (defaults to text-only). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) +- Added optional `inputModalities` and `supportedDocumentTypes` configuration for language models, exposing model input-modality and document capabilities (defaults to text-only, no documents). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index e0b00c540..5b099d724 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1867,10 +1867,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2016,10 +2026,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2162,10 +2182,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2280,10 +2310,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2412,10 +2452,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2546,10 +2596,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2696,10 +2756,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2814,10 +2884,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2958,10 +3038,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3133,10 +3223,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3252,10 +3352,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3374,10 +3484,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3565,10 +3685,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3714,10 +3844,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3860,10 +4000,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3978,10 +4128,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4110,10 +4270,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4244,10 +4414,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4394,10 +4574,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4512,10 +4702,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4656,10 +4856,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4831,10 +5041,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4950,10 +5170,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -5072,10 +5302,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 7c7874207..7b1e774cf 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -181,10 +181,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -330,10 +340,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -476,10 +496,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -594,10 +624,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -726,10 +766,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -860,10 +910,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1010,10 +1070,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1128,10 +1198,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1272,10 +1352,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1447,10 +1537,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1566,10 +1666,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1688,10 +1798,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1879,10 +1999,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2028,10 +2158,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2174,10 +2314,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2292,10 +2442,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2424,10 +2584,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2558,10 +2728,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2708,10 +2888,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2826,10 +3016,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2970,10 +3170,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3145,10 +3355,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3264,10 +3484,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3386,10 +3616,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 257c8ae7d..7d051544c 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1866,10 +1866,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2015,10 +2025,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2161,10 +2181,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2279,10 +2309,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2411,10 +2451,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2545,10 +2595,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2695,10 +2755,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2813,10 +2883,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2957,10 +3037,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3132,10 +3222,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3251,10 +3351,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3373,10 +3483,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3564,10 +3684,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3713,10 +3843,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3859,10 +3999,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3977,10 +4127,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4109,10 +4269,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4243,10 +4413,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4393,10 +4573,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4511,10 +4701,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4655,10 +4855,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4830,10 +5040,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4949,10 +5169,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -5071,10 +5301,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 85dbaac43..14c8c14e2 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -763,9 +763,13 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional headers to use with the model. @@ -847,9 +851,13 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface AzureLanguageModel { /** @@ -906,9 +914,13 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface DeepSeekLanguageModel { /** @@ -949,9 +961,13 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -1000,9 +1016,13 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -1051,9 +1071,13 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexLanguageModel { /** @@ -1110,9 +1134,13 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface MistralLanguageModel { /** @@ -1153,9 +1181,13 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAILanguageModel { /** @@ -1204,9 +1236,13 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAICompatibleLanguageModel { /** @@ -1252,9 +1288,13 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional query parameters to include in the request url. @@ -1320,9 +1360,13 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface XaiLanguageModel { /** @@ -1363,9 +1407,13 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GitHubAppConfig { /** diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index 85c2bf8a8..9c9ae7b2d 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -180,10 +180,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -329,10 +339,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -475,10 +495,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -593,10 +623,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -725,10 +765,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -859,10 +909,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1009,10 +1069,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1127,10 +1197,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1271,10 +1351,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1446,10 +1536,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1565,10 +1665,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1687,10 +1797,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1878,10 +1998,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2027,10 +2157,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2173,10 +2313,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2291,10 +2441,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2423,10 +2583,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2557,10 +2727,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2707,10 +2887,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2825,10 +3015,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2969,10 +3169,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3144,10 +3354,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3263,10 +3483,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3385,10 +3615,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index df4569ee8..3297689b7 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -89,9 +89,13 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional headers to use with the model. @@ -173,9 +177,13 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface AzureLanguageModel { /** @@ -232,9 +240,13 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface DeepSeekLanguageModel { /** @@ -275,9 +287,13 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -326,9 +342,13 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -377,9 +397,13 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexLanguageModel { /** @@ -436,9 +460,13 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface MistralLanguageModel { /** @@ -479,9 +507,13 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAILanguageModel { /** @@ -530,9 +562,13 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAICompatibleLanguageModel { /** @@ -578,9 +614,13 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional query parameters to include in the request url. @@ -646,9 +686,13 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface XaiLanguageModel { /** @@ -689,7 +733,11 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index 8b2432fb5..7f779ffc8 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -4,7 +4,7 @@ import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; -import { resolveModelInputModalities } from "@/features/chat/modelCapabilities"; +import { resolveModelInputModalities, resolveModelSupportedDocumentTypes } from "@/features/chat/modelCapabilities"; import { ErrorCode } from "@/lib/errorCodes"; import { ServiceError, ServiceErrorException } from "@/lib/serviceError"; import { withOptionalAuth } from "@/middleware/withAuth"; @@ -245,6 +245,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise { + const declared = config.supportedDocumentTypes; + if (declared && declared.length > 0) { + return declared; + } + return []; +} diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 615fe2b1c..e1daf0bdb 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -208,13 +208,15 @@ type _AssertAllProviders = LanguageModelProvider extends typeof languageModelPro const _assertAllProviders: _AssertAllProviders = true; void _assertAllProviders; -export type InputModality = 'text' | 'image' | 'pdf'; +export type InputModality = 'text' | 'image' | 'audio' | 'video'; +export type DocumentType = 'pdf'; export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), - inputModalities: z.array(z.enum(['text', 'image', 'pdf'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), + inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), + supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("The document/file container formats the model can ingest natively. Defaults to none."), }); /** @@ -225,6 +227,7 @@ export type LanguageModelInfo = { model: LanguageModel['model'], displayName?: LanguageModel['displayName'], inputModalities: InputModality[], + supportedDocumentTypes: DocumentType[], } // Additional request body data that we send along to the chat API. diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index 7ec47b677..0b04226d8 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -7,7 +7,7 @@ import { env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; -import { resolveModelInputModalities } from './modelCapabilities'; +import { resolveModelInputModalities, resolveModelSupportedDocumentTypes } from './modelCapabilities'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; @@ -133,5 +133,6 @@ export const getConfiguredLanguageModelsInfo = async () => { model: model.model, displayName: model.displayName, inputModalities: resolveModelInputModalities(model), + supportedDocumentTypes: resolveModelSupportedDocumentTypes(model), })); }; diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index 0fb96217a..e49707484 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -57,10 +57,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -111,10 +121,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -189,10 +209,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -239,10 +269,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -303,10 +343,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -369,10 +419,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -451,10 +511,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -501,10 +571,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -577,10 +657,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -643,10 +733,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -694,10 +794,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -748,10 +858,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ From 0baabcba43e86432a1a69846d90a47c83b62499f Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:17:14 -0700 Subject: [PATCH 04/10] docs(schemas): clarify what counts as a document type Tighten the inputModalities / supportedDocumentTypes descriptions to remove the implication that omitting supportedDocumentTypes blocks all non-text attachments. Clarify the taxonomy: single-medium files (images, audio, video) and plain-text files (.txt, .md) are governed by inputModalities; supportedDocumentTypes only gates rich compound container formats like PDF. Co-authored-by: Cursor --- docs/snippets/schemas/v3/index.schema.mdx | 96 +++++++++---------- .../schemas/v3/languageModel.schema.mdx | 96 +++++++++---------- packages/schemas/src/v3/index.schema.ts | 96 +++++++++---------- packages/schemas/src/v3/index.type.ts | 48 +++++----- .../schemas/src/v3/languageModel.schema.ts | 96 +++++++++---------- packages/schemas/src/v3/languageModel.type.ts | 48 +++++----- packages/web/src/features/chat/types.ts | 4 +- schemas/v3/languageModel.json | 48 +++++----- 8 files changed, 266 insertions(+), 266 deletions(-) diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 5b099d724..356da2009 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1871,7 +1871,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1880,7 +1880,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2030,7 +2030,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2039,7 +2039,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2186,7 +2186,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2195,7 +2195,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2314,7 +2314,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2323,7 +2323,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2456,7 +2456,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2465,7 +2465,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2600,7 +2600,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2609,7 +2609,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2760,7 +2760,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2769,7 +2769,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2888,7 +2888,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2897,7 +2897,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3042,7 +3042,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3051,7 +3051,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3227,7 +3227,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3236,7 +3236,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3356,7 +3356,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3365,7 +3365,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3488,7 +3488,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3497,7 +3497,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3689,7 +3689,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3698,7 +3698,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3848,7 +3848,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3857,7 +3857,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4004,7 +4004,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4013,7 +4013,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4132,7 +4132,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4141,7 +4141,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4274,7 +4274,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4283,7 +4283,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4418,7 +4418,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4427,7 +4427,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4578,7 +4578,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4587,7 +4587,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4706,7 +4706,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4715,7 +4715,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4860,7 +4860,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4869,7 +4869,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5045,7 +5045,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5054,7 +5054,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5174,7 +5174,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5183,7 +5183,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5306,7 +5306,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5315,7 +5315,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 7b1e774cf..5af4b3d96 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -185,7 +185,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -194,7 +194,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -344,7 +344,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -353,7 +353,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -500,7 +500,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -509,7 +509,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -628,7 +628,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -637,7 +637,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -770,7 +770,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -779,7 +779,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -914,7 +914,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -923,7 +923,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1074,7 +1074,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1083,7 +1083,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1202,7 +1202,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1211,7 +1211,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1356,7 +1356,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1365,7 +1365,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1541,7 +1541,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1550,7 +1550,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1670,7 +1670,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1679,7 +1679,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1802,7 +1802,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1811,7 +1811,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2003,7 +2003,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2012,7 +2012,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2162,7 +2162,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2171,7 +2171,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2318,7 +2318,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2327,7 +2327,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2446,7 +2446,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2455,7 +2455,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2588,7 +2588,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2597,7 +2597,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2732,7 +2732,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2741,7 +2741,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2892,7 +2892,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2901,7 +2901,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3020,7 +3020,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3029,7 +3029,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3174,7 +3174,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3183,7 +3183,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3359,7 +3359,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3368,7 +3368,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3488,7 +3488,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3497,7 +3497,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3620,7 +3620,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3629,7 +3629,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 7d051544c..123fd4a8b 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1870,7 +1870,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1879,7 +1879,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2029,7 +2029,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2038,7 +2038,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2185,7 +2185,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2194,7 +2194,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2313,7 +2313,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2322,7 +2322,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2455,7 +2455,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2464,7 +2464,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2599,7 +2599,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2608,7 +2608,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2759,7 +2759,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2768,7 +2768,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2887,7 +2887,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2896,7 +2896,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3041,7 +3041,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3050,7 +3050,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3226,7 +3226,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3235,7 +3235,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3355,7 +3355,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3364,7 +3364,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3487,7 +3487,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3496,7 +3496,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3688,7 +3688,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3697,7 +3697,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3847,7 +3847,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3856,7 +3856,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4003,7 +4003,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4012,7 +4012,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4131,7 +4131,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4140,7 +4140,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4273,7 +4273,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4282,7 +4282,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4417,7 +4417,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4426,7 +4426,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4577,7 +4577,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4586,7 +4586,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4705,7 +4705,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4714,7 +4714,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4859,7 +4859,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4868,7 +4868,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5044,7 +5044,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5053,7 +5053,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5173,7 +5173,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5182,7 +5182,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5305,7 +5305,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5314,7 +5314,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 14c8c14e2..d6f555e8d 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -763,11 +763,11 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -851,11 +851,11 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -914,11 +914,11 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -961,11 +961,11 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1016,11 +1016,11 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1071,11 +1071,11 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1134,11 +1134,11 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1181,11 +1181,11 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1236,11 +1236,11 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1288,11 +1288,11 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1360,11 +1360,11 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1407,11 +1407,11 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index 9c9ae7b2d..61cc0adf3 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -184,7 +184,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -193,7 +193,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -343,7 +343,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -352,7 +352,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -499,7 +499,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -508,7 +508,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -627,7 +627,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -636,7 +636,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -769,7 +769,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -778,7 +778,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -913,7 +913,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -922,7 +922,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1073,7 +1073,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1082,7 +1082,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1201,7 +1201,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1210,7 +1210,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1355,7 +1355,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1364,7 +1364,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1540,7 +1540,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1549,7 +1549,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1669,7 +1669,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1678,7 +1678,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1801,7 +1801,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1810,7 +1810,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2002,7 +2002,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2011,7 +2011,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2161,7 +2161,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2170,7 +2170,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2317,7 +2317,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2326,7 +2326,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2445,7 +2445,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2454,7 +2454,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2587,7 +2587,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2596,7 +2596,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2731,7 +2731,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2740,7 +2740,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2891,7 +2891,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2900,7 +2900,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3019,7 +3019,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3028,7 +3028,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3173,7 +3173,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3182,7 +3182,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3358,7 +3358,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3367,7 +3367,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3487,7 +3487,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3496,7 +3496,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3619,7 +3619,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3628,7 +3628,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index 3297689b7..90a53b423 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -89,11 +89,11 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -177,11 +177,11 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -240,11 +240,11 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -287,11 +287,11 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -342,11 +342,11 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -397,11 +397,11 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -460,11 +460,11 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -507,11 +507,11 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -562,11 +562,11 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -614,11 +614,11 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -686,11 +686,11 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -733,11 +733,11 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index e1daf0bdb..3547c5d0a 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -215,8 +215,8 @@ export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), - inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), - supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("The document/file container formats the model can ingest natively. Defaults to none."), + inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept (images, audio, video, text). Single-medium attachments are gated by these. Defaults to text-only."), + supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("Rich compound document formats (e.g. PDF) the model can ingest natively, distinct from single-medium attachments gated by inputModalities. Defaults to none."), }); /** diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index e49707484..a952554b9 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -61,7 +61,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -70,7 +70,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -125,7 +125,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -134,7 +134,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -213,7 +213,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -222,7 +222,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -273,7 +273,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -282,7 +282,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -347,7 +347,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -356,7 +356,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -423,7 +423,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -432,7 +432,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -515,7 +515,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -524,7 +524,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -575,7 +575,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -584,7 +584,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -661,7 +661,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -670,7 +670,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -737,7 +737,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -746,7 +746,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -798,7 +798,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -807,7 +807,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -862,7 +862,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -871,7 +871,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ From 5e4045b0ef25d95ac740961d0368b5250e820daf Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:25:57 -0700 Subject: [PATCH 05/10] fix(web): widen getLanguageModelKey param to keyable subset LanguageModelInfo now has required inputModalities/supportedDocumentTypes, so a raw LanguageModel config (where those are optional) is no longer assignable to it. getLanguageModelKey only reads provider/model/displayName, so type its parameter as that Pick subset, letting both LanguageModel and LanguageModelInfo be keyed. Fixes the docker build type check. Co-authored-by: Cursor --- packages/web/src/features/chat/utils.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index c7f409ac7..b103ada7c 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -422,9 +422,11 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isTurn } /** - * Generates a unique key given a LanguageModelInfo object. + * Generates a unique key for a language model. Accepts any object carrying the + * identifying fields, so both the full `LanguageModel` config and the + * client-safe `LanguageModelInfo` can be keyed with it. */ -export const getLanguageModelKey = (model: LanguageModelInfo) => { +export const getLanguageModelKey = (model: Pick) => { return `${model.provider}-${model.model}-${model.displayName}`; } From 507d7586cb2f10f8ae166629cb36af19872d4e6d Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:45:05 -0700 Subject: [PATCH 06/10] chore(schemas,web): keep schema dist fresh and resolve types from source Two dev-experience fixes for the stale-build-output footgun: - schemas watch now runs `yarn build` (generate + tsc) instead of generate-only, so editing a schema JSON during `yarn dev` refreshes dist (both the .d.ts types and the runtime index.schema.js used by ajv), not just the generated source. - web tsconfig maps @sourcebot/schemas/v3|v2/* to the package source, so type-checking and the IDE read committed source directly instead of stale built .d.ts. Web only imports .type files (erased at compile), so there is no bundling/runtime impact. Co-authored-by: Cursor --- packages/schemas/package.json | 2 +- packages/web/tsconfig.json | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/schemas/package.json b/packages/schemas/package.json index 13fe2cb7a..3719a6da5 100644 --- a/packages/schemas/package.json +++ b/packages/schemas/package.json @@ -5,7 +5,7 @@ "scripts": { "build": "yarn generate && tsc", "generate": "tsx tools/generate.ts", - "watch": "nodemon --watch ../../schemas -e json -x 'yarn generate'", + "watch": "nodemon --watch ../../schemas -e json -x 'yarn build'", "postinstall": "yarn build" }, "devDependencies": { diff --git a/packages/web/tsconfig.json b/packages/web/tsconfig.json index f18162100..3f0e7534b 100644 --- a/packages/web/tsconfig.json +++ b/packages/web/tsconfig.json @@ -27,6 +27,12 @@ ], "@/public/*": [ "./public/*" + ], + "@sourcebot/schemas/v3/*": [ + "../schemas/src/v3/*" + ], + "@sourcebot/schemas/v2/*": [ + "../schemas/src/v2/*" ] }, "target": "ES2017" From de291cc8ab301b28067812186f534bcba5b6f69e Mon Sep 17 00:00:00 2001 From: whoisthey Date: Sat, 27 Jun 2026 11:30:15 -0700 Subject: [PATCH 07/10] refactor(web): resolve model capabilities from models.dev, not config.json Re-source language model input-modality / document capabilities from the models.dev catalog instead of hand-declared config.json fields, aligning with the move to de-emphasize on-disk config in favor of automatic resolution (the same catalog already backs context-window resolution). - Revert the inputModalities/supportedDocumentTypes additions to schemas/v3/languageModel.json and all regenerated artifacts; capabilities are no longer declared in config.json. - Extract the shared models.dev catalog plumbing (fetch/TTL/negative-cache/ stale-while-revalidate/provider-id overrides) into modelsDevCatalog.server.ts, now consumed by both context-window and capability resolution. - Add models.dev-backed resolveModelCapabilities (modelCapabilities.server.ts), partitioning the catalog's modalities.input list into Sourcebot's inputModalities (channels) and supportedDocumentTypes (containers); falls back to text-only for uncatalogued / self-hosted models. The client-safe LanguageModelInfo contract is unchanged; only the resolution backend moved. Co-authored-by: Cursor --- CHANGELOG.md | 2 +- docs/snippets/schemas/v3/index.schema.mdx | 504 ------------------ .../schemas/v3/languageModel.schema.mdx | 504 ------------------ packages/schemas/src/v3/index.schema.ts | 504 ------------------ packages/schemas/src/v3/index.type.ts | 96 ---- .../schemas/src/v3/languageModel.schema.ts | 504 ------------------ packages/schemas/src/v3/languageModel.type.ts | 96 ---- .../web/src/ee/features/mcp/askCodebase.ts | 7 +- .../chat/modelCapabilities.server.test.ts | 126 +++++ .../features/chat/modelCapabilities.server.ts | 64 +++ .../src/features/chat/modelCapabilities.ts | 24 - .../chat/modelContextWindow.server.ts | 99 +--- .../features/chat/modelsDevCatalog.server.ts | 111 ++++ .../web/src/features/chat/utils.server.ts | 17 +- schemas/v3/languageModel.json | 254 +-------- 15 files changed, 322 insertions(+), 2590 deletions(-) create mode 100644 packages/web/src/features/chat/modelCapabilities.server.test.ts create mode 100644 packages/web/src/features/chat/modelCapabilities.server.ts delete mode 100644 packages/web/src/features/chat/modelCapabilities.ts create mode 100644 packages/web/src/features/chat/modelsDevCatalog.server.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 27408c782..68e63d675 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) - [EE] Added mermaid diagram rendering to Ask Sourcebot answers, with pan/zoom, copy/export, in-thread deep links, and an interleaved right-panel view. [#1369](https://github.com/sourcebot-dev/sourcebot/pull/1369) - [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370) -- Added optional `inputModalities` and `supportedDocumentTypes` configuration for language models, exposing model input-modality and document capabilities (defaults to text-only, no documents). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) +- Added language model input-modality and document capability resolution, automatically resolved from the models.dev catalog (falls back to text-only for uncatalogued/self-hosted models). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 356da2009..864359251 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1860,27 +1860,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2019,27 +1998,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2175,27 +2133,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2303,27 +2240,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2445,27 +2361,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2589,27 +2484,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2749,27 +2623,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2877,27 +2730,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3031,27 +2863,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3216,27 +3027,6 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3345,27 +3135,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3477,27 +3246,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3678,27 +3426,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3837,27 +3564,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3993,27 +3699,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4121,27 +3806,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4263,27 +3927,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4407,27 +4050,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4567,27 +4189,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4695,27 +4296,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4849,27 +4429,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5034,27 +4593,6 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5163,27 +4701,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5295,27 +4812,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 5af4b3d96..90aee08af 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -174,27 +174,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -333,27 +312,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -489,27 +447,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -617,27 +554,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -759,27 +675,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -903,27 +798,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1063,27 +937,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1191,27 +1044,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1345,27 +1177,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1530,27 +1341,6 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1659,27 +1449,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1791,27 +1560,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1992,27 +1740,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2151,27 +1878,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2307,27 +2013,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2435,27 +2120,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2577,27 +2241,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2721,27 +2364,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2881,27 +2503,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3009,27 +2610,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3163,27 +2743,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3348,27 +2907,6 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3477,27 +3015,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3609,27 +3126,6 @@ } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 123fd4a8b..8c1d64b52 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1859,27 +1859,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2018,27 +1997,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2174,27 +2132,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2302,27 +2239,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2444,27 +2360,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2588,27 +2483,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2748,27 +2622,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2876,27 +2729,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3030,27 +2862,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3215,27 +3026,6 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3344,27 +3134,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3476,27 +3245,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3677,27 +3425,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3836,27 +3563,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3992,27 +3698,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4120,27 +3805,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4262,27 +3926,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4406,27 +4049,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4566,27 +4188,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4694,27 +4295,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4848,27 +4428,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5033,27 +4592,6 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5162,27 +4700,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5294,27 +4811,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index d6f555e8d..7fa7f5a17 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -762,14 +762,6 @@ export interface AmazonBedrockLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } /** * Optional headers to use with the model. @@ -850,14 +842,6 @@ export interface AnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface AzureLanguageModel { /** @@ -913,14 +897,6 @@ export interface AzureLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface DeepSeekLanguageModel { /** @@ -960,14 +936,6 @@ export interface DeepSeekLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -1015,14 +983,6 @@ export interface GoogleGenerativeAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -1070,14 +1030,6 @@ export interface GoogleVertexAnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexLanguageModel { /** @@ -1133,14 +1085,6 @@ export interface GoogleVertexLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface MistralLanguageModel { /** @@ -1180,14 +1124,6 @@ export interface MistralLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface OpenAILanguageModel { /** @@ -1235,14 +1171,6 @@ export interface OpenAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface OpenAICompatibleLanguageModel { /** @@ -1287,14 +1215,6 @@ export interface OpenAICompatibleLanguageModel { * Optional temperature setting to use with the model. */ temperature?: number; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } /** * Optional query parameters to include in the request url. @@ -1359,14 +1279,6 @@ export interface OpenRouterLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface XaiLanguageModel { /** @@ -1406,14 +1318,6 @@ export interface XaiLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GitHubAppConfig { /** diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index 61cc0adf3..ab418ce79 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -173,27 +173,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -332,27 +311,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -488,27 +446,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -616,27 +553,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -758,27 +674,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -902,27 +797,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1062,27 +936,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1190,27 +1043,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1344,27 +1176,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1529,27 +1340,6 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1658,27 +1448,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1790,27 +1559,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1991,27 +1739,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2150,27 +1877,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2306,27 +2012,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2434,27 +2119,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2576,27 +2240,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2720,27 +2363,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2880,27 +2502,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3008,27 +2609,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3162,27 +2742,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3347,27 +2906,6 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3476,27 +3014,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3608,27 +3125,6 @@ const schema = { } }, "additionalProperties": false - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index 90a53b423..5c3b25668 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -88,14 +88,6 @@ export interface AmazonBedrockLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } /** * Optional headers to use with the model. @@ -176,14 +168,6 @@ export interface AnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface AzureLanguageModel { /** @@ -239,14 +223,6 @@ export interface AzureLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface DeepSeekLanguageModel { /** @@ -286,14 +262,6 @@ export interface DeepSeekLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -341,14 +309,6 @@ export interface GoogleGenerativeAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -396,14 +356,6 @@ export interface GoogleVertexAnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexLanguageModel { /** @@ -459,14 +411,6 @@ export interface GoogleVertexLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface MistralLanguageModel { /** @@ -506,14 +450,6 @@ export interface MistralLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface OpenAILanguageModel { /** @@ -561,14 +497,6 @@ export interface OpenAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface OpenAICompatibleLanguageModel { /** @@ -613,14 +541,6 @@ export interface OpenAICompatibleLanguageModel { * Optional temperature setting to use with the model. */ temperature?: number; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } /** * Optional query parameters to include in the request url. @@ -685,14 +605,6 @@ export interface OpenRouterLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } export interface XaiLanguageModel { /** @@ -732,12 +644,4 @@ export interface XaiLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; - /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). - */ - inputModalities?: ("text" | "image" | "audio" | "video")[]; - /** - * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. - */ - supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index 59f8a35ec..35337d29f 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -5,7 +5,7 @@ import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { resolveContextWindow } from "@/features/chat/modelContextWindow.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; -import { resolveModelInputModalities, resolveModelSupportedDocumentTypes } from "@/features/chat/modelCapabilities"; +import { resolveModelCapabilities } from "@/features/chat/modelCapabilities.server"; import { ErrorCode } from "@/lib/errorCodes"; import { ServiceError, ServiceErrorException } from "@/lib/serviceError"; import { withOptionalAuth } from "@/middleware/withAuth"; @@ -87,6 +87,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise ({ default: vi.fn() })); + +vi.mock('@sourcebot/shared', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})); + +import { lookupModelCapabilities, resolveModelCapabilities } from './modelCapabilities.server'; +import type { ModelsDevCatalog } from './modelsDevCatalog.server'; + +const catalog: ModelsDevCatalog = { + anthropic: { + id: 'anthropic', + models: { + // Text + image + a document (pdf) container format. + 'claude-sonnet-4-5': { + id: 'claude-sonnet-4-5', + modalities: { input: ['text', 'image', 'pdf'], output: ['text'] }, + }, + }, + }, + // models.dev keys Gemini under 'google', whereas Sourcebot's provider id is + // 'google-generative-ai' — exercises the provider id override. + google: { + id: 'google', + models: { + 'gemini-2.5-pro': { + id: 'gemini-2.5-pro', + modalities: { input: ['text', 'image', 'audio', 'video', 'pdf'], output: ['text'] }, + }, + }, + }, + openai: { + id: 'openai', + models: { + // Catalogued model that omits `text` from its input list. + 'image-only': { id: 'image-only', modalities: { input: ['image'], output: ['text'] } }, + // Catalogued model with no `modalities` object at all. + 'no-modalities-model': { id: 'no-modalities-model' }, + }, + }, +}; + +const model = (provider: string, modelId: string) => + ({ provider, model: modelId }) as Pick; + +describe('lookupModelCapabilities', () => { + test('splits modalities and document types for a direct provider/model hit', () => { + expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: ['pdf'], + }); + }); + + test('maps google-generative-ai to the catalog\'s google key', () => { + expect(lookupModelCapabilities(catalog, model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({ + inputModalities: ['text', 'image', 'audio', 'video'], + supportedDocumentTypes: ['pdf'], + }); + }); + + test('always includes text even when the catalog omits it', () => { + expect(lookupModelCapabilities(catalog, model('openai', 'image-only'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: [], + }); + }); + + test('falls back to text-only for a catalogued model with no modalities', () => { + expect(lookupModelCapabilities(catalog, model('openai', 'no-modalities-model'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + }); + + test('falls back to text-only for an uncatalogued model (e.g. openai-compatible / self-hosted)', () => { + expect(lookupModelCapabilities(catalog, model('openai-compatible', 'my-local-model'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + expect(lookupModelCapabilities(catalog, model('anthropic', 'claude-unknown'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + }); + + test('falls back to text-only when the catalog is null (fetch failed / unreachable)', () => { + expect(lookupModelCapabilities(null, model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text'], + supportedDocumentTypes: [], + }); + }); +}); + +describe('resolveModelCapabilities', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + test('fetches the catalog once and resolves capabilities (incl. provider mapping)', async () => { + const fetchMock = vi.fn(async () => ({ + ok: true, + json: async () => catalog, + }) as unknown as Response); + vi.stubGlobal('fetch', fetchMock); + + expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: ['pdf'], + }); + // Subsequent lookups reuse the cached catalog rather than refetching. + expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({ + inputModalities: ['text', 'image', 'audio', 'video'], + supportedDocumentTypes: ['pdf'], + }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/web/src/features/chat/modelCapabilities.server.ts b/packages/web/src/features/chat/modelCapabilities.server.ts new file mode 100644 index 000000000..87d2cb131 --- /dev/null +++ b/packages/web/src/features/chat/modelCapabilities.server.ts @@ -0,0 +1,64 @@ +import 'server-only'; + +import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; +import { DocumentType, InputModality } from './types'; +import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server'; + +// models.dev folds every accepted input — perceptual channels (text, image, +// audio, video) AND container formats (pdf) — into a single `modalities.input` +// list. Sourcebot keeps those two concepts apart: `inputModalities` are the +// raw channels a model encodes, while `supportedDocumentTypes` are rich +// compound formats providers decompose server-side. We partition the catalog's +// input list into those two buckets here. +const INPUT_MODALITY_VALUES = ['text', 'image', 'audio', 'video'] as const satisfies readonly InputModality[]; +const DOCUMENT_TYPE_VALUES = ['pdf'] as const satisfies readonly DocumentType[]; + +const isInputModality = (value: string): value is InputModality => + (INPUT_MODALITY_VALUES as readonly string[]).includes(value); + +const isDocumentType = (value: string): value is DocumentType => + (DOCUMENT_TYPE_VALUES as readonly string[]).includes(value); + +export type ModelCapabilities = { + inputModalities: InputModality[]; + supportedDocumentTypes: DocumentType[]; +}; + +/** + * Pure lookup of a model's input capabilities in a models.dev catalog. + * Separated from the network fetch so it can be unit-tested directly. + * + * Resolution is automatic from the catalog — capabilities are NOT hand-declared + * in config.json. When a model isn't catalogued (e.g. a self-hosted / + * openai-compatible endpoint we can't introspect), we fall back to text-only + * with no document support: the model stays fully usable for normal chat, and + * richer attachments stay gated off until we can positively confirm support. + */ +export const lookupModelCapabilities = ( + catalog: ModelsDevCatalog | null, + config: Pick, +): ModelCapabilities => { + const providerId = resolveProviderId(config.provider); + const inputs = catalog?.[providerId]?.models?.[config.model]?.modalities?.input; + + if (!inputs || inputs.length === 0) { + return { inputModalities: ['text'], supportedDocumentTypes: [] }; + } + + const inputModalities = inputs.filter(isInputModality); + const supportedDocumentTypes = inputs.filter(isDocumentType); + + // Every model accepts text, even if the catalog omits it from the list. + if (!inputModalities.includes('text')) { + inputModalities.unshift('text'); + } + + return { inputModalities, supportedDocumentTypes }; +}; + +export const resolveModelCapabilities = async ( + config: Pick, +): Promise => { + const catalog = await loadCatalog(); + return lookupModelCapabilities(catalog, config); +}; diff --git a/packages/web/src/features/chat/modelCapabilities.ts b/packages/web/src/features/chat/modelCapabilities.ts deleted file mode 100644 index 8b976af59..000000000 --- a/packages/web/src/features/chat/modelCapabilities.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; -import { DocumentType, InputModality } from './types'; - -// Fail-closed: when a model does not declare input modalities, assume text-only. -// NOTE: future work may add live provider capability probing (see -// tryResolveAnthropicThinkingConfig in llm.server.ts for the precedent). -export const resolveModelInputModalities = (config: LanguageModel): InputModality[] => { - const declared = config.inputModalities; - if (declared && declared.length > 0) { - return declared; - } - return ['text']; -} - -// Fail-closed: when a model does not declare supported document types, assume none. -// Document types (e.g. PDF) are container formats distinct from raw input -// modalities, since providers decompose them into text/image internally. -export const resolveModelSupportedDocumentTypes = (config: LanguageModel): DocumentType[] => { - const declared = config.supportedDocumentTypes; - if (declared && declared.length > 0) { - return declared; - } - return []; -} diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts index 0e70dc04f..f87bbcf3b 100644 --- a/packages/web/src/features/chat/modelContextWindow.server.ts +++ b/packages/web/src/features/chat/modelContextWindow.server.ts @@ -1,100 +1,11 @@ import 'server-only'; import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; -import { createLogger } from '@sourcebot/shared'; +import { loadCatalog, resolveProviderId, type ModelsDevCatalog } from './modelsDevCatalog.server'; -const logger = createLogger('model-context-window'); - -// The same public, unauthenticated catalog the setup wizard already consumes -// (see packages/setupWizard/src/models.ts). Each model entry exposes a -// `limit.context` field holding the total context window in tokens. -const MODELS_DEV_API_URL = 'https://models.dev/api.json'; -const FETCH_TIMEOUT_MS = 8000; -// Re-fetch the (~2.4 MB) catalog at most once per this interval per server -// process. New models trickle in daily; a stale window for a few hours is fine. -const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; -// After a failed fetch, don't reattempt for this long. Without it, an outage in -// models.dev would make every chat send pay the fetch timeout on the request path. -const NEGATIVE_CACHE_MS = 60 * 1000; - -// Sourcebot provider id -> models.dev top-level catalog key. Only providers -// whose Sourcebot id differs from the models.dev id need an entry; everything -// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai, -// openrouter, google-vertex, google-vertex-anthropic) matches 1:1. -const PROVIDER_ID_OVERRIDES: Record = { - 'google-generative-ai': 'google', -}; - -type ModelsDevModel = { - id: string; - limit?: { - context?: number; - output?: number; - }; -}; - -type ModelsDevProvider = { - id: string; - models?: Record; -}; - -export type ModelsDevCatalog = Record; - -// Last successfully-fetched catalog. Served while fresh, and kept as a fallback -// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL), -// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and -// `inFlightFetch` dedupes concurrent fetches. -let cachedCatalog: ModelsDevCatalog | null = null; -let catalogFetchedAt = 0; -let lastFailedAt = 0; -let inFlightFetch: Promise | null = null; - -const fetchCatalog = async (): Promise => { - try { - const response = await fetch(MODELS_DEV_API_URL, { - signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), - }); - if (!response.ok) { - logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`); - return null; - } - return await response.json() as ModelsDevCatalog; - } catch (error) { - logger.warn(`Failed to fetch models.dev catalog: ${error}`); - return null; - } -}; - -const loadCatalog = async (): Promise => { - const now = Date.now(); - const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS; - const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS; - - // Kick off a (deduped) refresh when the cache is stale/empty and we're not - // within the post-failure backoff window. On success it replaces the cache; - // on failure it only records the failure time, leaving the last-known-good - // catalog intact. - if (!isFresh && !isBackingOff && !inFlightFetch) { - inFlightFetch = fetchCatalog().then((catalog) => { - if (catalog) { - cachedCatalog = catalog; - catalogFetchedAt = Date.now(); - } else { - lastFailedAt = Date.now(); - } - inFlightFetch = null; - return catalog; - }); - } - - // Once a catalog has loaded once, never block the request path on the - // network: serve the last-known-good value (even if stale) and let any - // refresh settle in the background. Only the very first load awaits. - if (cachedCatalog !== null) { - return cachedCatalog; - } - return inFlightFetch ?? null; -}; +// Re-exported so existing consumers/tests can keep importing the catalog type +// from here. +export type { ModelsDevCatalog } from './modelsDevCatalog.server'; /** * Pure lookup of a model's context window in a models.dev catalog. Separated @@ -110,7 +21,7 @@ export const lookupContextWindow = ( if (!catalog) { return undefined; } - const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider; + const providerId = resolveProviderId(config.provider); const context = catalog[providerId]?.models?.[config.model]?.limit?.context; // `limit` is schema-optional, and models.dev reports a 0 context window for // non-text models (image/audio/etc.). Treat both as "unknown" so the UI diff --git a/packages/web/src/features/chat/modelsDevCatalog.server.ts b/packages/web/src/features/chat/modelsDevCatalog.server.ts new file mode 100644 index 000000000..8f6b35043 --- /dev/null +++ b/packages/web/src/features/chat/modelsDevCatalog.server.ts @@ -0,0 +1,111 @@ +import 'server-only'; + +import { createLogger } from '@sourcebot/shared'; + +const logger = createLogger('models-dev-catalog'); + +// The same public, unauthenticated catalog the setup wizard already consumes +// (see packages/setupWizard/src/models.ts). Each model entry exposes a +// `limit.context` field (total context window in tokens) and a `modalities` +// field describing the inputs/outputs the model supports natively. +const MODELS_DEV_API_URL = 'https://models.dev/api.json'; +const FETCH_TIMEOUT_MS = 8000; +// Re-fetch the (~2.4 MB) catalog at most once per this interval per server +// process. New models trickle in daily; a stale window for a few hours is fine. +const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; +// After a failed fetch, don't reattempt for this long. Without it, an outage in +// models.dev would make every chat send pay the fetch timeout on the request path. +const NEGATIVE_CACHE_MS = 60 * 1000; + +// Sourcebot provider id -> models.dev top-level catalog key. Only providers +// whose Sourcebot id differs from the models.dev id need an entry; everything +// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai, +// openrouter, google-vertex, google-vertex-anthropic) matches 1:1. +const PROVIDER_ID_OVERRIDES: Record = { + 'google-generative-ai': 'google', +}; + +export const resolveProviderId = (provider: string): string => + PROVIDER_ID_OVERRIDES[provider] ?? provider; + +type ModelsDevModel = { + id: string; + limit?: { + context?: number; + output?: number; + }; + modalities?: { + // e.g. ["text", "image", "pdf", "audio", "video"] + input?: string[]; + output?: string[]; + }; +}; + +type ModelsDevProvider = { + id: string; + models?: Record; +}; + +export type ModelsDevCatalog = Record; + +// Last successfully-fetched catalog. Served while fresh, and kept as a fallback +// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL), +// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and +// `inFlightFetch` dedupes concurrent fetches. +let cachedCatalog: ModelsDevCatalog | null = null; +let catalogFetchedAt = 0; +let lastFailedAt = 0; +let inFlightFetch: Promise | null = null; + +const fetchCatalog = async (): Promise => { + try { + const response = await fetch(MODELS_DEV_API_URL, { + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }); + if (!response.ok) { + logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`); + return null; + } + return await response.json() as ModelsDevCatalog; + } catch (error) { + logger.warn(`Failed to fetch models.dev catalog: ${error}`); + return null; + } +}; + +/** + * Returns the cached models.dev catalog, refreshing it in the background when + * stale. Only the very first load blocks on the network; thereafter the + * last-known-good catalog is served immediately (even if stale) so the request + * path never waits on models.dev. + */ +export const loadCatalog = async (): Promise => { + const now = Date.now(); + const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS; + const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS; + + // Kick off a (deduped) refresh when the cache is stale/empty and we're not + // within the post-failure backoff window. On success it replaces the cache; + // on failure it only records the failure time, leaving the last-known-good + // catalog intact. + if (!isFresh && !isBackingOff && !inFlightFetch) { + inFlightFetch = fetchCatalog().then((catalog) => { + if (catalog) { + cachedCatalog = catalog; + catalogFetchedAt = Date.now(); + } else { + lastFailedAt = Date.now(); + } + inFlightFetch = null; + return catalog; + }); + } + + // Once a catalog has loaded once, never block the request path on the + // network: serve the last-known-good value (even if stale) and let any + // refresh settle in the background. Only the very first load awaits. + if (cachedCatalog !== null) { + return cachedCatalog; + } + return inFlightFetch ?? null; +}; diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index 0b04226d8..a458c41d8 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -7,7 +7,7 @@ import { env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; -import { resolveModelInputModalities, resolveModelSupportedDocumentTypes } from './modelCapabilities'; +import { resolveModelCapabilities } from './modelCapabilities.server'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; @@ -128,11 +128,14 @@ export const getConfiguredLanguageModels = async (): Promise => */ export const getConfiguredLanguageModelsInfo = async () => { const models = await getConfiguredLanguageModels(); - return models.map((model): LanguageModelInfo => ({ - provider: model.provider, - model: model.model, - displayName: model.displayName, - inputModalities: resolveModelInputModalities(model), - supportedDocumentTypes: resolveModelSupportedDocumentTypes(model), + return Promise.all(models.map(async (model): Promise => { + const { inputModalities, supportedDocumentTypes } = await resolveModelCapabilities(model); + return { + provider: model.provider, + model: model.model, + displayName: model.displayName, + inputModalities, + supportedDocumentTypes, + }; })); }; diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index a952554b9..3f1d13d52 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -50,27 +50,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -114,27 +93,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -202,27 +160,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -262,27 +199,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -336,27 +252,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -412,27 +307,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -504,27 +378,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -564,27 +417,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -650,27 +482,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -726,27 +537,6 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -787,27 +577,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -851,27 +620,6 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" - }, - "inputModalities": { - "type": "array", - "items": { - "enum": [ - "text", - "image", - "audio", - "video" - ] - }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." - }, - "supportedDocumentTypes": { - "type": "array", - "items": { - "enum": [ - "pdf" - ] - }, - "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -919,4 +667,4 @@ "$ref": "#/definitions/XaiLanguageModel" } ] -} +} \ No newline at end of file From bf792601ee6a0da559aed28d354812bc18bc8f5c Mon Sep 17 00:00:00 2001 From: whoisthey Date: Sat, 27 Jun 2026 12:57:45 -0700 Subject: [PATCH 08/10] stronger typing for contract --- packages/web/src/features/chat/types.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 50792dcb8..659551d4f 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -209,15 +209,18 @@ type _AssertAllProviders = LanguageModelProvider extends typeof languageModelPro const _assertAllProviders: _AssertAllProviders = true; void _assertAllProviders; -export type InputModality = 'text' | 'image' | 'audio' | 'video'; -export type DocumentType = 'pdf'; +export const inputModalities = ['text', 'image', 'audio', 'video'] as const; +export type InputModality = typeof inputModalities[number]; + +export const documentTypes = ['pdf'] as const; +export type DocumentType = typeof documentTypes[number]; export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), - inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept (images, audio, video, text). Single-medium attachments are gated by these. Defaults to text-only."), - supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("Rich compound document formats (e.g. PDF) the model can ingest natively, distinct from single-medium attachments gated by inputModalities. Defaults to none."), + inputModalities: z.array(z.enum(inputModalities)).default(['text']).describe("The input modalities the model can accept (images, audio, video, text). Single-medium attachments are gated by these. Defaults to text-only."), + supportedDocumentTypes: z.array(z.enum(documentTypes)).default([]).describe("Rich compound document formats (e.g. PDF) the model can ingest natively, distinct from single-medium attachments gated by inputModalities. Defaults to none."), }); /** From dbcfc8a6036429193194705352194fed248ceb79 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Sat, 27 Jun 2026 13:28:33 -0700 Subject: [PATCH 09/10] remove blocking models.dev catalog request and add cache warm on startup --- .../chat/modelCapabilities.server.test.ts | 17 ++++++++-- .../features/chat/modelsDevCatalog.server.ts | 32 +++++++++++-------- packages/web/src/initialize.ts | 12 +++++++ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/packages/web/src/features/chat/modelCapabilities.server.test.ts b/packages/web/src/features/chat/modelCapabilities.server.test.ts index 0a2e9a2ec..4cd4121bf 100644 --- a/packages/web/src/features/chat/modelCapabilities.server.test.ts +++ b/packages/web/src/features/chat/modelCapabilities.server.test.ts @@ -104,17 +104,28 @@ describe('resolveModelCapabilities', () => { vi.unstubAllGlobals(); }); - test('fetches the catalog once and resolves capabilities (incl. provider mapping)', async () => { + test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => { const fetchMock = vi.fn(async () => ({ ok: true, json: async () => catalog, }) as unknown as Response); vi.stubGlobal('fetch', fetchMock); + // The request path never blocks on the fetch: the first lookup kicks off + // the background fetch and falls back to text-only while it's in flight. expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ - inputModalities: ['text', 'image'], - supportedDocumentTypes: ['pdf'], + inputModalities: ['text'], + supportedDocumentTypes: [], }); + + // Once the background fetch settles, lookups resolve from the cached catalog. + await vi.waitFor(async () => { + expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ + inputModalities: ['text', 'image'], + supportedDocumentTypes: ['pdf'], + }); + }); + // Subsequent lookups reuse the cached catalog rather than refetching. expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({ inputModalities: ['text', 'image', 'audio', 'video'], diff --git a/packages/web/src/features/chat/modelsDevCatalog.server.ts b/packages/web/src/features/chat/modelsDevCatalog.server.ts index 8f6b35043..f2344b6f7 100644 --- a/packages/web/src/features/chat/modelsDevCatalog.server.ts +++ b/packages/web/src/features/chat/modelsDevCatalog.server.ts @@ -13,8 +13,10 @@ const FETCH_TIMEOUT_MS = 8000; // Re-fetch the (~2.4 MB) catalog at most once per this interval per server // process. New models trickle in daily; a stale window for a few hours is fine. const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; -// After a failed fetch, don't reattempt for this long. Without it, an outage in -// models.dev would make every chat send pay the fetch timeout on the request path. +// After a failed fetch, don't reattempt for this long. Since the request path +// never blocks on the fetch (see loadCatalog), this throttles background +// refresh attempts to once per interval during a models.dev outage instead of +// kicking one off on (nearly) every request. const NEGATIVE_CACHE_MS = 60 * 1000; // Sourcebot provider id -> models.dev top-level catalog key. Only providers @@ -75,9 +77,16 @@ const fetchCatalog = async (): Promise => { /** * Returns the cached models.dev catalog, refreshing it in the background when - * stale. Only the very first load blocks on the network; thereafter the - * last-known-good catalog is served immediately (even if stale) so the request - * path never waits on models.dev. + * stale. The request path NEVER blocks on the network: the last-known-good + * catalog is returned immediately (even if stale), or null before the first + * successful fetch lands, and any refresh settles in the background. + * + * Consequences of never awaiting: + * - For the brief window after a cold start (before the first fetch resolves), + * capability resolution falls back to text-only; it self-heals on the next + * request once the background fetch populates the cache. + * - An unreachable catalog (e.g. an airgapped deployment) costs nothing on the + * request path instead of repeatedly paying the fetch timeout. */ export const loadCatalog = async (): Promise => { const now = Date.now(); @@ -87,7 +96,8 @@ export const loadCatalog = async (): Promise => { // Kick off a (deduped) refresh when the cache is stale/empty and we're not // within the post-failure backoff window. On success it replaces the cache; // on failure it only records the failure time, leaving the last-known-good - // catalog intact. + // catalog intact. The promise is intentionally not awaited here so the + // request path never waits on models.dev. if (!isFresh && !isBackingOff && !inFlightFetch) { inFlightFetch = fetchCatalog().then((catalog) => { if (catalog) { @@ -101,11 +111,7 @@ export const loadCatalog = async (): Promise => { }); } - // Once a catalog has loaded once, never block the request path on the - // network: serve the last-known-good value (even if stale) and let any - // refresh settle in the background. Only the very first load awaits. - if (cachedCatalog !== null) { - return cachedCatalog; - } - return inFlightFetch ?? null; + // Serve whatever we currently have cached (possibly null on a cold start) + // and let any in-flight refresh settle in the background. + return cachedCatalog; }; diff --git a/packages/web/src/initialize.ts b/packages/web/src/initialize.ts index 0a8eb90f9..a63581ad1 100644 --- a/packages/web/src/initialize.ts +++ b/packages/web/src/initialize.ts @@ -4,6 +4,8 @@ import { startChangelogPollingJob } from '@/features/changelog/pollChangelog'; import { createLogger, env } from "@sourcebot/shared"; import { hasEntitlement } from '@/lib/entitlements'; import { SINGLE_TENANT_ORG_ID } from './lib/constants'; +import { getConfiguredLanguageModels } from '@/features/chat/utils.server'; +import { loadCatalog } from '@/features/chat/modelsDevCatalog.server'; const logger = createLogger('web-initialize'); @@ -73,8 +75,18 @@ const init = async () => { } } +const warmModelCapabilitiesCatalog = async () => { + const configuredModels = await getConfiguredLanguageModels(); + if (configuredModels.length === 0) { + return; + } + logger.info(`Warming models.dev capability catalog for ${configuredModels.length} configured language model(s)`); + void loadCatalog(); +}; + (async () => { await init(); startServicePingCronJob(); startChangelogPollingJob(); + await warmModelCapabilitiesCatalog(); })(); From 7ba297bca6208a1f0cf26e5b6b4d19dd5d91886c Mon Sep 17 00:00:00 2001 From: whoisthey Date: Sat, 27 Jun 2026 13:45:34 -0700 Subject: [PATCH 10/10] cleanup warming --- .../features/chat/modelContextWindow.test.ts | 13 ++++++++-- .../web/src/features/chat/utils.server.ts | 26 ++++++++++++++++++- packages/web/src/initialize.ts | 14 ++-------- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts index 9476820ae..818251a3f 100644 --- a/packages/web/src/features/chat/modelContextWindow.test.ts +++ b/packages/web/src/features/chat/modelContextWindow.test.ts @@ -81,13 +81,19 @@ describe('resolveContextWindow', () => { vi.unstubAllGlobals(); }); - test('fetches the catalog once and resolves windows (incl. provider mapping)', async () => { + test('fetches the catalog once in the background and resolves windows (incl. provider mapping)', async () => { const fetchMock = vi.fn(async () => ({ ok: true, json: async () => catalog, }) as unknown as Response); vi.stubGlobal('fetch', fetchMock); + // The request path never blocks on the fetch: the first lookup kicks off + // the background fetch and falls back to "unknown" while it's in flight. + expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + + // Once the background fetch settles, lookups resolve from the cached catalog. + await new Promise((resolve) => setTimeout(resolve, 0)); expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); // Subsequent lookups reuse the cached catalog rather than refetching. expect(await resolveContextWindow(model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576); @@ -141,7 +147,10 @@ describe('resolveContextWindow resilience', () => { const mod = await importFresh(); - // First load populates the cache. + // First load kicks off the background fetch (returning the "unknown" + // fallback until it settles), which then populates the cache. + expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined(); + await new Promise((resolve) => setTimeout(resolve, 0)); expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000); expect(fetchMock).toHaveBeenCalledTimes(1); diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index a458c41d8..90c83c859 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -3,16 +3,19 @@ import 'server-only'; import { getAnonymousId } from '@/lib/anonymousId'; import { Chat, Prisma, PrismaClient, User } from '@sourcebot/db'; import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; -import { env, loadConfig } from '@sourcebot/shared'; +import { createLogger, env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; import { resolveModelCapabilities } from './modelCapabilities.server'; +import { loadCatalog } from './modelsDevCatalog.server'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; import { StatusCodes } from 'http-status-codes'; +const logger = createLogger('chat-utils'); + /** * Returns a FORBIDDEN ServiceError when the deployment lacks the `ask` * entitlement, or null when Ask is available. Gates the generative chat @@ -139,3 +142,24 @@ export const getConfiguredLanguageModelsInfo = async () => { }; })); }; + +/** + * Eagerly warms the models.dev capability catalog at server startup so the first + * request after a cold start resolves real model capabilities instead of the + * text-only fallback. No-op when no language models are configured (avoids a + * gratuitous outbound call for deployments not using Ask). Best-effort and + * non-blocking: loadCatalog kicks off a background fetch and returns immediately, + * and any unexpected error is logged rather than surfaced. + */ +export const warmModelCapabilitiesCatalog = (): void => { + void (async () => { + const configuredModels = await getConfiguredLanguageModels(); + if (configuredModels.length === 0) { + return; + } + logger.info(`Warming models.dev capability catalog for ${configuredModels.length} configured language model(s)`); + void loadCatalog(); + })().catch((error) => { + logger.error(`Failed to warm models.dev capability catalog: ${error}`); + }); +}; diff --git a/packages/web/src/initialize.ts b/packages/web/src/initialize.ts index a63581ad1..406116dee 100644 --- a/packages/web/src/initialize.ts +++ b/packages/web/src/initialize.ts @@ -4,8 +4,7 @@ import { startChangelogPollingJob } from '@/features/changelog/pollChangelog'; import { createLogger, env } from "@sourcebot/shared"; import { hasEntitlement } from '@/lib/entitlements'; import { SINGLE_TENANT_ORG_ID } from './lib/constants'; -import { getConfiguredLanguageModels } from '@/features/chat/utils.server'; -import { loadCatalog } from '@/features/chat/modelsDevCatalog.server'; +import { warmModelCapabilitiesCatalog } from '@/features/chat/utils.server'; const logger = createLogger('web-initialize'); @@ -75,18 +74,9 @@ const init = async () => { } } -const warmModelCapabilitiesCatalog = async () => { - const configuredModels = await getConfiguredLanguageModels(); - if (configuredModels.length === 0) { - return; - } - logger.info(`Warming models.dev capability catalog for ${configuredModels.length} configured language model(s)`); - void loadCatalog(); -}; - (async () => { await init(); startServicePingCronJob(); startChangelogPollingJob(); - await warmModelCapabilitiesCatalog(); + warmModelCapabilitiesCatalog(); })();