From 1797d9a33d828163801d1738b309d2258de9ed36 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:38:51 +0000 Subject: [PATCH 01/18] Move CacheableMethod to mcp_types.methods and derive CACHEABLE_METHODS from the method registry --- src/mcp-types/mcp_types/methods.py | 27 ++++++++++++++++++++++++++- src/mcp/server/caching.py | 18 ++---------------- tests/server/test_caching.py | 19 ++----------------- tests/types/test_methods.py | 8 ++++++++ 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/mcp-types/mcp_types/methods.py b/src/mcp-types/mcp_types/methods.py index 824dcfdfe..0c7cd04ad 100644 --- a/src/mcp-types/mcp_types/methods.py +++ b/src/mcp-types/mcp_types/methods.py @@ -13,7 +13,7 @@ from collections.abc import Mapping from functools import cache from types import MappingProxyType, UnionType -from typing import Any, Final, TypeVar +from typing import Any, Final, Literal, TypeVar, get_args from pydantic import BaseModel, TypeAdapter @@ -23,9 +23,11 @@ from mcp_types.version import KNOWN_PROTOCOL_VERSIONS __all__ = [ + "CACHEABLE_METHODS", "CLIENT_NOTIFICATIONS", "CLIENT_REQUESTS", "CLIENT_RESULTS", + "CacheableMethod", "MONOLITH_NOTIFICATIONS", "MONOLITH_REQUESTS", "MONOLITH_RESULTS", @@ -404,6 +406,29 @@ """Monolith result model (or two-arm union) per request method.""" +# --- Cacheable methods --- + +CacheableMethod = Literal[ + "prompts/list", + "resources/list", + "resources/read", + "resources/templates/list", + "server/discover", + "tools/list", +] +"""The methods whose results carry `ttlMs`/`cacheScope`. Closed set: the spec +defines caching hints on exactly these six. Hand-written because a Literal +cannot be computed at runtime; tests weld it to `CACHEABLE_METHODS`.""" + +CACHEABLE_METHODS: Final[frozenset[str]] = frozenset( + method + for method, row in MONOLITH_RESULTS.items() + if any(issubclass(arm, types.CacheableResult) for arm in (get_args(row) if isinstance(row, UnionType) else (row,))) +) +"""Runtime mirror of `CacheableMethod`, derived from `MONOLITH_RESULTS`: a +method is cacheable iff its result row has a `CacheableResult` arm.""" + + # --- Parse functions --- # Envelope stubs merged into bodies for surface validation (surface classes are full frames). diff --git a/src/mcp/server/caching.py b/src/mcp/server/caching.py index a8a2a470c..f8dcb558b 100644 --- a/src/mcp/server/caching.py +++ b/src/mcp/server/caching.py @@ -11,27 +11,13 @@ from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Final, Literal, TypeVar, get_args +from typing import Any, Literal, TypeVar import mcp_types as types +from mcp_types.methods import CACHEABLE_METHODS, CacheableMethod __all__ = ["CACHEABLE_METHODS", "CacheHint", "CacheableMethod", "apply_cache_hint", "validate_cache_hints"] -CacheableMethod = Literal[ - "prompts/list", - "resources/list", - "resources/read", - "resources/templates/list", - "server/discover", - "tools/list", -] -"""The methods whose results carry `ttlMs`/`cacheScope`. Closed set: the spec -defines caching hints on exactly these six (tests pin it to which result models -mix in `CacheableResult`).""" - -CACHEABLE_METHODS: Final[frozenset[str]] = frozenset(get_args(CacheableMethod)) -"""Runtime mirror of `CacheableMethod`, for callers the type checker can't see.""" - @dataclass(frozen=True, slots=True) class CacheHint: diff --git a/tests/server/test_caching.py b/tests/server/test_caching.py index 46701d659..a540fe037 100644 --- a/tests/server/test_caching.py +++ b/tests/server/test_caching.py @@ -1,40 +1,25 @@ """`mcp.server.caching`: `CacheHint` validation, per-field fills, and the `cache_hints` constructor map reaching the wire on both server tiers.""" -from types import UnionType -from typing import Any, cast, get_args +from typing import Any, cast import pytest from inline_snapshot import snapshot from mcp_types import ( - CacheableResult, ListResourcesResult, ListToolsResult, PaginatedRequestParams, Resource, Tool, - methods, ) from mcp import Client from mcp.server import CacheHint, MCPServer, Server, ServerRequestContext -from mcp.server.caching import CACHEABLE_METHODS, apply_cache_hint +from mcp.server.caching import apply_cache_hint pytestmark = pytest.mark.anyio -def test_cacheable_methods_match_the_result_models() -> None: - """Spec-mandated set (SEP-2549): `CACHEABLE_METHODS` mirrors exactly the - methods whose monolith result models mix in `CacheableResult` - if the - schema gains or loses a cacheable result, this weld breaks.""" - derived: set[str] = set() - for method, model in methods.MONOLITH_RESULTS.items(): - arms = get_args(model) if isinstance(model, UnionType) else (model,) - if any(isinstance(arm, type) and issubclass(arm, CacheableResult) for arm in arms): - derived.add(method) - assert CACHEABLE_METHODS == derived - - def test_cache_hint_defaults_match_the_conservative_model_defaults() -> None: """SDK-defined: an unconfigured hint fills the same values the result models already default to - immediately stale, not shared - so stamping it is diff --git a/tests/types/test_methods.py b/tests/types/test_methods.py index 79ea067c6..237578e52 100644 --- a/tests/types/test_methods.py +++ b/tests/types/test_methods.py @@ -548,6 +548,14 @@ def test_built_in_maps_are_immutable(): _assign_item(built_in) +def test_cacheable_methods_mirror_the_cacheable_method_literal(): + """Spec-mandated set (SEP-2549): the hand-written `CacheableMethod` Literal and + `CACHEABLE_METHODS` (derived from which `MONOLITH_RESULTS` rows have a + `CacheableResult` arm) name the same methods - if the schema gains or loses a + cacheable result, this weld breaks.""" + assert methods.CACHEABLE_METHODS == frozenset(get_args(methods.CacheableMethod)) + + def test_minimal_request_bodies_parse_through_every_request_row(): for (method, version), surface_type in methods.CLIENT_REQUESTS.items(): parsed = methods.parse_client_request(method, version, REQUEST_PARAMS_FIXTURES[surface_type]) From d2d4a25b3b0d140a9aa610ff3d31f22687076714 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:43:57 +0000 Subject: [PATCH 02/18] Add client response cache primitives: config, keys, store protocol, in-memory store --- src/mcp/client/__init__.py | 10 +- src/mcp/client/caching.py | 188 +++++++++++++++++++++ tests/client/test_caching.py | 308 +++++++++++++++++++++++++++++++++++ 3 files changed, 505 insertions(+), 1 deletion(-) create mode 100644 src/mcp/client/caching.py create mode 100644 tests/client/test_caching.py diff --git a/src/mcp/client/__init__.py b/src/mcp/client/__init__.py index f9f732ad9..f92a01eb4 100644 --- a/src/mcp/client/__init__.py +++ b/src/mcp/client/__init__.py @@ -2,8 +2,16 @@ from mcp.client._input_required import InputRequiredRoundsExceededError from mcp.client._transport import Transport +from mcp.client.caching import CacheConfig from mcp.client.client import Client from mcp.client.context import ClientRequestContext from mcp.client.session import ClientSession -__all__ = ["Client", "ClientRequestContext", "ClientSession", "InputRequiredRoundsExceededError", "Transport"] +__all__ = [ + "CacheConfig", + "Client", + "ClientRequestContext", + "ClientSession", + "InputRequiredRoundsExceededError", + "Transport", +] diff --git a/src/mcp/client/caching.py b/src/mcp/client/caching.py new file mode 100644 index 000000000..3c55dc2cb --- /dev/null +++ b/src/mcp/client/caching.py @@ -0,0 +1,188 @@ +"""Client-side response caching primitives (SEP-2549, protocol revision 2026-07-28). + +Results for the cacheable methods carry `ttlMs`/`cacheScope` freshness hints; +the client honors them through a response cache configured with `CacheConfig`. +This module defines the configuration, the store contract (`ResponseCacheStore` +keyed by `CacheKey`, holding `CacheEntry` values), and the default in-process +store. Wiring into `Client` lives in `mcp.client.client`. +""" + +from __future__ import annotations + +import time +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any, Final, Literal, Protocol + +__all__ = [ + "MAX_TTL_MS", + "CacheConfig", + "CacheEntry", + "CacheKey", + "CacheMode", + "InMemoryResponseCacheStore", + "ResponseCacheStore", +] + +CacheMode = Literal["use", "refresh", "bypass"] +"""Per-call cache behavior: `"use"` serves fresh entries and stores fetches, +`"refresh"` skips the read but stores the fetch, `"bypass"` touches the cache +not at all.""" + +MAX_TTL_MS: Final[int] = 24 * 60 * 60 * 1000 +"""Upper bound on any entry's time-to-live (24 hours, in milliseconds): a +server-provided or configured `ttlMs` above it is clamped down, bounding how +long a stale entry can be served.""" + + +@dataclass(frozen=True, slots=True) +class CacheKey: + """Identity of one cached response. + + Stores MUST compare keys as the `(method, params_key, partition)` field + tuple - never by flattening the fields into one delimited string, which + lets crafted values collide across field boundaries. + """ + + method: str + """The request method, e.g. `"tools/list"`.""" + + params_key: str = "" + """Result-affecting params discriminator: the uri for `resources/read`, + `""` for the list methods (only cursor-less calls participate in caching).""" + + partition: str = "" + """Coordinator-computed arm identifier; opaque to stores.""" + + +@dataclass(frozen=True, slots=True) +class CacheEntry: + """One cached response with its freshness and sharing metadata.""" + + value: Any + """The cached result. The SDK deep-copies it on write and on serve, so a + store may hold the object as-is.""" + + scope: Literal["public", "private"] + """The server-asserted `cacheScope`: whether the entry may be shared + across authorization contexts (`"public"`) or only reused within the one + that produced it (`"private"`).""" + + expires_at: float | None + """Epoch seconds after which the entry is stale; `None` is never fresh.""" + + +class ResponseCacheStore(Protocol): + """Storage contract for the client response cache. + + Keys MUST be compared as the `(method, params_key, partition)` field tuple - + no delimiter-based flattening (collision hazard). Each `Client` calls its + store from a single event loop; cross-loop sharing and per-operation + atomicity are the implementation's responsibility. Operations may raise; + the SDK degrades per its error discipline (a failing store never fails a + successful fetch). + """ + + async def get(self, key: CacheKey) -> CacheEntry | None: ... + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: ... + + async def delete(self, key: CacheKey) -> None: ... + + async def clear(self) -> None: ... + + +@dataclass(frozen=True, slots=True) +class CacheConfig: + """Configuration for a `Client`'s response cache. + + Raises: + ValueError: If a custom `store` is given without a `partition`, or if + `default_ttl_ms` is negative. + """ + + store: ResponseCacheStore | None = None + """Backing store; `None` means a store-per-client `InMemoryResponseCacheStore`. + A custom store requires an explicit `partition`.""" + + partition: str = "" + """Authorization-context identifier isolating `"private"`-scoped entries + within a shared store. + + Derive it from a verified credential (e.g. a validated token's subject) - + never from request-supplied data, and never from the server URL (server + identity is a separate key axis). The SDK is a library with no + authentication of its own: whoever constructs the `CacheConfig` - the + deployment, not the tenant - is the trust anchor. Multi-tenant gateways + mint one `CacheConfig` per authenticated principal. + """ + + target_id: str | None = None + """Explicit server-identity override, for custom transports and proxies + where the SDK cannot derive an identity from a server URL.""" + + default_ttl_ms: int = 0 + """Time-to-live, in milliseconds, applied to results that carry no `ttlMs` + hint. The default `0` leaves hint-less results uncached.""" + + clock: Callable[[], float] = time.time + """Wall-clock source returning epoch seconds; injectable so expiry tests + need no sleeping.""" + + share_public: bool = False + """Serve entries the server marked `cacheScope: "public"` across every + partition using the store, instead of only within the partition that + fetched them. + + WARNING: enabling this trusts the server's public classification for every + principal sharing the store - a server that stamps `"public"` on + per-tenant data (by bug or by malice) leaks one tenant's response to the + others. It is deliberately constructor-level only, set once by the + operator: the per-call `cache_mode` kwarg can narrow caching but can never + widen sharing. + """ + + def __post_init__(self) -> None: + if self.store is not None and not self.partition: + raise ValueError("a custom store requires an explicit partition") + if self.default_ttl_ms < 0: + raise ValueError(f"default_ttl_ms must be >= 0, got {self.default_ttl_ms}") + + +class InMemoryResponseCacheStore: + """Default in-process `ResponseCacheStore`. + + Method bodies are synchronous (no awaits), so each operation completes + without an event-loop checkpoint and concurrent tasks can never observe a + torn write. Memory is bounded: the methods other than `resources/read` + form a small closed set of keys, and `max_read_entries` caps the + `resources/read` entries (one per uri) - storing a new read key at the cap + evicts the oldest read key, first-in-first-out. `0` disables the cap. + + Raises: + ValueError: If `max_read_entries` is negative. + """ + + def __init__(self, *, max_read_entries: int = 512) -> None: + if max_read_entries < 0: + raise ValueError(f"max_read_entries must be >= 0, got {max_read_entries}") + self._max_read_entries = max_read_entries + self._entries: dict[CacheKey, CacheEntry] = {} + + async def get(self, key: CacheKey) -> CacheEntry | None: + return self._entries.get(key) + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + if self._max_read_entries and key.method == "resources/read" and key not in self._entries: + # dict preserves insertion order and replacement keeps position, so + # the dict itself is the FIFO ledger - no parallel structure to drift. + read_keys = [k for k in self._entries if k.method == "resources/read"] + if len(read_keys) >= self._max_read_entries: + del self._entries[read_keys[0]] + self._entries[key] = entry + + async def delete(self, key: CacheKey) -> None: + self._entries.pop(key, None) + + async def clear(self) -> None: + self._entries.clear() diff --git a/tests/client/test_caching.py b/tests/client/test_caching.py new file mode 100644 index 000000000..6dc976def --- /dev/null +++ b/tests/client/test_caching.py @@ -0,0 +1,308 @@ +"""`mcp.client.caching`: the `CacheConfig` construction guards, the store +contract every `ResponseCacheStore` implementation must satisfy, and the +default in-memory store's bounded `resources/read` FIFO. + +The store-contract tests are parametrized over `STORE_FACTORIES`; a +third-party store implementation can be run against the same contract by +adding its factory to the list (or copying the parametrization). +""" + +import time +from collections.abc import Callable +from typing import Any + +import pytest +from inline_snapshot import snapshot + +from mcp.client.caching import ( + CacheConfig, + CacheEntry, + CacheKey, + InMemoryResponseCacheStore, + ResponseCacheStore, +) + +pytestmark = pytest.mark.anyio + +STORE_FACTORIES: list[Callable[[], ResponseCacheStore]] = [InMemoryResponseCacheStore] + +store_contract = pytest.mark.parametrize("make_store", STORE_FACTORIES, ids=["InMemoryResponseCacheStore"]) + + +def _entry(value: Any = "cached") -> CacheEntry: + """Entries are opaque payloads at the store layer; only the key matters here.""" + return CacheEntry(value=value, scope="private", expires_at=None) + + +def _read_key(uri: str) -> CacheKey: + return CacheKey("resources/read", uri) + + +# --- Store contract --- + + +@store_contract +async def test_a_set_entry_round_trips_through_get(make_store: Callable[[], ResponseCacheStore]) -> None: + """SDK-defined contract: `get` returns an entry equal to the one `set` + stored under the same three-field key.""" + store = make_store() + key = CacheKey("tools/list", "", "partition-1") + entry = CacheEntry(value={"tools": []}, scope="public", expires_at=1700000000.0) + await store.set(key, entry) + assert await store.get(key) == entry + + +@store_contract +async def test_get_misses_for_a_key_never_set(make_store: Callable[[], ResponseCacheStore]) -> None: + """SDK-defined contract: an unknown key is a miss (`None`), not an error.""" + store = make_store() + assert await store.get(CacheKey("tools/list")) is None + + +@store_contract +async def test_keys_differing_in_only_one_field_do_not_collide( + make_store: Callable[[], ResponseCacheStore], +) -> None: + """Spec-mandated: the cache key spans the method, the result-affecting + params, and the authorization context - a store collapsing any one field + would serve a response across method, params, or principal boundaries.""" + store = make_store() + base = CacheKey("resources/read", "file:///a", "partition-1") + keys = [ + base, + CacheKey("resources/list", base.params_key, base.partition), + CacheKey(base.method, "file:///b", base.partition), + CacheKey(base.method, base.params_key, "partition-2"), + ] + for i, key in enumerate(keys): + await store.set(key, _entry(i)) + for i, key in enumerate(keys): + assert await store.get(key) == _entry(i) + + +@store_contract +async def test_swapped_params_key_and_partition_values_are_distinct_keys( + make_store: Callable[[], ResponseCacheStore], +) -> None: + """SDK-defined contract: identical values in different field positions are + different keys - the fields are positional, not a bag of strings.""" + store = make_store() + await store.set(CacheKey("m", "a", "b"), _entry("params=a")) + await store.set(CacheKey("m", "b", "a"), _entry("params=b")) + assert await store.get(CacheKey("m", "a", "b")) == _entry("params=a") + assert await store.get(CacheKey("m", "b", "a")) == _entry("params=b") + + +@store_contract +async def test_keys_with_field_values_that_concatenate_identically_do_not_collide( + make_store: Callable[[], ResponseCacheStore], +) -> None: + """SDK-defined contract: keys MUST be compared as the field tuple, so pairs + whose fields join to the same string under any delimiter (or none) stay + distinct - flattening would let crafted values collide across boundaries.""" + store = make_store() + keys = [ + CacheKey("a", "b.c", "p"), + CacheKey("a.b", "c", "p"), + CacheKey("m", "x", "y:z"), + CacheKey("m", "x:y", "z"), + CacheKey("m", "u/v", ""), + CacheKey("m/u", "v", ""), + CacheKey("ab", "", ""), + CacheKey("a", "b", ""), + CacheKey("", "ab", ""), + ] + for i, key in enumerate(keys): + await store.set(key, _entry(i)) + for i, key in enumerate(keys): + assert await store.get(key) == _entry(i) + + +@store_contract +async def test_set_replaces_the_entry_for_an_existing_key(make_store: Callable[[], ResponseCacheStore]) -> None: + """SDK-defined contract: a second `set` under the same key overwrites; the + store holds at most one entry per key.""" + store = make_store() + key = CacheKey("tools/list") + await store.set(key, _entry("first")) + await store.set(key, _entry("second")) + assert await store.get(key) == _entry("second") + + +@store_contract +async def test_delete_removes_only_the_given_key(make_store: Callable[[], ResponseCacheStore]) -> None: + """SDK-defined contract: `delete` is exact - sibling keys survive.""" + store = make_store() + doomed = CacheKey("tools/list", "", "partition-1") + survivor = CacheKey("tools/list", "", "partition-2") + await store.set(doomed, _entry("doomed")) + await store.set(survivor, _entry("survivor")) + await store.delete(doomed) + assert await store.get(doomed) is None + assert await store.get(survivor) == _entry("survivor") + + +@store_contract +async def test_delete_is_idempotent(make_store: Callable[[], ResponseCacheStore]) -> None: + """SDK-defined contract: deleting an absent key is a no-op, not an error - + the SDK issues unconditional deletes during eviction.""" + store = make_store() + key = CacheKey("prompts/list") + await store.delete(key) + await store.set(key, _entry()) + await store.delete(key) + await store.delete(key) + assert await store.get(key) is None + + +@store_contract +async def test_clear_removes_every_entry_across_methods_and_partitions( + make_store: Callable[[], ResponseCacheStore], +) -> None: + """SDK-defined contract: `clear` empties the store wholesale - every + method, params_key, and partition.""" + store = make_store() + keys = [ + CacheKey("tools/list", "", "partition-1"), + CacheKey("prompts/list", "", "partition-2"), + CacheKey("resources/read", "file:///a", "partition-1"), + ] + for key in keys: + await store.set(key, _entry()) + await store.clear() + for key in keys: + assert await store.get(key) is None + + +# --- CacheConfig guards --- + + +def test_cache_config_defaults_construct_an_unshared_zero_ttl_config() -> None: + """SDK-defined defaults: in-memory store minted per client, empty + partition, no identity override, hint-less results uncached, wall clock, + and public-entry sharing OFF (sharing is an explicit operator opt-in).""" + config = CacheConfig() + assert config.store is None + assert config.partition == "" + assert config.target_id is None + assert config.default_ttl_ms == 0 + assert config.clock is time.time + assert config.share_public is False + + +def test_a_custom_store_without_a_partition_is_rejected_at_construction() -> None: + """SDK-defined guard: a custom store is shareable, so omitting the + authorization-context partition would let private entries cross + principals - rejected at `CacheConfig` construction, not on first use.""" + with pytest.raises(ValueError) as exc: + CacheConfig(store=InMemoryResponseCacheStore()) + assert str(exc.value) == snapshot("a custom store requires an explicit partition") + + +def test_a_custom_store_with_an_explicit_partition_constructs() -> None: + """SDK-defined: the partition guard is satisfied by any non-empty + operator-supplied principal id.""" + store = InMemoryResponseCacheStore() + config = CacheConfig(store=store, partition="token-subject-1") + assert config.store is store + assert config.partition == "token-subject-1" + + +def test_a_negative_default_ttl_is_rejected_at_construction() -> None: + """SDK-defined guard: a negative configured TTL is a programming error, + rejected at construction (negative `ttlMs` from the wire is tolerated as 0 + at the parse seam instead).""" + with pytest.raises(ValueError) as exc: + CacheConfig(default_ttl_ms=-1) + assert str(exc.value) == snapshot("default_ttl_ms must be >= 0, got -1") + + +# --- InMemoryResponseCacheStore read cap --- + + +async def test_a_new_read_key_at_the_cap_evicts_the_oldest_read_key() -> None: + """SDK-defined bound: `resources/read` keys are unbounded in principle (one + per uri), so storing a new one at the cap drops the oldest, FIFO.""" + store = InMemoryResponseCacheStore(max_read_entries=2) + await store.set(_read_key("file:///a"), _entry("a")) + await store.set(_read_key("file:///b"), _entry("b")) + await store.set(_read_key("file:///c"), _entry("c")) + assert await store.get(_read_key("file:///a")) is None + assert await store.get(_read_key("file:///b")) == _entry("b") + assert await store.get(_read_key("file:///c")) == _entry("c") + + +async def test_replacing_a_read_key_at_the_cap_neither_evicts_nor_refreshes_its_age() -> None: + """SDK-defined: replacement is not growth (no double-count, nothing + evicted) and does not renew the key's position - eviction order is + first-insertion order (FIFO), not recency (LRU).""" + store = InMemoryResponseCacheStore(max_read_entries=2) + await store.set(_read_key("file:///a"), _entry("a")) + await store.set(_read_key("file:///b"), _entry("b")) + await store.set(_read_key("file:///a"), _entry("a-replaced")) + assert await store.get(_read_key("file:///a")) == _entry("a-replaced") + assert await store.get(_read_key("file:///b")) == _entry("b") + await store.set(_read_key("file:///c"), _entry("c")) + assert await store.get(_read_key("file:///a")) is None + assert await store.get(_read_key("file:///b")) == _entry("b") + + +async def test_only_read_keys_count_toward_the_cap_and_only_read_keys_are_evicted() -> None: + """SDK-defined: the non-read cacheable methods are a small closed key set - + they neither consume cap slots nor ever get cap-evicted.""" + store = InMemoryResponseCacheStore(max_read_entries=1) + list_keys = [ + CacheKey("tools/list"), + CacheKey("prompts/list"), + CacheKey("resources/list"), + CacheKey("resources/templates/list"), + CacheKey("server/discover"), + ] + for key in list_keys: + await store.set(key, _entry(key.method)) + await store.set(_read_key("file:///a"), _entry("a")) + for key in list_keys: + assert await store.get(key) == _entry(key.method) + await store.set(_read_key("file:///b"), _entry("b")) + assert await store.get(_read_key("file:///a")) is None + assert await store.get(_read_key("file:///b")) == _entry("b") + for key in list_keys: + assert await store.get(key) == _entry(key.method) + + +async def test_a_non_read_set_never_triggers_eviction_even_with_reads_at_the_cap() -> None: + """SDK-defined: only storing a NEW read key can evict - a non-read `set` + while reads sit at the cap leaves them untouched.""" + store = InMemoryResponseCacheStore(max_read_entries=1) + await store.set(_read_key("file:///a"), _entry("a")) + await store.set(CacheKey("tools/list"), _entry("tools")) + assert await store.get(_read_key("file:///a")) == _entry("a") + assert await store.get(CacheKey("tools/list")) == _entry("tools") + + +async def test_a_zero_cap_disables_read_eviction() -> None: + """SDK-defined: `max_read_entries=0` means unbounded read entries.""" + store = InMemoryResponseCacheStore(max_read_entries=0) + uris = [f"file:///{i}" for i in range(5)] + for uri in uris: + await store.set(_read_key(uri), _entry(uri)) + for uri in uris: + assert await store.get(_read_key(uri)) == _entry(uri) + + +async def test_deleting_a_read_key_frees_its_cap_slot() -> None: + """SDK-defined: the cap counts live entries, so a deleted read key's slot + is reusable without evicting anything.""" + store = InMemoryResponseCacheStore(max_read_entries=1) + await store.set(_read_key("file:///a"), _entry("a")) + await store.delete(_read_key("file:///a")) + await store.set(_read_key("file:///b"), _entry("b")) + assert await store.get(_read_key("file:///b")) == _entry("b") + + +def test_a_negative_read_cap_is_rejected_at_construction() -> None: + """SDK-defined guard: a negative cap is meaningless (0 already means + uncapped) and would otherwise evict on every read insert.""" + with pytest.raises(ValueError) as exc: + InMemoryResponseCacheStore(max_read_entries=-1) + assert str(exc.value) == snapshot("max_read_entries must be >= 0, got -1") From 5f8a3925de864e115146373c3bd6252f1602cf5d Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:56:08 +0000 Subject: [PATCH 03/18] Add client response cache coordinator: scope arms, era gating, TTL resolution, eviction --- src/mcp/client/caching.py | 256 ++++++++++++ tests/client/test_caching.py | 727 ++++++++++++++++++++++++++++++++++- 2 files changed, 980 insertions(+), 3 deletions(-) diff --git a/src/mcp/client/caching.py b/src/mcp/client/caching.py index 3c55dc2cb..2e50cbb42 100644 --- a/src/mcp/client/caching.py +++ b/src/mcp/client/caching.py @@ -9,11 +9,24 @@ from __future__ import annotations +import json +import logging import time from collections.abc import Callable from dataclasses import dataclass from typing import Any, Final, Literal, Protocol +import anyio +from mcp_types import ( + CacheableResult, + PromptListChangedNotification, + ResourceListChangedNotification, + ResourceUpdatedNotification, + ServerNotification, + ToolListChangedNotification, +) +from mcp_types.version import MODERN_PROTOCOL_VERSIONS + __all__ = [ "MAX_TTL_MS", "CacheConfig", @@ -24,6 +37,8 @@ "ResponseCacheStore", ] +logger = logging.getLogger(__name__) + CacheMode = Literal["use", "refresh", "bypass"] """Per-call cache behavior: `"use"` serves fresh entries and stores fetches, `"refresh"` skips the read but stores the fetch, `"bypass"` touches the cache @@ -81,6 +96,13 @@ class ResponseCacheStore(Protocol): atomicity are the implementation's responsibility. Operations may raise; the SDK degrades per its error discipline (a failing store never fails a successful fetch). + + A store that serializes entries (any cross-process store must) is + responsible for round-tripping them: `get` returns the entry as stored, + with `value` still the result model object `set` received - the SDK has + no rehydration hook to rebuild it from serialized data. An entry that + comes back in the wrong shape (e.g. with a plain-dict value) degrades to + a cache miss, never an error. """ async def get(self, key: CacheKey) -> CacheEntry | None: ... @@ -186,3 +208,237 @@ async def delete(self, key: CacheKey) -> None: async def clear(self) -> None: self._entries.clear() + + +_GENERATION_MAP_CAP: Final[int] = 4096 +"""Cap on the coordinator's eviction-race bookkeeping (the generation map). +At the cap, registering a new key drops the oldest one, degrading the dropped +key's race guard to the accepted co-tenant class.""" + + +class ClientResponseCache: + """Coordinator between the `Client` verbs and a `ResponseCacheStore`. + + Owns key construction (the scope arms), the era gate, TTL/scope + resolution, eviction, and the store error discipline. `Client` mints one + per instance; the caching verbs and the notification wrap are the only + callers. + """ + + def __init__( + self, + *, + store: ResponseCacheStore, + partition: str, + arm_id: str, + default_ttl_ms: int, + clock: Callable[[], float], + share_public: bool, + negotiated_version: Callable[[], str | None], + generation_map_cap: int = _GENERATION_MAP_CAP, + ) -> None: + self._store = store + self._default_ttl_ms = default_ttl_ms + self._clock = clock + self._negotiated_version = negotiated_version + # Arms are JSON arrays so crafted arm_id/partition values cannot + # collide across field boundaries. Private entries always carry the + # partition; public entries do too unless the operator opted into + # fleet-wide sharing of server-asserted-public results. + self._private_arm = json.dumps(["private", arm_id, partition]) + self._public_arm = json.dumps(["public", arm_id] if share_public else ["public", arm_id, partition]) + # The generation map is the sole membership structure: a key is + # race-guarded iff registered here. + self._generations: dict[tuple[str, str], int] = {} + self._generation_map_cap = generation_map_cap + # Operation kinds ("get"/"set"/"delete") that warned and have not + # succeeded since; membership suppresses repeat warnings for the kind. + self._warned_store_ops: set[str] = set() + + async def read(self, method: str, params_key: str) -> CacheableResult | None: + """Serve a fresh entry for the key, or `None`. + + Called only under `cache_mode="use"`; returns a deep copy so a served + result never aliases the stored one. + """ + # One boundary around the whole read path: a raising store `get` and + # an entry rehydrated into the wrong shape (which raises only at the + # freshness check or the copy) are the same "get" failure class - + # warned once per burst, re-armed only by a fully successful read. + try: + entry = await self._get_fresh(CacheKey(method, params_key, self._private_arm)) + if entry is None: + # Stale counts as a miss for fall-through too: after a server + # scope flip (private -> public), a stale private leftover + # must not shadow a fresh public entry. + entry = await self._get_fresh(CacheKey(method, params_key, self._public_arm)) + if entry is not None and entry.scope != "public": + # The arm routes, the scope verifies: never serve an entry the + # server scoped "private" out of the shared arm, however it + # got there. + entry = None + copied: CacheableResult | None = None if entry is None else entry.value.model_copy(deep=True) + except Exception: # boundary around user store code: any read-path failure is a miss, never a failed call + self._warn_store_failure("get") + return None + self._warned_store_ops.discard("get") + return copied + + async def _get_fresh(self, key: CacheKey) -> CacheEntry | None: + entry = await self._store.get(key) + if entry is None or entry.expires_at is None or entry.expires_at <= self._clock(): + return None + return entry + + def capture(self, method: str, params_key: str) -> int: + """Register the key for eviction-race detection, before the fetch is + sent; the matching `write` passes the returned generation back.""" + gen_key = (method, params_key) + if gen_key not in self._generations: + if len(self._generations) >= self._generation_map_cap: + # FIFO overflow: drop the oldest key, degrading its race guard + # to the accepted co-tenant class (an eviction racing that + # key's in-flight fetch is no longer detected at write time). + del self._generations[next(iter(self._generations))] + self._generations[gen_key] = 0 + return self._generations[gen_key] + + async def write( + self, + method: str, + params_key: str, + result: CacheableResult, + gen_at_capture: int, + mode: Literal["use", "refresh"], + ) -> None: + """Store a fetched result under the arm its resolved scope selects.""" + gen_key = (method, params_key) + if self._generation_moved(gen_key, gen_at_capture): + return # the key was evicted while the fetch was in flight + ttl_ms, scope = self._resolve(result) + private_key = CacheKey(method, params_key, self._private_arm) + public_key = CacheKey(method, params_key, self._public_arm) + if ttl_ms <= 0: + if mode == "refresh": + # The refetch superseded whatever was cached; purge the warm + # entry so it cannot be served again. Shielded: a cancellation + # delivered between the two deletes would leave the opposite + # arm warm for its full TTL. + with anyio.CancelScope(shield=True): + await self._delete(private_key) + await self._delete(public_key) + return + own, opposite = (public_key, private_key) if scope == "public" else (private_key, public_key) + # Opposite arm first: a failed (or cancelled) delete aborts before the + # set, leaving a miss - never two arms answering for one key. + if not await self._delete(opposite): + return + entry = CacheEntry(value=result.model_copy(deep=True), scope=scope, expires_at=self._clock() + ttl_ms / 1000) + try: + await self._set(own, entry) + finally: + # An eviction can land while an async store's set is committing, + # and the set can commit even when its await is cancelled (the + # request may already be on the wire) - so the re-check runs on + # every exit, and the compensating delete is shielded so the + # pending cancellation cannot abort it and resurrect the evicted + # entry for its full TTL. (A delete after a set that raised is an + # idempotent no-op.) + if self._generation_moved(gen_key, gen_at_capture): + with anyio.CancelScope(shield=True): + await self._delete(own) + + async def evict_method(self, method: str) -> None: + """Evict the method's cursor-less entry (notification- or + cursor-expiry-driven).""" + await self.evict_key(method, "") + + async def evict_key(self, method: str, params_key: str) -> None: + """Evict one key from both arms.""" + gen_key = (method, params_key) + # Bump before deleting so an in-flight fetch that captured earlier + # cannot write the just-evicted entry back. Only registered keys bump + # (arbitrary notification uris must not grow the map); the store + # deletes always run - a persistent store may hold warm entries this + # coordinator never captured. + if gen_key in self._generations: + self._generations[gen_key] += 1 + # Shielded: eviction runs in spawned notification tasks that die with + # the session - a cancellation between the two deletes would leave one + # arm serving the evicted entry until its TTL. + with anyio.CancelScope(shield=True): + await self._delete(CacheKey(method, params_key, self._private_arm)) + await self._delete(CacheKey(method, params_key, self._public_arm)) + + async def evict_for_notification(self, notification: ServerNotification) -> None: + """Map a server notification to the entries it makes stale. + + Wire-path notifications are dispatched from spawned tasks, so eviction + is eventual relative to in-flight responses: the generation bump + closes the write-back race, while a read racing the notification may + briefly serve the pre-eviction entry (accepted, latency-bounded). + """ + match notification: + case ToolListChangedNotification(): + await self.evict_method("tools/list") + case PromptListChangedNotification(): + await self.evict_method("prompts/list") + case ResourceListChangedNotification(): + # Templates enumerate the same changed resource space. + await self.evict_method("resources/list") + await self.evict_method("resources/templates/list") + case ResourceUpdatedNotification(): + await self.evict_key("resources/read", notification.params.uri) + case _: + pass + + def _resolve(self, result: CacheableResult) -> tuple[int, Literal["public", "private"]]: + # Hints count only on modern sessions: a legacy peer can also put + # `ttlMs`/`cacheScope` keys on the wire (the 2025 surfaces validate + # and discard unknown keys, so wire presence still reaches + # `model_fields_set`) - wire presence is not a peer-era signal. + modern = self._negotiated_version() in MODERN_PROTOCOL_VERSIONS + if modern and "ttl_ms" in result.model_fields_set: + # An explicit `ttlMs: 0` stays 0 (never overridden by the + # default), and negatives are unconstructible here - the model + # enforces ge=0 and the parse seam floors negative wire values - + # so only the cap applies. + ttl_ms = result.ttl_ms + else: + ttl_ms = self._default_ttl_ms + scope: Literal["public", "private"] = "public" if modern and result.cache_scope == "public" else "private" + return min(ttl_ms, MAX_TTL_MS), scope + + def _generation_moved(self, gen_key: tuple[str, str], gen_at_capture: int) -> bool: + # A key FIFO-dropped from the map can no longer be checked; the guard + # fails open (the accepted co-tenant race class) rather than + # discarding the fetch. + return self._generations.get(gen_key, gen_at_capture) != gen_at_capture + + async def _set(self, key: CacheKey, entry: CacheEntry) -> bool: + try: + await self._store.set(key, entry) + except Exception: # boundary around user store code: nothing cached, the fetch already succeeded + self._warn_store_failure("set") + return False + self._warned_store_ops.discard("set") + return True + + async def _delete(self, key: CacheKey) -> bool: + try: + await self._store.delete(key) + except Exception: # boundary around user store code: callers decide whether a failed delete aborts + self._warn_store_failure("delete") + return False + self._warned_store_ops.discard("delete") + return True + + def _warn_store_failure(self, kind: Literal["get", "set", "delete"]) -> None: + # One warning per failure burst, tracked per operation kind: armed by + # the kind's first failure, re-armed only when that same kind succeeds. + # A dead store warns once, not once per request - and a store where + # only `set` is broken warns once too, instead of its healthy deletes + # re-arming the warning every write cycle. + if kind not in self._warned_store_ops: + self._warned_store_ops.add(kind) + logger.warning("Response cache store operation failed; continuing without the cache", exc_info=True) diff --git a/tests/client/test_caching.py b/tests/client/test_caching.py index 6dc976def..d48a7ed5e 100644 --- a/tests/client/test_caching.py +++ b/tests/client/test_caching.py @@ -1,23 +1,44 @@ """`mcp.client.caching`: the `CacheConfig` construction guards, the store -contract every `ResponseCacheStore` implementation must satisfy, and the -default in-memory store's bounded `resources/read` FIFO. +contract every `ResponseCacheStore` implementation must satisfy, the default +in-memory store's bounded `resources/read` FIFO, and the `ClientResponseCache` +coordinator (scope arms, era gate, TTL/scope resolution, eviction, store error +discipline). The store-contract tests are parametrized over `STORE_FACTORIES`; a third-party store implementation can be run against the same contract by adding its factory to the list (or copying the parametrization). """ +import json +import logging import time -from collections.abc import Callable +from collections.abc import Awaitable, Callable from typing import Any +import anyio +import anyio.lowlevel import pytest from inline_snapshot import snapshot +from mcp_types import ( + ListPromptsResult, + ListToolsResult, + LoggingMessageNotification, + LoggingMessageNotificationParams, + PromptListChangedNotification, + ReadResourceResult, + ResourceListChangedNotification, + ResourceUpdatedNotification, + ResourceUpdatedNotificationParams, + ServerNotification, + ToolListChangedNotification, +) from mcp.client.caching import ( + MAX_TTL_MS, CacheConfig, CacheEntry, CacheKey, + ClientResponseCache, InMemoryResponseCacheStore, ResponseCacheStore, ) @@ -306,3 +327,703 @@ def test_a_negative_read_cap_is_rejected_at_construction() -> None: with pytest.raises(ValueError) as exc: InMemoryResponseCacheStore(max_read_entries=-1) assert str(exc.value) == snapshot("max_read_entries must be >= 0, got -1") + + +# --- ClientResponseCache coordinator --- + +MODERN_VERSION = "2026-07-28" +LEGACY_VERSION = "2025-11-25" + + +class _ManualClock: + """Injected wall clock: tests advance `now` instead of sleeping.""" + + def __init__(self) -> None: + self.now = 1_000_000.0 + + def __call__(self) -> float: + return self.now + + +def _coordinator( + store: ResponseCacheStore, + *, + partition: str = "", + arm_id: str = "arm", + default_ttl_ms: int = 0, + clock: _ManualClock | None = None, + share_public: bool = False, + version: str | None = MODERN_VERSION, + generation_map_cap: int = 4096, +) -> ClientResponseCache: + return ClientResponseCache( + store=store, + partition=partition, + arm_id=arm_id, + default_ttl_ms=default_ttl_ms, + clock=clock or _ManualClock(), + share_public=share_public, + negotiated_version=lambda: version, + generation_map_cap=generation_map_cap, + ) + + +def _private_arm(arm_id: str = "arm", partition: str = "") -> str: + return json.dumps(["private", arm_id, partition]) + + +def _public_arm(arm_id: str = "arm", partition: str = "") -> str: + return json.dumps(["public", arm_id, partition]) + + +def _wire_result(ttl_ms: int | None = None, cache_scope: str | None = None) -> ListToolsResult: + """A `tools/list` result as parsed off the wire; `None` omits the hint so + it stays out of `model_fields_set`.""" + payload: dict[str, Any] = {"tools": []} + if ttl_ms is not None: + payload["ttlMs"] = ttl_ms + if cache_scope is not None: + payload["cacheScope"] = cache_scope + return ListToolsResult.model_validate(payload) + + +def _read_result(ttl_ms: int) -> ReadResourceResult: + return ReadResourceResult.model_validate({"contents": [], "ttlMs": ttl_ms}) + + +class _ScriptedStore: + """In-memory store that logs `(op, key)` and can await one-shot hooks + around an operation's commit, modelling an async store mid-commit when an + eviction or a cancellation lands.""" + + def __init__(self) -> None: + self.inner = InMemoryResponseCacheStore() + self.ops: list[tuple[str, CacheKey]] = [] + self.before_set_commits: Callable[[], Awaitable[None]] | None = None + self.after_set_commits: Callable[[], Awaitable[None]] | None = None + self.after_delete_commits: Callable[[], Awaitable[None]] | None = None + + async def get(self, key: CacheKey) -> CacheEntry | None: + self.ops.append(("get", key)) + return await self.inner.get(key) + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + self.ops.append(("set", key)) + if self.before_set_commits is not None: + hook, self.before_set_commits = self.before_set_commits, None + await hook() + await self.inner.set(key, entry) + if self.after_set_commits is not None: + hook, self.after_set_commits = self.after_set_commits, None + await hook() + + async def delete(self, key: CacheKey) -> None: + self.ops.append(("delete", key)) + await self.inner.delete(key) + if self.after_delete_commits is not None: + hook, self.after_delete_commits = self.after_delete_commits, None + await hook() + + async def clear(self) -> None: + raise NotImplementedError + + +class _FailingStore: + """In-memory store whose operations raise while their flag is set; the + flags toggle so tests can model recovery.""" + + def __init__(self, *, fail_get: bool = False, fail_set: bool = False, fail_delete: bool = False) -> None: + self.inner = InMemoryResponseCacheStore() + self.fail_get = fail_get + self.fail_set = fail_set + self.fail_delete = fail_delete + + async def get(self, key: CacheKey) -> CacheEntry | None: + if self.fail_get: + raise RuntimeError("store get failed") + return await self.inner.get(key) + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + if self.fail_set: + raise RuntimeError("store set failed") + await self.inner.set(key, entry) + + async def delete(self, key: CacheKey) -> None: + if self.fail_delete: + raise RuntimeError("store delete failed") + await self.inner.delete(key) + + async def clear(self) -> None: + raise NotImplementedError + + +class _RehydratingStore: + """Models a persistent store whose `get` returns what its deserializer + produced - possibly not the shape `set` received.""" + + def __init__(self, rehydrated: Any) -> None: + self.rehydrated = rehydrated + + async def get(self, key: CacheKey) -> CacheEntry | None: + return self.rehydrated + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + raise NotImplementedError + + async def delete(self, key: CacheKey) -> None: + raise NotImplementedError + + async def clear(self) -> None: + raise NotImplementedError + + +# --- Coordinator: era gate --- + + +@pytest.mark.parametrize("version", [LEGACY_VERSION, None], ids=["legacy", "pre-negotiation"]) +async def test_hints_from_a_non_modern_session_are_ignored(version: str | None) -> None: + """SDK-defined era gate: `ttlMs`/`cacheScope` are 2026-07-28 assertions. A + legacy peer can inject the keys onto the wire (the 2025 surfaces validate + and discard unknown keys, so they reach `model_fields_set`), so wire + presence is not trusted: on a non-modern session every result is + hint-absent - with the default `default_ttl_ms=0`, nothing is stored.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store, version=version) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + assert await cache.read("tools/list", "") is None + assert await store.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + + +async def test_a_legacy_session_with_a_default_ttl_caches_on_the_private_arm_only() -> None: + """SDK-defined era gate: the operator's `default_ttl_ms` still applies on + legacy sessions, but an injected `cacheScope: "public"` cannot promote the + entry, and an injected `ttlMs` does not shorten (or extend) its life.""" + store = InMemoryResponseCacheStore() + clock = _ManualClock() + cache = _coordinator(store, version=LEGACY_VERSION, default_ttl_ms=60_000, clock=clock) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=5, cache_scope="public"), gen, "use") + private_entry = await store.get(CacheKey("tools/list", "", _private_arm())) + assert private_entry is not None + assert private_entry.scope == "private" + assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + clock.now += 1.0 # well past the injected 5ms; the default 60s governs + assert await cache.read("tools/list", "") == _wire_result(ttl_ms=5, cache_scope="public") + + +# --- Coordinator: TTL and scope resolution --- + + +async def test_an_explicit_zero_ttl_is_not_overridden_by_the_default_ttl() -> None: + """Spec-mandated: `ttlMs: 0` means immediately stale. The configured + `default_ttl_ms` fills in only for hint-ABSENT results - an explicit 0 + stores nothing.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store, default_ttl_ms=60_000) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=0), gen, "use") + assert await store.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + + +async def test_a_hint_absent_modern_result_uses_the_default_ttl_privately() -> None: + """SDK-defined: on a modern session a result without `ttlMs` in + `model_fields_set` gets `default_ttl_ms` and scope `"private"`, expiring + exactly when the default says.""" + store = InMemoryResponseCacheStore() + clock = _ManualClock() + cache = _coordinator(store, default_ttl_ms=60_000, clock=clock) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(), gen, "use") + entry = await store.get(CacheKey("tools/list", "", _private_arm())) + assert entry is not None + assert entry.scope == "private" + assert entry.expires_at == clock.now + 60.0 + assert await cache.read("tools/list", "") == _wire_result() + clock.now += 60.0 + assert await cache.read("tools/list", "") is None + + +async def test_a_ttl_above_24_hours_is_clamped_to_the_cap() -> None: + """SDK-defined hardening (SEP-2549 security discussion): a server cannot + pin an entry beyond 24 hours - the stored expiry is clamped to + `MAX_TTL_MS`.""" + store = InMemoryResponseCacheStore() + clock = _ManualClock() + cache = _coordinator(store, clock=clock) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=7 * MAX_TTL_MS), gen, "use") + entry = await store.get(CacheKey("tools/list", "", _private_arm())) + assert entry is not None + assert entry.expires_at == clock.now + MAX_TTL_MS / 1000 + + +async def test_a_public_result_lands_on_the_public_arm_and_clears_the_private_arm() -> None: + """Spec-mandated scope routing plus the SDK's no-stale-pair invariant: + when a key's scope flips, writing the new arm deletes the other so the two + arms never both answer.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await store.get(CacheKey("tools/list", "", _private_arm())) is not None + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + public_entry = await store.get(CacheKey("tools/list", "", _public_arm())) + assert public_entry is not None + assert public_entry.scope == "public" + assert await store.get(CacheKey("tools/list", "", _private_arm())) is None + + +# --- Coordinator: partition arms and the scope guard --- + + +async def test_arm_key_layout_is_pinned_for_shared_store_compatibility() -> None: + """SDK-defined persistence contract: arm strings are the cross-process + store key material, so their layout is pinned - JSON arrays of the scope, + the hashed server identity, and (unless `share_public`) the partition.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store, partition="tenant-a", arm_id="abc123", default_ttl_ms=60_000) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(), gen, "use") + assert await store.get(CacheKey("tools/list", "", snapshot('["private", "abc123", "tenant-a"]'))) is not None + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + assert await store.get(CacheKey("tools/list", "", snapshot('["public", "abc123", "tenant-a"]'))) is not None + shared = _coordinator(store, partition="tenant-a", arm_id="abc123", share_public=True) + gen = shared.capture("tools/list", "") + await shared.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + assert await store.get(CacheKey("tools/list", "", snapshot('["public", "abc123"]'))) is not None + + +async def test_public_entries_do_not_cross_partitions_by_default() -> None: + """SDK security default (deviates from the ts SDK): the public arm is + partition-scoped, so a server stamping `cacheScope: "public"` on + per-tenant data (bug or malice) cannot leak one tenant's response to + another through a shared store.""" + store = InMemoryResponseCacheStore() + tenant_a = _coordinator(store, partition="tenant-a") + tenant_b = _coordinator(store, partition="tenant-b") + gen = tenant_a.capture("tools/list", "") + await tenant_a.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + assert await tenant_a.read("tools/list", "") == _wire_result(ttl_ms=60_000, cache_scope="public") + assert await tenant_b.read("tools/list", "") is None + + +async def test_share_public_serves_public_entries_across_partitions_but_never_private_ones() -> None: + """SDK-defined opt-in: `share_public=True` drops the partition from the + public arm, sharing server-asserted-public entries fleet-wide. Private + entries still never cross partitions.""" + store = InMemoryResponseCacheStore() + tenant_a = _coordinator(store, partition="tenant-a", share_public=True) + tenant_b = _coordinator(store, partition="tenant-b", share_public=True) + gen = tenant_a.capture("tools/list", "") + await tenant_a.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + assert await tenant_b.read("tools/list", "") == _wire_result(ttl_ms=60_000, cache_scope="public") + private_result = ListPromptsResult.model_validate({"prompts": [], "ttlMs": 60_000}) + gen = tenant_a.capture("prompts/list", "") + await tenant_a.write("prompts/list", "", private_result, gen, "use") + assert await tenant_b.read("prompts/list", "") is None + + +async def test_a_private_scoped_entry_under_the_public_arm_is_not_served() -> None: + """SDK defense in depth: the arm routes, the entry's scope verifies - a + `"private"` entry sitting under the shared arm (a corrupted or pre-seeded + store) is refused, not served across the boundary.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + await store.set( + CacheKey("tools/list", "", _public_arm()), + CacheEntry(value=_wire_result(), scope="private", expires_at=2_000_000.0), + ) + assert await cache.read("tools/list", "") is None + + +async def test_a_stale_private_entry_does_not_shadow_a_fresh_public_one() -> None: + """SDK-defined fall-through: a stale private-arm entry is a miss for + arm-probing purposes, so after a server scope flip (private -> public, + with the public entry seeded by another client sharing the store) the + fresh public entry is served, not shadowed into a spurious miss.""" + store = InMemoryResponseCacheStore() + clock = _ManualClock() + cache = _coordinator(store, clock=clock) + await store.set( + CacheKey("tools/list", "", _private_arm()), + CacheEntry(value=_wire_result(), scope="private", expires_at=clock.now - 1.0), + ) + public_result = _wire_result(ttl_ms=60_000, cache_scope="public") + await store.set( + CacheKey("tools/list", "", _public_arm()), + CacheEntry(value=public_result, scope="public", expires_at=clock.now + 60.0), + ) + assert await cache.read("tools/list", "") == public_result + + +async def test_an_entry_without_an_expiry_is_never_fresh() -> None: + """SDK-defined: `expires_at=None` means never fresh - a store rehydrating + entries without expiry metadata yields misses, not immortal entries.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + await store.set( + CacheKey("tools/list", "", _private_arm()), + CacheEntry(value=_wire_result(), scope="private", expires_at=None), + ) + assert await cache.read("tools/list", "") is None + + +# --- Coordinator: write ordering --- + + +async def test_write_deletes_the_opposite_arm_before_setting_its_own() -> None: + """SDK-defined ordering: the opposite arm is deleted before the own-arm + set, so a cancellation between the two operations leaves a miss - never + two arms answering for one key.""" + store = _ScriptedStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + assert store.ops == [ + ("delete", CacheKey("tools/list", "", _private_arm())), + ("set", CacheKey("tools/list", "", _public_arm())), + ] + + +async def test_an_eviction_landing_during_an_async_set_is_compensated() -> None: + """SDK-defined TOCTOU re-check. Steps: (1) write captures, deletes the + opposite arm, and issues `set`; (2) before the store commits, an eviction + runs fully (bump + deletes, which see nothing); (3) the set commits the + now-stale entry; (4) the post-set generation re-check fires a compensating + delete, so the evicted key does not resurface.""" + store = _ScriptedStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + + async def evict_mid_commit() -> None: + await cache.evict_method("tools/list") + + store.before_set_commits = evict_mid_commit + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + private_key = CacheKey("tools/list", "", _private_arm()) + public_key = CacheKey("tools/list", "", _public_arm()) + assert store.ops == [ + ("delete", public_key), # write: opposite arm first + ("set", private_key), # write: own arm, commit still pending + ("delete", private_key), # eviction (sees nothing - not committed yet) + ("delete", public_key), # eviction + ("delete", private_key), # post-set re-check compensation + ] + assert await store.inner.get(private_key) is None + assert await cache.read("tools/list", "") is None + + +async def test_a_cancellation_landing_as_the_set_commits_still_compensates_an_eviction() -> None: + """SDK-defined: the eviction re-check survives cancellation. Steps: (1) + write deletes the opposite arm and issues `set`; (2) before the store + commits, an eviction runs fully (its deletes see nothing) and the caller's + scope is cancelled; (3) the set commits and the cancellation is delivered + at the store's next checkpoint - a timeout firing while an async store's + set is already on the wire; (4) the shielded compensating delete still + runs, so the evicted entry is not resurrected for its full TTL.""" + store = _ScriptedStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + private_key = CacheKey("tools/list", "", _private_arm()) + public_key = CacheKey("tools/list", "", _public_arm()) + with anyio.CancelScope() as scope: + + async def evict_then_cancel() -> None: + await cache.evict_method("tools/list") + scope.cancel() + + store.before_set_commits = evict_then_cancel + store.after_set_commits = anyio.lowlevel.checkpoint # first checkpoint after the commit + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert scope.cancelled_caught + assert store.ops == [ + ("delete", public_key), # write: opposite arm first + ("set", private_key), # write: own arm, commit still pending + ("delete", private_key), # eviction (sees nothing - not committed yet) + ("delete", public_key), # eviction + ("delete", private_key), # post-set re-check compensation, shielded + ] + assert await store.inner.get(private_key) is None + + +async def test_a_cancellation_during_the_refresh_purge_still_purges_both_arms() -> None: + """SDK-defined: the `mode="refresh"` purge is shielded - a cancellation + delivered between its two arm deletes must not leave the warm + opposite-arm entry that the refetch superseded.""" + store = _ScriptedStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + public_key = CacheKey("tools/list", "", _public_arm()) + assert await store.inner.get(public_key) is not None + with anyio.CancelScope() as scope: + scope.cancel() + # The cancellation would be delivered at the first checkpoint after the + # first (private-arm) delete commits, skipping the warm public arm. + store.after_delete_commits = anyio.lowlevel.checkpoint + await cache.write("tools/list", "", _wire_result(ttl_ms=0), gen, "refresh") + assert await store.inner.get(public_key) is None + + +async def test_a_cancellation_during_an_eviction_still_evicts_both_arms() -> None: + """SDK-defined: eviction's two arm deletes are shielded - a notification + task cancelled mid-eviction (e.g. session teardown) must not leave one arm + serving the evicted entry until its TTL.""" + store = _ScriptedStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + public_key = CacheKey("tools/list", "", _public_arm()) + with anyio.CancelScope() as scope: + scope.cancel() + # The cancellation would be delivered at the first checkpoint after the + # first (private-arm) delete commits, skipping the warm public arm. + store.after_delete_commits = anyio.lowlevel.checkpoint + await cache.evict_method("tools/list") + assert await store.inner.get(public_key) is None + + +# --- Coordinator: store error discipline --- + + +async def test_a_raising_store_get_is_a_cache_miss() -> None: + """SDK error discipline: a raising store never fails the caller - a + read-path `get` raise is a miss.""" + store = _FailingStore(fail_get=True) + cache = _coordinator(store) + assert await cache.read("tools/list", "") is None + + +@pytest.mark.parametrize( + "rehydrated", + [ + CacheEntry(value={"tools": []}, scope="private", expires_at=2_000_000.0), + {"value": {"tools": []}, "scope": "private", "expires_at": 2_000_000.0}, + ], + ids=["dict-value", "dict-entry"], +) +async def test_an_entry_rehydrated_into_the_wrong_shape_is_a_warned_miss( + rehydrated: Any, caplog: pytest.LogCaptureFixture +) -> None: + """SDK error discipline: a persistent store has no method-to-model mapping + to rehydrate with, so its `get` may return serialized shapes (a dict where + the result model was stored, or a dict for the whole entry); the read + degrades to a warned miss instead of failing the call - and a store that + is persistently misconfigured this way is one warning burst, not one + warning per cached read.""" + cache = _coordinator(_RehydratingStore(rehydrated)) + with caplog.at_level(logging.WARNING, logger="mcp.client.caching"): + assert await cache.read("tools/list", "") is None + assert await cache.read("tools/list", "") is None + assert len(caplog.records) == 1 + + +async def test_a_raising_opposite_arm_delete_aborts_the_write() -> None: + """SDK error discipline: if the opposite-arm delete fails, setting anyway + could leave both arms populated - the write aborts with nothing cached.""" + store = _FailingStore(fail_delete=True) + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.inner.get(CacheKey("tools/list", "", _public_arm())) is None + + +async def test_a_raising_store_set_caches_nothing_and_does_not_raise() -> None: + """SDK error discipline: a `set` raise is logged and swallowed - the fetch + already succeeded, the result just is not cached.""" + store = _FailingStore(fail_set=True) + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await cache.read("tools/list", "") is None + + +async def test_eviction_with_a_raising_delete_still_bumps_the_generation() -> None: + """SDK error discipline (bump-first): even when the store deletes raise, + the eviction's generation bump lands - an in-flight fetch captured before + the eviction cannot write back, while a fetch captured after it can.""" + store = _FailingStore() + cache = _coordinator(store) + stale_gen = cache.capture("tools/list", "") # fetch in flight when the eviction lands + store.fail_delete = True + await cache.evict_method("tools/list") # deletes raise; the bump already happened + store.fail_delete = False + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), stale_gen, "use") + assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is None + fresh_gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), fresh_gen, "use") + assert await cache.read("tools/list", "") == _wire_result(ttl_ms=60_000) + + +async def test_store_failures_warn_once_per_burst(caplog: pytest.LogCaptureFixture) -> None: + """SDK-defined logging: consecutive store failures log a single warning; a + successful operation re-arms it so the next burst warns again.""" + store = _FailingStore(fail_get=True) + cache = _coordinator(store) + with caplog.at_level(logging.WARNING, logger="mcp.client.caching"): + await cache.read("tools/list", "") # consecutive failing reads, one burst + await cache.read("tools/list", "") + assert len(caplog.records) == 1 + store.fail_get = False + await cache.read("tools/list", "") # success re-arms the warning + store.fail_get = True + await cache.read("tools/list", "") + assert len(caplog.records) == 2 + assert caplog.messages[0] == snapshot("Response cache store operation failed; continuing without the cache") + + +async def test_a_set_only_store_failure_warns_once_across_write_cycles(caplog: pytest.LogCaptureFixture) -> None: + """SDK-defined logging: the warning burst is tracked per operation kind - + a store where only `set` is broken warns once across write cycles, the + healthy deletes in between never re-arming it; only a `set` succeeding + re-arms the `set` warning.""" + store = _FailingStore(fail_set=True) + cache = _coordinator(store) + with caplog.at_level(logging.WARNING, logger="mcp.client.caching"): + for _ in range(3): # each cycle: opposite-arm delete succeeds, then the set fails + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert len(caplog.records) == 1 + store.fail_set = False + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") # set succeeds, re-arms + store.fail_set = True + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert len(caplog.records) == 2 + + +# --- Coordinator: generation discipline --- + + +async def test_an_eviction_between_capture_and_write_discards_the_write() -> None: + """Spec-aligned race rule: a fetch in flight when its key is evicted must + not write the evicted entry back - the generation captured before the send + no longer matches at write time.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.evict_method("tools/list") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await store.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + + +async def test_recapturing_a_registered_key_returns_its_current_generation() -> None: + """SDK-defined: `capture` re-reads, it does not reset - after an eviction + a new fetch captures the bumped generation and its write lands.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + gen_before = cache.capture("tools/list", "") + await cache.evict_method("tools/list") + gen_after = cache.capture("tools/list", "") + assert gen_after != gen_before + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen_after, "use") + assert await cache.read("tools/list", "") == _wire_result(ttl_ms=60_000) + + +async def test_the_generation_map_drops_the_oldest_key_at_its_cap() -> None: + """SDK-defined bound (cap parametrized small; 4096 in production): + registering a new key at the cap drops the oldest, whose race guard + degrades to the accepted co-tenant class - an eviction racing the dropped + key's in-flight fetch goes undetected and its write lands, while a + still-registered key's write is discarded.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store, generation_map_cap=2) + gen_a = cache.capture("resources/read", "file:///a") + gen_b = cache.capture("resources/read", "file:///b") + cache.capture("resources/read", "file:///c") # at the cap: drops file:///a + await cache.evict_key("resources/read", "file:///a") # unregistered: no bump + await cache.evict_key("resources/read", "file:///b") # registered: bump + await cache.write("resources/read", "file:///a", _read_result(ttl_ms=60_000), gen_a, "use") + await cache.write("resources/read", "file:///b", _read_result(ttl_ms=60_000), gen_b, "use") + assert await cache.read("resources/read", "file:///a") is not None # degraded guard fails open + assert await cache.read("resources/read", "file:///b") is None # guard held + + +# --- Coordinator: eviction --- + + +async def test_a_refresh_resolving_uncacheable_purges_the_warm_entry() -> None: + """SDK-defined: a `cache_mode="refresh"` whose fresh result resolves to an + uncacheable TTL deletes both arms - the refetch superseded the warm entry, + which must not be served again.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await cache.read("tools/list", "") is not None + await cache.write("tools/list", "", _wire_result(ttl_ms=0), gen, "refresh") + assert await store.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + + +async def test_evict_key_on_an_unregistered_key_still_deletes_both_arms() -> None: + """SDK-defined: a persistent store may hold warm entries from a prior + process that this coordinator never captured - eviction always issues the + store deletes, registered or not.""" + store = InMemoryResponseCacheStore() + await store.set( + CacheKey("resources/read", "file:///warm", _private_arm()), + CacheEntry(value=_read_result(ttl_ms=60_000), scope="private", expires_at=2_000_000.0), + ) + await store.set( + CacheKey("resources/read", "file:///warm", _public_arm()), + CacheEntry(value=_read_result(ttl_ms=60_000), scope="public", expires_at=2_000_000.0), + ) + cache = _coordinator(store) + await cache.evict_key("resources/read", "file:///warm") + assert await store.get(CacheKey("resources/read", "file:///warm", _private_arm())) is None + assert await store.get(CacheKey("resources/read", "file:///warm", _public_arm())) is None + + +@pytest.mark.parametrize( + ("notification", "evicted"), + [ + (ToolListChangedNotification(), {("tools/list", "")}), + (PromptListChangedNotification(), {("prompts/list", "")}), + (ResourceListChangedNotification(), {("resources/list", ""), ("resources/templates/list", "")}), + ( + ResourceUpdatedNotification(params=ResourceUpdatedNotificationParams(uri="file:///a")), + {("resources/read", "file:///a")}, + ), + ( + LoggingMessageNotification(params=LoggingMessageNotificationParams(level="info", data="x")), + set[tuple[str, str]](), + ), + ], + ids=["tools-list-changed", "prompts-list-changed", "resources-list-changed", "resource-updated", "unrelated"], +) +async def test_notifications_evict_exactly_their_mapped_entries( + notification: ServerNotification, evicted: set[tuple[str, str]] +) -> None: + """Spec SHOULD (notifications invalidate) plus negative space: each + list_changed notification evicts its own method's entry and nothing else, + resources/list_changed co-evicts the templates list, resources/updated + evicts only the named uri, and an unrelated notification evicts nothing.""" + store = InMemoryResponseCacheStore() + cache = _coordinator(store) + seeded = [ + ("tools/list", ""), + ("prompts/list", ""), + ("resources/list", ""), + ("resources/templates/list", ""), + ("resources/read", "file:///a"), + ("resources/read", "file:///b"), + ] + for method, params_key in seeded: + # The value's content is irrelevant to eviction; any cacheable model serves. + await store.set( + CacheKey(method, params_key, _private_arm()), + CacheEntry(value=_wire_result(), scope="private", expires_at=2_000_000.0), + ) + await cache.evict_for_notification(notification) + for method, params_key in seeded: + if (method, params_key) in evicted: + assert await cache.read(method, params_key) is None + else: + assert await cache.read(method, params_key) is not None From ba95005652fd025e31242712348edbb7e7544908 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 15:29:14 +0000 Subject: [PATCH 04/18] Wire the response cache into Client: configuration, server identity, notification eviction --- src/mcp/client/__init__.py | 3 +- src/mcp/client/client.py | 95 +++++++- tests/client/test_client_caching.py | 331 ++++++++++++++++++++++++++++ 3 files changed, 427 insertions(+), 2 deletions(-) create mode 100644 tests/client/test_client_caching.py diff --git a/src/mcp/client/__init__.py b/src/mcp/client/__init__.py index f92a01eb4..30df52737 100644 --- a/src/mcp/client/__init__.py +++ b/src/mcp/client/__init__.py @@ -2,13 +2,14 @@ from mcp.client._input_required import InputRequiredRoundsExceededError from mcp.client._transport import Transport -from mcp.client.caching import CacheConfig +from mcp.client.caching import CacheConfig, CacheMode from mcp.client.client import Client from mcp.client.context import ClientRequestContext from mcp.client.session import ClientSession __all__ = [ "CacheConfig", + "CacheMode", "Client", "ClientRequestContext", "ClientSession", diff --git a/src/mcp/client/client.py b/src/mcp/client/client.py index d3290f308..44377f452 100644 --- a/src/mcp/client/client.py +++ b/src/mcp/client/client.py @@ -2,12 +2,17 @@ from __future__ import annotations +import hashlib +import logging +import uuid from collections.abc import Awaitable, Callable, Mapping from contextlib import AsyncExitStack from dataclasses import KW_ONLY, dataclass, field from typing import Any, Literal, TypeVar +from urllib.parse import urlsplit, urlunsplit import anyio +import anyio.lowlevel import mcp_types as types from mcp_types import ( CallToolResult, @@ -39,6 +44,7 @@ from mcp.client._memory import InMemoryTransport from mcp.client._probe import negotiate_auto from mcp.client._transport import Transport +from mcp.client.caching import CacheConfig, ClientResponseCache, InMemoryResponseCacheStore from mcp.client.session import ( ClientRequestContext, ClientSession, @@ -56,6 +62,9 @@ from mcp.shared.dispatcher import Dispatcher, ProgressFnT from mcp.shared.exceptions import MCPDeprecationWarning from mcp.shared.jsonrpc_dispatcher import JSONRPCDispatcher +from mcp.shared.session import RequestResponder + +logger = logging.getLogger(__name__) ConnectMode = Literal["legacy", "auto"] | str """``mode=`` value: ``"legacy"`` (initialize handshake), ``"auto"`` (discover, fall back to @@ -115,6 +124,45 @@ def _connected(value: _T | None) -> _T: return value +def _strip_userinfo(url: str) -> str: + """Drop any userinfo from the URL's authority component; byte-exact otherwise. + + Cache identity must never over-normalize (case-folding or query rewriting could + merge distinct servers, e.g. `?tenant=a` vs `?tenant=b`), and credentials must + never enter cache-key material — userinfo removal is the single permitted rewrite. + """ + parts = urlsplit(url) + if "@" not in parts.netloc: + return url + return urlunsplit(parts._replace(netloc=parts.netloc.rpartition("@")[2])) + + +def _evicting_message_handler(cache: ClientResponseCache, user_handler: MessageHandlerFnT | None) -> MessageHandlerFnT: + """Wrap the session message handler with cache eviction on server notifications. + + Eviction runs before delegation, inside its own boundary, so a cache fault can + never suppress delivery. Every item — notification, `RequestResponder`, or + transport `Exception` — then reaches the user's handler; with none supplied, the + wrapper performs the same bare checkpoint `ClientSession` installs by default. + """ + + async def handler( + message: RequestResponder[types.ServerRequest, types.ClientResult] | types.ServerNotification | Exception, + ) -> None: + if isinstance(message, types.ServerNotification): + try: + await cache.evict_for_notification(message) + except Exception: # boundary: eviction reaches user store code; a cache fault must not block delivery + logger.exception("Response cache eviction failed; the notification is still delivered") + if user_handler is not None: + await user_handler(message) + else: + # Mirrors ClientSession's default handler (session._default_message_handler). + await anyio.lowlevel.checkpoint() + + return handler + + def _synthesize_discover(protocol_version: str) -> types.DiscoverResult: return types.DiscoverResult( supported_versions=[protocol_version], @@ -221,10 +269,23 @@ async def main(): """SEP-2133 extension support to advertise under `ClientCapabilities.extensions` (identifier -> settings), e.g. `{"io.modelcontextprotocol/ui": {"mimeTypes": [...]}}`.""" + cache: CacheConfig | Literal[False] | None = None + """Client-side response caching for the SEP-2549 cacheable methods (2026-07-28). + + `None` (the default) honors server `ttlMs`/`cacheScope` hints with a per-client + in-memory store; results carrying no hints are not cached. Pass a `CacheConfig` + to customize (shared store, partition, default TTL), or `False` to disable + caching entirely. + + Construction raises `ValueError` for a `CacheConfig` with a custom `store` when + no server identity can be derived (an in-process server or a `Transport` + instance) — set `CacheConfig.target_id` to name the server.""" + _entered: bool = field(init=False, default=False) _session: ClientSession | None = field(init=False, default=None) _exit_stack: AsyncExitStack | None = field(init=False, default=None) _connect: _Connector = field(init=False, repr=False, compare=False) + _response_cache: ClientResponseCache | None = field(init=False, default=None, repr=False, compare=False) def __post_init__(self) -> None: if self.mode not in ("legacy", "auto") and self.mode not in MODERN_PROTOCOL_VERSIONS: @@ -247,16 +308,48 @@ def __post_init__(self) -> None: else: self._connect = _connect_transport(srv) + if self.cache is not False: + config = self.cache if self.cache is not None else CacheConfig() + # Server identity, in resolution order: explicit override, server URL + # (userinfo stripped, byte-exact otherwise), per-Client random. Only the + # hash below leaves this scope — the raw identity may carry credentials + # in its query string and must never be logged or stored. + target_id = config.target_id + if target_id is None and isinstance(self.server, str): + target_id = _strip_userinfo(self.server) + if target_id is None: + if config.store is not None: + raise ValueError( + "a custom cache store requires CacheConfig.target_id when the server is not a URL: " + "in-process servers and Transport instances get a random per-client identity, so " + "their entries in a shared store could never be served to another client" + ) + target_id = uuid.uuid4().hex + self._response_cache = ClientResponseCache( + store=config.store if config.store is not None else InMemoryResponseCacheStore(), + partition=config.partition, + arm_id=hashlib.sha256(target_id.encode()).hexdigest(), + default_ttl_ms=config.default_ttl_ms, + clock=config.clock, + share_public=config.share_public, + # Lazy: the era is unknown until __aenter__'s handshake, and the + # session is unpublished outside the context manager. + negotiated_version=lambda: self._session.protocol_version if self._session is not None else None, + ) + async def _build_session(self, exit_stack: AsyncExitStack) -> ClientSession: """Enter the resolved connector and return an un-entered ClientSession.""" dispatcher = await self._connect(exit_stack, self.mode, self.raise_exceptions) + message_handler = self.message_handler + if self._response_cache is not None: + message_handler = _evicting_message_handler(self._response_cache, self.message_handler) return ClientSession( dispatcher=dispatcher, read_timeout_seconds=self.read_timeout_seconds, sampling_callback=self.sampling_callback, list_roots_callback=self.list_roots_callback, logging_callback=self.logging_callback, - message_handler=self.message_handler, + message_handler=message_handler, client_info=self.client_info, elicitation_callback=self.elicitation_callback, extensions=self.extensions, diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py new file mode 100644 index 000000000..4b03062b8 --- /dev/null +++ b/tests/client/test_client_caching.py @@ -0,0 +1,331 @@ +"""`Client` wiring for the response cache: the `cache=` constructor kwarg, server +identity resolution (explicit `target_id`, URL, per-client random), the custom-store +identity guard, the notification-eviction message-handler wrap, and the lazy +negotiated-version supplier. The coordinator's own behavior is covered in +`test_caching.py`; the cached verbs land separately. +""" + +import time +from types import TracebackType +from typing import Any + +import anyio +import mcp_types as types +import pytest +from inline_snapshot import snapshot +from mcp_types import ( + CallToolResult, + ListToolsResult, + ServerNotification, + TextContent, + ToolListChangedNotification, +) + +from mcp.client import Client +from mcp.client._transport import TransportStreams +from mcp.client.caching import ( + CacheConfig, + CacheEntry, + CacheKey, + ClientResponseCache, + InMemoryResponseCacheStore, +) +from mcp.server import Server, ServerRequestContext +from mcp.shared.session import RequestResponder + +pytestmark = pytest.mark.anyio + +IncomingMessage = RequestResponder[types.ServerRequest, types.ClientResult] | types.ServerNotification | Exception + + +def _coordinator(client: Client) -> ClientResponseCache: + cache = client._response_cache + assert cache is not None + return cache + + +def _private_arm(client: Client) -> str: + """The arm string the coordinator stamps into every store key's partition field. + + Server identity is only observable through it pre-verbs; `test_caching.py` pins + the arm layout, so only equality between clients matters here. + """ + return _coordinator(client)._private_arm + + +def _tools_list_key(client: Client) -> CacheKey: + return CacheKey("tools/list", "", _private_arm(client)) + + +class _OpaqueTransport: + """Shape-only `Transport`: identity resolution happens at construction, so the + tests never enter it.""" + + async def __aenter__(self) -> TransportStreams: + raise NotImplementedError + + async def __aexit__( + self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None + ) -> None: + raise NotImplementedError + + +def _list_changed_server() -> Server[Any]: + """In-process server whose `touch` tool emits `notifications/tools/list_changed`. + + The notification-delivery tests connect with `mode="legacy"`: the modern + in-process DirectDispatcher path has no standalone channel and drops unrelated + server notifications before they reach the client, so the legacy in-memory + stream pair is the lightest transport that actually delivers them. + """ + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + return ListToolsResult(tools=[types.Tool(name="touch", input_schema={"type": "object"})]) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "touch" + await ctx.session.send_tool_list_changed() + return CallToolResult(content=[TextContent(text="touched")]) + + return Server("notifier", on_list_tools=list_tools, on_call_tool=call_tool) + + +async def _warm_tools_list_entry(client: Client) -> CacheKey: + """Seed a private-arm tools/list entry directly in the client's store; eviction + deletes regardless of freshness, so the entry's payload and expiry are inert.""" + key = _tools_list_key(client) + await _coordinator(client)._store.set(key, CacheEntry(value="warm", scope="private", expires_at=None)) + return key + + +def test_an_explicit_target_id_overrides_both_url_and_in_process_identity() -> None: + """`CacheConfig.target_id` wins over every server shape: a URL client and an + in-process client given the same target_id share one cache identity, distinct + from the URL-derived one. SDK-defined resolution order.""" + by_target_url = Client("https://example.com/mcp", cache=CacheConfig(target_id="svc")) + by_target_inproc = Client(Server("plain"), cache=CacheConfig(target_id="svc")) + by_url = Client("https://example.com/mcp") + + assert _private_arm(by_target_url) == _private_arm(by_target_inproc) + assert _private_arm(by_target_url) != _private_arm(by_url) + + +def test_userinfo_variants_of_a_server_url_share_one_cache_identity() -> None: + """Stripping credentials is the single permitted URL rewrite: userinfo variants + of the same URL resolve to the identity of the bare URL. SDK-defined.""" + bare = Client("https://example.com/mcp") + with_password = Client("https://user:secret@example.com/mcp") + with_token = Client("https://token@example.com/mcp") + + assert _private_arm(bare) == _private_arm(with_password) == _private_arm(with_token) + + +def test_urls_differing_only_in_query_have_distinct_cache_identities() -> None: + """URL identity is byte-exact outside userinfo — `?tenant=a` and `?tenant=b` + must never share entries (over-normalization would merge tenants). SDK-defined.""" + tenant_a = Client("https://example.com/mcp?tenant=a") + tenant_b = Client("https://example.com/mcp?tenant=b") + + assert _private_arm(tenant_a) != _private_arm(tenant_b) + + +def test_two_clients_on_one_in_process_server_get_distinct_cache_identities() -> None: + """An in-process server has no URL, so each client gets a random per-client + identity — two clients on the same server never share entries. SDK-defined.""" + server = Server("plain") + + assert _private_arm(Client(server)) != _private_arm(Client(server)) + + +def test_a_transport_object_gets_a_per_client_cache_identity() -> None: + """The `Transport` protocol carries no URL, so a transport-backed client gets + the same random per-client identity as an in-process one. SDK-defined.""" + transport = _OpaqueTransport() + + assert _private_arm(Client(transport)) != _private_arm(Client(transport)) + + +@pytest.mark.parametrize("make_server", [lambda: Server("plain"), _OpaqueTransport], ids=["in-process", "transport"]) +def test_a_custom_store_without_a_url_or_target_id_is_rejected(make_server: Any) -> None: + """A shared store keyed by a random per-client identity would accumulate entries + no other client can ever read, so construction refuses the combination and + points at the fix.""" + with pytest.raises(ValueError) as exc_info: + Client(make_server(), cache=CacheConfig(store=InMemoryResponseCacheStore(), partition="p")) + assert str(exc_info.value) == snapshot( + "a custom cache store requires CacheConfig.target_id when the server is not a URL: in-process servers " + "and Transport instances get a random per-client identity, so their entries in a shared store could " + "never be served to another client" + ) + + +def test_a_custom_store_with_a_url_server_constructs_and_is_used() -> None: + """A URL provides a stable identity, so a custom store needs no `target_id`.""" + store = InMemoryResponseCacheStore() + client = Client("https://example.com/mcp", cache=CacheConfig(store=store, partition="p")) + + assert _coordinator(client)._store is store + + +def test_a_custom_store_with_an_explicit_target_id_constructs_for_any_server() -> None: + """`target_id` is the documented escape hatch: it lifts the custom-store guard + even for an in-process server.""" + store = InMemoryResponseCacheStore() + client = Client(Server("plain"), cache=CacheConfig(store=store, partition="p", target_id="svc")) + + assert _coordinator(client)._store is store + + +async def test_cache_false_disables_the_cache_and_the_handler_wrap() -> None: + """`cache=False` mints no coordinator and installs the user's handler unwrapped — + today's no-cache behavior exactly.""" + + async def handler(message: IncomingMessage) -> None: + raise NotImplementedError + + client = Client(_list_changed_server(), cache=False, message_handler=handler) + assert client._response_cache is None + + async with client: + assert client.session._message_handler is handler + + +def test_the_default_cache_uses_a_per_client_in_memory_store() -> None: + """`cache=None` (the default) is cache-on: each client gets its own coordinator + backed by its own in-memory store, never shared between clients.""" + server = Server("plain") + first = Client(server) + second = Client(server) + + assert isinstance(_coordinator(first)._store, InMemoryResponseCacheStore) + assert _coordinator(first)._store is not _coordinator(second)._store + + +async def test_the_negotiated_version_supplier_tracks_the_session_lifecycle() -> None: + """The era supplier returns None before connect (and again after exit) and the + negotiated version while the session is live — the era gate must never read a + stale or raising source.""" + client = Client(_list_changed_server()) + supplier = _coordinator(client)._negotiated_version + + assert supplier() is None + async with client: + assert supplier() == client.protocol_version + assert supplier() is None + + +async def test_a_list_changed_notification_evicts_without_a_user_handler() -> None: + """With no user handler the wrap is still installed: a tools/list_changed + notification deletes the warm tools/list entry from both arms. Spec SHOULD + (notifications invalidate).""" + + class _EventedStore(InMemoryResponseCacheStore): + """Signals once both arms of an eviction have been deleted.""" + + def __init__(self) -> None: + super().__init__() + self._deletes = 0 + self.both_arms_deleted = anyio.Event() + + async def delete(self, key: CacheKey) -> None: + await super().delete(key) + self._deletes += 1 + if self._deletes == 2: + self.both_arms_deleted.set() + + store = _EventedStore() + client = Client( + _list_changed_server(), mode="legacy", cache=CacheConfig(store=store, partition="p", target_id="svc") + ) + + async with client: + key = await _warm_tools_list_entry(client) + await client.call_tool("touch", {}) + with anyio.fail_after(5): + await store.both_arms_deleted.wait() + assert await store.get(key) is None + + +async def test_a_user_handler_receives_the_notification_the_eviction_consumed() -> None: + """Eviction is a tee, not a filter: the warm entry is gone by the time the + user's handler sees the notification, and nothing else is delivered.""" + received: list[IncomingMessage] = [] + seen = anyio.Event() + + async def collect(message: IncomingMessage) -> None: + received.append(message) + seen.set() + + client = Client(_list_changed_server(), mode="legacy", message_handler=collect) + + async with client: + key = await _warm_tools_list_entry(client) + await client.call_tool("touch", {}) + with anyio.fail_after(5): + await seen.wait() + # The wrap awaits the eviction before delegating, so delivery implies the + # entry is already gone. + assert await _coordinator(client)._store.get(key) is None + + assert received == snapshot([ToolListChangedNotification()]) + + +async def test_non_notification_items_pass_through_to_the_user_handler_untouched() -> None: + """The wrap delegates non-notification items verbatim and leaves the cache + alone. Transport `Exception` items only exist on stream-backed dispatchers, + which the in-process path cannot produce, so the installed handler is invoked + directly; `RequestResponder` items take this same non-notification branch.""" + received: list[IncomingMessage] = [] + + async def collect(message: IncomingMessage) -> None: + received.append(message) + + client = Client(_list_changed_server(), message_handler=collect) + + async with client: + installed = client.session._message_handler + assert installed is not collect # the wrap, not the bare user handler + key = await _warm_tools_list_entry(client) + fault = RuntimeError("stream broke") + await installed(fault) + assert received == [fault] + assert await _coordinator(client)._store.get(key) is not None + + +async def test_a_raising_eviction_does_not_block_notification_delivery(caplog: pytest.LogCaptureFixture) -> None: + """The eviction boundary contains cache faults: a coordinator that raises is + logged and the user's handler still receives the notification.""" + + class _ExplodingCache(ClientResponseCache): + async def evict_for_notification(self, notification: ServerNotification) -> None: + raise RuntimeError("cache bug") + + received: list[IncomingMessage] = [] + seen = anyio.Event() + + async def collect(message: IncomingMessage) -> None: + received.append(message) + seen.set() + + client = Client(_list_changed_server(), mode="legacy", message_handler=collect) + # The wrap reads `_response_cache` when the session is built, so swapping the + # coordinator pre-enter routes eviction through the exploding subclass. + client._response_cache = _ExplodingCache( + store=InMemoryResponseCacheStore(), + partition="", + arm_id="arm", + default_ttl_ms=0, + clock=time.time, + share_public=False, + negotiated_version=lambda: None, + ) + + async with client: + await client.call_tool("touch", {}) + with anyio.fail_after(5): + await seen.wait() + + assert received == snapshot([ToolListChangedNotification()]) + assert "Response cache eviction failed; the notification is still delivered" in [ + record.message for record in caplog.records + ] From 0a19550e21d7f12e63a9b64e5dbabbac2951d9ed Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 15:45:51 +0000 Subject: [PATCH 05/18] Serve cacheable client verbs through the response cache --- src/mcp/client/client.py | 146 ++++++- src/mcp/client/session.py | 10 + tests/client/test_client_caching.py | 637 +++++++++++++++++++++++++++- 3 files changed, 776 insertions(+), 17 deletions(-) diff --git a/src/mcp/client/client.py b/src/mcp/client/client.py index 44377f452..2afd2dd53 100644 --- a/src/mcp/client/client.py +++ b/src/mcp/client/client.py @@ -8,13 +8,15 @@ from collections.abc import Awaitable, Callable, Mapping from contextlib import AsyncExitStack from dataclasses import KW_ONLY, dataclass, field -from typing import Any, Literal, TypeVar +from typing import Any, Literal, TypeVar, cast from urllib.parse import urlsplit, urlunsplit import anyio import anyio.lowlevel import mcp_types as types from mcp_types import ( + INVALID_PARAMS, + CacheableResult, CallToolResult, CompleteResult, EmptyResult, @@ -44,7 +46,7 @@ from mcp.client._memory import InMemoryTransport from mcp.client._probe import negotiate_auto from mcp.client._transport import Transport -from mcp.client.caching import CacheConfig, ClientResponseCache, InMemoryResponseCacheStore +from mcp.client.caching import CacheConfig, CacheMode, ClientResponseCache, InMemoryResponseCacheStore from mcp.client.session import ( ClientRequestContext, ClientSession, @@ -60,7 +62,7 @@ from mcp.server.runner import modern_on_request from mcp.shared.direct_dispatcher import create_direct_dispatcher_pair from mcp.shared.dispatcher import Dispatcher, ProgressFnT -from mcp.shared.exceptions import MCPDeprecationWarning +from mcp.shared.exceptions import MCPDeprecationWarning, MCPError from mcp.shared.jsonrpc_dispatcher import JSONRPCDispatcher from mcp.shared.session import RequestResponder @@ -73,6 +75,7 @@ _T = TypeVar("_T") _ResultT = TypeVar("_ResultT") +_CacheableT = TypeVar("_CacheableT", bound=CacheableResult) _Connector = Callable[[AsyncExitStack, ConnectMode, bool], Awaitable["Dispatcher[Any]"]] """Resolved at ``__post_init__`` from the shape of ``server`` alone: enter whatever resources @@ -275,7 +278,9 @@ async def main(): `None` (the default) honors server `ttlMs`/`cacheScope` hints with a per-client in-memory store; results carrying no hints are not cached. Pass a `CacheConfig` to customize (shared store, partition, default TTL), or `False` to disable - caching entirely. + caching entirely. The cacheable verbs (`list_tools`, `list_prompts`, + `list_resources`, `list_resource_templates`, `read_resource`) take a per-call + `cache_mode` to narrow caching for one call; with `cache=False` it is inert. Construction raises `ValueError` for a `CacheConfig` with a custom `store` when no server identity can be derived (an in-process server or a `Transport` @@ -454,23 +459,79 @@ async def set_logging_level(self, level: LoggingLevel, *, meta: RequestParamsMet """Set the logging level on the server.""" return await self.session.set_logging_level(level=level, meta=meta) # pyright: ignore[reportDeprecated] + async def _cached_fetch( + self, + method: str, + *, + cursor: str | None, + cache_mode: CacheMode, + send: Callable[[], Awaitable[_CacheableT]], + absorb: Callable[[_CacheableT], _CacheableT] | None = None, + ) -> _CacheableT: + """Serve one of the four list verbs through the response cache. + + `send` performs the fetch via the session; `absorb` (tools/list only) + re-applies session-side derived state to a served cache hit. + """ + cache = self._response_cache + if cache is None or cache_mode == "bypass": + return await send() # no read, no write, no eviction side-effects + if cursor is not None: + # Continuation pages never read or write the (cursor-less) entry, but an + # expired-cursor rejection signals the listing changed since the entry was + # fetched, so it is evicted (spec SHOULD; over-eviction is harmless). + try: + return await send() + except MCPError as e: + if e.code == INVALID_PARAMS: + await cache.evict_method(method) + raise + if cache_mode == "use" and (hit := await cache.read(method, "")) is not None: + # The store key carries the method, so the entry under it has `send`'s + # result type. The hit is already a private deep copy of the stored + # value, so absorption may mutate it freely. + served = cast(_CacheableT, hit) + return served if absorb is None else absorb(served) + gen = cache.capture(method, "") + result = await send() + await cache.write(method, "", result, gen, cache_mode) + return result + async def list_resources( self, *, cursor: str | None = None, meta: RequestParamsMeta | None = None, + cache_mode: CacheMode = "use", ) -> ListResourcesResult: - """List available resources from the server.""" - return await self.session.list_resources(params=PaginatedRequestParams(cursor=cursor, _meta=meta)) + """List available resources from the server. + + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + """ + return await self._cached_fetch( + "resources/list", + cursor=cursor, + cache_mode=cache_mode, + send=lambda: self.session.list_resources(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), + ) async def list_resource_templates( self, *, cursor: str | None = None, meta: RequestParamsMeta | None = None, + cache_mode: CacheMode = "use", ) -> ListResourceTemplatesResult: - """List available resource templates from the server.""" - return await self.session.list_resource_templates(params=PaginatedRequestParams(cursor=cursor, _meta=meta)) + """List available resource templates from the server. + + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + """ + return await self._cached_fetch( + "resources/templates/list", + cursor=cursor, + cache_mode=cache_mode, + send=lambda: self.session.list_resource_templates(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), + ) async def read_resource( self, @@ -479,6 +540,7 @@ async def read_resource( input_responses: InputResponses | None = None, request_state: str | None = None, meta: RequestParamsMeta | None = None, + cache_mode: CacheMode = "use", ) -> ReadResourceResult: """Read a resource from the server. @@ -493,6 +555,11 @@ async def read_resource( resuming from a persisted `InputRequiredResult`). request_state: Opaque state to seed the first call with. meta: Additional metadata for the request. + cache_mode: Adjusts the response cache's behavior for this call + (see `CacheMode`). Seeded calls (either `input_responses` or + `request_state` set) are resumptions of a multi-round-trip + read and ignore it entirely: no cache read, no write, no + refresh purge. Returns: The resource content. @@ -507,7 +574,31 @@ async def retry(r: InputResponses | None, s: str | None) -> ReadResourceResult | uri, input_responses=r, request_state=s, meta=meta, allow_input_required=True ) - return await self._drive_input_required(await retry(input_responses, request_state), retry) + # Results of requests carrying inputResponses or requestState must never be + # cached (spec MUST), and a seeded call exists to resume a specific exchange - + # serving it from the cache would skip the resumption. + seeded = input_responses is not None or request_state is not None + cache = None if seeded else self._response_cache + if cache is None or cache_mode == "bypass": + return await self._drive_input_required(await retry(input_responses, request_state), retry) + if cache_mode == "use" and (hit := await cache.read("resources/read", uri)) is not None: + # InputRequiredResult is never stored (only terminal first-round results + # are written below), so a hit is always terminal and legitimately skips + # the driver. + return cast(ReadResourceResult, hit) + gen = cache.capture("resources/read", uri) + first = await retry(None, None) + if not isinstance(first, InputRequiredResult): + await cache.write("resources/read", uri, first, gen, cache_mode) + elif cache_mode == "refresh": + # An input_required resolution can never be stored, but the explicit + # refresh still superseded whatever was cached: purge the warm entry + # so it cannot be served again (the same supersession rule as a + # refreshed ttl<=0 result in `ClientResponseCache.write`). + await cache.evict_key("resources/read", uri) + # A terminal result reached through driver rounds is never cached: the rounds + # carried inputResponses (the same spec MUST as the seeded skip above). + return await self._drive_input_required(first, retry) async def subscribe_resource(self, uri: str, *, meta: RequestParamsMeta | None = None) -> EmptyResult: """Subscribe to resource updates.""" @@ -574,9 +665,18 @@ async def list_prompts( *, cursor: str | None = None, meta: RequestParamsMeta | None = None, + cache_mode: CacheMode = "use", ) -> ListPromptsResult: - """List available prompts from the server.""" - return await self.session.list_prompts(params=PaginatedRequestParams(cursor=cursor, _meta=meta)) + """List available prompts from the server. + + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + """ + return await self._cached_fetch( + "prompts/list", + cursor=cursor, + cache_mode=cache_mode, + send=lambda: self.session.list_prompts(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), + ) async def get_prompt( self, @@ -658,9 +758,27 @@ async def complete( """ return await self.session.complete(ref=ref, argument=argument, context_arguments=context_arguments) - async def list_tools(self, *, cursor: str | None = None, meta: RequestParamsMeta | None = None) -> ListToolsResult: - """List available tools from the server.""" - return await self.session.list_tools(params=PaginatedRequestParams(cursor=cursor, _meta=meta)) + async def list_tools( + self, + *, + cursor: str | None = None, + meta: RequestParamsMeta | None = None, + cache_mode: CacheMode = "use", + ) -> ListToolsResult: + """List available tools from the server. + + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + """ + return await self._cached_fetch( + "tools/list", + cursor=cursor, + cache_mode=cache_mode, + send=lambda: self.session.list_tools(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), + # A cache hit skips session.list_tools, so the session re-absorbs the + # served listing to rebuild its derived per-tool state (header maps, + # output schemas) - idempotent on the already-filtered stored value. + absorb=self.session._absorb_tool_listing, # pyright: ignore[reportPrivateUsage] + ) @deprecated("The roots capability is deprecated as of 2026-07-28 (SEP-2577).", category=MCPDeprecationWarning) async def send_roots_list_changed(self) -> None: diff --git a/src/mcp/client/session.py b/src/mcp/client/session.py index 3cebb569e..fd56c3940 100644 --- a/src/mcp/client/session.py +++ b/src/mcp/client/session.py @@ -895,7 +895,17 @@ async def list_tools(self, *, params: types.PaginatedRequestParams | None = None types.ListToolsRequest(params=params), types.ListToolsResult, ) + return self._absorb_tool_listing(result) + def _absorb_tool_listing(self, result: types.ListToolsResult) -> types.ListToolsResult: + """Filter a tool listing per the 2026 x-mcp-header MUST and rebuild the derived + per-tool state (arg→header maps, output schemas) from it. + + Idempotent, so the client response cache can re-absorb a served listing: stored + values are already post-filter, making the re-filter a no-op that rebuilds the + maps and schemas from the served value. `result` is mutated in place (the cache + only ever passes a private deep copy). + """ if self._negotiated_version in MODERN_PROTOCOL_VERSIONS: # 2026-07-28: clients MUST drop tools whose x-mcp-header annotations are invalid. kept: list[types.Tool] = [] diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 4b03062b8..2d62a8511 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -1,25 +1,45 @@ """`Client` wiring for the response cache: the `cache=` constructor kwarg, server identity resolution (explicit `target_id`, URL, per-client random), the custom-store -identity guard, the notification-eviction message-handler wrap, and the lazy -negotiated-version supplier. The coordinator's own behavior is covered in -`test_caching.py`; the cached verbs land separately. +identity guard, the notification-eviction message-handler wrap, the lazy +negotiated-version supplier, and the five cacheable verbs (the `_cached_fetch` +choke point, the `read_resource` sibling, and the tools/list absorption seam). +The coordinator's own behavior is covered in `test_caching.py`. """ +import json import time from types import TracebackType from typing import Any import anyio +import httpx import mcp_types as types import pytest from inline_snapshot import snapshot from mcp_types import ( + INTERNAL_ERROR, + INVALID_PARAMS, CallToolResult, + DiscoverResult, + ElicitRequest, + ElicitRequestFormParams, + ElicitResult, + Implementation, + InputRequiredResult, + ListPromptsResult, + ListResourcesResult, + ListResourceTemplatesResult, ListToolsResult, + ReadResourceResult, + ResourceUpdatedNotification, + ServerCapabilities, ServerNotification, TextContent, + TextResourceContents, + Tool, ToolListChangedNotification, ) +from mcp_types.version import LATEST_MODERN_VERSION from mcp.client import Client from mcp.client._transport import TransportStreams @@ -30,8 +50,12 @@ ClientResponseCache, InMemoryResponseCacheStore, ) +from mcp.client.streamable_http import streamable_http_client from mcp.server import Server, ServerRequestContext +from mcp.server.caching import CacheHint +from mcp.shared.exceptions import MCPError from mcp.shared.session import RequestResponder +from tests.interaction._connect import BASE_URL, mounted_app pytestmark = pytest.mark.anyio @@ -329,3 +353,610 @@ async def collect(message: IncomingMessage) -> None: assert "Response cache eviction failed; the notification is still delivered" in [ record.message for record in caplog.records ] + + +# --- The cacheable verbs --- + + +class _ManualClock: + """Injected wall clock: tests advance `now` instead of sleeping.""" + + def __init__(self) -> None: + self.now = 1_000_000.0 + + def __call__(self) -> float: + return self.now + + +def _varying_tools_server(*, ttl_ms: int = 60_000) -> tuple[Server[Any], list[str | None]]: + """In-process server whose every tools/list fetch returns a distinct tool name + `t`, so a served entry is distinguishable from a refetch by payload, not just + by handler count. The fetch log records each request's cursor.""" + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + return ListToolsResult(tools=[Tool(name=f"t{len(fetches) - 1}", input_schema={"type": "object"})]) + + server = Server("varying", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=ttl_ms)}) + return server, fetches + + +def _tool_names(result: ListToolsResult) -> list[str]: + return [tool.name for tool in result.tools] + + +async def test_a_second_list_tools_within_the_ttl_is_served_from_the_cache() -> None: + """SEP-2549: a result carrying a `ttlMs` hint is reusable until it expires — the + second `list_tools` is served from the cache without reaching the server.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + first = await client.list_tools() + second = await client.list_tools() + + assert fetches == [None] + assert second == first + + +async def test_an_expired_entry_is_refetched() -> None: + """An entry is fresh strictly within its `ttlMs`: once the (injected) clock passes + expiry, the next `list_tools` fetches again and serves the new listing.""" + clock = _ManualClock() + server, fetches = _varying_tools_server(ttl_ms=60_000) + + async with Client(server, cache=CacheConfig(clock=clock)) as client: + assert _tool_names(await client.list_tools()) == ["t0"] + clock.now += 60.0 + assert _tool_names(await client.list_tools()) == ["t1"] + + assert fetches == [None, None] + + +async def test_each_list_verb_caches_independently_under_its_own_method() -> None: + """Cache keys discriminate by method (spec MUST): warming one list verb never + serves another — each of the four fetches once, and each repeat call is served + from that verb's own entry.""" + fetched: list[str] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetched.append("tools/list") + return ListToolsResult(tools=[]) + + async def list_prompts(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListPromptsResult: + fetched.append("prompts/list") + return ListPromptsResult(prompts=[]) + + async def list_resources( + ctx: ServerRequestContext, params: types.PaginatedRequestParams | None + ) -> ListResourcesResult: + fetched.append("resources/list") + return ListResourcesResult(resources=[]) + + async def list_templates( + ctx: ServerRequestContext, params: types.PaginatedRequestParams | None + ) -> ListResourceTemplatesResult: + fetched.append("resources/templates/list") + return ListResourceTemplatesResult(resource_templates=[]) + + hint = CacheHint(ttl_ms=60_000) + server = Server( + "all-lists", + on_list_tools=list_tools, + on_list_prompts=list_prompts, + on_list_resources=list_resources, + on_list_resource_templates=list_templates, + cache_hints={ + "tools/list": hint, + "prompts/list": hint, + "resources/list": hint, + "resources/templates/list": hint, + }, + ) + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + # First round: every verb fetches, despite the previously warmed entries. + await client.list_tools() + await client.list_prompts() + await client.list_resources() + await client.list_resource_templates() + # Second round: every verb is served from its own entry. + await client.list_tools() + await client.list_prompts() + await client.list_resources() + await client.list_resource_templates() + + assert fetched == ["tools/list", "prompts/list", "resources/list", "resources/templates/list"] + + +async def test_read_resource_caches_per_uri() -> None: + """Cache keys discriminate by result-affecting params (spec MUST): two uris cache + independently, and each repeat read is served from its own entry.""" + reads: list[str] = [] + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + reads.append(params.uri) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=params.uri)]) + + server = Server("res", on_read_resource=read, cache_hints={"resources/read": CacheHint(ttl_ms=60_000)}) + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + first_a = await client.read_resource("memo://a") + first_b = await client.read_resource("memo://b") + assert await client.read_resource("memo://a") == first_a + assert await client.read_resource("memo://b") == first_b + + assert reads == ["memo://a", "memo://b"] + + +def _paginated_tools_server() -> tuple[Server[Any], list[str | None]]: + """In-process server with a cacheable first page; the cursor `"expired"` is + rejected with INVALID_PARAMS (the spec's expired-cursor signal) and `"fail"` + with INTERNAL_ERROR (any other continuation failure).""" + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + cursor = params.cursor if params is not None else None + fetches.append(cursor) + if cursor is None: + first_page = Tool(name="first-page", input_schema={"type": "object"}) + return ListToolsResult(tools=[first_page], next_cursor="page-2") + if cursor == "page-2": + return ListToolsResult(tools=[Tool(name="second-page", input_schema={"type": "object"})]) + if cursor == "fail": + raise MCPError(code=INTERNAL_ERROR, message="transient failure") + raise MCPError(code=INVALID_PARAMS, message=f"Unknown cursor: {cursor!r}") + + server = Server("paginated", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}) + return server, fetches + + +async def test_cursor_continuations_neither_read_nor_write_the_cache() -> None: + """Only cursor-less calls participate in caching (SDK-defined single-page entry): + a continuation fetches despite a warm entry, and its page does not replace it.""" + server, fetches = _paginated_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _tool_names(await client.list_tools()) == ["first-page"] + # Not served from the warm entry, despite cache_mode="use". + assert _tool_names(await client.list_tools(cursor="page-2")) == ["second-page"] + # The continuation page did not overwrite the cursor-less entry. + assert _tool_names(await client.list_tools()) == ["first-page"] + + assert fetches == [None, "page-2"] + + +async def test_an_expired_cursor_rejection_evicts_the_methods_entry() -> None: + """Spec SHOULD: an INVALID_PARAMS rejection of a continuation cursor means the + listing changed, so the cached first page is evicted and refetched next time.""" + server, fetches = _paginated_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + await client.list_tools() + with pytest.raises(MCPError) as exc_info: + await client.list_tools(cursor="expired") + assert exc_info.value.code == INVALID_PARAMS + await client.list_tools() + + assert fetches == [None, "expired", None] + + +async def test_an_expired_cursor_rejection_under_bypass_does_not_evict() -> None: + """`cache_mode="bypass"` means no cache side-effects at all: the same + INVALID_PARAMS rejection leaves the warm entry in place.""" + server, fetches = _paginated_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + await client.list_tools() + with pytest.raises(MCPError) as exc_info: + await client.list_tools(cursor="expired", cache_mode="bypass") + assert exc_info.value.code == INVALID_PARAMS + await client.list_tools() # still served from the warm entry + + assert fetches == [None, "expired"] + + +async def test_a_non_cursor_error_on_a_continuation_does_not_evict() -> None: + """Only INVALID_PARAMS signals cursor expiry: a continuation failing with any + other code re-raises without disturbing the warm entry.""" + server, fetches = _paginated_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + await client.list_tools() + with pytest.raises(MCPError) as exc_info: + await client.list_tools(cursor="fail") + assert exc_info.value.code == INTERNAL_ERROR + await client.list_tools() # still served from the warm entry + + assert fetches == [None, "fail"] + + +async def test_bypass_neither_serves_nor_disturbs_a_warm_entry() -> None: + """`cache_mode="bypass"` fetches fresh without reading the warm entry and without + storing the fetched result over it.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _tool_names(await client.list_tools()) == ["t0"] + assert _tool_names(await client.list_tools(cache_mode="bypass")) == ["t1"] + # The bypass fetch neither served nor replaced the entry. + assert _tool_names(await client.list_tools()) == ["t0"] + + assert fetches == [None, None] + + +async def test_refresh_skips_the_read_and_stores_the_refetched_result() -> None: + """`cache_mode="refresh"` ignores the warm entry, fetches, and re-stores: the + following plain call serves the refreshed listing.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _tool_names(await client.list_tools()) == ["t0"] + assert _tool_names(await client.list_tools(cache_mode="refresh")) == ["t1"] + assert _tool_names(await client.list_tools()) == ["t1"] + + assert fetches == [None, None] + + +async def test_refresh_storing_a_ttl_zero_result_purges_the_warm_entry() -> None: + """A refresh whose refetched result is uncacheable (`ttlMs: 0`) purges the warm + entry instead of leaving it to be served again — the refetch superseded it.""" + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + ttl_ms = 60_000 if len(fetches) == 1 else 0 + tool = Tool(name=f"t{len(fetches) - 1}", input_schema={"type": "object"}) + return ListToolsResult(tools=[tool], ttl_ms=ttl_ms) + + server = Server("flip", on_list_tools=list_tools) + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _tool_names(await client.list_tools()) == ["t0"] + assert _tool_names(await client.list_tools(cache_mode="refresh")) == ["t1"] + # t0 must not resurface: the refresh purged it, and t1 (ttl 0) was never stored. + assert _tool_names(await client.list_tools()) == ["t2"] + + assert fetches == [None, None, None] + + +async def test_cache_mode_is_inert_when_caching_is_disabled() -> None: + """With `cache=False` the verbs accept `cache_mode` but every call goes to the + server — no reads, no writes, no eviction machinery. SDK-defined off switch.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=False) as client: + await client.list_tools() + await client.list_tools(cache_mode="use") + await client.list_tools(cache_mode="refresh") + + assert fetches == [None, None, None] + + +@pytest.mark.parametrize( + "seed", + [{"request_state": "round-2"}, {"input_responses": {"ask": ElicitResult(action="decline")}}], + ids=["request_state", "input_responses"], +) +async def test_a_seeded_read_resource_skips_the_cache_and_ignores_cache_mode(seed: dict[str, Any]) -> None: + """Spec MUST: results of requests carrying `inputResponses` or `requestState` are + never cached. A seeded read is a resumption: it is not served from the warm entry + under "use", does not purge it under "refresh", and stores nothing — the final + plain read still serves the original entry.""" + reads = 0 + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + nonlocal reads + reads += 1 + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=f"v{reads}")], ttl_ms=60_000) + + server = Server("res", on_read_resource=read) + + def text(result: ReadResourceResult) -> str: + content = result.contents[0] + assert isinstance(content, TextResourceContents) + return content.text + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert text(await client.read_resource("memo://a")) == "v1" + assert text(await client.read_resource("memo://a", **seed)) == "v2" + assert text(await client.read_resource("memo://a", **seed, cache_mode="refresh")) == "v3" + # The warm v1 entry survived both seeded calls: nothing read, written, or purged. + assert text(await client.read_resource("memo://a")) == "v1" + + assert reads == 3 + + +async def test_a_terminal_read_reached_through_driver_rounds_is_never_cached() -> None: + """Spec MUST: the driver's retry rounds carry `inputResponses`, so a terminal + result reached through them is not cached — a repeat read goes back to the wire + (and drives the rounds again).""" + seeded_rounds: list[bool] = [] + ask = ElicitRequest( + params=ElicitRequestFormParams( + message="What is your name?", + requested_schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + ) + + async def read( + ctx: ServerRequestContext, params: types.ReadResourceRequestParams + ) -> ReadResourceResult | InputRequiredResult: + seeded_rounds.append(params.input_responses is not None) + if params.input_responses is not None: + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text="terminal")], ttl_ms=60_000) + return InputRequiredResult(input_requests={"ask": ask}) + + async def elicitation_callback( + context: Any, params: types.ElicitRequestParams + ) -> types.ElicitResult | types.ErrorData: + return ElicitResult(action="accept", content={"name": "Ada"}) + + server = Server("gated", on_read_resource=read) + + with anyio.fail_after(5): + async with Client( + server, elicitation_callback=elicitation_callback, cache=CacheConfig(clock=_ManualClock()) + ) as client: + first = await client.read_resource("memo://gated") + second = await client.read_resource("memo://gated") + + assert isinstance(first.contents[0], TextResourceContents) and first.contents[0].text == "terminal" + assert second == first + # Two wire rounds per call: the second call was not served from the cache. + assert seeded_rounds == [False, True, False, True] + + +async def test_a_refresh_that_resolves_to_input_required_purges_the_warm_entry() -> None: + """SDK-defined supersession rule: a refresh whose unseeded first round comes back + input_required cannot store its driven terminal result (the rounds carry + `inputResponses` — spec MUST), but it still purges the warm entry — the pre-flip + value must not resurface on the next plain read.""" + reads = 0 + ask = ElicitRequest( + params=ElicitRequestFormParams( + message="What is your name?", + requested_schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + ) + + async def read( + ctx: ServerRequestContext, params: types.ReadResourceRequestParams + ) -> ReadResourceResult | InputRequiredResult: + nonlocal reads + reads += 1 + # The resource starts plain and then flips to requiring input. + if reads > 1 and params.input_responses is None: + return InputRequiredResult(input_requests={"ask": ask}) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=f"v{reads}")], ttl_ms=60_000) + + async def elicitation_callback( + context: Any, params: types.ElicitRequestParams + ) -> types.ElicitResult | types.ErrorData: + return ElicitResult(action="accept", content={"name": "Ada"}) + + server = Server("flipping", on_read_resource=read) + + def text(result: ReadResourceResult) -> str: + content = result.contents[0] + assert isinstance(content, TextResourceContents) + return content.text + + with anyio.fail_after(5): + async with Client( + server, elicitation_callback=elicitation_callback, cache=CacheConfig(clock=_ManualClock()) + ) as client: + assert text(await client.read_resource("memo://a")) == "v1" # cached for 60s + assert text(await client.read_resource("memo://a", cache_mode="refresh")) == "v3" + # v1 must not resurface: the refresh purged it, and the driven terminal + # result (v3) was never stored — the plain read drives fresh rounds. + assert text(await client.read_resource("memo://a")) == "v5" + + assert reads == 5 + + +def _output_schema_server(call_result: CallToolResult) -> tuple[Server[Any], list[str | None]]: + """In-process server whose one tool declares an output schema; `call_tool` returns + the canned `call_result` so tests choose whether it satisfies that schema.""" + fetches: list[str | None] = [] + tool = Tool( + name="run", + input_schema={"type": "object"}, + output_schema={"type": "object", "properties": {"n": {"type": "integer"}}, "required": ["n"]}, + ) + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + return ListToolsResult(tools=[tool]) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "run" + return call_result + + server = Server( + "schemas", + on_list_tools=list_tools, + on_call_tool=call_tool, + cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}, + ) + return server, fetches + + +async def test_a_listing_served_from_a_shared_store_rebuilds_output_schemas() -> None: + """A fresh client whose first `list_tools` is served from a pre-warmed shared + store absorbs the served listing into the session: `call_tool` validates its + structured output against the absorbed schema without ever fetching the listing + from the server (the fetch log stays at the warming client's one entry).""" + call_result = CallToolResult(content=[TextContent(text="ok")], structured_content={"n": 1}) + server, fetches = _output_schema_server(call_result) + config = CacheConfig(store=InMemoryResponseCacheStore(), partition="p", target_id="svc", clock=_ManualClock()) + + async with Client(server, cache=config) as warming: + listing = await warming.list_tools() + + async with Client(server, cache=config) as fresh: + assert await fresh.list_tools() == listing # served from the shared store + result = await fresh.call_tool("run", {}) + + assert result.structured_content == {"n": 1} + # One wire fetch total: the fresh client's listing AND the validation schema both + # came from the served entry (a starved schema cache would have re-listed here). + assert fetches == [None] + + +async def test_validation_from_a_served_listing_rejects_missing_structured_content() -> None: + """The schema absorbed from a served listing is enforced, not just present: a tool + result without structured content fails validation in the fresh client, again + without any wire refetch of the listing.""" + server, fetches = _output_schema_server(CallToolResult(content=[TextContent(text="ok")])) + config = CacheConfig(store=InMemoryResponseCacheStore(), partition="p", target_id="svc", clock=_ManualClock()) + + async with Client(server, cache=config) as warming: + await warming.list_tools() + + async with Client(server, cache=config) as fresh: + await fresh.list_tools() + with pytest.raises(RuntimeError) as exc_info: + await fresh.call_tool("run", {}) + + assert str(exc_info.value) == snapshot("Tool run has an output schema but did not return structured content") + assert fetches == [None] + + +async def test_a_cache_hit_listing_still_mirrors_x_mcp_headers_on_tools_call() -> None: + """A fresh client serving tools/list from a pre-warmed shared store still mirrors + `x-mcp-header` arguments into `Mcp-Param-*` headers on a later `tools/call`: the + arg→header maps are rebuilt from the served listing. Asserted at the wire (over + the in-process HTTP bridge) because the client never surfaces outgoing headers.""" + tool = Tool( + name="run", + input_schema={"type": "object", "properties": {"region": {"type": "string", "x-mcp-header": "Region"}}}, + ) + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + return ListToolsResult(tools=[tool], ttl_ms=60_000) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "run" + return CallToolResult(content=[TextContent(text="ok")]) + + server = Server("headers", on_list_tools=list_tools, on_call_tool=call_tool) + + posts: list[httpx.Request] = [] + + async def on_request(request: httpx.Request) -> None: + posts.append(request) + + config = CacheConfig(store=InMemoryResponseCacheStore(), partition="p", target_id="svc") + discover = DiscoverResult( + supported_versions=[LATEST_MODERN_VERSION], + capabilities=ServerCapabilities(), + server_info=Implementation(name="srv", version="0"), + ) + + with anyio.fail_after(5): + async with mounted_app(server, on_request=on_request) as (http, _): + warming = Client( + streamable_http_client(f"{BASE_URL}/mcp", http_client=http), + mode=LATEST_MODERN_VERSION, + prior_discover=discover, + cache=config, + ) + async with warming: + await warming.list_tools() + fresh = Client( + streamable_http_client(f"{BASE_URL}/mcp", http_client=http), + mode=LATEST_MODERN_VERSION, + prior_discover=discover, + cache=config, + ) + async with fresh: + await fresh.list_tools() + await fresh.call_tool("run", {"region": "us-west1"}) + + # Exactly one tools/list reached the wire: the fresh client served from the store. + assert [json.loads(request.content)["method"] for request in posts] == ["tools/list", "tools/call"] + assert posts[-1].headers["mcp-param-region"] == "us-west1" + + +async def test_a_tools_list_changed_notification_makes_the_next_list_refetch() -> None: + """Spec SHOULD: a list_changed notification invalidates the cached listing — the + next `list_tools` goes back to the server. Runs on a legacy session (the only + in-process transport that delivers standalone notifications) with `default_ttl_ms` + providing the cached entry, proving eviction is era-independent.""" + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + return ListToolsResult(tools=[Tool(name="touch", input_schema={"type": "object"})]) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "touch" + await ctx.session.send_tool_list_changed() + return CallToolResult(content=[TextContent(text="ok")]) + + server = Server("notify", on_list_tools=list_tools, on_call_tool=call_tool) + + # The wrap evicts before delegating, so delivery here implies eviction completed. + delivered = anyio.Event() + + async def on_message(message: IncomingMessage) -> None: + assert isinstance(message, ToolListChangedNotification) # the only message this server emits + delivered.set() + + client = Client(server, mode="legacy", cache=CacheConfig(default_ttl_ms=60_000), message_handler=on_message) + async with client: + await client.list_tools() + await client.list_tools() + assert fetches == [None] # cached via default_ttl_ms on the legacy session + await client.call_tool("touch", {}) + with anyio.fail_after(5): + await delivered.wait() + await client.list_tools() + + assert fetches == [None, None] + + +async def test_a_resource_updated_notification_evicts_that_uris_read_entry() -> None: + """Spec SHOULD: `notifications/resources/updated` invalidates the cached read for + its uri. This is also the uri-form agreement proof: the entry stored under the + string passed to `read_resource` is the one the notification's `params.uri` + evicts — the next read of that uri refetches.""" + uri = "memo://cached" + reads: list[str] = [] + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + reads.append(params.uri) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=f"v{len(reads)}")]) + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + return ListToolsResult(tools=[Tool(name="poke", input_schema={"type": "object"})]) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "poke" + await ctx.session.send_resource_updated(uri) + return CallToolResult(content=[TextContent(text="ok")]) + + server = Server("updates", on_read_resource=read, on_list_tools=list_tools, on_call_tool=call_tool) + + delivered: list[str] = [] + seen = anyio.Event() + + async def on_message(message: IncomingMessage) -> None: + assert isinstance(message, ResourceUpdatedNotification) # the only message this server emits + delivered.append(message.params.uri) + seen.set() + + client = Client(server, mode="legacy", cache=CacheConfig(default_ttl_ms=60_000), message_handler=on_message) + async with client: + await client.read_resource(uri) + await client.read_resource(uri) + assert reads == [uri] # cached via default_ttl_ms on the legacy session + await client.call_tool("poke", {}) + with anyio.fail_after(5): + await seen.wait() + await client.read_resource(uri) + + # The notification carried the exact string the entry was stored under. + assert delivered == [uri] + assert reads == [uri, uri] From 8fae9cf0bf2df55acf891ae3e4c87be28af888f1 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:10:03 +0000 Subject: [PATCH 06/18] Treat negative inbound ttlMs as zero at the client parse seams --- src/mcp/client/session.py | 22 +++++- tests/client/test_client_caching.py | 103 ++++++++++++++++++++++++++++ tests/client/test_session.py | 31 +++++++++ 3 files changed, 155 insertions(+), 1 deletion(-) diff --git a/src/mcp/client/session.py b/src/mcp/client/session.py index fd56c3940..4998fadaa 100644 --- a/src/mcp/client/session.py +++ b/src/mcp/client/session.py @@ -55,6 +55,19 @@ logger = logging.getLogger("client") +def _clamp_inbound_ttl(raw: dict[str, Any]) -> None: + """Floor a negative inbound `ttlMs` to 0, in place (2026-07-28 caching SHOULD). + + Runs before the surface validation, whose `ge=0` would otherwise fail the + whole call over one bad hint. Emit-side strictness is untouched — only a + misbehaving peer reaches this. Floats are floored too; bools are not numbers + here and are left for the validation to reject. + """ + ttl = raw.get("ttlMs") + if isinstance(ttl, int | float) and not isinstance(ttl, bool) and ttl < 0: + raw["ttlMs"] = 0 + + def _preconnect_stamp(data: dict[str, Any], opts: CallOptions) -> None: # initialize/discover forbid cancellation; other pre-handshake requests (lowlevel # ClientSession callers may skip the handshake entirely) keep the courtesy cancel. @@ -331,6 +344,7 @@ async def send_request( if metadata.on_resumption_token_update is not None: opts["on_resumption_token"] = metadata.on_resumption_token_update raw = await self._dispatcher.send_raw_request(method, data.get("params"), opts) + _clamp_inbound_ttl(raw) # Literal fallback covers pre-handshake and stateless; matches runner.py. version = self._negotiated_version or "2025-11-25" try: @@ -458,7 +472,13 @@ async def send_discover(self, version: str) -> dict[str, Any]: "cancel_on_abandon": False, "headers": {MCP_PROTOCOL_VERSION_HEADER: version, MCP_METHOD_HEADER: data["method"]}, } - return await self._dispatcher.send_raw_request(data["method"], data.get("params"), opts) + raw = await self._dispatcher.send_raw_request(data["method"], data.get("params"), opts) + # Clamping here (not in the callers) covers both discover() and the + # mode='auto' probe — un-floored, a negative ttl fails DiscoverResult + # validation in the probe, which reads as "not modern evidence" and + # silently downgrades the connection to the legacy handshake. + _clamp_inbound_ttl(raw) + return raw async def discover(self) -> types.DiscoverResult: """Probe `server/discover` and adopt the result. diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 2d62a8511..1da5ba91a 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -8,6 +8,8 @@ import json import time +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager from types import TracebackType from typing import Any @@ -54,6 +56,8 @@ from mcp.server import Server, ServerRequestContext from mcp.server.caching import CacheHint from mcp.shared.exceptions import MCPError +from mcp.shared.memory import MessageStream, create_client_server_memory_streams +from mcp.shared.message import SessionMessage from mcp.shared.session import RequestResponder from tests.interaction._connect import BASE_URL, mounted_app @@ -960,3 +964,102 @@ async def on_message(message: IncomingMessage) -> None: # The notification carried the exact string the entry was stored under. assert delivered == [uri] assert reads == [uri, uri] + + +# --- The inbound ttlMs clamp (parse seam) --- + + +@pytest.mark.parametrize("wire_ttl", [-5, -5.0]) +async def test_a_negative_inbound_ttl_is_served_as_zero_and_never_cached(wire_ttl: int | float) -> None: + """Spec SHOULD (2026-07-28 caching): a negative `ttlMs` is treated as 0 — the + call succeeds instead of failing the `ge=0` wire validation, and a zero ttl is + never stored, so the next call goes back to the server. The peer is scripted + over raw streams because an SDK server cannot emit a negative ttl (server-side + `ge=0` enforcement).""" + listings_served = 0 + + async def scripted_server(streams: MessageStream) -> None: + nonlocal listings_served + server_read, server_write = streams + async for message in server_read: + assert isinstance(message, SessionMessage) + frame = message.message + assert isinstance(frame, types.JSONRPCRequest) + if frame.method == "server/discover": + result: dict[str, Any] = { + "supportedVersions": [LATEST_MODERN_VERSION], + "capabilities": {}, + "serverInfo": {"name": "negative-ttl", "version": "0.0.1"}, + "resultType": "complete", + "ttlMs": 0, + } + else: + assert frame.method == "tools/list" + listings_served += 1 + result = {"resultType": "complete", "tools": [], "ttlMs": wire_ttl, "cacheScope": "private"} + await server_write.send(SessionMessage(types.JSONRPCResponse(jsonrpc="2.0", id=frame.id, result=result))) + + @asynccontextmanager + async def scripted_transport() -> AsyncIterator[TransportStreams]: + async with ( + create_client_server_memory_streams() as ((client_read, client_write), server_streams), + anyio.create_task_group() as tg, + ): + tg.start_soon(scripted_server, server_streams) + yield client_read, client_write + tg.cancel_scope.cancel() + + with anyio.fail_after(5): + async with Client(scripted_transport(), mode="auto") as client: + first = await client.list_tools() + second = await client.list_tools() + + assert first.ttl_ms == 0 + assert second.ttl_ms == 0 + assert listings_served == 2 # the clamped-to-zero ttl was never stored: the second call re-fetched + + +async def test_a_negative_discover_ttl_still_connects_modern_in_auto_mode() -> None: + """Spec SHOULD (2026-07-28 caching) — silent-downgrade regression: before the + parse-seam clamp, a negative `ttlMs` on `server/discover` failed `DiscoverResult` + validation inside the mode='auto' probe, which reads as "not modern evidence" and + silently fell back to the legacy initialize handshake. Clamped, the probe adopts + the modern era and the result carries `ttl_ms == 0`.""" + methods_seen: list[str] = [] + + async def scripted_server(streams: MessageStream) -> None: + server_read, server_write = streams + async for message in server_read: + assert isinstance(message, SessionMessage) + frame = message.message + assert isinstance(frame, types.JSONRPCRequest) + methods_seen.append(frame.method) + # A legacy downgrade would send `initialize` next; fail loudly instead. + assert frame.method == "server/discover" + result: dict[str, Any] = { + "supportedVersions": [LATEST_MODERN_VERSION], + "capabilities": {}, + "serverInfo": {"name": "negative-ttl", "version": "0.0.1"}, + "resultType": "complete", + "ttlMs": -5, + } + await server_write.send(SessionMessage(types.JSONRPCResponse(jsonrpc="2.0", id=frame.id, result=result))) + + @asynccontextmanager + async def scripted_transport() -> AsyncIterator[TransportStreams]: + async with ( + create_client_server_memory_streams() as ((client_read, client_write), server_streams), + anyio.create_task_group() as tg, + ): + tg.start_soon(scripted_server, server_streams) + yield client_read, client_write + tg.cancel_scope.cancel() + + with anyio.fail_after(5): + async with Client(scripted_transport(), mode="auto") as client: + assert client.protocol_version == LATEST_MODERN_VERSION + discover = client.session.discover_result + assert discover is not None + assert discover.ttl_ms == 0 + + assert methods_seen == ["server/discover"] diff --git a/tests/client/test_session.py b/tests/client/test_session.py index 83893e36f..b6ddb40a9 100644 --- a/tests/client/test_session.py +++ b/tests/client/test_session.py @@ -1661,6 +1661,37 @@ async def test_discover_reraises_unsupported_version_with_malformed_error_data() assert [m for m, _ in dispatcher.calls] == ["server/discover"] +# --- inbound ttlMs clamp --- + + +@pytest.mark.anyio +async def test_a_positive_inbound_ttl_reaches_the_result_unchanged() -> None: + """SDK-defined: the inbound clamp only floors negative `ttlMs` values — a valid + positive hint passes through to the typed result untouched.""" + listing: dict[str, Any] = {"resultType": "complete", "tools": [], "ttlMs": 60_000, "cacheScope": "private"} + dispatcher = _ScriptedDispatcher(_discover_result_dict(), listing) + with anyio.fail_after(5): + async with ClientSession(dispatcher=dispatcher) as session: + await session.discover() + result = await session.list_tools() + assert result.ttl_ms == 60_000 + + +@pytest.mark.anyio +@pytest.mark.parametrize("wire_ttl", [True, False]) +async def test_a_boolean_inbound_ttl_is_not_clamped_only_coerced_by_validation(wire_ttl: bool) -> None: + """SDK-defined: `bool` is an `int` subclass but the clamp does not treat it as a + number — the value reaches validation untouched, where pydantic's lax mode + coerces it (True -> 1, False -> 0) rather than rejecting it.""" + listing: dict[str, Any] = {"resultType": "complete", "tools": [], "ttlMs": wire_ttl, "cacheScope": "private"} + dispatcher = _ScriptedDispatcher(_discover_result_dict(), listing) + with anyio.fail_after(5): + async with ClientSession(dispatcher=dispatcher) as session: + await session.discover() + result = await session.list_tools() + assert result.ttl_ms == int(wire_ttl) + + @pytest.mark.anyio async def test_session_call_tool_returns_input_required_result_when_opted_in() -> None: """`ClientSession.call_tool(..., allow_input_required=True)` surfaces the From 98941acf63d6f5f5d5312a779e91de0e8b40a4f8 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:19:06 +0000 Subject: [PATCH 07/18] Document the client response cache --- docs/advanced/caching.md | 61 ++++++++-- docs/migration.md | 4 + docs_src/caching/tutorial003.py | 35 ++++-- src/mcp/client/caching.py | 5 + tests/client/test_client_caching.py | 61 ++++++++++ tests/docs_src/test_caching.py | 174 ++++++++++++++++++++++++++-- 6 files changed, 317 insertions(+), 23 deletions(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index f53a3096b..53ea0b89a 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -37,19 +37,63 @@ One caveat on paginated lists: the protocol requires the **same `cacheScope` on ## What the client sees -On the client, the hints arrive as plain fields on every cacheable result — `ttl_ms` and `cache_scope`, already parsed: +On a 2026-07-28 session, `Client` honors the hints for you: it has a built-in response cache, on by default. A result that arrives carrying a `ttlMs` is stored, and an identical call within that TTL is served from the cache — no round trip. A result that carries *no* hint is not cached: hint-less results get `CacheConfig.default_ttl_ms`, which defaults to `0` (immediately stale), so a server that declares nothing sees exactly the call-for-call traffic it always did. -```python title="client.py" hl_lines="15" +```python title="client.py" hl_lines="28 30 33" --8<-- "docs_src/caching/tutorial003.py" ``` -The SDK parses; it does not (yet) act. There is no built-in response cache: calling `list_tools()` twice makes two round trips, whatever the TTL said. The spec makes honoring optional — a client that ignores the hints entirely is fully conformant — so until the SDK grows a response cache, the supported path is to read the fields and do your own bookkeeping: +Four calls, three fetches. The second call found a fresh entry and never reached the server; advancing the (injected) clock past the TTL made the third fetch again; the fourth said `cache_mode="refresh"`. That kwarg exists on the five caching verbs — `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, `read_resource`: -* **Freshness** is `now < t_received + ttl_ms / 1000`: record the clock when the response arrives, and treat the result as reusable until the TTL runs out. `ttl_ms == 0` means *immediately stale* — don't reuse it at all. -* **Scope is a sharing rule, not a suggestion.** A `"private"` result may be reused only within the same authorization context — same access token, same cache. Never put `"private"` results in a cache shared across users. -* **Notifications beat TTL.** If the server sends `list_changed` while your copy is still fresh, the copy is stale now — re-fetch. +* `"use"` (the default) serves a fresh entry if there is one, and stores the fetch if not. +* `"refresh"` never serves: it fetches and stores the result, replacing whatever was cached. +* `"bypass"` makes the round trip without touching the cache at all — no read, no write. -Against an **older server** (pre-2026 protocol), the fields are simply absent from the wire, and the models show their conservative defaults: `ttl_ms == 0`, `cache_scope == "private"` — stale and unshared, the right assumption for a server that declared nothing. If you need to distinguish "the server said 0" from "the server said nothing", check `"ttl_ms" in result.model_fields_set`: it's only set when the field actually arrived. +To turn caching off entirely, construct with `Client(server, cache=False)`: every call is a round trip again, and `cache_mode`, while still accepted, does nothing. + +Scope is honored automatically too — `"private"` entries are keyed to the cache's *partition* (below); `"public"` ones may opt into wider sharing — and **notifications beat TTL**: a `list_changed` notification evicts the matching cached listing, and `resources/updated` evicts the cached read for its URI, however fresh they were. + +### Configuring it: `CacheConfig` + +```python +from mcp.client import CacheConfig + +client = Client("https://api.example.com/mcp", cache=CacheConfig(default_ttl_ms=5_000)) +``` + +* `store` — where entries live. The default is a fresh in-memory store per client; pass your own `ResponseCacheStore` implementation (Redis-backed, say) to share a cache across clients or processes. A custom store **requires** an explicit `partition`. +* `partition` — the authorization-context label that keeps one principal's `"private"` entries from being served to another within a shared store. +* `target_id` — explicit server identity, for custom transports and in-process servers (below). +* `default_ttl_ms` — TTL applied to results that carry no `ttlMs` hint. The default `0` leaves hint-less results uncached. +* `share_public` — serve server-asserted-`"public"` entries across partitions (below). Off by default. +* `clock` — the wall-clock source, epoch seconds. Inject one, as the example above does, and expiry tests need no sleeping. + +!!! warning "Partition = verified principal" + Derive `partition` from a **verified credential** — a validated token's subject, for example. Never from request-supplied data, and never from the server URL (server identity is a separate key axis). The SDK is a library with no authentication of its own: whoever constructs the `CacheConfig` — the deployment, not the tenant — is the trust anchor. A multi-tenant gateway mints one `CacheConfig` per authenticated principal. + + The partition is also fixed for the `Client`'s lifetime. If the connection's authorization context changes mid-session — a re-authentication as a different principal, say — the cache does not follow; construct a new `Client` for the new principal. + +Cache keys also carry the **server's identity**: the URL string you dialed, with any `user:pass@` userinfo stripped and otherwise byte-exact. No case folding, no query reordering, no trailing-slash cleanup — under-normalizing only costs sharing, while over-normalizing could merge two tenants (`?tenant=a` vs `?tenant=b`), so superficially different URLs simply don't share entries. When there is no URL — an in-process server, or a `Transport` instance — the client gets a random per-instance identity instead; set `CacheConfig.target_id` to name the server (with a custom store this is required, and construction says so). The identity is sha256-hashed before it enters key material, so a URL carrying secrets in its query string never appears in store keys — don't log the pre-hash form yourself, either. + +!!! warning "`share_public` trusts the server, fleet-wide" + By default even `"public"` entries stay within their partition. `share_public=True` serves entries the server marked `cacheScope: "public"` to **every** partition using the store — trusting the server's classification on behalf of all of them. A server that stamps `"public"` on per-tenant data (by bug or by malice) then leaks one tenant's response to the others. The flag is deliberately constructor-level only: the per-call `cache_mode` can narrow caching, but nothing per-call can widen sharing. + +### What the cache never does + +* **Session-tier calls bypass it.** `client.session.list_tools()` and friends always make the round trip; the cache lives on the `Client` verbs. +* **`server/discover` stays out of it.** The discover result is delivered once, at connect, and never enters the response cache — even when it carries a `ttlMs`. If you persist one yourself to skip the reconnect probe ([`prior_discover`](../client/protocol-versions.md#reconnecting-with-prior_discover)), its freshness is your bookkeeping: `DiscoverResult` carries `ttl_ms` and `cache_scope`, already parsed, for exactly that purpose. +* **Continuation pages are never cached.** Only cursor-less calls participate. A continuation page rejected for an expired cursor does *evict* the cached listing — the listing changed under it. +* **Multi-round-trip reads are never cached.** A `read_resource` seeded with `input_responses`/`request_state`, or one that resolves through input rounds, never enters the cache (a spec MUST). +* **Notification eviction needs notifications.** Eviction is only as good as the transport's delivery — the modern in-process path (`Client(server)` with the default `mode="auto"`) does not deliver standalone notifications today. +* **No stale-if-error.** An expired entry is never served because the refetch failed; the error propagates. +* **No coalescing.** Two concurrent identical calls are two fetches. +* On a **shared persistent store**, a session that negotiated a different protocol era than the entry's writer may be served the writer's entry until TTL or eviction — accepted, and bounded by the cache's 24-hour TTL cap. + +### Reading the hints yourself + +The hints are also plain fields on every cacheable result — `result.ttl_ms` and `result.cache_scope`, already parsed — if you want to layer your own bookkeeping on top of (or instead of) the built-in cache. + +Against an **older server** (pre-2026 protocol), the fields are simply absent from the wire, and the models show their conservative defaults: `ttl_ms == 0`, `cache_scope == "private"` — stale and unshared, the right assumption for a server that declared nothing. The cache treats a legacy session the same way: hints are never consulted there (whatever keys appear on the wire), only `default_ttl_ms` applies, and its default of `0` caches nothing — a pre-2026 connection behaves exactly as it did before the cache existed. If you need to distinguish "the server said 0" from "the server said nothing", check `"ttl_ms" in result.model_fields_set`: it's only set when the field actually arrived. ## Older clients @@ -61,4 +105,5 @@ Clients on pre-2026 protocol versions never see either field — the SDK strips * `cache_hints={method: CacheHint(...)}` at construction (both `MCPServer` and `Server`) sets server-wide values per method. * A handler that sets the fields on its result overrides the map, per field. * `"public"` is a promise that the result is identical for every caller. It is not access control. -* Clients read the hints as `result.ttl_ms` / `result.cache_scope` and own the caching decision themselves — the SDK has no built-in response cache yet. +* `Client` honors the hints automatically: its response cache is on by default, serves fresh entries instead of refetching, and caches nothing for servers (or sessions) that provide no hints. +* Per call, `cache_mode="refresh"` refetches and `"bypass"` skips the cache; `cache=False` at construction turns it off entirely. diff --git a/docs/migration.md b/docs/migration.md index 516cd8b18..c9908a401 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -427,6 +427,10 @@ On `ClientSession`, `call_tool` / `get_prompt` / `read_resource` still return th For protocol 2026-07-28 over Streamable HTTP, a tool's input-schema property may carry an `x-mcp-header` annotation. When a tool the client has listed is called, each annotated argument is mirrored into an `Mcp-Param-` request header (string verbatim, integer as decimal, boolean as `true`/`false`, base64-sentinel-wrapped when not header-safe; `null`/absent arguments are omitted). The argument is also left in the request body. `list_tools` caches a tool's annotations, so list a tool before calling it to enable mirroring; a tool the client never listed emits no `Mcp-Param-*` headers. Other transports ignore the annotation. +### `Client` verbs may serve cached responses ([SEP-2549](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2549)) + +On protocol 2026-07-28, servers attach caching hints (`ttlMs`, `cacheScope`) to the cacheable results, and `Client` now honors them: `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, and `read_resource` may serve a cached response instead of making a round trip, for as long as the server's `ttlMs` says the result is fresh. Servers that send no hints — including every pre-2026 server — see identical call-for-call behavior, because hint-less results are not cached. Pass `Client(..., cache=False)` to disable the cache and restore v1 behavior exactly; per-call control (`cache_mode`) and configuration (`CacheConfig`) are described in [Caching hints](advanced/caching.md). + ### Server extensions API ([SEP-2133](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2133)) `MCPServer` now accepts opt-in extensions that bundle MCP behaviour behind a diff --git a/docs_src/caching/tutorial003.py b/docs_src/caching/tutorial003.py index 77ade546b..5a17dbcdb 100644 --- a/docs_src/caching/tutorial003.py +++ b/docs_src/caching/tutorial003.py @@ -1,15 +1,34 @@ +from typing import Any + +from mcp_types import ListToolsResult, PaginatedRequestParams, Tool + from mcp import Client -from mcp.server import CacheHint, MCPServer +from mcp.client import CacheConfig +from mcp.server import CacheHint, Server, ServerRequestContext + +fetches = 0 +now = 1_000_000.0 + -mcp = MCPServer("Weather", cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")}) +async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: + global fetches + fetches += 1 + return ListToolsResult(tools=[Tool(name="forecast", input_schema={"type": "object"})]) -@mcp.tool() -def forecast(city: str) -> str: - return f"Sunny in {city}" +server = Server( + "Weather", + on_list_tools=list_tools, + cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")}, +) async def main() -> None: - async with Client(mcp) as client: - tools = await client.list_tools() - print(f"{len(tools.tools)} tools, fresh for {tools.ttl_ms / 1000:.0f}s, scope={tools.cache_scope}") + global now + async with Client(server, cache=CacheConfig(clock=lambda: now)) as client: + await client.list_tools() # fetch 1 + await client.list_tools() # fresh for 60s: served from the cache + now += 60.0 + await client.list_tools() # the TTL ran out: fetch 2 + await client.list_tools(cache_mode="refresh") # skip the cache read: fetch 3 + print(f"4 calls, {fetches} fetches") diff --git a/src/mcp/client/caching.py b/src/mcp/client/caching.py index 2e50cbb42..63052989b 100644 --- a/src/mcp/client/caching.py +++ b/src/mcp/client/caching.py @@ -137,6 +137,11 @@ class CacheConfig: authentication of its own: whoever constructs the `CacheConfig` - the deployment, not the tenant - is the trust anchor. Multi-tenant gateways mint one `CacheConfig` per authenticated principal. + + The partition is fixed for the `Client`'s lifetime: if the connection's + authorization context changes mid-session (a re-authentication as a + different principal), the cache does not follow - construct a new + `Client` for the new principal. """ target_id: str | None = None diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 1da5ba91a..739fcf0bd 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -6,6 +6,7 @@ The coordinator's own behavior is covered in `test_caching.py`. """ +import hashlib import json import time from collections.abc import AsyncIterator @@ -148,6 +149,16 @@ def test_userinfo_variants_of_a_server_url_share_one_cache_identity() -> None: assert _private_arm(bare) == _private_arm(with_password) == _private_arm(with_token) +def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None: + """The arm carries sha256(url-sans-userinfo), not the URL itself, so a secret + in the query string never appears in store keys. SDK-defined; pins the docs' + secrets-never-in-keys claim — raw-URL key material would fail here.""" + client = Client("https://user:pass@example.com/mcp?api_key=SECRET") + + arm_id = hashlib.sha256(b"https://example.com/mcp?api_key=SECRET").hexdigest() + assert _private_arm(client) == json.dumps(["private", arm_id, ""]) + + def test_urls_differing_only_in_query_have_distinct_cache_identities() -> None: """URL identity is byte-exact outside userinfo — `?tenant=a` and `?tenant=b` must never share entries (over-normalization would merge tenants). SDK-defined.""" @@ -966,6 +977,56 @@ async def on_message(message: IncomingMessage) -> None: assert reads == [uri, uri] +async def test_the_modern_in_process_path_drops_the_eviction_notification() -> None: + """Pins the documented transport gap: the default in-process connection + (mode="auto", DirectDispatcher) does not deliver standalone server notifications, + so a tools/list_changed emitted mid-call never reaches the cache - the warm entry + survives and the next `list_tools` is still served from it. Delivery on this path + would happen inline within the awaited `call_tool`, so asserting after it returns + is race-free. If this test starts failing, the path gained delivery: flip the + `docs/advanced/caching.md` eviction caveat and the legacy-mode notification tests.""" + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + return ListToolsResult(tools=[Tool(name="touch", input_schema={"type": "object"})]) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "touch" + await ctx.session.send_tool_list_changed() + return CallToolResult(content=[TextContent(text="ok")]) + + server = Server( + "notify", + on_list_tools=list_tools, + on_call_tool=call_tool, + cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}, + ) + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + await client.list_tools() + await client.call_tool("touch", {}) + await client.list_tools() # still served from the warm entry: no eviction arrived + + assert fetches == [None] + + +async def test_a_discover_result_never_enters_the_response_cache() -> None: + """SDK ruling (documented): the response cache covers the five `Client` verbs + only. The connect-time server/discover result is never stored, even when it + carries a `ttlMs` hint - a persisted `prior_discover`'s freshness is the user's + bookkeeping (`DiscoverResult` carries the parsed hints for it).""" + server = Server("hinted", cache_hints={"server/discover": CacheHint(ttl_ms=60_000)}) + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + discover = client.session.discover_result + assert discover is not None + assert discover.ttl_ms == 60_000 # the hint arrived with the probe result... + store = _coordinator(client)._store + assert isinstance(store, InMemoryResponseCacheStore) + assert store._entries == {} # ...and nothing entered the cache + + # --- The inbound ttlMs clamp (parse seam) --- diff --git a/tests/docs_src/test_caching.py b/tests/docs_src/test_caching.py index bc2feb9ac..74973ebf0 100644 --- a/tests/docs_src/test_caching.py +++ b/tests/docs_src/test_caching.py @@ -1,13 +1,19 @@ """`docs/advanced/caching.md`: every claim the page makes, proved against the real SDK.""" +from collections.abc import Mapping from typing import Any, cast +import anyio import pytest from inline_snapshot import snapshot +from mcp_types import INTERNAL_ERROR, ListToolsResult, PaginatedRequestParams, Tool from docs_src.caching import tutorial001, tutorial002, tutorial003 -from mcp import Client -from mcp.server import CacheHint, MCPServer +from mcp import Client, MCPError +from mcp.client import CacheConfig +from mcp.client.caching import InMemoryResponseCacheStore +from mcp.server import CacheHint, MCPServer, Server, ServerRequestContext +from mcp.server.caching import CacheableMethod # See test_index.py for why this is a per-module mark and not a conftest hook. pytestmark = [pytest.mark.anyio, pytest.mark.filterwarnings("error::mcp.MCPDeprecationWarning")] @@ -55,16 +61,170 @@ async def test_the_handler_value_wins_over_the_map_per_field() -> None: assert tools.cache_scope == "public" -async def test_the_client_program_on_the_page_reads_the_hints(capsys: pytest.CaptureFixture[str]) -> None: - """tutorial003: `main()` is the literal client program on the page - the hints - arrive as parsed fields on the result.""" +async def test_the_client_program_on_the_page_makes_three_fetches_for_four_calls( + capsys: pytest.CaptureFixture[str], +) -> None: + """tutorial003: `main()` is the literal client program on the page - the second + call is served from the cache, the clock advance expires the entry, and + `cache_mode="refresh"` skips the read, so four calls cost three fetches.""" await tutorial003.main() - assert capsys.readouterr().out == "1 tools, fresh for 60s, scope=public\n" + assert capsys.readouterr().out == "4 calls, 3 fetches\n" + + +def _counting_tools_server(*, ttl_ms: int | None = 60_000) -> tuple[Server[Any], list[str | None]]: + """In-process server whose every tools/list fetch returns a distinct tool name + `t`, so a served cache entry is distinguishable from a refetch by payload. + `ttl_ms=None` sends no hints at all.""" + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + return ListToolsResult(tools=[Tool(name=f"t{len(fetches) - 1}", input_schema={"type": "object"})]) + + hints: Mapping[CacheableMethod, CacheHint] | None = None + if ttl_ms is not None: + hints = {"tools/list": CacheHint(ttl_ms=ttl_ms)} + return Server("counting", on_list_tools=list_tools, cache_hints=hints), fetches + + +async def test_caching_is_on_by_default_the_second_call_makes_no_fetch() -> None: + """The page's claim: with no `cache=` argument at all, a result carrying a `ttlMs` + hint is stored and the identical call within the TTL never reaches the server.""" + server, fetches = _counting_tools_server() + async with Client(server) as client: + first = await client.list_tools() + second = await client.list_tools() + assert fetches == [None] + assert second == first + + +async def test_a_hintless_result_is_not_cached_by_default() -> None: + """The page's claim: `default_ttl_ms` defaults to 0, so a server that declares + nothing sees exactly the call-for-call traffic it always did.""" + server, fetches = _counting_tools_server(ttl_ms=None) + async with Client(server) as client: + await client.list_tools() + await client.list_tools() + assert fetches == [None, None] + + +async def test_cache_false_makes_every_call_a_round_trip() -> None: + """The page's claim: `cache=False` disables caching entirely - two calls are two + fetches even though the server's hint allowed a minute of reuse.""" + server, fetches = _counting_tools_server() + async with Client(server, cache=False) as client: + await client.list_tools() + await client.list_tools() + assert fetches == [None, None] + + +async def test_refresh_refetches_and_replaces_the_cached_entry() -> None: + """The page's claim: `cache_mode="refresh"` never serves - it fetches and stores + the result, which the next plain call is then served from.""" + server, fetches = _counting_tools_server() + async with Client(server) as client: + await client.list_tools() + refreshed = await client.list_tools(cache_mode="refresh") + served = await client.list_tools() + assert fetches == [None, None] + assert [tool.name for tool in refreshed.tools] == ["t1"] + assert served == refreshed + + +async def test_bypass_fetches_without_reading_or_writing_the_cache() -> None: + """The page's claim: `cache_mode="bypass"` makes the round trip without touching + the cache - it neither serves the warm entry nor replaces it.""" + server, fetches = _counting_tools_server() + async with Client(server) as client: + first = await client.list_tools() + bypassed = await client.list_tools(cache_mode="bypass") + served = await client.list_tools() + assert fetches == [None, None] + assert [tool.name for tool in bypassed.tools] == ["t1"] + assert served == first + + +async def test_an_expired_entry_is_not_revived_when_the_refetch_fails() -> None: + """The page's claim (SDK ruling, no stale-if-error): once the entry has expired, + a failing refetch propagates the server's error instead of serving the expired + entry.""" + now = 1_000_000.0 + fetches: list[None] = [] + + async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(None) + if len(fetches) > 1: + raise MCPError(code=INTERNAL_ERROR, message="backend down") + return ListToolsResult(tools=[Tool(name="t0", input_schema={"type": "object"})]) + + server = Server("flaky", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}) + async with Client(server, cache=CacheConfig(clock=lambda: now)) as client: + await client.list_tools() + now += 60.0 # the entry is now expired, so the next call must refetch + with pytest.raises(MCPError) as exc: + await client.list_tools() + assert exc.value.code == INTERNAL_ERROR + assert len(fetches) == 2 + + +async def test_two_concurrent_identical_calls_are_two_fetches() -> None: + """The page's claim (SDK ruling, no coalescing): a second identical call issued + while the first fetch is still in flight makes its own fetch instead of waiting + on the first. The handler barrier releases only once both calls are inside it, + so the test passes only if the two fetches were genuinely concurrent.""" + both_fetching = anyio.Event() + fetches: list[None] = [] + + async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(None) + if len(fetches) == 2: + both_fetching.set() + with anyio.fail_after(5): + await both_fetching.wait() + return ListToolsResult(tools=[Tool(name="t", input_schema={"type": "object"})]) + + server = Server("concurrent", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}) + async with Client(server) as client: + async with anyio.create_task_group() as tg: + tg.start_soon(client.list_tools) + tg.start_soon(client.list_tools) + assert len(fetches) == 2 + + +async def test_a_session_tier_call_always_makes_the_round_trip() -> None: + """The page's claim: the cache lives on the `Client` verbs - `client.session` + calls bypass it even when a fresh entry is sitting in the store.""" + server, fetches = _counting_tools_server() + async with Client(server) as client: + await client.list_tools() + await client.session.list_tools() + assert fetches == [None, None] + + +async def test_a_custom_store_requires_a_partition() -> None: + """The page's claim: passing your own store without a `partition` raises at + construction.""" + with pytest.raises(ValueError) as exc: + CacheConfig(store=InMemoryResponseCacheStore()) + assert str(exc.value) == snapshot("a custom store requires an explicit partition") + + +async def test_a_custom_store_with_an_in_process_server_requires_target_id() -> None: + """The page's claim: with no URL to derive a server identity from, a custom store + needs `CacheConfig.target_id` - and construction says so.""" + server, _ = _counting_tools_server() + with pytest.raises(ValueError) as exc: + Client(server, cache=CacheConfig(store=InMemoryResponseCacheStore(), partition="user-1")) + assert str(exc.value) == snapshot( + "a custom cache store requires CacheConfig.target_id when the server is not a URL: in-process servers " + "and Transport instances get a random per-client identity, so their entries in a shared store could " + "never be served to another client" + ) async def test_the_wire_presence_check_the_page_recommends_works() -> None: """The page's claim: `"ttl_ms" in result.model_fields_set` distinguishes a server that sent the field from one that said nothing (model defaults).""" - async with Client(tutorial003.mcp) as client: + async with Client(tutorial001.mcp) as client: tools = await client.list_tools() assert "ttl_ms" in tools.model_fields_set From c689c20a1bf73fdfa589b3eb900de1f708eab0a3 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:44:56 +0000 Subject: [PATCH 08/18] Add end-to-end hardening tests for the client response cache --- tests/client/test_client_caching.py | 447 +++++++++++++++++++++++++++- 1 file changed, 442 insertions(+), 5 deletions(-) diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 739fcf0bd..9660edb55 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -3,18 +3,21 @@ identity guard, the notification-eviction message-handler wrap, the lazy negotiated-version supplier, and the five cacheable verbs (the `_cached_fetch` choke point, the `read_resource` sibling, and the tools/list absorption seam). -The coordinator's own behavior is covered in `test_caching.py`. +Cross-cutting end-to-end hardening (eviction completeness, partition isolation, +deep-copy isolation, era-gate injection, write/eviction races) lives at the +bottom. The coordinator's own behavior is covered in `test_caching.py`. """ import hashlib import json import time -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Awaitable, Callable from contextlib import asynccontextmanager from types import TracebackType -from typing import Any +from typing import Any, Literal import anyio +import anyio.lowlevel import httpx import mcp_types as types import pytest @@ -34,7 +37,9 @@ ListResourceTemplatesResult, ListToolsResult, ReadResourceResult, + ResourceListChangedNotification, ResourceUpdatedNotification, + ResourceUpdatedNotificationParams, ServerCapabilities, ServerNotification, TextContent, @@ -383,7 +388,9 @@ def __call__(self) -> float: return self.now -def _varying_tools_server(*, ttl_ms: int = 60_000) -> tuple[Server[Any], list[str | None]]: +def _varying_tools_server( + *, ttl_ms: int = 60_000, scope: Literal["public", "private"] = "private" +) -> tuple[Server[Any], list[str | None]]: """In-process server whose every tools/list fetch returns a distinct tool name `t`, so a served entry is distinguishable from a refetch by payload, not just by handler count. The fetch log records each request's cursor.""" @@ -393,7 +400,9 @@ async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestPa fetches.append(params.cursor if params is not None else None) return ListToolsResult(tools=[Tool(name=f"t{len(fetches) - 1}", input_schema={"type": "object"})]) - server = Server("varying", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=ttl_ms)}) + server = Server( + "varying", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=ttl_ms, scope=scope)} + ) return server, fetches @@ -1124,3 +1133,431 @@ async def scripted_transport() -> AsyncIterator[TransportStreams]: assert discover.ttl_ms == 0 assert methods_seen == ["server/discover"] + + +# --- Hardening e2e --- + + +def _versioned_read_server(*, ttl_ms: int = 60_000) -> tuple[Server[Any], list[str]]: + """In-process server whose every resources/read fetch returns a distinct payload + `v`, so a served entry is distinguishable from a refetch. The read log records + each request's uri.""" + reads: list[str] = [] + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + reads.append(params.uri) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=f"v{len(reads)}")], ttl_ms=ttl_ms) + + return Server("versioned-reads", on_read_resource=read), reads + + +def _resource_text(result: ReadResourceResult) -> str: + content = result.contents[0] + assert isinstance(content, TextResourceContents) + return content.text + + +async def test_each_notification_evicts_exactly_its_entries_end_to_end() -> None: + """Spec SHOULD (notifications invalidate) plus its negative space, end to end. + + Steps: + 1. Prime all four list verbs and two resource reads; a second round of calls + is served entirely from the cache. + 2. tools/list_changed -> only tools/list refetches. + 3. resources/list_changed -> resources/list AND resources/templates/list + refetch; tools, prompts, and both reads stay served. + 4. resources/updated(X) -> only the X read refetches; Y and every list stay + served. + + Runs on a legacy session (the in-process transport that delivers standalone + notifications) with `default_ttl_ms` providing the cached entries. + """ + uri_x, uri_y = "memo://x", "memo://y" + fetched: list[str] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetched.append("tools/list") + return ListToolsResult(tools=[Tool(name="notify", input_schema={"type": "object"})]) + + async def list_prompts(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListPromptsResult: + fetched.append("prompts/list") + return ListPromptsResult(prompts=[]) + + async def list_resources( + ctx: ServerRequestContext, params: types.PaginatedRequestParams | None + ) -> ListResourcesResult: + fetched.append("resources/list") + return ListResourcesResult(resources=[]) + + async def list_templates( + ctx: ServerRequestContext, params: types.PaginatedRequestParams | None + ) -> ListResourceTemplatesResult: + fetched.append("resources/templates/list") + return ListResourceTemplatesResult(resource_templates=[]) + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + fetched.append(f"resources/read {params.uri}") + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text="body")]) + + async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult: + assert params.name == "notify" + kind = (params.arguments or {})["kind"] + if kind == "tools": + await ctx.session.send_tool_list_changed() + elif kind == "resources": + await ctx.session.send_resource_list_changed() + else: + assert kind == "updated-x" + await ctx.session.send_resource_updated(uri_x) + return CallToolResult(content=[TextContent(text="sent")]) + + server = Server( + "notifier", + on_list_tools=list_tools, + on_list_prompts=list_prompts, + on_list_resources=list_resources, + on_list_resource_templates=list_templates, + on_read_resource=read, + on_call_tool=call_tool, + ) + + delivered: list[IncomingMessage] = [] + eviction_done = [anyio.Event() for _ in range(3)] + + async def on_message(message: IncomingMessage) -> None: + # The wrap evicts before delegating, so each event implies its eviction completed. + delivered.append(message) + eviction_done[len(delivered) - 1].set() + + client = Client( + server, + mode="legacy", + cache=CacheConfig(default_ttl_ms=60_000, clock=_ManualClock()), + message_handler=on_message, + ) + + async with client: + + async def served_round() -> list[str]: + """Call every cacheable verb once; return the calls that reached the server.""" + before = len(fetched) + await client.list_tools() + await client.list_prompts() + await client.list_resources() + await client.list_resource_templates() + await client.read_resource(uri_x) + await client.read_resource(uri_y) + return fetched[before:] + + assert await served_round() == [ + "tools/list", + "prompts/list", + "resources/list", + "resources/templates/list", + f"resources/read {uri_x}", + f"resources/read {uri_y}", + ] + assert await served_round() == [] # everything primed and served + + await client.call_tool("notify", {"kind": "tools"}) + with anyio.fail_after(5): + await eviction_done[0].wait() + assert await served_round() == ["tools/list"] + + await client.call_tool("notify", {"kind": "resources"}) + with anyio.fail_after(5): + await eviction_done[1].wait() + assert await served_round() == ["resources/list", "resources/templates/list"] + + await client.call_tool("notify", {"kind": "updated-x"}) + with anyio.fail_after(5): + await eviction_done[2].wait() + assert await served_round() == [f"resources/read {uri_x}"] + + assert delivered == [ + ToolListChangedNotification(), + ResourceListChangedNotification(), + ResourceUpdatedNotification(params=ResourceUpdatedNotificationParams(uri=uri_x)), + ] + + +async def test_private_entries_never_cross_partitions_between_clients_sharing_a_store() -> None: + """Spec MUST (`"private"` never crosses authorization contexts), end to end: two + clients sharing one store and server identity but holding different partitions + each fetch their own listing - the second client is never served the first's + private-scoped entry.""" + server, fetches = _varying_tools_server() + store = InMemoryResponseCacheStore() + + def config(partition: str) -> CacheConfig: + return CacheConfig(store=store, partition=partition, target_id="svc", clock=_ManualClock()) + + async with Client(server, cache=config("tenant-a")) as tenant_a: + assert _tool_names(await tenant_a.list_tools()) == ["t0"] + async with Client(server, cache=config("tenant-b")) as tenant_b: + assert _tool_names(await tenant_b.list_tools()) == ["t1"] # fetched, not tenant-a's entry + + assert fetches == [None, None] + + +async def test_a_server_stamped_public_entry_does_not_cross_partitions_by_default() -> None: + """SDK security default (deviates from the ts SDK), end to end: even when the + server stamps `cacheScope: "public"`, the default config keys the public arm by + partition - a same-partition client is served from the store, a different- + partition client fetches its own listing.""" + server, fetches = _varying_tools_server(scope="public") + store = InMemoryResponseCacheStore() + + def config(partition: str) -> CacheConfig: + return CacheConfig(store=store, partition=partition, target_id="svc", clock=_ManualClock()) + + async with Client(server, cache=config("tenant-a")) as tenant_a: + assert _tool_names(await tenant_a.list_tools()) == ["t0"] + async with Client(server, cache=config("tenant-a")) as same_partition: + assert _tool_names(await same_partition.list_tools()) == ["t0"] # served from the store + async with Client(server, cache=config("tenant-b")) as tenant_b: + assert _tool_names(await tenant_b.list_tools()) == ["t1"] # fetched + + assert fetches == [None, None] + + +async def test_share_public_serves_a_server_stamped_public_entry_across_partitions() -> None: + """SDK-defined opt-in, end to end: with `share_public=True` the public arm drops + the partition, so the second tenant's first list_tools is served from the first + tenant's server-asserted-public entry without a fetch.""" + server, fetches = _varying_tools_server(scope="public") + store = InMemoryResponseCacheStore() + + def config(partition: str) -> CacheConfig: + return CacheConfig(store=store, partition=partition, target_id="svc", share_public=True, clock=_ManualClock()) + + async with Client(server, cache=config("tenant-a")) as tenant_a: + assert _tool_names(await tenant_a.list_tools()) == ["t0"] + async with Client(server, cache=config("tenant-b")) as tenant_b: + assert _tool_names(await tenant_b.list_tools()) == ["t0"] # served across partitions + + assert fetches == [None] + + +async def test_same_partition_clients_share_read_entries_through_the_store() -> None: + """SDK-defined sharing, end to end: two clients with the same store, server + identity, and partition share `resources/read` entries - the second client's + first read is served from the store without invoking the handler. (The + tools/list case, including its absorbed derived state, is pinned by the + shared-store absorption tests above.)""" + server, reads = _versioned_read_server() + store = InMemoryResponseCacheStore() + + def config() -> CacheConfig: + return CacheConfig(store=store, partition="p", target_id="svc", clock=_ManualClock()) + + async with Client(server, cache=config()) as first: + first_result = await first.read_resource("memo://a") + async with Client(server, cache=config()) as second: + assert await second.read_resource("memo://a") == first_result + + assert reads == ["memo://a"] + + +async def test_mutating_returned_results_never_corrupts_the_cached_entry() -> None: + """SDK-defined deep-copy isolation, both directions, end to end: mutating the + result a verb returned (the very object the write deep-copied from) and mutating + a served hit (the object the read deep-copied out) both leave the stored entry + untouched - every later call serves the pristine listing from the single fetch.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + first = await client.list_tools() + first.tools[0].name = "tampered-after-fetch" + second = await client.list_tools() # cache hit, unaffected by the mutation + assert _tool_names(second) == ["t0"] + second.tools[0].name = "tampered-after-serve" + assert _tool_names(await client.list_tools()) == ["t0"] # still pristine + + assert fetches == [None] + + +async def test_a_legacy_peer_injecting_cache_hints_caches_nothing() -> None: + """SDK-defined era gate, end to end: `ttlMs`/`cacheScope` are 2026-07-28 + assertions, but a 2025 peer can still put the keys on the wire. On a legacy + session with the default config nothing is cached - the second list_tools + reaches the peer and the store stays empty on both arms. The peer is scripted + over raw streams because an SDK server strips the hint fields when serializing + for a 2025 session, so the injection is not expressible through the server API.""" + listings_served = 0 + + async def scripted_server(streams: MessageStream) -> None: + nonlocal listings_served + server_read, server_write = streams + async for message in server_read: + assert isinstance(message, SessionMessage) + frame = message.message + if isinstance(frame, types.JSONRPCNotification): + assert frame.method == "notifications/initialized" + continue + assert isinstance(frame, types.JSONRPCRequest) + if frame.method == "initialize": + result: dict[str, Any] = { + "protocolVersion": "2025-11-25", + "capabilities": {}, + "serverInfo": {"name": "legacy-injector", "version": "0.0.1"}, + } + else: + assert frame.method == "tools/list" + listings_served += 1 + result = {"tools": [], "ttlMs": 60_000, "cacheScope": "public"} + await server_write.send(SessionMessage(types.JSONRPCResponse(jsonrpc="2.0", id=frame.id, result=result))) + + @asynccontextmanager + async def scripted_transport() -> AsyncIterator[TransportStreams]: + async with ( + create_client_server_memory_streams() as ((client_read, client_write), server_streams), + anyio.create_task_group() as tg, + ): + tg.start_soon(scripted_server, server_streams) + yield client_read, client_write + tg.cancel_scope.cancel() + + with anyio.fail_after(5): + async with Client(scripted_transport(), mode="legacy", cache=CacheConfig(clock=_ManualClock())) as client: + await client.list_tools() + await client.list_tools() + store = _coordinator(client)._store + assert isinstance(store, InMemoryResponseCacheStore) + assert store._entries == {} # neither arm holds an entry + + assert listings_served == 2 + + +class _CancelOnSetStore(InMemoryResponseCacheStore): + """Store whose next `set` awaits a one-shot hook before committing, modelling an + async store whose commit a cancellation interrupts.""" + + def __init__(self) -> None: + super().__init__() + self.before_set: Callable[[], Awaitable[None]] | None = None + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + if self.before_set is not None: + hook, self.before_set = self.before_set, None + await hook() + await super().set(key, entry) + + +async def test_a_verb_cancelled_mid_write_leaves_no_stale_arm_pair() -> None: + """SDK-defined no-stale-pair invariant, end to end: a verb call cancelled while + its cache write is mid-set (after the opposite-arm delete) leaves at most one + entry for the key - here zero - so the superseded entry cannot be served. + + Steps: + 1. The first list_tools stores a public-scoped entry. + 2. A refresh call fetches a private-scoped result; its write deletes the + public arm first, then the store's `set` is cancelled before committing. + 3. Both arms are empty - never two entries answering for one key - and the + next call refetches. + """ + fetches: list[str | None] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + scope: Literal["public", "private"] = "public" if len(fetches) == 1 else "private" + tool = Tool(name=f"t{len(fetches) - 1}", input_schema={"type": "object"}) + return ListToolsResult(tools=[tool], ttl_ms=60_000, cache_scope=scope) + + server = Server("scope-flip", on_list_tools=list_tools) + store = _CancelOnSetStore() + client = Client(server, cache=CacheConfig(store=store, partition="p", target_id="svc", clock=_ManualClock())) + + async with client: + assert _tool_names(await client.list_tools()) == ["t0"] + assert len(store._entries) == 1 # the public-arm entry + + with anyio.CancelScope() as scope: + + async def cancel_mid_commit() -> None: + scope.cancel() + await anyio.lowlevel.checkpoint() # the cancellation is delivered here, inside `set` + + store.before_set = cancel_mid_commit + await client.list_tools(cache_mode="refresh") + assert scope.cancelled_caught + + # The write deleted the opposite (public) arm before the cancelled set could + # commit: zero entries, and in particular not the stale pre-refresh one. + assert store._entries == {} + assert _tool_names(await client.list_tools()) == ["t2"] # nothing cached: refetched + + assert fetches == [None, None, None] + + +async def test_an_eviction_landing_mid_fetch_discards_that_fetchs_write() -> None: + """Spec-aligned race rule, end to end: a tools/list_changed notification that + arrives while the tools/list fetch it concerns is still in flight discards that + fetch's cache write - the store is empty after the call returns and the next + list_tools refetches (and then caches normally). The server emits the + notification mid-fetch and waits for the client-side eviction before responding + (the handler wrap delegates only after evicting), so the interleaving is + deterministic, not scheduler-dependent.""" + fetches: list[str | None] = [] + evicted = anyio.Event() + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetches.append(params.cursor if params is not None else None) + if len(fetches) == 1: + await ctx.session.send_tool_list_changed() + with anyio.fail_after(5): + await evicted.wait() + return ListToolsResult(tools=[Tool(name=f"t{len(fetches) - 1}", input_schema={"type": "object"})]) + + async def on_message(message: IncomingMessage) -> None: + assert isinstance(message, ToolListChangedNotification) # the only message this server emits + evicted.set() + + server = Server("racer", on_list_tools=list_tools) + client = Client( + server, + mode="legacy", + cache=CacheConfig(default_ttl_ms=60_000, clock=_ManualClock()), + message_handler=on_message, + ) + + async with client: + assert _tool_names(await client.list_tools()) == ["t0"] + # Empty proves the write was SKIPPED, not stored-then-evicted: the eviction + # completed strictly before the response (the handler waited for it) and the + # write runs strictly after - had it landed, the entry would still be here. + store = _coordinator(client)._store + assert isinstance(store, InMemoryResponseCacheStore) + assert store._entries == {} + assert _tool_names(await client.list_tools()) == ["t1"] # refetched... + assert _tool_names(await client.list_tools()) == ["t1"] # ...and that fetch cached normally + + assert fetches == [None, None] + + +async def test_read_resource_bypass_neither_serves_nor_disturbs_a_warm_entry() -> None: + """`cache_mode="bypass"` on `read_resource` fetches fresh without reading the + warm entry and without storing over it - the following plain read still serves + the original value. SDK-defined mode semantics (the list-verb counterpart is + pinned above).""" + server, reads = _versioned_read_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _resource_text(await client.read_resource("memo://a")) == "v1" + assert _resource_text(await client.read_resource("memo://a", cache_mode="bypass")) == "v2" + assert _resource_text(await client.read_resource("memo://a")) == "v1" # warm entry intact + + assert reads == ["memo://a", "memo://a"] + + +async def test_read_resource_refresh_refetches_and_restores() -> None: + """`cache_mode="refresh"` on `read_resource` skips the warm entry, fetches, and + re-stores: the following plain read serves the refreshed value.""" + server, reads = _versioned_read_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _resource_text(await client.read_resource("memo://a")) == "v1" + assert _resource_text(await client.read_resource("memo://a", cache_mode="refresh")) == "v2" + assert _resource_text(await client.read_resource("memo://a")) == "v2" # the refreshed value re-stored + + assert reads == ["memo://a", "memo://a"] From e3bb7121b08768bd7fda9f195655d4632a337c1e Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:53:53 +0000 Subject: [PATCH 09/18] Cover float negative ttlMs on the discover seam in the auto-mode test --- tests/client/test_client_caching.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 9660edb55..e07dbbed1 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -1089,12 +1089,14 @@ async def scripted_transport() -> AsyncIterator[TransportStreams]: assert listings_served == 2 # the clamped-to-zero ttl was never stored: the second call re-fetched -async def test_a_negative_discover_ttl_still_connects_modern_in_auto_mode() -> None: +@pytest.mark.parametrize("wire_ttl", [-5, -5.0]) +async def test_a_negative_discover_ttl_still_connects_modern_in_auto_mode(wire_ttl: int | float) -> None: """Spec SHOULD (2026-07-28 caching) — silent-downgrade regression: before the parse-seam clamp, a negative `ttlMs` on `server/discover` failed `DiscoverResult` validation inside the mode='auto' probe, which reads as "not modern evidence" and silently fell back to the legacy initialize handshake. Clamped, the probe adopts - the modern era and the result carries `ttl_ms == 0`.""" + the modern era and the result carries `ttl_ms == 0` — for float negatives too, + the same as the tools/list seam (both call the shared clamp).""" methods_seen: list[str] = [] async def scripted_server(streams: MessageStream) -> None: @@ -1111,7 +1113,7 @@ async def scripted_server(streams: MessageStream) -> None: "capabilities": {}, "serverInfo": {"name": "negative-ttl", "version": "0.0.1"}, "resultType": "complete", - "ttlMs": -5, + "ttlMs": wire_ttl, } await server_write.send(SessionMessage(types.JSONRPCResponse(jsonrpc="2.0", id=frame.id, result=result))) From b81b7dddb7e8402e1f89d056c96bdf16645bb45a Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:54:07 +0000 Subject: [PATCH 10/18] Document eviction timing, refetch policy, shared-store races, and the TTL cap --- docs/advanced/caching.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index 53ea0b89a..c8eadccc4 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -85,9 +85,13 @@ Cache keys also carry the **server's identity**: the URL string you dialed, with * **Continuation pages are never cached.** Only cursor-less calls participate. A continuation page rejected for an expired cursor does *evict* the cached listing — the listing changed under it. * **Multi-round-trip reads are never cached.** A `read_resource` seeded with `input_responses`/`request_state`, or one that resolves through input rounds, never enters the cache (a spec MUST). * **Notification eviction needs notifications.** Eviction is only as good as the transport's delivery — the modern in-process path (`Client(server)` with the default `mode="auto"`) does not deliver standalone notifications today. +* **Eviction is eventual, not instantaneous.** Wire-path notifications are dispatched from spawned tasks, so a call racing a notification's arrival may be served the pre-eviction entry once more; the window is bounded by dispatch latency, and the eviction still lands. * **No stale-if-error.** An expired entry is never served because the refetch failed; the error propagates. +* **No early re-fetch.** A stored entry is served until its TTL expires and the next call after that pays the round trip — nothing refreshes in the background. * **No coalescing.** Two concurrent identical calls are two fetches. -* On a **shared persistent store**, a session that negotiated a different protocol era than the entry's writer may be served the writer's entry until TTL or eviction — accepted, and bounded by the cache's 24-hour TTL cap. +* **No TTL beyond 24 hours.** A larger `ttlMs` — server-sent or configured — is clamped down on store (`mcp.client.caching.MAX_TTL_MS`), bounding how long any entry, however generously hinted, can be served. +* On a **shared store**, clients race each other. Each client drops its own write when an eviction overtook the fetch in flight, but a *co-tenant* client can still write back an entry that an eviction it never saw had removed; and that race bookkeeping is itself bounded — past 4096 tracked keys the oldest key's guard is dropped first. Both windows are accepted, and closed by the TTL cap above. +* On a **shared persistent store**, a session that negotiated a different protocol era than the entry's writer may be served the writer's entry until TTL or eviction — accepted, and likewise bounded by the TTL cap. ### Reading the hints yourself From efac31f989896f988961d6a2e63b7090caa573b9 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 17:08:46 +0000 Subject: [PATCH 11/18] Tighten response cache: session guard, identity and meta handling, refresh purge --- docs/advanced/caching.md | 8 +- src/mcp/client/__init__.py | 13 +++- src/mcp/client/caching.py | 29 ++++++- src/mcp/client/client.py | 35 +++++++-- tests/client/test_caching.py | 52 +++++++++++++ tests/client/test_client_caching.py | 116 ++++++++++++++++++++++++++++ 6 files changed, 241 insertions(+), 12 deletions(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index c8eadccc4..e797e72f5 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -49,9 +49,13 @@ Four calls, three fetches. The second call found a fresh entry and never reached * `"refresh"` never serves: it fetches and stores the result, replacing whatever was cached. * `"bypass"` makes the round trip without touching the cache at all — no read, no write. +One rule sits above `"use"`: **calls carrying `meta` always reach the server.** A request with `meta` set (a progress token, tracing fields) expects a wire request, so under `cache_mode="use"` it is treated as `"refresh"` — the cache read is skipped, and the fetched result still replaces the cached entry. `"bypass"` and an explicit `"refresh"` behave as they always do. + To turn caching off entirely, construct with `Client(server, cache=False)`: every call is a round trip again, and `cache_mode`, while still accepted, does nothing. -Scope is honored automatically too — `"private"` entries are keyed to the cache's *partition* (below); `"public"` ones may opt into wider sharing — and **notifications beat TTL**: a `list_changed` notification evicts the matching cached listing, and `resources/updated` evicts the cached read for its URI, however fresh they were. +Scope is honored automatically too — `"private"` entries are keyed to the cache's *partition* (below); `"public"` ones may opt into wider sharing — and **notifications beat TTL** for the exact entries they name: a `list_changed` notification evicts the matching cached listing, and `resources/updated` evicts the cached read stored under exactly its URI, however fresh they were. + +One caveat on `resources/updated`: eviction is exact-URI only. The store contract has no enumerate or scan operation (same as the reference TypeScript implementation), so a notification carrying a *sub*-resource URI does not evict a cached read of its parent. If your server signals sub-resources this way, refetch the parent with `cache_mode="refresh"`. ### Configuring it: `CacheConfig` @@ -61,7 +65,7 @@ from mcp.client import CacheConfig client = Client("https://api.example.com/mcp", cache=CacheConfig(default_ttl_ms=5_000)) ``` -* `store` — where entries live. The default is a fresh in-memory store per client; pass your own `ResponseCacheStore` implementation (Redis-backed, say) to share a cache across clients or processes. A custom store **requires** an explicit `partition`. +* `store` — where entries live. The default is a fresh in-memory store per client; pass your own `ResponseCacheStore` implementation (Redis-backed, say) to share a cache across clients or processes — the contract types (`ResponseCacheStore`, `CacheKey`, `CacheEntry`, and the default `InMemoryResponseCacheStore`) are importable from `mcp.client`. A lookup may issue up to two sequential store `get`s (the private arm, then the public one), so size a remote store's latency expectations accordingly. A custom store **requires** an explicit `partition`. * `partition` — the authorization-context label that keeps one principal's `"private"` entries from being served to another within a shared store. * `target_id` — explicit server identity, for custom transports and in-process servers (below). * `default_ttl_ms` — TTL applied to results that carry no `ttlMs` hint. The default `0` leaves hint-less results uncached. diff --git a/src/mcp/client/__init__.py b/src/mcp/client/__init__.py index 30df52737..b7823f5ef 100644 --- a/src/mcp/client/__init__.py +++ b/src/mcp/client/__init__.py @@ -2,17 +2,28 @@ from mcp.client._input_required import InputRequiredRoundsExceededError from mcp.client._transport import Transport -from mcp.client.caching import CacheConfig, CacheMode +from mcp.client.caching import ( + CacheConfig, + CacheEntry, + CacheKey, + CacheMode, + InMemoryResponseCacheStore, + ResponseCacheStore, +) from mcp.client.client import Client from mcp.client.context import ClientRequestContext from mcp.client.session import ClientSession __all__ = [ "CacheConfig", + "CacheEntry", + "CacheKey", "CacheMode", "Client", "ClientRequestContext", "ClientSession", + "InMemoryResponseCacheStore", "InputRequiredRoundsExceededError", + "ResponseCacheStore", "Transport", ] diff --git a/src/mcp/client/caching.py b/src/mcp/client/caching.py index 63052989b..f46bd0c04 100644 --- a/src/mcp/client/caching.py +++ b/src/mcp/client/caching.py @@ -103,6 +103,10 @@ class ResponseCacheStore(Protocol): no rehydration hook to rebuild it from serialized data. An entry that comes back in the wrong shape (e.g. with a plain-dict value) degrades to a cache miss, never an error. + + A cache lookup may issue up to two sequential `get` calls - the private + arm, then the public one - so remote-store implementers should size + latency expectations accordingly. """ async def get(self, key: CacheKey) -> CacheEntry | None: ... @@ -119,8 +123,8 @@ class CacheConfig: """Configuration for a `Client`'s response cache. Raises: - ValueError: If a custom `store` is given without a `partition`, or if - `default_ttl_ms` is negative. + ValueError: If a custom `store` is given without a `partition`, if + `target_id` is an empty string, or if `default_ttl_ms` is negative. """ store: ResponseCacheStore | None = None @@ -146,7 +150,9 @@ class CacheConfig: target_id: str | None = None """Explicit server-identity override, for custom transports and proxies - where the SDK cannot derive an identity from a server URL.""" + where the SDK cannot derive an identity from a server URL. Must be + non-empty when provided - an empty string would collapse distinct servers + onto one identity.""" default_ttl_ms: int = 0 """Time-to-live, in milliseconds, applied to results that carry no `ttlMs` @@ -172,6 +178,8 @@ class CacheConfig: def __post_init__(self) -> None: if self.store is not None and not self.partition: raise ValueError("a custom store requires an explicit partition") + if self.target_id == "": + raise ValueError("target_id must be a non-empty string or omitted") if self.default_ttl_ms < 0: raise ValueError(f"default_ttl_ms must be >= 0, got {self.default_ttl_ms}") @@ -200,7 +208,14 @@ async def get(self, key: CacheKey) -> CacheEntry | None: return self._entries.get(key) async def set(self, key: CacheKey, entry: CacheEntry) -> None: - if self._max_read_entries and key.method == "resources/read" and key not in self._entries: + if ( + self._max_read_entries + and key.method == "resources/read" + and key not in self._entries + # Strictly below the cap the read-key subset cannot be at the cap, + # so the scan only runs when an eviction is actually possible. + and len(self._entries) >= self._max_read_entries + ): # dict preserves insertion order and replacement keeps position, so # the dict itself is the FIFO ledger - no parallel structure to drift. read_keys = [k for k in self._entries if k.method == "resources/read"] @@ -337,6 +352,12 @@ async def write( # Opposite arm first: a failed (or cancelled) delete aborts before the # set, leaving a miss - never two arms answering for one key. if not await self._delete(opposite): + # The fetch superseded whatever the own arm holds, so it must not + # keep serving either: best-effort delete it too (shielded like the + # other cleanup deletes), degrading the key to a full miss - no + # stale pair AND no superseded entry. + with anyio.CancelScope(shield=True): + await self._delete(own) return entry = CacheEntry(value=result.model_copy(deep=True), scope=scope, expires_at=self._clock() + ttl_ms / 1000) try: diff --git a/src/mcp/client/client.py b/src/mcp/client/client.py index 2afd2dd53..01b00b74d 100644 --- a/src/mcp/client/client.py +++ b/src/mcp/client/client.py @@ -464,6 +464,7 @@ async def _cached_fetch( method: str, *, cursor: str | None, + meta: RequestParamsMeta | None, cache_mode: CacheMode, send: Callable[[], Awaitable[_CacheableT]], absorb: Callable[[_CacheableT], _CacheableT] | None = None, @@ -476,6 +477,15 @@ async def _cached_fetch( cache = self._response_cache if cache is None or cache_mode == "bypass": return await send() # no read, no write, no eviction side-effects + # Cache participation requires a live session: a closed (or never-entered) + # client raises the no-context RuntimeError on every verb, exactly as the + # verbs did before the cache existed - never serving stale entries. + _ = self.session + if meta is not None and cache_mode == "use": + # A call carrying meta (a progress token, tracing fields) expects a + # wire request, so it is never served from the cache; the fetched + # result still replaces the entry, the same as an explicit refresh. + cache_mode = "refresh" if cursor is not None: # Continuation pages never read or write the (cursor-less) entry, but an # expired-cursor rejection signals the listing changed since the entry was @@ -506,11 +516,13 @@ async def list_resources( ) -> ListResourcesResult: """List available resources from the server. - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); + calls carrying `meta` always reach the server. """ return await self._cached_fetch( "resources/list", cursor=cursor, + meta=meta, cache_mode=cache_mode, send=lambda: self.session.list_resources(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), ) @@ -524,11 +536,13 @@ async def list_resource_templates( ) -> ListResourceTemplatesResult: """List available resource templates from the server. - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); + calls carrying `meta` always reach the server. """ return await self._cached_fetch( "resources/templates/list", cursor=cursor, + meta=meta, cache_mode=cache_mode, send=lambda: self.session.list_resource_templates(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), ) @@ -554,7 +568,8 @@ async def read_resource( input_responses: Responses to seed the first call with (e.g. when resuming from a persisted `InputRequiredResult`). request_state: Opaque state to seed the first call with. - meta: Additional metadata for the request. + meta: Additional metadata for the request. Calls carrying `meta` + always reach the server. cache_mode: Adjusts the response cache's behavior for this call (see `CacheMode`). Seeded calls (either `input_responses` or `request_state` set) are resumptions of a multi-round-trip @@ -581,6 +596,12 @@ async def retry(r: InputResponses | None, s: str | None) -> ReadResourceResult | cache = None if seeded else self._response_cache if cache is None or cache_mode == "bypass": return await self._drive_input_required(await retry(input_responses, request_state), retry) + # Cache participation requires a live session: a closed (or never-entered) + # client raises the no-context RuntimeError here, never serving stale entries. + _ = self.session + if meta is not None and cache_mode == "use": + # Calls carrying meta always reach the server (mirrors `_cached_fetch`). + cache_mode = "refresh" if cache_mode == "use" and (hit := await cache.read("resources/read", uri)) is not None: # InputRequiredResult is never stored (only terminal first-round results # are written below), so a hit is always terminal and legitimately skips @@ -669,11 +690,13 @@ async def list_prompts( ) -> ListPromptsResult: """List available prompts from the server. - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); + calls carrying `meta` always reach the server. """ return await self._cached_fetch( "prompts/list", cursor=cursor, + meta=meta, cache_mode=cache_mode, send=lambda: self.session.list_prompts(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), ) @@ -767,11 +790,13 @@ async def list_tools( ) -> ListToolsResult: """List available tools from the server. - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`). + `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); + calls carrying `meta` always reach the server. """ return await self._cached_fetch( "tools/list", cursor=cursor, + meta=meta, cache_mode=cache_mode, send=lambda: self.session.list_tools(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), # A cache hit skips session.list_tools, so the session re-absorbs the diff --git a/tests/client/test_caching.py b/tests/client/test_caching.py index d48a7ed5e..c6933aa8f 100644 --- a/tests/client/test_caching.py +++ b/tests/client/test_caching.py @@ -229,6 +229,15 @@ def test_a_custom_store_with_an_explicit_partition_constructs() -> None: assert config.partition == "token-subject-1" +def test_an_empty_target_id_is_rejected_at_construction() -> None: + """SDK-defined guard: an explicit empty `target_id` would hash to the one + shared `sha256("")` identity, collapsing distinct servers onto it - + rejected at construction; omit the field (None) to derive an identity.""" + with pytest.raises(ValueError) as exc: + CacheConfig(target_id="") + assert str(exc.value) == snapshot("target_id must be a non-empty string or omitted") + + def test_a_negative_default_ttl_is_rejected_at_construction() -> None: """SDK-defined guard: a negative configured TTL is a programming error, rejected at construction (negative `ttlMs` from the wire is tolerated as 0 @@ -457,6 +466,30 @@ async def clear(self) -> None: raise NotImplementedError +class _ArmDeleteFailingStore: + """In-memory store whose `delete` raises only for keys on the given arm, + modelling a write whose opposite-arm cleanup fails while everything else + works. A write hitting that failure never reaches `set`.""" + + def __init__(self, failing_arm: str) -> None: + self.inner = InMemoryResponseCacheStore() + self.failing_arm = failing_arm + + async def get(self, key: CacheKey) -> CacheEntry | None: + return await self.inner.get(key) + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + raise NotImplementedError + + async def delete(self, key: CacheKey) -> None: + if key.partition == self.failing_arm: + raise RuntimeError("store delete failed") + await self.inner.delete(key) + + async def clear(self) -> None: + raise NotImplementedError + + class _RehydratingStore: """Models a persistent store whose `get` returns what its deserializer produced - possibly not the shape `set` received.""" @@ -832,6 +865,25 @@ async def test_a_raising_opposite_arm_delete_aborts_the_write() -> None: assert await store.inner.get(CacheKey("tools/list", "", _public_arm())) is None +async def test_a_failed_opposite_arm_delete_degrades_the_key_to_a_full_miss() -> None: + """SDK error discipline: when only the opposite-arm delete fails, the write + cannot set its own arm (two arms might answer) - but the warm own-arm + entry was superseded by the fetch, so it is best-effort deleted too: both + arms read as misses, and the write itself never raises.""" + store = _ArmDeleteFailingStore(failing_arm=_public_arm()) + cache = _coordinator(store) + await store.inner.set( + CacheKey("tools/list", "", _private_arm()), + CacheEntry(value=_wire_result(), scope="private", expires_at=2_000_000.0), + ) + assert await cache.read("tools/list", "") is not None # the warm own-arm entry + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.inner.get(CacheKey("tools/list", "", _public_arm())) is None + assert await cache.read("tools/list", "") is None + + async def test_a_raising_store_set_caches_nothing_and_does_not_raise() -> None: """SDK error discipline: a `set` raise is logged and swallowed - the fetch already succeeded, the result just is not cached.""" diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index e07dbbed1..139f6a2e3 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -644,6 +644,47 @@ async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestPa assert fetches == [None, None, None] +async def test_a_list_call_carrying_meta_is_fetched_and_replaces_the_warm_entry() -> None: + """SDK-defined: a call carrying `meta` (a progress token, tracing fields) + expects a wire request, so under the default `cache_mode="use"` it behaves + as a refresh - the warm entry is not served, the handler runs, and the + fresh result replaces the entry for later meta-less calls.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _tool_names(await client.list_tools()) == ["t0"] + assert _tool_names(await client.list_tools()) == ["t0"] # warm, meta-less: served + assert _tool_names(await client.list_tools(meta={"progress_token": "tok"})) == ["t1"] # meta: fetched + assert _tool_names(await client.list_tools()) == ["t1"] # the fresh result replaced the entry + + assert fetches == [None, None] + + +async def test_a_read_resource_carrying_meta_is_fetched_and_replaces_the_warm_entry() -> None: + """`read_resource` counterpart of the meta rule: a read carrying `meta` is + never served from the warm entry, and its fetched result re-stores.""" + reads: list[str] = [] + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + reads.append(params.uri) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=f"v{len(reads)}")], ttl_ms=60_000) + + server = Server("versioned-reads", on_read_resource=read) + + def text(result: ReadResourceResult) -> str: + content = result.contents[0] + assert isinstance(content, TextResourceContents) + return content.text + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert text(await client.read_resource("memo://a")) == "v1" + assert text(await client.read_resource("memo://a")) == "v1" # warm, meta-less: served + assert text(await client.read_resource("memo://a", meta={"progress_token": "tok"})) == "v2" # meta: fetched + assert text(await client.read_resource("memo://a")) == "v2" # the fresh result replaced the entry + + assert reads == ["memo://a", "memo://a"] + + async def test_cache_mode_is_inert_when_caching_is_disabled() -> None: """With `cache=False` the verbs accept `cache_mode` but every call goes to the server — no reads, no writes, no eviction machinery. SDK-defined off switch.""" @@ -1563,3 +1604,78 @@ async def test_read_resource_refresh_refetches_and_restores() -> None: assert _resource_text(await client.read_resource("memo://a")) == "v2" # the refreshed value re-stored assert reads == ["memo://a", "memo://a"] + + +async def test_a_closed_client_raises_on_every_cacheable_verb_instead_of_serving_the_cache() -> None: + """SDK-defined: cache participation requires a live session. After the client + exits its context, each of the five cacheable verbs raises the same no-context + RuntimeError it raised before the cache existed - the still-warm entries are + never served, and nothing reaches the server.""" + fetched: list[str] = [] + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + fetched.append("tools/list") + return ListToolsResult(tools=[]) + + async def list_prompts(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListPromptsResult: + fetched.append("prompts/list") + return ListPromptsResult(prompts=[]) + + async def list_resources( + ctx: ServerRequestContext, params: types.PaginatedRequestParams | None + ) -> ListResourcesResult: + fetched.append("resources/list") + return ListResourcesResult(resources=[]) + + async def list_templates( + ctx: ServerRequestContext, params: types.PaginatedRequestParams | None + ) -> ListResourceTemplatesResult: + fetched.append("resources/templates/list") + return ListResourceTemplatesResult(resource_templates=[]) + + async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: + fetched.append(f"resources/read {params.uri}") + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text="body")]) + + hint = CacheHint(ttl_ms=60_000) + server = Server( + "warm", + on_list_tools=list_tools, + on_list_prompts=list_prompts, + on_list_resources=list_resources, + on_list_resource_templates=list_templates, + on_read_resource=read, + cache_hints={ + "tools/list": hint, + "prompts/list": hint, + "resources/list": hint, + "resources/templates/list": hint, + "resources/read": hint, + }, + ) + + client = Client(server, cache=CacheConfig(clock=_ManualClock())) + async with client: + await client.list_tools() + await client.list_prompts() + await client.list_resources() + await client.list_resource_templates() + await client.read_resource("memo://a") + # The entries are warm: a repeat round is served entirely from the cache. + await client.list_tools() + await client.read_resource("memo://a") + assert len(fetched) == 5 + + with pytest.raises(RuntimeError) as exc_info: + await client.list_tools() + assert str(exc_info.value) == snapshot("Client must be used within an async context manager") + with pytest.raises(RuntimeError): + await client.list_prompts() + with pytest.raises(RuntimeError): + await client.list_resources() + with pytest.raises(RuntimeError): + await client.list_resource_templates() + with pytest.raises(RuntimeError): + await client.read_resource("memo://a") + + assert len(fetched) == 5 # nothing was served from the cache and nothing reached the server From d1cebd1b1ea84bb11201002b0c2e8021dfd39263 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 17:20:23 +0000 Subject: [PATCH 12/18] Apply configured cache hints to mapping handler results and fix unknown-key error formatting --- src/mcp/server/caching.py | 4 +- src/mcp/server/runner.py | 20 ++++++--- tests/docs_src/test_caching.py | 2 +- tests/server/test_caching.py | 81 +++++++++++++++++++++++++++++++++- 4 files changed, 98 insertions(+), 9 deletions(-) diff --git a/src/mcp/server/caching.py b/src/mcp/server/caching.py index f8dcb558b..e20379a1d 100644 --- a/src/mcp/server/caching.py +++ b/src/mcp/server/caching.py @@ -73,7 +73,9 @@ def validate_cache_hints(cache_hints: Mapping[Any, Any] | None) -> dict[str, Cac """ if cache_hints is None: return {} - unknown = sorted(method for method in cache_hints if method not in CACHEABLE_METHODS) + # Keys come from an untyped mapping, so format via repr: a non-string key + # must produce this ValueError too, not a TypeError from sorted/join. + unknown = sorted(repr(method) for method in cache_hints if method not in CACHEABLE_METHODS) if unknown: raise ValueError(f"cache_hints keys must be cacheable methods (see CacheableMethod); got: {', '.join(unknown)}") validated: dict[str, CacheHint] = {} diff --git a/src/mcp/server/runner.py b/src/mcp/server/runner.py index 4c25a8a5b..60ad6309d 100644 --- a/src/mcp/server/runner.py +++ b/src/mcp/server/runner.py @@ -198,19 +198,27 @@ async def _inner(ctx: ServerRequestContext[LifespanT, Any]) -> HandlerResult: if isinstance(result, ErrorData): # Raise inside the chain so middleware observes the failure. raise MCPError.from_error_data(result) - # Fill cache hints on the typed result, before the serialize sieve + # Fill cache hints on the handler result, before the serialize sieve # decides whether the negotiated version carries the fields at all. - # `input_required` interim results are not `CacheableResult` models, - # so the MRTR carve-out (no hints on them) holds by shape. - if isinstance(result, CacheableResult) and (hint := self.server.cache_hints.get(method)) is not None: - result = apply_cache_hint(result, hint) + # `input_required` interim results are not `CacheableResult` models + # and mapping results declaring that shape are skipped explicitly, + # so the MRTR carve-out (no hints on them) holds on both paths. + if (hint := self.server.cache_hints.get(method)) is not None: + if isinstance(result, CacheableResult): + result = apply_cache_hint(result, hint) + elif isinstance(result, Mapping) and result.get("resultType") != "input_required": + # Same per-field precedence as `apply_cache_hint`: wire keys the + # handler put in the mapping win. Fresh dict, so a mapping the + # handler may still hold an alias to is never mutated. + result = {"ttlMs": hint.ttl_ms, "cacheScope": hint.scope, **result} # Dump and serialize inside the chain so the OpenTelemetry span (the # outermost middleware) records a failing handler return shape too. return self._serialize(method, version, result) call = self._compose_server_middleware(_inner) # `_inner` already produced the wire dict; a middleware that short-circuited - # without `call_next` is trusted to return its own well-formed result. + # without `call_next` is trusted to return its own well-formed result, + # configured cache hints included. result = _dump_result(await call(ctx)) if method == "initialize": # Commit only on chain success, so a middleware veto leaves no state. diff --git a/tests/docs_src/test_caching.py b/tests/docs_src/test_caching.py index 74973ebf0..db9d0a7dd 100644 --- a/tests/docs_src/test_caching.py +++ b/tests/docs_src/test_caching.py @@ -48,7 +48,7 @@ async def test_a_non_cacheable_method_is_rejected_at_construction() -> None: with pytest.raises(ValueError) as exc: MCPServer("Weather", cache_hints=cast(Any, {"tools/call": CacheHint(ttl_ms=1_000)})) assert str(exc.value) == snapshot( - "cache_hints keys must be cacheable methods (see CacheableMethod); got: tools/call" + "cache_hints keys must be cacheable methods (see CacheableMethod); got: 'tools/call'" ) diff --git a/tests/server/test_caching.py b/tests/server/test_caching.py index a540fe037..6552b0b26 100644 --- a/tests/server/test_caching.py +++ b/tests/server/test_caching.py @@ -6,9 +6,11 @@ import pytest from inline_snapshot import snapshot from mcp_types import ( + InputRequiredResult, ListResourcesResult, ListToolsResult, PaginatedRequestParams, + ReadResourceRequestParams, Resource, Tool, ) @@ -68,7 +70,7 @@ def test_a_non_cacheable_method_in_cache_hints_is_rejected_at_server_constructio with pytest.raises(ValueError) as exc: Server("srv", cache_hints=cast(Any, {"tools/call": CacheHint()})) assert str(exc.value) == snapshot( - "cache_hints keys must be cacheable methods (see CacheableMethod); got: tools/call" + "cache_hints keys must be cacheable methods (see CacheableMethod); got: 'tools/call'" ) @@ -81,6 +83,83 @@ def test_a_non_cache_hint_value_is_rejected_at_server_construction() -> None: assert str(exc.value) == snapshot("cache_hints['tools/list'] must be a CacheHint, got dict") +def test_a_non_string_cache_hints_key_is_rejected_with_the_unknown_key_error() -> None: + """SDK-defined: `cache_hints` is deliberately loose for config-shaped callers, + so a non-string key takes the same unknown-key ValueError as a typo - not a + TypeError from formatting the message.""" + with pytest.raises(ValueError) as exc: + Server("srv", cache_hints=cast(Any, {42: CacheHint()})) + assert str(exc.value) == snapshot("cache_hints keys must be cacheable methods (see CacheableMethod); got: 42") + + +async def test_a_dict_returning_handler_takes_the_configured_hint() -> None: + """SDK-defined: the construction-time hint also stamps a handler that returns + a raw dict for a cacheable method, so the 2026-07-28 surface (where both + fields are required) accepts it and the wire carries the hint's values.""" + hint = CacheHint(ttl_ms=60_000, scope="public") + + async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams) -> dict[str, Any]: + return {"tools": [], "resultType": "complete"} + + server = Server("srv", cache_hints={"tools/list": hint}) + server.add_request_handler("tools/list", PaginatedRequestParams, list_tools) + async with Client(server) as client: + result = await client.list_tools() + assert result.ttl_ms == hint.ttl_ms + assert result.cache_scope == hint.scope + + +async def test_a_dict_provided_ttl_wins_and_the_hint_fills_only_the_missing_scope() -> None: + """SDK-defined precedence, dict path: wire keys the handler put in the dict + win, mirroring `model_fields_set` semantics on the model path - the hint + fills only the absent `cacheScope`.""" + + async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams) -> dict[str, Any]: + return {"tools": [], "resultType": "complete", "ttlMs": 25} + + server = Server("srv", cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")}) + server.add_request_handler("tools/list", PaginatedRequestParams, list_tools) + async with Client(server) as client: + result = await client.list_tools() + assert result.ttl_ms == 25 + assert result.cache_scope == "public" + + +async def test_a_dict_returning_handler_leaks_no_hint_fields_to_a_2025_session() -> None: + """SDK-defined era gate: the stamp runs version-independently, but the 2025 + serialize sieve still strips `ttlMs`/`cacheScope` from a dict result - the + client model parses them as unset, not as wire values.""" + + async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams) -> dict[str, Any]: + return {"tools": []} + + server = Server("srv", cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")}) + server.add_request_handler("tools/list", PaginatedRequestParams, list_tools) + async with Client(server, mode="legacy") as client: + result = await client.list_tools() + assert "ttl_ms" not in result.model_fields_set + assert "cache_scope" not in result.model_fields_set + + +async def test_an_input_required_shaped_dict_is_never_stamped() -> None: + """Spec-mandated MRTR carve-out: an interim `input_required` result carries no + cache hints even on a hinted cacheable method. The runner's stamp skips a + dict declaring that shape (and the serialize surface would drop stray hint + keys regardless), so the full dump is exactly what the handler returned.""" + + async def read_resource(ctx: ServerRequestContext[Any], params: ReadResourceRequestParams) -> dict[str, Any]: + return {"resultType": "input_required", "requestState": "s1"} + + server = Server("srv", cache_hints={"resources/read": CacheHint(ttl_ms=60_000, scope="public")}) + server.add_request_handler("resources/read", ReadResourceRequestParams, read_resource) + async with Client(server) as client: + result = await client.session.read_resource("res://x", allow_input_required=True) + assert isinstance(result, InputRequiredResult) + assert result.model_dump(by_alias=True, exclude_none=True) == snapshot( + {"resultType": "input_required", "requestState": "s1"} + ) + + async def test_server_cache_hints_reach_the_wire_for_a_bare_handler_result() -> None: """SDK-defined: a lowlevel handler that never thinks about caching emits the server-wide hint configured at construction.""" From 21a779a418a4bce69d0ade11f277581163c4e0b9 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 17:52:45 +0000 Subject: [PATCH 13/18] Trim comments and docstrings --- src/mcp-types/mcp_types/methods.py | 7 +- src/mcp/client/caching.py | 239 +++++------------- src/mcp/client/client.py | 111 +++------ src/mcp/client/session.py | 21 +- src/mcp/server/caching.py | 3 +- src/mcp/server/runner.py | 11 +- tests/client/test_caching.py | 228 ++++-------------- tests/client/test_client_caching.py | 359 +++++++--------------------- tests/client/test_session.py | 6 +- tests/docs_src/test_caching.py | 39 +-- tests/server/test_caching.py | 21 +- tests/types/test_methods.py | 5 +- 12 files changed, 261 insertions(+), 789 deletions(-) diff --git a/src/mcp-types/mcp_types/methods.py b/src/mcp-types/mcp_types/methods.py index 0c7cd04ad..985968b1d 100644 --- a/src/mcp-types/mcp_types/methods.py +++ b/src/mcp-types/mcp_types/methods.py @@ -416,17 +416,14 @@ "server/discover", "tools/list", ] -"""The methods whose results carry `ttlMs`/`cacheScope`. Closed set: the spec -defines caching hints on exactly these six. Hand-written because a Literal -cannot be computed at runtime; tests weld it to `CACHEABLE_METHODS`.""" +"""Methods whose results carry `ttlMs`/`cacheScope`; hand-written Literal, welded to `CACHEABLE_METHODS` by tests.""" CACHEABLE_METHODS: Final[frozenset[str]] = frozenset( method for method, row in MONOLITH_RESULTS.items() if any(issubclass(arm, types.CacheableResult) for arm in (get_args(row) if isinstance(row, UnionType) else (row,))) ) -"""Runtime mirror of `CacheableMethod`, derived from `MONOLITH_RESULTS`: a -method is cacheable iff its result row has a `CacheableResult` arm.""" +"""Runtime mirror of `CacheableMethod`, derived from `MONOLITH_RESULTS`.""" # --- Parse functions --- diff --git a/src/mcp/client/caching.py b/src/mcp/client/caching.py index f46bd0c04..9d31851db 100644 --- a/src/mcp/client/caching.py +++ b/src/mcp/client/caching.py @@ -1,11 +1,4 @@ -"""Client-side response caching primitives (SEP-2549, protocol revision 2026-07-28). - -Results for the cacheable methods carry `ttlMs`/`cacheScope` freshness hints; -the client honors them through a response cache configured with `CacheConfig`. -This module defines the configuration, the store contract (`ResponseCacheStore` -keyed by `CacheKey`, holding `CacheEntry` values), and the default in-process -store. Wiring into `Client` lives in `mcp.client.client`. -""" +"""Client-side response caching primitives (SEP-2549, protocol revision 2026-07-28).""" from __future__ import annotations @@ -40,31 +33,21 @@ logger = logging.getLogger(__name__) CacheMode = Literal["use", "refresh", "bypass"] -"""Per-call cache behavior: `"use"` serves fresh entries and stores fetches, -`"refresh"` skips the read but stores the fetch, `"bypass"` touches the cache -not at all.""" +"""Per-call cache behavior: `"use"` serves and stores, `"refresh"` stores +without serving, `"bypass"` skips the cache entirely.""" MAX_TTL_MS: Final[int] = 24 * 60 * 60 * 1000 -"""Upper bound on any entry's time-to-live (24 hours, in milliseconds): a -server-provided or configured `ttlMs` above it is clamped down, bounding how -long a stale entry can be served.""" +"""Cap on any entry's time-to-live (24 hours, in milliseconds); larger `ttlMs` values are clamped down.""" @dataclass(frozen=True, slots=True) class CacheKey: - """Identity of one cached response. - - Stores MUST compare keys as the `(method, params_key, partition)` field - tuple - never by flattening the fields into one delimited string, which - lets crafted values collide across field boundaries. - """ + """Identity of one cached response; compare as the field tuple, never a flattened string (collision hazard).""" method: str - """The request method, e.g. `"tools/list"`.""" params_key: str = "" - """Result-affecting params discriminator: the uri for `resources/read`, - `""` for the list methods (only cursor-less calls participate in caching).""" + """Result-affecting params discriminator: the uri for `resources/read`, `""` for the list methods.""" partition: str = "" """Coordinator-computed arm identifier; opaque to stores.""" @@ -75,13 +58,10 @@ class CacheEntry: """One cached response with its freshness and sharing metadata.""" value: Any - """The cached result. The SDK deep-copies it on write and on serve, so a - store may hold the object as-is.""" + """The cached result; the SDK deep-copies on write and on serve, so a store may hold it as-is.""" scope: Literal["public", "private"] - """The server-asserted `cacheScope`: whether the entry may be shared - across authorization contexts (`"public"`) or only reused within the one - that produced it (`"private"`).""" + """Server-asserted `cacheScope`: only `"public"` entries may be shared across authorization contexts.""" expires_at: float | None """Epoch seconds after which the entry is stale; `None` is never fresh.""" @@ -90,23 +70,12 @@ class CacheEntry: class ResponseCacheStore(Protocol): """Storage contract for the client response cache. - Keys MUST be compared as the `(method, params_key, partition)` field tuple - - no delimiter-based flattening (collision hazard). Each `Client` calls its - store from a single event loop; cross-loop sharing and per-operation - atomicity are the implementation's responsibility. Operations may raise; - the SDK degrades per its error discipline (a failing store never fails a - successful fetch). - - A store that serializes entries (any cross-process store must) is - responsible for round-tripping them: `get` returns the entry as stored, - with `value` still the result model object `set` received - the SDK has - no rehydration hook to rebuild it from serialized data. An entry that - comes back in the wrong shape (e.g. with a plain-dict value) degrades to - a cache miss, never an error. - - A cache lookup may issue up to two sequential `get` calls - the private - arm, then the public one - so remote-store implementers should size - latency expectations accordingly. + Each `Client` calls its store from a single event loop; per-operation + atomicity is the implementation's responsibility. Operations may raise - + the SDK degrades to a miss rather than failing the call. A serializing + store must round-trip `value` back to the result model object (a + wrong-shape entry is a miss, never an error). A lookup may issue two + sequential `get` calls (private arm, then public). """ async def get(self, key: CacheKey) -> CacheEntry | None: ... @@ -123,57 +92,35 @@ class CacheConfig: """Configuration for a `Client`'s response cache. Raises: - ValueError: If a custom `store` is given without a `partition`, if - `target_id` is an empty string, or if `default_ttl_ms` is negative. + ValueError: On a custom `store` without `partition`, an empty `target_id`, or a negative `default_ttl_ms`. """ store: ResponseCacheStore | None = None - """Backing store; `None` means a store-per-client `InMemoryResponseCacheStore`. + """Backing store; `None` means a per-client `InMemoryResponseCacheStore`. A custom store requires an explicit `partition`.""" partition: str = "" """Authorization-context identifier isolating `"private"`-scoped entries - within a shared store. - - Derive it from a verified credential (e.g. a validated token's subject) - - never from request-supplied data, and never from the server URL (server - identity is a separate key axis). The SDK is a library with no - authentication of its own: whoever constructs the `CacheConfig` - the - deployment, not the tenant - is the trust anchor. Multi-tenant gateways - mint one `CacheConfig` per authenticated principal. - - The partition is fixed for the `Client`'s lifetime: if the connection's - authorization context changes mid-session (a re-authentication as a - different principal), the cache does not follow - construct a new - `Client` for the new principal. - """ + within a shared store. Derive it from a verified credential - never from + request-supplied data or the server URL. Fixed for the `Client`'s + lifetime: construct a new `Client` when the principal changes.""" target_id: str | None = None - """Explicit server-identity override, for custom transports and proxies - where the SDK cannot derive an identity from a server URL. Must be - non-empty when provided - an empty string would collapse distinct servers - onto one identity.""" + """Server-identity override for custom transports and proxies where the + SDK cannot derive one from a URL; must be non-empty when provided.""" default_ttl_ms: int = 0 - """Time-to-live, in milliseconds, applied to results that carry no `ttlMs` - hint. The default `0` leaves hint-less results uncached.""" + """TTL in milliseconds for results carrying no `ttlMs` hint; the default `0` leaves them uncached.""" clock: Callable[[], float] = time.time - """Wall-clock source returning epoch seconds; injectable so expiry tests - need no sleeping.""" + """Wall-clock source returning epoch seconds; injectable for expiry tests.""" share_public: bool = False - """Serve entries the server marked `cacheScope: "public"` across every - partition using the store, instead of only within the partition that - fetched them. - - WARNING: enabling this trusts the server's public classification for every - principal sharing the store - a server that stamps `"public"` on - per-tenant data (by bug or by malice) leaks one tenant's response to the - others. It is deliberately constructor-level only, set once by the - operator: the per-call `cache_mode` kwarg can narrow caching but can never - widen sharing. - """ + """Serve server-marked `"public"` entries across every partition in the store. + + WARNING: this trusts the server's `"public"` classification for every + principal sharing the store - a mislabeled response leaks across tenants. + Constructor-level only: the per-call `cache_mode` can never widen sharing.""" def __post_init__(self) -> None: if self.store is not None and not self.partition: @@ -187,12 +134,9 @@ def __post_init__(self) -> None: class InMemoryResponseCacheStore: """Default in-process `ResponseCacheStore`. - Method bodies are synchronous (no awaits), so each operation completes - without an event-loop checkpoint and concurrent tasks can never observe a - torn write. Memory is bounded: the methods other than `resources/read` - form a small closed set of keys, and `max_read_entries` caps the - `resources/read` entries (one per uri) - storing a new read key at the cap - evicts the oldest read key, first-in-first-out. `0` disables the cap. + Method bodies are synchronous, so concurrent tasks never observe a torn + write. Non-read methods form a small closed key set; `max_read_entries` + caps the `resources/read` keys, FIFO-evicting at the cap (`0` disables it). Raises: ValueError: If `max_read_entries` is negative. @@ -212,12 +156,10 @@ async def set(self, key: CacheKey, entry: CacheEntry) -> None: self._max_read_entries and key.method == "resources/read" and key not in self._entries - # Strictly below the cap the read-key subset cannot be at the cap, - # so the scan only runs when an eviction is actually possible. + # Total size below the cap implies the read subset is below it too - skip the scan. and len(self._entries) >= self._max_read_entries ): - # dict preserves insertion order and replacement keeps position, so - # the dict itself is the FIFO ledger - no parallel structure to drift. + # Insertion order (replacement keeps position) makes the dict itself the FIFO ledger. read_keys = [k for k in self._entries if k.method == "resources/read"] if len(read_keys) >= self._max_read_entries: del self._entries[read_keys[0]] @@ -231,19 +173,11 @@ async def clear(self) -> None: _GENERATION_MAP_CAP: Final[int] = 4096 -"""Cap on the coordinator's eviction-race bookkeeping (the generation map). -At the cap, registering a new key drops the oldest one, degrading the dropped -key's race guard to the accepted co-tenant class.""" +"""Cap on the generation map; at the cap the oldest key's eviction-race guard is dropped (FIFO).""" class ClientResponseCache: - """Coordinator between the `Client` verbs and a `ResponseCacheStore`. - - Owns key construction (the scope arms), the era gate, TTL/scope - resolution, eviction, and the store error discipline. `Client` mints one - per instance; the caching verbs and the notification wrap are the only - callers. - """ + """Coordinates the `Client` caching verbs with a `ResponseCacheStore`: keys, era gate, TTL/scope, eviction.""" def __init__( self, @@ -261,41 +195,24 @@ def __init__( self._default_ttl_ms = default_ttl_ms self._clock = clock self._negotiated_version = negotiated_version - # Arms are JSON arrays so crafted arm_id/partition values cannot - # collide across field boundaries. Private entries always carry the - # partition; public entries do too unless the operator opted into - # fleet-wide sharing of server-asserted-public results. + # JSON arrays so crafted arm_id/partition values cannot collide across field boundaries. self._private_arm = json.dumps(["private", arm_id, partition]) self._public_arm = json.dumps(["public", arm_id] if share_public else ["public", arm_id, partition]) - # The generation map is the sole membership structure: a key is - # race-guarded iff registered here. + # A key is eviction-race-guarded iff registered here. self._generations: dict[tuple[str, str], int] = {} self._generation_map_cap = generation_map_cap - # Operation kinds ("get"/"set"/"delete") that warned and have not - # succeeded since; membership suppresses repeat warnings for the kind. self._warned_store_ops: set[str] = set() async def read(self, method: str, params_key: str) -> CacheableResult | None: - """Serve a fresh entry for the key, or `None`. - - Called only under `cache_mode="use"`; returns a deep copy so a served - result never aliases the stored one. - """ - # One boundary around the whole read path: a raising store `get` and - # an entry rehydrated into the wrong shape (which raises only at the - # freshness check or the copy) are the same "get" failure class - - # warned once per burst, re-armed only by a fully successful read. + """Serve a fresh entry for the key, or `None`; the served result is a deep copy.""" + # A wrong-shape entry raises as late as the copy, so the boundary wraps the whole read path. try: entry = await self._get_fresh(CacheKey(method, params_key, self._private_arm)) if entry is None: - # Stale counts as a miss for fall-through too: after a server - # scope flip (private -> public), a stale private leftover - # must not shadow a fresh public entry. + # After a scope flip, a stale private entry must not shadow a fresh public one. entry = await self._get_fresh(CacheKey(method, params_key, self._public_arm)) if entry is not None and entry.scope != "public": - # The arm routes, the scope verifies: never serve an entry the - # server scoped "private" out of the shared arm, however it - # got there. + # Never serve an entry the server scoped "private" out of the shared arm. entry = None copied: CacheableResult | None = None if entry is None else entry.value.model_copy(deep=True) except Exception: # boundary around user store code: any read-path failure is a miss, never a failed call @@ -311,14 +228,11 @@ async def _get_fresh(self, key: CacheKey) -> CacheEntry | None: return entry def capture(self, method: str, params_key: str) -> int: - """Register the key for eviction-race detection, before the fetch is - sent; the matching `write` passes the returned generation back.""" + """Register the key for eviction-race detection before the fetch; `write` takes the returned generation.""" gen_key = (method, params_key) if gen_key not in self._generations: if len(self._generations) >= self._generation_map_cap: - # FIFO overflow: drop the oldest key, degrading its race guard - # to the accepted co-tenant class (an eviction racing that - # key's in-flight fetch is no longer detected at write time). + # FIFO overflow: the dropped key's race guard degrades to the accepted co-tenant class. del self._generations[next(iter(self._generations))] self._generations[gen_key] = 0 return self._generations[gen_key] @@ -340,22 +254,15 @@ async def write( public_key = CacheKey(method, params_key, self._public_arm) if ttl_ms <= 0: if mode == "refresh": - # The refetch superseded whatever was cached; purge the warm - # entry so it cannot be served again. Shielded: a cancellation - # delivered between the two deletes would leave the opposite - # arm warm for its full TTL. + # The refetch superseded the warm entry; shielded so a cancellation cannot leave one arm warm. with anyio.CancelScope(shield=True): await self._delete(private_key) await self._delete(public_key) return own, opposite = (public_key, private_key) if scope == "public" else (private_key, public_key) - # Opposite arm first: a failed (or cancelled) delete aborts before the - # set, leaving a miss - never two arms answering for one key. + # Opposite arm first: a failed delete aborts before the set - never two arms answering for one key. if not await self._delete(opposite): - # The fetch superseded whatever the own arm holds, so it must not - # keep serving either: best-effort delete it too (shielded like the - # other cleanup deletes), degrading the key to a full miss - no - # stale pair AND no superseded entry. + # The own arm's entry is superseded too: shielded best-effort delete, degrading to a full miss. with anyio.CancelScope(shield=True): await self._delete(own) return @@ -363,35 +270,26 @@ async def write( try: await self._set(own, entry) finally: - # An eviction can land while an async store's set is committing, - # and the set can commit even when its await is cancelled (the - # request may already be on the wire) - so the re-check runs on - # every exit, and the compensating delete is shielded so the - # pending cancellation cannot abort it and resurrect the evicted - # entry for its full TTL. (A delete after a set that raised is an - # idempotent no-op.) + # An eviction can land while the set commits - even when the await + # is cancelled - so re-check on every exit; the delete is shielded + # so the pending cancellation cannot resurrect the evicted entry. if self._generation_moved(gen_key, gen_at_capture): with anyio.CancelScope(shield=True): await self._delete(own) async def evict_method(self, method: str) -> None: - """Evict the method's cursor-less entry (notification- or - cursor-expiry-driven).""" + """Evict the method's cursor-less entry.""" await self.evict_key(method, "") async def evict_key(self, method: str, params_key: str) -> None: """Evict one key from both arms.""" gen_key = (method, params_key) - # Bump before deleting so an in-flight fetch that captured earlier - # cannot write the just-evicted entry back. Only registered keys bump - # (arbitrary notification uris must not grow the map); the store - # deletes always run - a persistent store may hold warm entries this - # coordinator never captured. + # Bump first so an in-flight fetch cannot write the evicted entry back. + # Unregistered keys skip the bump (uris must not grow the map) but not + # the deletes - a persistent store may hold uncaptured entries. if gen_key in self._generations: self._generations[gen_key] += 1 - # Shielded: eviction runs in spawned notification tasks that die with - # the session - a cancellation between the two deletes would leave one - # arm serving the evicted entry until its TTL. + # Shielded: a cancellation between the deletes would leave one arm serving the evicted entry. with anyio.CancelScope(shield=True): await self._delete(CacheKey(method, params_key, self._private_arm)) await self._delete(CacheKey(method, params_key, self._public_arm)) @@ -399,10 +297,8 @@ async def evict_key(self, method: str, params_key: str) -> None: async def evict_for_notification(self, notification: ServerNotification) -> None: """Map a server notification to the entries it makes stale. - Wire-path notifications are dispatched from spawned tasks, so eviction - is eventual relative to in-flight responses: the generation bump - closes the write-back race, while a read racing the notification may - briefly serve the pre-eviction entry (accepted, latency-bounded). + Eviction is eventual (spawned-task dispatch): the generation bump closes + the write-back race; a racing read may briefly serve the old entry. """ match notification: case ToolListChangedNotification(): @@ -419,16 +315,12 @@ async def evict_for_notification(self, notification: ServerNotification) -> None pass def _resolve(self, result: CacheableResult) -> tuple[int, Literal["public", "private"]]: - # Hints count only on modern sessions: a legacy peer can also put - # `ttlMs`/`cacheScope` keys on the wire (the 2025 surfaces validate - # and discard unknown keys, so wire presence still reaches - # `model_fields_set`) - wire presence is not a peer-era signal. + # A legacy peer can also put `ttlMs`/`cacheScope` keys on the wire, so + # wire presence is not a peer-era signal - hints count only when modern. modern = self._negotiated_version() in MODERN_PROTOCOL_VERSIONS if modern and "ttl_ms" in result.model_fields_set: - # An explicit `ttlMs: 0` stays 0 (never overridden by the - # default), and negatives are unconstructible here - the model - # enforces ge=0 and the parse seam floors negative wire values - - # so only the cap applies. + # An explicit `ttlMs: 0` stays 0, and negatives are unconstructible + # upstream (model ge=0, parse-seam floor) - only the cap applies. ttl_ms = result.ttl_ms else: ttl_ms = self._default_ttl_ms @@ -436,9 +328,7 @@ def _resolve(self, result: CacheableResult) -> tuple[int, Literal["public", "pri return min(ttl_ms, MAX_TTL_MS), scope def _generation_moved(self, gen_key: tuple[str, str], gen_at_capture: int) -> bool: - # A key FIFO-dropped from the map can no longer be checked; the guard - # fails open (the accepted co-tenant race class) rather than - # discarding the fetch. + # A FIFO-dropped key fails open (the accepted co-tenant race) rather than discarding the fetch. return self._generations.get(gen_key, gen_at_capture) != gen_at_capture async def _set(self, key: CacheKey, entry: CacheEntry) -> bool: @@ -460,11 +350,8 @@ async def _delete(self, key: CacheKey) -> bool: return True def _warn_store_failure(self, kind: Literal["get", "set", "delete"]) -> None: - # One warning per failure burst, tracked per operation kind: armed by - # the kind's first failure, re-armed only when that same kind succeeds. - # A dead store warns once, not once per request - and a store where - # only `set` is broken warns once too, instead of its healthy deletes - # re-arming the warning every write cycle. + # One warning per failure burst, per op kind; re-armed only when that + # same kind succeeds, so a healthy delete cannot re-arm a broken set. if kind not in self._warned_store_ops: self._warned_store_ops.add(kind) logger.warning("Response cache store operation failed; continuing without the cache", exc_info=True) diff --git a/src/mcp/client/client.py b/src/mcp/client/client.py index 01b00b74d..311213a5b 100644 --- a/src/mcp/client/client.py +++ b/src/mcp/client/client.py @@ -130,9 +130,7 @@ def _connected(value: _T | None) -> _T: def _strip_userinfo(url: str) -> str: """Drop any userinfo from the URL's authority component; byte-exact otherwise. - Cache identity must never over-normalize (case-folding or query rewriting could - merge distinct servers, e.g. `?tenant=a` vs `?tenant=b`), and credentials must - never enter cache-key material — userinfo removal is the single permitted rewrite. + Credentials must not enter cache-key material; any further normalization could merge distinct servers. """ parts = urlsplit(url) if "@" not in parts.netloc: @@ -141,13 +139,7 @@ def _strip_userinfo(url: str) -> str: def _evicting_message_handler(cache: ClientResponseCache, user_handler: MessageHandlerFnT | None) -> MessageHandlerFnT: - """Wrap the session message handler with cache eviction on server notifications. - - Eviction runs before delegation, inside its own boundary, so a cache fault can - never suppress delivery. Every item — notification, `RequestResponder`, or - transport `Exception` — then reaches the user's handler; with none supplied, the - wrapper performs the same bare checkpoint `ClientSession` installs by default. - """ + """Wrap the session message handler with cache eviction on server notifications.""" async def handler( message: RequestResponder[types.ServerRequest, types.ClientResult] | types.ServerNotification | Exception, @@ -276,15 +268,10 @@ async def main(): """Client-side response caching for the SEP-2549 cacheable methods (2026-07-28). `None` (the default) honors server `ttlMs`/`cacheScope` hints with a per-client - in-memory store; results carrying no hints are not cached. Pass a `CacheConfig` - to customize (shared store, partition, default TTL), or `False` to disable - caching entirely. The cacheable verbs (`list_tools`, `list_prompts`, - `list_resources`, `list_resource_templates`, `read_resource`) take a per-call - `cache_mode` to narrow caching for one call; with `cache=False` it is inert. - - Construction raises `ValueError` for a `CacheConfig` with a custom `store` when - no server identity can be derived (an in-process server or a `Transport` - instance) — set `CacheConfig.target_id` to name the server.""" + in-memory store; pass a `CacheConfig` to customize, or `False` to disable. The + cacheable verbs take a per-call `cache_mode` (see `CacheMode`); calls carrying + `meta` always reach the server. A `CacheConfig` with a custom `store` requires + `target_id` when the server is not a URL (no identity can be derived).""" _entered: bool = field(init=False, default=False) _session: ClientSession | None = field(init=False, default=None) @@ -315,10 +302,7 @@ def __post_init__(self) -> None: if self.cache is not False: config = self.cache if self.cache is not None else CacheConfig() - # Server identity, in resolution order: explicit override, server URL - # (userinfo stripped, byte-exact otherwise), per-Client random. Only the - # hash below leaves this scope — the raw identity may carry credentials - # in its query string and must never be logged or stored. + # Only the hash below leaves this scope - the raw identity may carry credentials; never log or store it. target_id = config.target_id if target_id is None and isinstance(self.server, str): target_id = _strip_userinfo(self.server) @@ -337,8 +321,7 @@ def __post_init__(self) -> None: default_ttl_ms=config.default_ttl_ms, clock=config.clock, share_public=config.share_public, - # Lazy: the era is unknown until __aenter__'s handshake, and the - # session is unpublished outside the context manager. + # Lazy: the negotiated version is unknown until __aenter__'s handshake. negotiated_version=lambda: self._session.protocol_version if self._session is not None else None, ) @@ -471,25 +454,18 @@ async def _cached_fetch( ) -> _CacheableT: """Serve one of the four list verbs through the response cache. - `send` performs the fetch via the session; `absorb` (tools/list only) - re-applies session-side derived state to a served cache hit. + `absorb` (tools/list only) re-applies session-side derived state to a served cache hit. """ cache = self._response_cache if cache is None or cache_mode == "bypass": - return await send() # no read, no write, no eviction side-effects - # Cache participation requires a live session: a closed (or never-entered) - # client raises the no-context RuntimeError on every verb, exactly as the - # verbs did before the cache existed - never serving stale entries. + return await send() + # A closed (or never-entered) client must raise, never serve cached entries. _ = self.session if meta is not None and cache_mode == "use": - # A call carrying meta (a progress token, tracing fields) expects a - # wire request, so it is never served from the cache; the fetched - # result still replaces the entry, the same as an explicit refresh. + # meta (a progress token, tracing fields) expects a wire request; fetch and replace the entry. cache_mode = "refresh" if cursor is not None: - # Continuation pages never read or write the (cursor-less) entry, but an - # expired-cursor rejection signals the listing changed since the entry was - # fetched, so it is evicted (spec SHOULD; over-eviction is harmless). + # Continuation pages skip the cache, but an expired cursor means the listing changed (spec SHOULD evict). try: return await send() except MCPError as e: @@ -497,9 +473,7 @@ async def _cached_fetch( await cache.evict_method(method) raise if cache_mode == "use" and (hit := await cache.read(method, "")) is not None: - # The store key carries the method, so the entry under it has `send`'s - # result type. The hit is already a private deep copy of the stored - # value, so absorption may mutate it freely. + # The hit is a private deep copy, so absorption may mutate it freely. served = cast(_CacheableT, hit) return served if absorb is None else absorb(served) gen = cache.capture(method, "") @@ -514,11 +488,7 @@ async def list_resources( meta: RequestParamsMeta | None = None, cache_mode: CacheMode = "use", ) -> ListResourcesResult: - """List available resources from the server. - - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); - calls carrying `meta` always reach the server. - """ + """List available resources from the server.""" return await self._cached_fetch( "resources/list", cursor=cursor, @@ -534,11 +504,7 @@ async def list_resource_templates( meta: RequestParamsMeta | None = None, cache_mode: CacheMode = "use", ) -> ListResourceTemplatesResult: - """List available resource templates from the server. - - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); - calls carrying `meta` always reach the server. - """ + """List available resource templates from the server.""" return await self._cached_fetch( "resources/templates/list", cursor=cursor, @@ -568,13 +534,9 @@ async def read_resource( input_responses: Responses to seed the first call with (e.g. when resuming from a persisted `InputRequiredResult`). request_state: Opaque state to seed the first call with. - meta: Additional metadata for the request. Calls carrying `meta` - always reach the server. - cache_mode: Adjusts the response cache's behavior for this call - (see `CacheMode`). Seeded calls (either `input_responses` or - `request_state` set) are resumptions of a multi-round-trip - read and ignore it entirely: no cache read, no write, no - refresh purge. + meta: Additional metadata for the request. + cache_mode: Cache behavior for this call (see `CacheMode`); seeded + calls (`input_responses` or `request_state` set) ignore it. Returns: The resource content. @@ -589,36 +551,28 @@ async def retry(r: InputResponses | None, s: str | None) -> ReadResourceResult | uri, input_responses=r, request_state=s, meta=meta, allow_input_required=True ) - # Results of requests carrying inputResponses or requestState must never be - # cached (spec MUST), and a seeded call exists to resume a specific exchange - - # serving it from the cache would skip the resumption. + # Seeded calls resume a specific exchange and must never be cached (spec MUST). seeded = input_responses is not None or request_state is not None cache = None if seeded else self._response_cache if cache is None or cache_mode == "bypass": return await self._drive_input_required(await retry(input_responses, request_state), retry) - # Cache participation requires a live session: a closed (or never-entered) - # client raises the no-context RuntimeError here, never serving stale entries. + # A closed (or never-entered) client must raise, never serve cached entries. _ = self.session if meta is not None and cache_mode == "use": # Calls carrying meta always reach the server (mirrors `_cached_fetch`). cache_mode = "refresh" if cache_mode == "use" and (hit := await cache.read("resources/read", uri)) is not None: - # InputRequiredResult is never stored (only terminal first-round results - # are written below), so a hit is always terminal and legitimately skips - # the driver. + # Only terminal first-round results are stored, so a hit legitimately skips the driver. return cast(ReadResourceResult, hit) gen = cache.capture("resources/read", uri) first = await retry(None, None) if not isinstance(first, InputRequiredResult): await cache.write("resources/read", uri, first, gen, cache_mode) elif cache_mode == "refresh": - # An input_required resolution can never be stored, but the explicit - # refresh still superseded whatever was cached: purge the warm entry - # so it cannot be served again (the same supersession rule as a - # refreshed ttl<=0 result in `ClientResponseCache.write`). + # The refresh superseded whatever was cached, but an input_required resolution + # cannot be stored: purge the warm entry so it cannot be served again. await cache.evict_key("resources/read", uri) - # A terminal result reached through driver rounds is never cached: the rounds - # carried inputResponses (the same spec MUST as the seeded skip above). + # Driver rounds carry inputResponses, so a terminal result reached through them is never cached (spec MUST). return await self._drive_input_required(first, retry) async def subscribe_resource(self, uri: str, *, meta: RequestParamsMeta | None = None) -> EmptyResult: @@ -688,11 +642,7 @@ async def list_prompts( meta: RequestParamsMeta | None = None, cache_mode: CacheMode = "use", ) -> ListPromptsResult: - """List available prompts from the server. - - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); - calls carrying `meta` always reach the server. - """ + """List available prompts from the server.""" return await self._cached_fetch( "prompts/list", cursor=cursor, @@ -788,11 +738,7 @@ async def list_tools( meta: RequestParamsMeta | None = None, cache_mode: CacheMode = "use", ) -> ListToolsResult: - """List available tools from the server. - - `cache_mode` adjusts the response cache's behavior for this call (see `CacheMode`); - calls carrying `meta` always reach the server. - """ + """List available tools from the server.""" return await self._cached_fetch( "tools/list", cursor=cursor, @@ -800,8 +746,7 @@ async def list_tools( cache_mode=cache_mode, send=lambda: self.session.list_tools(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), # A cache hit skips session.list_tools, so the session re-absorbs the - # served listing to rebuild its derived per-tool state (header maps, - # output schemas) - idempotent on the already-filtered stored value. + # served listing to rebuild its derived per-tool state. absorb=self.session._absorb_tool_listing, # pyright: ignore[reportPrivateUsage] ) diff --git a/src/mcp/client/session.py b/src/mcp/client/session.py index 4998fadaa..c76eb8fc7 100644 --- a/src/mcp/client/session.py +++ b/src/mcp/client/session.py @@ -56,13 +56,7 @@ def _clamp_inbound_ttl(raw: dict[str, Any]) -> None: - """Floor a negative inbound `ttlMs` to 0, in place (2026-07-28 caching SHOULD). - - Runs before the surface validation, whose `ge=0` would otherwise fail the - whole call over one bad hint. Emit-side strictness is untouched — only a - misbehaving peer reaches this. Floats are floored too; bools are not numbers - here and are left for the validation to reject. - """ + """Floor a negative inbound `ttlMs` to 0 before `ge=0` validation fails the call (2026-07-28 caching SHOULD).""" ttl = raw.get("ttlMs") if isinstance(ttl, int | float) and not isinstance(ttl, bool) and ttl < 0: raw["ttlMs"] = 0 @@ -473,10 +467,7 @@ async def send_discover(self, version: str) -> dict[str, Any]: "headers": {MCP_PROTOCOL_VERSION_HEADER: version, MCP_METHOD_HEADER: data["method"]}, } raw = await self._dispatcher.send_raw_request(data["method"], data.get("params"), opts) - # Clamping here (not in the callers) covers both discover() and the - # mode='auto' probe — un-floored, a negative ttl fails DiscoverResult - # validation in the probe, which reads as "not modern evidence" and - # silently downgrades the connection to the legacy handshake. + # Un-floored, a negative ttl fails the mode='auto' probe's validation and silently downgrades the handshake. _clamp_inbound_ttl(raw) return raw @@ -918,13 +909,9 @@ async def list_tools(self, *, params: types.PaginatedRequestParams | None = None return self._absorb_tool_listing(result) def _absorb_tool_listing(self, result: types.ListToolsResult) -> types.ListToolsResult: - """Filter a tool listing per the 2026 x-mcp-header MUST and rebuild the derived - per-tool state (arg→header maps, output schemas) from it. + """Filter the listing per the 2026 x-mcp-header MUST and rebuild derived per-tool state, in place. - Idempotent, so the client response cache can re-absorb a served listing: stored - values are already post-filter, making the re-filter a no-op that rebuilds the - maps and schemas from the served value. `result` is mutated in place (the cache - only ever passes a private deep copy). + Idempotent: cached values are already post-filter, so the response cache can re-absorb a served listing. """ if self._negotiated_version in MODERN_PROTOCOL_VERSIONS: # 2026-07-28: clients MUST drop tools whose x-mcp-header annotations are invalid. diff --git a/src/mcp/server/caching.py b/src/mcp/server/caching.py index e20379a1d..5e9930315 100644 --- a/src/mcp/server/caching.py +++ b/src/mcp/server/caching.py @@ -73,8 +73,7 @@ def validate_cache_hints(cache_hints: Mapping[Any, Any] | None) -> dict[str, Cac """ if cache_hints is None: return {} - # Keys come from an untyped mapping, so format via repr: a non-string key - # must produce this ValueError too, not a TypeError from sorted/join. + # repr-format keys so a non-string key raises this ValueError, not a TypeError from sorted/join. unknown = sorted(repr(method) for method in cache_hints if method not in CACHEABLE_METHODS) if unknown: raise ValueError(f"cache_hints keys must be cacheable methods (see CacheableMethod); got: {', '.join(unknown)}") diff --git a/src/mcp/server/runner.py b/src/mcp/server/runner.py index 60ad6309d..6773fd4de 100644 --- a/src/mcp/server/runner.py +++ b/src/mcp/server/runner.py @@ -200,16 +200,12 @@ async def _inner(ctx: ServerRequestContext[LifespanT, Any]) -> HandlerResult: raise MCPError.from_error_data(result) # Fill cache hints on the handler result, before the serialize sieve # decides whether the negotiated version carries the fields at all. - # `input_required` interim results are not `CacheableResult` models - # and mapping results declaring that shape are skipped explicitly, - # so the MRTR carve-out (no hints on them) holds on both paths. + # MRTR carve-out: `input_required` interim results, typed or mapping, never get hints. if (hint := self.server.cache_hints.get(method)) is not None: if isinstance(result, CacheableResult): result = apply_cache_hint(result, hint) elif isinstance(result, Mapping) and result.get("resultType") != "input_required": - # Same per-field precedence as `apply_cache_hint`: wire keys the - # handler put in the mapping win. Fresh dict, so a mapping the - # handler may still hold an alias to is never mutated. + # Hint keys first so wire keys the handler set win, matching `apply_cache_hint` precedence. result = {"ttlMs": hint.ttl_ms, "cacheScope": hint.scope, **result} # Dump and serialize inside the chain so the OpenTelemetry span (the # outermost middleware) records a failing handler return shape too. @@ -217,8 +213,7 @@ async def _inner(ctx: ServerRequestContext[LifespanT, Any]) -> HandlerResult: call = self._compose_server_middleware(_inner) # `_inner` already produced the wire dict; a middleware that short-circuited - # without `call_next` is trusted to return its own well-formed result, - # configured cache hints included. + # without `call_next` is trusted to return its own well-formed result. result = _dump_result(await call(ctx)) if method == "initialize": # Commit only on chain success, so a middleware veto leaves no state. diff --git a/tests/client/test_caching.py b/tests/client/test_caching.py index c6933aa8f..d4d67e0c5 100644 --- a/tests/client/test_caching.py +++ b/tests/client/test_caching.py @@ -1,13 +1,6 @@ -"""`mcp.client.caching`: the `CacheConfig` construction guards, the store -contract every `ResponseCacheStore` implementation must satisfy, the default -in-memory store's bounded `resources/read` FIFO, and the `ClientResponseCache` -coordinator (scope arms, era gate, TTL/scope resolution, eviction, store error -discipline). - -The store-contract tests are parametrized over `STORE_FACTORIES`; a -third-party store implementation can be run against the same contract by -adding its factory to the list (or copying the parametrization). -""" +"""Tests for `mcp.client.caching`. The store-contract tests are parametrized +over `STORE_FACTORIES`; a third-party store can be run against the same +contract by adding its factory.""" import json import logging @@ -64,8 +57,6 @@ def _read_key(uri: str) -> CacheKey: @store_contract async def test_a_set_entry_round_trips_through_get(make_store: Callable[[], ResponseCacheStore]) -> None: - """SDK-defined contract: `get` returns an entry equal to the one `set` - stored under the same three-field key.""" store = make_store() key = CacheKey("tools/list", "", "partition-1") entry = CacheEntry(value={"tools": []}, scope="public", expires_at=1700000000.0) @@ -75,7 +66,6 @@ async def test_a_set_entry_round_trips_through_get(make_store: Callable[[], Resp @store_contract async def test_get_misses_for_a_key_never_set(make_store: Callable[[], ResponseCacheStore]) -> None: - """SDK-defined contract: an unknown key is a miss (`None`), not an error.""" store = make_store() assert await store.get(CacheKey("tools/list")) is None @@ -84,9 +74,7 @@ async def test_get_misses_for_a_key_never_set(make_store: Callable[[], ResponseC async def test_keys_differing_in_only_one_field_do_not_collide( make_store: Callable[[], ResponseCacheStore], ) -> None: - """Spec-mandated: the cache key spans the method, the result-affecting - params, and the authorization context - a store collapsing any one field - would serve a response across method, params, or principal boundaries.""" + """Spec-mandated: collapsing any key field would serve responses across method, params, or principal boundaries.""" store = make_store() base = CacheKey("resources/read", "file:///a", "partition-1") keys = [ @@ -105,8 +93,6 @@ async def test_keys_differing_in_only_one_field_do_not_collide( async def test_swapped_params_key_and_partition_values_are_distinct_keys( make_store: Callable[[], ResponseCacheStore], ) -> None: - """SDK-defined contract: identical values in different field positions are - different keys - the fields are positional, not a bag of strings.""" store = make_store() await store.set(CacheKey("m", "a", "b"), _entry("params=a")) await store.set(CacheKey("m", "b", "a"), _entry("params=b")) @@ -118,9 +104,7 @@ async def test_swapped_params_key_and_partition_values_are_distinct_keys( async def test_keys_with_field_values_that_concatenate_identically_do_not_collide( make_store: Callable[[], ResponseCacheStore], ) -> None: - """SDK-defined contract: keys MUST be compared as the field tuple, so pairs - whose fields join to the same string under any delimiter (or none) stay - distinct - flattening would let crafted values collide across boundaries.""" + """Keys compare as the field tuple - flattening would let crafted values collide across boundaries.""" store = make_store() keys = [ CacheKey("a", "b.c", "p"), @@ -141,8 +125,6 @@ async def test_keys_with_field_values_that_concatenate_identically_do_not_collid @store_contract async def test_set_replaces_the_entry_for_an_existing_key(make_store: Callable[[], ResponseCacheStore]) -> None: - """SDK-defined contract: a second `set` under the same key overwrites; the - store holds at most one entry per key.""" store = make_store() key = CacheKey("tools/list") await store.set(key, _entry("first")) @@ -152,7 +134,6 @@ async def test_set_replaces_the_entry_for_an_existing_key(make_store: Callable[[ @store_contract async def test_delete_removes_only_the_given_key(make_store: Callable[[], ResponseCacheStore]) -> None: - """SDK-defined contract: `delete` is exact - sibling keys survive.""" store = make_store() doomed = CacheKey("tools/list", "", "partition-1") survivor = CacheKey("tools/list", "", "partition-2") @@ -165,8 +146,7 @@ async def test_delete_removes_only_the_given_key(make_store: Callable[[], Respon @store_contract async def test_delete_is_idempotent(make_store: Callable[[], ResponseCacheStore]) -> None: - """SDK-defined contract: deleting an absent key is a no-op, not an error - - the SDK issues unconditional deletes during eviction.""" + """The SDK issues unconditional deletes during eviction, so deleting an absent key must be a no-op.""" store = make_store() key = CacheKey("prompts/list") await store.delete(key) @@ -180,8 +160,6 @@ async def test_delete_is_idempotent(make_store: Callable[[], ResponseCacheStore] async def test_clear_removes_every_entry_across_methods_and_partitions( make_store: Callable[[], ResponseCacheStore], ) -> None: - """SDK-defined contract: `clear` empties the store wholesale - every - method, params_key, and partition.""" store = make_store() keys = [ CacheKey("tools/list", "", "partition-1"), @@ -199,9 +177,6 @@ async def test_clear_removes_every_entry_across_methods_and_partitions( def test_cache_config_defaults_construct_an_unshared_zero_ttl_config() -> None: - """SDK-defined defaults: in-memory store minted per client, empty - partition, no identity override, hint-less results uncached, wall clock, - and public-entry sharing OFF (sharing is an explicit operator opt-in).""" config = CacheConfig() assert config.store is None assert config.partition == "" @@ -212,17 +187,13 @@ def test_cache_config_defaults_construct_an_unshared_zero_ttl_config() -> None: def test_a_custom_store_without_a_partition_is_rejected_at_construction() -> None: - """SDK-defined guard: a custom store is shareable, so omitting the - authorization-context partition would let private entries cross - principals - rejected at `CacheConfig` construction, not on first use.""" + """A custom store is shareable, so a missing partition would let private entries cross principals.""" with pytest.raises(ValueError) as exc: CacheConfig(store=InMemoryResponseCacheStore()) assert str(exc.value) == snapshot("a custom store requires an explicit partition") def test_a_custom_store_with_an_explicit_partition_constructs() -> None: - """SDK-defined: the partition guard is satisfied by any non-empty - operator-supplied principal id.""" store = InMemoryResponseCacheStore() config = CacheConfig(store=store, partition="token-subject-1") assert config.store is store @@ -230,18 +201,14 @@ def test_a_custom_store_with_an_explicit_partition_constructs() -> None: def test_an_empty_target_id_is_rejected_at_construction() -> None: - """SDK-defined guard: an explicit empty `target_id` would hash to the one - shared `sha256("")` identity, collapsing distinct servers onto it - - rejected at construction; omit the field (None) to derive an identity.""" + """An empty target_id would collapse distinct servers onto the one shared sha256("") identity.""" with pytest.raises(ValueError) as exc: CacheConfig(target_id="") assert str(exc.value) == snapshot("target_id must be a non-empty string or omitted") def test_a_negative_default_ttl_is_rejected_at_construction() -> None: - """SDK-defined guard: a negative configured TTL is a programming error, - rejected at construction (negative `ttlMs` from the wire is tolerated as 0 - at the parse seam instead).""" + """A configured negative TTL is a programming error; negative wire ttlMs is tolerated as 0 at the parse seam.""" with pytest.raises(ValueError) as exc: CacheConfig(default_ttl_ms=-1) assert str(exc.value) == snapshot("default_ttl_ms must be >= 0, got -1") @@ -251,8 +218,6 @@ def test_a_negative_default_ttl_is_rejected_at_construction() -> None: async def test_a_new_read_key_at_the_cap_evicts_the_oldest_read_key() -> None: - """SDK-defined bound: `resources/read` keys are unbounded in principle (one - per uri), so storing a new one at the cap drops the oldest, FIFO.""" store = InMemoryResponseCacheStore(max_read_entries=2) await store.set(_read_key("file:///a"), _entry("a")) await store.set(_read_key("file:///b"), _entry("b")) @@ -263,9 +228,7 @@ async def test_a_new_read_key_at_the_cap_evicts_the_oldest_read_key() -> None: async def test_replacing_a_read_key_at_the_cap_neither_evicts_nor_refreshes_its_age() -> None: - """SDK-defined: replacement is not growth (no double-count, nothing - evicted) and does not renew the key's position - eviction order is - first-insertion order (FIFO), not recency (LRU).""" + """Eviction order is first-insertion order (FIFO), not recency (LRU).""" store = InMemoryResponseCacheStore(max_read_entries=2) await store.set(_read_key("file:///a"), _entry("a")) await store.set(_read_key("file:///b"), _entry("b")) @@ -278,8 +241,7 @@ async def test_replacing_a_read_key_at_the_cap_neither_evicts_nor_refreshes_its_ async def test_only_read_keys_count_toward_the_cap_and_only_read_keys_are_evicted() -> None: - """SDK-defined: the non-read cacheable methods are a small closed key set - - they neither consume cap slots nor ever get cap-evicted.""" + """The non-read cacheable methods are a small closed key set, so they are never capped.""" store = InMemoryResponseCacheStore(max_read_entries=1) list_keys = [ CacheKey("tools/list"), @@ -301,8 +263,6 @@ async def test_only_read_keys_count_toward_the_cap_and_only_read_keys_are_evicte async def test_a_non_read_set_never_triggers_eviction_even_with_reads_at_the_cap() -> None: - """SDK-defined: only storing a NEW read key can evict - a non-read `set` - while reads sit at the cap leaves them untouched.""" store = InMemoryResponseCacheStore(max_read_entries=1) await store.set(_read_key("file:///a"), _entry("a")) await store.set(CacheKey("tools/list"), _entry("tools")) @@ -311,7 +271,6 @@ async def test_a_non_read_set_never_triggers_eviction_even_with_reads_at_the_cap async def test_a_zero_cap_disables_read_eviction() -> None: - """SDK-defined: `max_read_entries=0` means unbounded read entries.""" store = InMemoryResponseCacheStore(max_read_entries=0) uris = [f"file:///{i}" for i in range(5)] for uri in uris: @@ -321,8 +280,6 @@ async def test_a_zero_cap_disables_read_eviction() -> None: async def test_deleting_a_read_key_frees_its_cap_slot() -> None: - """SDK-defined: the cap counts live entries, so a deleted read key's slot - is reusable without evicting anything.""" store = InMemoryResponseCacheStore(max_read_entries=1) await store.set(_read_key("file:///a"), _entry("a")) await store.delete(_read_key("file:///a")) @@ -331,8 +288,6 @@ async def test_deleting_a_read_key_frees_its_cap_slot() -> None: def test_a_negative_read_cap_is_rejected_at_construction() -> None: - """SDK-defined guard: a negative cap is meaningless (0 already means - uncapped) and would otherwise evict on every read insert.""" with pytest.raises(ValueError) as exc: InMemoryResponseCacheStore(max_read_entries=-1) assert str(exc.value) == snapshot("max_read_entries must be >= 0, got -1") @@ -386,8 +341,7 @@ def _public_arm(arm_id: str = "arm", partition: str = "") -> str: def _wire_result(ttl_ms: int | None = None, cache_scope: str | None = None) -> ListToolsResult: - """A `tools/list` result as parsed off the wire; `None` omits the hint so - it stays out of `model_fields_set`.""" + """A wire-parsed `tools/list` result; `None` keeps the hint out of `model_fields_set`.""" payload: dict[str, Any] = {"tools": []} if ttl_ms is not None: payload["ttlMs"] = ttl_ms @@ -401,9 +355,7 @@ def _read_result(ttl_ms: int) -> ReadResourceResult: class _ScriptedStore: - """In-memory store that logs `(op, key)` and can await one-shot hooks - around an operation's commit, modelling an async store mid-commit when an - eviction or a cancellation lands.""" + """Logs `(op, key)` and awaits one-shot hooks around commits, modelling an async store mid-commit.""" def __init__(self) -> None: self.inner = InMemoryResponseCacheStore() @@ -438,8 +390,7 @@ async def clear(self) -> None: class _FailingStore: - """In-memory store whose operations raise while their flag is set; the - flags toggle so tests can model recovery.""" + """Operations raise while their flag is set; toggling a flag models recovery.""" def __init__(self, *, fail_get: bool = False, fail_set: bool = False, fail_delete: bool = False) -> None: self.inner = InMemoryResponseCacheStore() @@ -467,9 +418,7 @@ async def clear(self) -> None: class _ArmDeleteFailingStore: - """In-memory store whose `delete` raises only for keys on the given arm, - modelling a write whose opposite-arm cleanup fails while everything else - works. A write hitting that failure never reaches `set`.""" + """`delete` raises only for keys on the given arm, modelling a failed opposite-arm cleanup.""" def __init__(self, failing_arm: str) -> None: self.inner = InMemoryResponseCacheStore() @@ -491,8 +440,7 @@ async def clear(self) -> None: class _RehydratingStore: - """Models a persistent store whose `get` returns what its deserializer - produced - possibly not the shape `set` received.""" + """`get` returns whatever a persistent store's deserializer produced - not necessarily what `set` received.""" def __init__(self, rehydrated: Any) -> None: self.rehydrated = rehydrated @@ -515,11 +463,8 @@ async def clear(self) -> None: @pytest.mark.parametrize("version", [LEGACY_VERSION, None], ids=["legacy", "pre-negotiation"]) async def test_hints_from_a_non_modern_session_are_ignored(version: str | None) -> None: - """SDK-defined era gate: `ttlMs`/`cacheScope` are 2026-07-28 assertions. A - legacy peer can inject the keys onto the wire (the 2025 surfaces validate - and discard unknown keys, so they reach `model_fields_set`), so wire - presence is not trusted: on a non-modern session every result is - hint-absent - with the default `default_ttl_ms=0`, nothing is stored.""" + """The hints are 2026-07-28 assertions a legacy peer can still inject onto the wire (unknown keys + reach `model_fields_set`), so on a non-modern session every result is treated as hint-absent.""" store = InMemoryResponseCacheStore() cache = _coordinator(store, version=version) gen = cache.capture("tools/list", "") @@ -530,9 +475,7 @@ async def test_hints_from_a_non_modern_session_are_ignored(version: str | None) async def test_a_legacy_session_with_a_default_ttl_caches_on_the_private_arm_only() -> None: - """SDK-defined era gate: the operator's `default_ttl_ms` still applies on - legacy sessions, but an injected `cacheScope: "public"` cannot promote the - entry, and an injected `ttlMs` does not shorten (or extend) its life.""" + """The operator's default TTL still applies on legacy sessions; injected hints cannot promote or re-clock.""" store = InMemoryResponseCacheStore() clock = _ManualClock() cache = _coordinator(store, version=LEGACY_VERSION, default_ttl_ms=60_000, clock=clock) @@ -550,9 +493,7 @@ async def test_a_legacy_session_with_a_default_ttl_caches_on_the_private_arm_onl async def test_an_explicit_zero_ttl_is_not_overridden_by_the_default_ttl() -> None: - """Spec-mandated: `ttlMs: 0` means immediately stale. The configured - `default_ttl_ms` fills in only for hint-ABSENT results - an explicit 0 - stores nothing.""" + """Spec-mandated: ttlMs 0 means immediately stale; the default fills in only for hint-absent results.""" store = InMemoryResponseCacheStore() cache = _coordinator(store, default_ttl_ms=60_000) gen = cache.capture("tools/list", "") @@ -562,9 +503,6 @@ async def test_an_explicit_zero_ttl_is_not_overridden_by_the_default_ttl() -> No async def test_a_hint_absent_modern_result_uses_the_default_ttl_privately() -> None: - """SDK-defined: on a modern session a result without `ttlMs` in - `model_fields_set` gets `default_ttl_ms` and scope `"private"`, expiring - exactly when the default says.""" store = InMemoryResponseCacheStore() clock = _ManualClock() cache = _coordinator(store, default_ttl_ms=60_000, clock=clock) @@ -580,9 +518,7 @@ async def test_a_hint_absent_modern_result_uses_the_default_ttl_privately() -> N async def test_a_ttl_above_24_hours_is_clamped_to_the_cap() -> None: - """SDK-defined hardening (SEP-2549 security discussion): a server cannot - pin an entry beyond 24 hours - the stored expiry is clamped to - `MAX_TTL_MS`.""" + """SEP-2549 hardening: a server cannot pin an entry beyond `MAX_TTL_MS`.""" store = InMemoryResponseCacheStore() clock = _ManualClock() cache = _coordinator(store, clock=clock) @@ -594,9 +530,7 @@ async def test_a_ttl_above_24_hours_is_clamped_to_the_cap() -> None: async def test_a_public_result_lands_on_the_public_arm_and_clears_the_private_arm() -> None: - """Spec-mandated scope routing plus the SDK's no-stale-pair invariant: - when a key's scope flips, writing the new arm deletes the other so the two - arms never both answer.""" + """On a scope flip, writing the new arm deletes the other so the two arms never both answer.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -613,9 +547,7 @@ async def test_a_public_result_lands_on_the_public_arm_and_clears_the_private_ar async def test_arm_key_layout_is_pinned_for_shared_store_compatibility() -> None: - """SDK-defined persistence contract: arm strings are the cross-process - store key material, so their layout is pinned - JSON arrays of the scope, - the hashed server identity, and (unless `share_public`) the partition.""" + """Arm strings are cross-process store key material; changing their layout breaks shared stores.""" store = InMemoryResponseCacheStore() cache = _coordinator(store, partition="tenant-a", arm_id="abc123", default_ttl_ms=60_000) gen = cache.capture("tools/list", "") @@ -630,10 +562,8 @@ async def test_arm_key_layout_is_pinned_for_shared_store_compatibility() -> None async def test_public_entries_do_not_cross_partitions_by_default() -> None: - """SDK security default (deviates from the ts SDK): the public arm is - partition-scoped, so a server stamping `cacheScope: "public"` on - per-tenant data (bug or malice) cannot leak one tenant's response to - another through a shared store.""" + """Security default (deviates from the TypeScript SDK): a server stamping per-tenant data public + (bug or malice) cannot leak one tenant's response to another through a shared store.""" store = InMemoryResponseCacheStore() tenant_a = _coordinator(store, partition="tenant-a") tenant_b = _coordinator(store, partition="tenant-b") @@ -644,9 +574,6 @@ async def test_public_entries_do_not_cross_partitions_by_default() -> None: async def test_share_public_serves_public_entries_across_partitions_but_never_private_ones() -> None: - """SDK-defined opt-in: `share_public=True` drops the partition from the - public arm, sharing server-asserted-public entries fleet-wide. Private - entries still never cross partitions.""" store = InMemoryResponseCacheStore() tenant_a = _coordinator(store, partition="tenant-a", share_public=True) tenant_b = _coordinator(store, partition="tenant-b", share_public=True) @@ -660,9 +587,7 @@ async def test_share_public_serves_public_entries_across_partitions_but_never_pr async def test_a_private_scoped_entry_under_the_public_arm_is_not_served() -> None: - """SDK defense in depth: the arm routes, the entry's scope verifies - a - `"private"` entry sitting under the shared arm (a corrupted or pre-seeded - store) is refused, not served across the boundary.""" + """Defense in depth against a corrupted or pre-seeded store: the arm routes, the entry's scope verifies.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) await store.set( @@ -673,10 +598,8 @@ async def test_a_private_scoped_entry_under_the_public_arm_is_not_served() -> No async def test_a_stale_private_entry_does_not_shadow_a_fresh_public_one() -> None: - """SDK-defined fall-through: a stale private-arm entry is a miss for - arm-probing purposes, so after a server scope flip (private -> public, - with the public entry seeded by another client sharing the store) the - fresh public entry is served, not shadowed into a spurious miss.""" + """A stale private entry is an arm-probe miss, so the fall-through finds a public entry seeded by + another client after a server scope flip.""" store = InMemoryResponseCacheStore() clock = _ManualClock() cache = _coordinator(store, clock=clock) @@ -693,8 +616,7 @@ async def test_a_stale_private_entry_does_not_shadow_a_fresh_public_one() -> Non async def test_an_entry_without_an_expiry_is_never_fresh() -> None: - """SDK-defined: `expires_at=None` means never fresh - a store rehydrating - entries without expiry metadata yields misses, not immortal entries.""" + """Entries rehydrated without expiry metadata are misses, not immortal.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) await store.set( @@ -708,9 +630,7 @@ async def test_an_entry_without_an_expiry_is_never_fresh() -> None: async def test_write_deletes_the_opposite_arm_before_setting_its_own() -> None: - """SDK-defined ordering: the opposite arm is deleted before the own-arm - set, so a cancellation between the two operations leaves a miss - never - two arms answering for one key.""" + """Delete-then-set: a cancellation between the two operations leaves a miss, never two answering arms.""" store = _ScriptedStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -722,11 +642,8 @@ async def test_write_deletes_the_opposite_arm_before_setting_its_own() -> None: async def test_an_eviction_landing_during_an_async_set_is_compensated() -> None: - """SDK-defined TOCTOU re-check. Steps: (1) write captures, deletes the - opposite arm, and issues `set`; (2) before the store commits, an eviction - runs fully (bump + deletes, which see nothing); (3) the set commits the - now-stale entry; (4) the post-set generation re-check fires a compensating - delete, so the evicted key does not resurface.""" + """TOCTOU re-check: the eviction's deletes see nothing (the set has not committed yet), so the + post-set generation re-check must fire a compensating delete.""" store = _ScriptedStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -750,13 +667,8 @@ async def evict_mid_commit() -> None: async def test_a_cancellation_landing_as_the_set_commits_still_compensates_an_eviction() -> None: - """SDK-defined: the eviction re-check survives cancellation. Steps: (1) - write deletes the opposite arm and issues `set`; (2) before the store - commits, an eviction runs fully (its deletes see nothing) and the caller's - scope is cancelled; (3) the set commits and the cancellation is delivered - at the store's next checkpoint - a timeout firing while an async store's - set is already on the wire; (4) the shielded compensating delete still - runs, so the evicted entry is not resurrected for its full TTL.""" + """The compensating delete is shielded: a timeout firing while the store's set is already on the + wire must not resurrect the evicted entry for its full TTL.""" store = _ScriptedStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -783,9 +695,7 @@ async def evict_then_cancel() -> None: async def test_a_cancellation_during_the_refresh_purge_still_purges_both_arms() -> None: - """SDK-defined: the `mode="refresh"` purge is shielded - a cancellation - delivered between its two arm deletes must not leave the warm - opposite-arm entry that the refetch superseded.""" + """The refresh purge is shielded - a mid-purge cancellation must not leave the superseded opposite arm.""" store = _ScriptedStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -794,17 +704,15 @@ async def test_a_cancellation_during_the_refresh_purge_still_purges_both_arms() assert await store.inner.get(public_key) is not None with anyio.CancelScope() as scope: scope.cancel() - # The cancellation would be delivered at the first checkpoint after the - # first (private-arm) delete commits, skipping the warm public arm. + # Delivers at the first checkpoint after the private-arm delete commits. store.after_delete_commits = anyio.lowlevel.checkpoint await cache.write("tools/list", "", _wire_result(ttl_ms=0), gen, "refresh") assert await store.inner.get(public_key) is None async def test_a_cancellation_during_an_eviction_still_evicts_both_arms() -> None: - """SDK-defined: eviction's two arm deletes are shielded - a notification - task cancelled mid-eviction (e.g. session teardown) must not leave one arm - serving the evicted entry until its TTL.""" + """Eviction's arm deletes are shielded - a notification task cancelled mid-eviction (session + teardown) must not leave one arm serving the evicted entry.""" store = _ScriptedStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -812,8 +720,7 @@ async def test_a_cancellation_during_an_eviction_still_evicts_both_arms() -> Non public_key = CacheKey("tools/list", "", _public_arm()) with anyio.CancelScope() as scope: scope.cancel() - # The cancellation would be delivered at the first checkpoint after the - # first (private-arm) delete commits, skipping the warm public arm. + # Delivers at the first checkpoint after the private-arm delete commits. store.after_delete_commits = anyio.lowlevel.checkpoint await cache.evict_method("tools/list") assert await store.inner.get(public_key) is None @@ -823,8 +730,6 @@ async def test_a_cancellation_during_an_eviction_still_evicts_both_arms() -> Non async def test_a_raising_store_get_is_a_cache_miss() -> None: - """SDK error discipline: a raising store never fails the caller - a - read-path `get` raise is a miss.""" store = _FailingStore(fail_get=True) cache = _coordinator(store) assert await cache.read("tools/list", "") is None @@ -841,12 +746,8 @@ async def test_a_raising_store_get_is_a_cache_miss() -> None: async def test_an_entry_rehydrated_into_the_wrong_shape_is_a_warned_miss( rehydrated: Any, caplog: pytest.LogCaptureFixture ) -> None: - """SDK error discipline: a persistent store has no method-to-model mapping - to rehydrate with, so its `get` may return serialized shapes (a dict where - the result model was stored, or a dict for the whole entry); the read - degrades to a warned miss instead of failing the call - and a store that - is persistently misconfigured this way is one warning burst, not one - warning per cached read.""" + """A persistent store has no method-to-model mapping, so its `get` may return serialized shapes; + the warned miss is one burst, not one warning per cached read.""" cache = _coordinator(_RehydratingStore(rehydrated)) with caplog.at_level(logging.WARNING, logger="mcp.client.caching"): assert await cache.read("tools/list", "") is None @@ -855,8 +756,7 @@ async def test_an_entry_rehydrated_into_the_wrong_shape_is_a_warned_miss( async def test_a_raising_opposite_arm_delete_aborts_the_write() -> None: - """SDK error discipline: if the opposite-arm delete fails, setting anyway - could leave both arms populated - the write aborts with nothing cached.""" + """Setting after a failed opposite-arm delete could leave both arms populated.""" store = _FailingStore(fail_delete=True) cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -866,10 +766,7 @@ async def test_a_raising_opposite_arm_delete_aborts_the_write() -> None: async def test_a_failed_opposite_arm_delete_degrades_the_key_to_a_full_miss() -> None: - """SDK error discipline: when only the opposite-arm delete fails, the write - cannot set its own arm (two arms might answer) - but the warm own-arm - entry was superseded by the fetch, so it is best-effort deleted too: both - arms read as misses, and the write itself never raises.""" + """The fetch superseded the warm own-arm entry, so it is best-effort deleted too; the write never raises.""" store = _ArmDeleteFailingStore(failing_arm=_public_arm()) cache = _coordinator(store) await store.inner.set( @@ -885,8 +782,6 @@ async def test_a_failed_opposite_arm_delete_degrades_the_key_to_a_full_miss() -> async def test_a_raising_store_set_caches_nothing_and_does_not_raise() -> None: - """SDK error discipline: a `set` raise is logged and swallowed - the fetch - already succeeded, the result just is not cached.""" store = _FailingStore(fail_set=True) cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -895,9 +790,7 @@ async def test_a_raising_store_set_caches_nothing_and_does_not_raise() -> None: async def test_eviction_with_a_raising_delete_still_bumps_the_generation() -> None: - """SDK error discipline (bump-first): even when the store deletes raise, - the eviction's generation bump lands - an in-flight fetch captured before - the eviction cannot write back, while a fetch captured after it can.""" + """Bump-first: a fetch captured before the eviction cannot write back even when the deletes raise.""" store = _FailingStore() cache = _coordinator(store) stale_gen = cache.capture("tools/list", "") # fetch in flight when the eviction lands @@ -912,8 +805,6 @@ async def test_eviction_with_a_raising_delete_still_bumps_the_generation() -> No async def test_store_failures_warn_once_per_burst(caplog: pytest.LogCaptureFixture) -> None: - """SDK-defined logging: consecutive store failures log a single warning; a - successful operation re-arms it so the next burst warns again.""" store = _FailingStore(fail_get=True) cache = _coordinator(store) with caplog.at_level(logging.WARNING, logger="mcp.client.caching"): @@ -929,10 +820,7 @@ async def test_store_failures_warn_once_per_burst(caplog: pytest.LogCaptureFixtu async def test_a_set_only_store_failure_warns_once_across_write_cycles(caplog: pytest.LogCaptureFixture) -> None: - """SDK-defined logging: the warning burst is tracked per operation kind - - a store where only `set` is broken warns once across write cycles, the - healthy deletes in between never re-arming it; only a `set` succeeding - re-arms the `set` warning.""" + """Bursts are tracked per operation kind - the healthy deletes between failing sets never re-arm.""" store = _FailingStore(fail_set=True) cache = _coordinator(store) with caplog.at_level(logging.WARNING, logger="mcp.client.caching"): @@ -953,9 +841,7 @@ async def test_a_set_only_store_failure_warns_once_across_write_cycles(caplog: p async def test_an_eviction_between_capture_and_write_discards_the_write() -> None: - """Spec-aligned race rule: a fetch in flight when its key is evicted must - not write the evicted entry back - the generation captured before the send - no longer matches at write time.""" + """Spec-aligned: a fetch in flight when its key is evicted must not write the evicted entry back.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -966,8 +852,6 @@ async def test_an_eviction_between_capture_and_write_discards_the_write() -> Non async def test_recapturing_a_registered_key_returns_its_current_generation() -> None: - """SDK-defined: `capture` re-reads, it does not reset - after an eviction - a new fetch captures the bumped generation and its write lands.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) gen_before = cache.capture("tools/list", "") @@ -979,11 +863,8 @@ async def test_recapturing_a_registered_key_returns_its_current_generation() -> async def test_the_generation_map_drops_the_oldest_key_at_its_cap() -> None: - """SDK-defined bound (cap parametrized small; 4096 in production): - registering a new key at the cap drops the oldest, whose race guard - degrades to the accepted co-tenant class - an eviction racing the dropped - key's in-flight fetch goes undetected and its write lands, while a - still-registered key's write is discarded.""" + """A dropped key's race guard degrades to the accepted co-tenant class - an eviction racing its + in-flight fetch goes undetected (cap is 4096 in production, parametrized small here).""" store = InMemoryResponseCacheStore() cache = _coordinator(store, generation_map_cap=2) gen_a = cache.capture("resources/read", "file:///a") @@ -1001,9 +882,7 @@ async def test_the_generation_map_drops_the_oldest_key_at_its_cap() -> None: async def test_a_refresh_resolving_uncacheable_purges_the_warm_entry() -> None: - """SDK-defined: a `cache_mode="refresh"` whose fresh result resolves to an - uncacheable TTL deletes both arms - the refetch superseded the warm entry, - which must not be served again.""" + """The refetch superseded the warm entry, which must not be served again.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) gen = cache.capture("tools/list", "") @@ -1015,9 +894,7 @@ async def test_a_refresh_resolving_uncacheable_purges_the_warm_entry() -> None: async def test_evict_key_on_an_unregistered_key_still_deletes_both_arms() -> None: - """SDK-defined: a persistent store may hold warm entries from a prior - process that this coordinator never captured - eviction always issues the - store deletes, registered or not.""" + """A persistent store may hold warm entries from a prior process this coordinator never captured.""" store = InMemoryResponseCacheStore() await store.set( CacheKey("resources/read", "file:///warm", _private_arm()), @@ -1053,10 +930,7 @@ async def test_evict_key_on_an_unregistered_key_still_deletes_both_arms() -> Non async def test_notifications_evict_exactly_their_mapped_entries( notification: ServerNotification, evicted: set[tuple[str, str]] ) -> None: - """Spec SHOULD (notifications invalidate) plus negative space: each - list_changed notification evicts its own method's entry and nothing else, - resources/list_changed co-evicts the templates list, resources/updated - evicts only the named uri, and an unrelated notification evicts nothing.""" + """Spec SHOULD: notifications invalidate - and nothing beyond their mapped entries.""" store = InMemoryResponseCacheStore() cache = _coordinator(store) seeded = [ diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 139f6a2e3..f302621cc 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -1,12 +1,6 @@ -"""`Client` wiring for the response cache: the `cache=` constructor kwarg, server -identity resolution (explicit `target_id`, URL, per-client random), the custom-store -identity guard, the notification-eviction message-handler wrap, the lazy -negotiated-version supplier, and the five cacheable verbs (the `_cached_fetch` -choke point, the `read_resource` sibling, and the tools/list absorption seam). -Cross-cutting end-to-end hardening (eviction completeness, partition isolation, -deep-copy isolation, era-gate injection, write/eviction races) lives at the -bottom. The coordinator's own behavior is covered in `test_caching.py`. -""" +"""`Client` wiring for the response cache: the `cache=` kwarg, server identity +resolution, the custom-store guard, notification eviction, and the five cacheable +verbs. The coordinator's own behavior is covered in `test_caching.py`.""" import hashlib import json @@ -79,11 +73,7 @@ def _coordinator(client: Client) -> ClientResponseCache: def _private_arm(client: Client) -> str: - """The arm string the coordinator stamps into every store key's partition field. - - Server identity is only observable through it pre-verbs; `test_caching.py` pins - the arm layout, so only equality between clients matters here. - """ + """The identity arm stamped into store keys; only equality between clients matters here.""" return _coordinator(client)._private_arm @@ -92,8 +82,7 @@ def _tools_list_key(client: Client) -> CacheKey: class _OpaqueTransport: - """Shape-only `Transport`: identity resolution happens at construction, so the - tests never enter it.""" + """Shape-only `Transport`: identity resolution happens at construction, so tests never enter it.""" async def __aenter__(self) -> TransportStreams: raise NotImplementedError @@ -105,13 +94,8 @@ async def __aexit__( def _list_changed_server() -> Server[Any]: - """In-process server whose `touch` tool emits `notifications/tools/list_changed`. - - The notification-delivery tests connect with `mode="legacy"`: the modern - in-process DirectDispatcher path has no standalone channel and drops unrelated - server notifications before they reach the client, so the legacy in-memory - stream pair is the lightest transport that actually delivers them. - """ + """Server whose `touch` tool emits tools/list_changed; connect with `mode="legacy"` — + the modern in-process path drops standalone server notifications.""" async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: return ListToolsResult(tools=[types.Tool(name="touch", input_schema={"type": "object"})]) @@ -125,17 +109,13 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara async def _warm_tools_list_entry(client: Client) -> CacheKey: - """Seed a private-arm tools/list entry directly in the client's store; eviction - deletes regardless of freshness, so the entry's payload and expiry are inert.""" + """Seed a private-arm tools/list entry directly in the store; payload and expiry are inert to eviction.""" key = _tools_list_key(client) await _coordinator(client)._store.set(key, CacheEntry(value="warm", scope="private", expires_at=None)) return key def test_an_explicit_target_id_overrides_both_url_and_in_process_identity() -> None: - """`CacheConfig.target_id` wins over every server shape: a URL client and an - in-process client given the same target_id share one cache identity, distinct - from the URL-derived one. SDK-defined resolution order.""" by_target_url = Client("https://example.com/mcp", cache=CacheConfig(target_id="svc")) by_target_inproc = Client(Server("plain"), cache=CacheConfig(target_id="svc")) by_url = Client("https://example.com/mcp") @@ -145,8 +125,7 @@ def test_an_explicit_target_id_overrides_both_url_and_in_process_identity() -> N def test_userinfo_variants_of_a_server_url_share_one_cache_identity() -> None: - """Stripping credentials is the single permitted URL rewrite: userinfo variants - of the same URL resolve to the identity of the bare URL. SDK-defined.""" + """Stripping userinfo is the single permitted URL rewrite.""" bare = Client("https://example.com/mcp") with_password = Client("https://user:secret@example.com/mcp") with_token = Client("https://token@example.com/mcp") @@ -155,9 +134,7 @@ def test_userinfo_variants_of_a_server_url_share_one_cache_identity() -> None: def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None: - """The arm carries sha256(url-sans-userinfo), not the URL itself, so a secret - in the query string never appears in store keys. SDK-defined; pins the docs' - secrets-never-in-keys claim — raw-URL key material would fail here.""" + """Pins the docs' secrets-never-in-keys claim: a query-string secret never appears in store keys.""" client = Client("https://user:pass@example.com/mcp?api_key=SECRET") arm_id = hashlib.sha256(b"https://example.com/mcp?api_key=SECRET").hexdigest() @@ -165,8 +142,7 @@ def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None def test_urls_differing_only_in_query_have_distinct_cache_identities() -> None: - """URL identity is byte-exact outside userinfo — `?tenant=a` and `?tenant=b` - must never share entries (over-normalization would merge tenants). SDK-defined.""" + """URL identity is byte-exact outside userinfo — over-normalization would merge tenants.""" tenant_a = Client("https://example.com/mcp?tenant=a") tenant_b = Client("https://example.com/mcp?tenant=b") @@ -174,16 +150,12 @@ def test_urls_differing_only_in_query_have_distinct_cache_identities() -> None: def test_two_clients_on_one_in_process_server_get_distinct_cache_identities() -> None: - """An in-process server has no URL, so each client gets a random per-client - identity — two clients on the same server never share entries. SDK-defined.""" server = Server("plain") assert _private_arm(Client(server)) != _private_arm(Client(server)) def test_a_transport_object_gets_a_per_client_cache_identity() -> None: - """The `Transport` protocol carries no URL, so a transport-backed client gets - the same random per-client identity as an in-process one. SDK-defined.""" transport = _OpaqueTransport() assert _private_arm(Client(transport)) != _private_arm(Client(transport)) @@ -191,9 +163,6 @@ def test_a_transport_object_gets_a_per_client_cache_identity() -> None: @pytest.mark.parametrize("make_server", [lambda: Server("plain"), _OpaqueTransport], ids=["in-process", "transport"]) def test_a_custom_store_without_a_url_or_target_id_is_rejected(make_server: Any) -> None: - """A shared store keyed by a random per-client identity would accumulate entries - no other client can ever read, so construction refuses the combination and - points at the fix.""" with pytest.raises(ValueError) as exc_info: Client(make_server(), cache=CacheConfig(store=InMemoryResponseCacheStore(), partition="p")) assert str(exc_info.value) == snapshot( @@ -204,7 +173,6 @@ def test_a_custom_store_without_a_url_or_target_id_is_rejected(make_server: Any) def test_a_custom_store_with_a_url_server_constructs_and_is_used() -> None: - """A URL provides a stable identity, so a custom store needs no `target_id`.""" store = InMemoryResponseCacheStore() client = Client("https://example.com/mcp", cache=CacheConfig(store=store, partition="p")) @@ -212,8 +180,6 @@ def test_a_custom_store_with_a_url_server_constructs_and_is_used() -> None: def test_a_custom_store_with_an_explicit_target_id_constructs_for_any_server() -> None: - """`target_id` is the documented escape hatch: it lifts the custom-store guard - even for an in-process server.""" store = InMemoryResponseCacheStore() client = Client(Server("plain"), cache=CacheConfig(store=store, partition="p", target_id="svc")) @@ -221,9 +187,6 @@ def test_a_custom_store_with_an_explicit_target_id_constructs_for_any_server() - async def test_cache_false_disables_the_cache_and_the_handler_wrap() -> None: - """`cache=False` mints no coordinator and installs the user's handler unwrapped — - today's no-cache behavior exactly.""" - async def handler(message: IncomingMessage) -> None: raise NotImplementedError @@ -235,8 +198,7 @@ async def handler(message: IncomingMessage) -> None: def test_the_default_cache_uses_a_per_client_in_memory_store() -> None: - """`cache=None` (the default) is cache-on: each client gets its own coordinator - backed by its own in-memory store, never shared between clients.""" + """`cache=None` (the default) is cache-on.""" server = Server("plain") first = Client(server) second = Client(server) @@ -246,9 +208,7 @@ def test_the_default_cache_uses_a_per_client_in_memory_store() -> None: async def test_the_negotiated_version_supplier_tracks_the_session_lifecycle() -> None: - """The era supplier returns None before connect (and again after exit) and the - negotiated version while the session is live — the era gate must never read a - stale or raising source.""" + """The era gate must never read a stale or raising source.""" client = Client(_list_changed_server()) supplier = _coordinator(client)._negotiated_version @@ -259,9 +219,7 @@ async def test_the_negotiated_version_supplier_tracks_the_session_lifecycle() -> async def test_a_list_changed_notification_evicts_without_a_user_handler() -> None: - """With no user handler the wrap is still installed: a tools/list_changed - notification deletes the warm tools/list entry from both arms. Spec SHOULD - (notifications invalidate).""" + """Spec SHOULD (notifications invalidate): the entry is deleted from both arms.""" class _EventedStore(InMemoryResponseCacheStore): """Signals once both arms of an eviction have been deleted.""" @@ -291,8 +249,7 @@ async def delete(self, key: CacheKey) -> None: async def test_a_user_handler_receives_the_notification_the_eviction_consumed() -> None: - """Eviction is a tee, not a filter: the warm entry is gone by the time the - user's handler sees the notification, and nothing else is delivered.""" + """Eviction is a tee, not a filter.""" received: list[IncomingMessage] = [] seen = anyio.Event() @@ -307,18 +264,14 @@ async def collect(message: IncomingMessage) -> None: await client.call_tool("touch", {}) with anyio.fail_after(5): await seen.wait() - # The wrap awaits the eviction before delegating, so delivery implies the - # entry is already gone. + # The wrap evicts before delegating: delivery implies the entry is gone. assert await _coordinator(client)._store.get(key) is None assert received == snapshot([ToolListChangedNotification()]) async def test_non_notification_items_pass_through_to_the_user_handler_untouched() -> None: - """The wrap delegates non-notification items verbatim and leaves the cache - alone. Transport `Exception` items only exist on stream-backed dispatchers, - which the in-process path cannot produce, so the installed handler is invoked - directly; `RequestResponder` items take this same non-notification branch.""" + """Transport `Exception` items can't occur in-process, so the installed handler is invoked directly.""" received: list[IncomingMessage] = [] async def collect(message: IncomingMessage) -> None: @@ -337,9 +290,6 @@ async def collect(message: IncomingMessage) -> None: async def test_a_raising_eviction_does_not_block_notification_delivery(caplog: pytest.LogCaptureFixture) -> None: - """The eviction boundary contains cache faults: a coordinator that raises is - logged and the user's handler still receives the notification.""" - class _ExplodingCache(ClientResponseCache): async def evict_for_notification(self, notification: ServerNotification) -> None: raise RuntimeError("cache bug") @@ -352,8 +302,7 @@ async def collect(message: IncomingMessage) -> None: seen.set() client = Client(_list_changed_server(), mode="legacy", message_handler=collect) - # The wrap reads `_response_cache` when the session is built, so swapping the - # coordinator pre-enter routes eviction through the exploding subclass. + # The wrap reads `_response_cache` at session build, so the swap must happen pre-enter. client._response_cache = _ExplodingCache( store=InMemoryResponseCacheStore(), partition="", @@ -391,9 +340,8 @@ def __call__(self) -> float: def _varying_tools_server( *, ttl_ms: int = 60_000, scope: Literal["public", "private"] = "private" ) -> tuple[Server[Any], list[str | None]]: - """In-process server whose every tools/list fetch returns a distinct tool name - `t`, so a served entry is distinguishable from a refetch by payload, not just - by handler count. The fetch log records each request's cursor.""" + """Server whose every tools/list fetch returns a distinct tool name `t`, + so a served entry is distinguishable from a refetch by payload.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -411,8 +359,7 @@ def _tool_names(result: ListToolsResult) -> list[str]: async def test_a_second_list_tools_within_the_ttl_is_served_from_the_cache() -> None: - """SEP-2549: a result carrying a `ttlMs` hint is reusable until it expires — the - second `list_tools` is served from the cache without reaching the server.""" + """SEP-2549: a result carrying a `ttlMs` hint is reusable until it expires.""" server, fetches = _varying_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -424,8 +371,7 @@ async def test_a_second_list_tools_within_the_ttl_is_served_from_the_cache() -> async def test_an_expired_entry_is_refetched() -> None: - """An entry is fresh strictly within its `ttlMs`: once the (injected) clock passes - expiry, the next `list_tools` fetches again and serves the new listing.""" + """Freshness is strict: at exactly `ttlMs` the entry is expired.""" clock = _ManualClock() server, fetches = _varying_tools_server(ttl_ms=60_000) @@ -438,9 +384,7 @@ async def test_an_expired_entry_is_refetched() -> None: async def test_each_list_verb_caches_independently_under_its_own_method() -> None: - """Cache keys discriminate by method (spec MUST): warming one list verb never - serves another — each of the four fetches once, and each repeat call is served - from that verb's own entry.""" + """Cache keys discriminate by method (spec MUST).""" fetched: list[str] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -479,12 +423,10 @@ async def list_templates( ) async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: - # First round: every verb fetches, despite the previously warmed entries. await client.list_tools() await client.list_prompts() await client.list_resources() await client.list_resource_templates() - # Second round: every verb is served from its own entry. await client.list_tools() await client.list_prompts() await client.list_resources() @@ -494,8 +436,7 @@ async def list_templates( async def test_read_resource_caches_per_uri() -> None: - """Cache keys discriminate by result-affecting params (spec MUST): two uris cache - independently, and each repeat read is served from its own entry.""" + """Cache keys discriminate by result-affecting params (spec MUST).""" reads: list[str] = [] async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: @@ -514,9 +455,8 @@ async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParam def _paginated_tools_server() -> tuple[Server[Any], list[str | None]]: - """In-process server with a cacheable first page; the cursor `"expired"` is - rejected with INVALID_PARAMS (the spec's expired-cursor signal) and `"fail"` - with INTERNAL_ERROR (any other continuation failure).""" + """Cacheable first page; cursor "expired" -> INVALID_PARAMS (the spec's expired-cursor + signal), "fail" -> INTERNAL_ERROR.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -536,23 +476,19 @@ async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestPa async def test_cursor_continuations_neither_read_nor_write_the_cache() -> None: - """Only cursor-less calls participate in caching (SDK-defined single-page entry): - a continuation fetches despite a warm entry, and its page does not replace it.""" + """Only cursor-less calls participate in caching (SDK-defined single-page entry).""" server, fetches = _paginated_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: assert _tool_names(await client.list_tools()) == ["first-page"] - # Not served from the warm entry, despite cache_mode="use". assert _tool_names(await client.list_tools(cursor="page-2")) == ["second-page"] - # The continuation page did not overwrite the cursor-less entry. - assert _tool_names(await client.list_tools()) == ["first-page"] + assert _tool_names(await client.list_tools()) == ["first-page"] # not overwritten by the continuation assert fetches == [None, "page-2"] async def test_an_expired_cursor_rejection_evicts_the_methods_entry() -> None: - """Spec SHOULD: an INVALID_PARAMS rejection of a continuation cursor means the - listing changed, so the cached first page is evicted and refetched next time.""" + """Spec SHOULD: INVALID_PARAMS on a continuation cursor means the listing changed.""" server, fetches = _paginated_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -566,8 +502,7 @@ async def test_an_expired_cursor_rejection_evicts_the_methods_entry() -> None: async def test_an_expired_cursor_rejection_under_bypass_does_not_evict() -> None: - """`cache_mode="bypass"` means no cache side-effects at all: the same - INVALID_PARAMS rejection leaves the warm entry in place.""" + """Bypass means no cache side-effects at all, eviction included.""" server, fetches = _paginated_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -581,8 +516,7 @@ async def test_an_expired_cursor_rejection_under_bypass_does_not_evict() -> None async def test_a_non_cursor_error_on_a_continuation_does_not_evict() -> None: - """Only INVALID_PARAMS signals cursor expiry: a continuation failing with any - other code re-raises without disturbing the warm entry.""" + """Only INVALID_PARAMS signals cursor expiry.""" server, fetches = _paginated_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -596,22 +530,17 @@ async def test_a_non_cursor_error_on_a_continuation_does_not_evict() -> None: async def test_bypass_neither_serves_nor_disturbs_a_warm_entry() -> None: - """`cache_mode="bypass"` fetches fresh without reading the warm entry and without - storing the fetched result over it.""" server, fetches = _varying_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: assert _tool_names(await client.list_tools()) == ["t0"] assert _tool_names(await client.list_tools(cache_mode="bypass")) == ["t1"] - # The bypass fetch neither served nor replaced the entry. - assert _tool_names(await client.list_tools()) == ["t0"] + assert _tool_names(await client.list_tools()) == ["t0"] # warm entry intact assert fetches == [None, None] async def test_refresh_skips_the_read_and_stores_the_refetched_result() -> None: - """`cache_mode="refresh"` ignores the warm entry, fetches, and re-stores: the - following plain call serves the refreshed listing.""" server, fetches = _varying_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -623,8 +552,7 @@ async def test_refresh_skips_the_read_and_stores_the_refetched_result() -> None: async def test_refresh_storing_a_ttl_zero_result_purges_the_warm_entry() -> None: - """A refresh whose refetched result is uncacheable (`ttlMs: 0`) purges the warm - entry instead of leaving it to be served again — the refetch superseded it.""" + """An uncacheable refetch still supersedes the warm entry.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -638,17 +566,14 @@ async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestPa async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: assert _tool_names(await client.list_tools()) == ["t0"] assert _tool_names(await client.list_tools(cache_mode="refresh")) == ["t1"] - # t0 must not resurface: the refresh purged it, and t1 (ttl 0) was never stored. - assert _tool_names(await client.list_tools()) == ["t2"] + assert _tool_names(await client.list_tools()) == ["t2"] # t0 purged, t1 (ttl 0) never stored assert fetches == [None, None, None] async def test_a_list_call_carrying_meta_is_fetched_and_replaces_the_warm_entry() -> None: - """SDK-defined: a call carrying `meta` (a progress token, tracing fields) - expects a wire request, so under the default `cache_mode="use"` it behaves - as a refresh - the warm entry is not served, the handler runs, and the - fresh result replaces the entry for later meta-less calls.""" + """SDK-defined: `meta` (a progress token, tracing fields) expects a wire request, + so under the default "use" the call behaves as a refresh.""" server, fetches = _varying_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -661,8 +586,6 @@ async def test_a_list_call_carrying_meta_is_fetched_and_replaces_the_warm_entry( async def test_a_read_resource_carrying_meta_is_fetched_and_replaces_the_warm_entry() -> None: - """`read_resource` counterpart of the meta rule: a read carrying `meta` is - never served from the warm entry, and its fetched result re-stores.""" reads: list[str] = [] async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: @@ -686,8 +609,6 @@ def text(result: ReadResourceResult) -> str: async def test_cache_mode_is_inert_when_caching_is_disabled() -> None: - """With `cache=False` the verbs accept `cache_mode` but every call goes to the - server — no reads, no writes, no eviction machinery. SDK-defined off switch.""" server, fetches = _varying_tools_server() async with Client(server, cache=False) as client: @@ -704,10 +625,7 @@ async def test_cache_mode_is_inert_when_caching_is_disabled() -> None: ids=["request_state", "input_responses"], ) async def test_a_seeded_read_resource_skips_the_cache_and_ignores_cache_mode(seed: dict[str, Any]) -> None: - """Spec MUST: results of requests carrying `inputResponses` or `requestState` are - never cached. A seeded read is a resumption: it is not served from the warm entry - under "use", does not purge it under "refresh", and stores nothing — the final - plain read still serves the original entry.""" + """Spec MUST: results of requests carrying `inputResponses` or `requestState` are never cached.""" reads = 0 async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: @@ -726,16 +644,13 @@ def text(result: ReadResourceResult) -> str: assert text(await client.read_resource("memo://a")) == "v1" assert text(await client.read_resource("memo://a", **seed)) == "v2" assert text(await client.read_resource("memo://a", **seed, cache_mode="refresh")) == "v3" - # The warm v1 entry survived both seeded calls: nothing read, written, or purged. - assert text(await client.read_resource("memo://a")) == "v1" + assert text(await client.read_resource("memo://a")) == "v1" # nothing read, written, or purged assert reads == 3 async def test_a_terminal_read_reached_through_driver_rounds_is_never_cached() -> None: - """Spec MUST: the driver's retry rounds carry `inputResponses`, so a terminal - result reached through them is not cached — a repeat read goes back to the wire - (and drives the rounds again).""" + """Spec MUST: the driver's retry rounds carry `inputResponses`, so their terminal result is not cached.""" seeded_rounds: list[bool] = [] ask = ElicitRequest( params=ElicitRequestFormParams( @@ -768,15 +683,12 @@ async def elicitation_callback( assert isinstance(first.contents[0], TextResourceContents) and first.contents[0].text == "terminal" assert second == first - # Two wire rounds per call: the second call was not served from the cache. - assert seeded_rounds == [False, True, False, True] + assert seeded_rounds == [False, True, False, True] # two wire rounds per call: never served async def test_a_refresh_that_resolves_to_input_required_purges_the_warm_entry() -> None: - """SDK-defined supersession rule: a refresh whose unseeded first round comes back - input_required cannot store its driven terminal result (the rounds carry - `inputResponses` — spec MUST), but it still purges the warm entry — the pre-flip - value must not resurface on the next plain read.""" + """The refresh cannot store its driven terminal result (the rounds carry + `inputResponses` — spec MUST), but it still purges the warm entry.""" reads = 0 ask = ElicitRequest( params=ElicitRequestFormParams( @@ -790,7 +702,7 @@ async def read( ) -> ReadResourceResult | InputRequiredResult: nonlocal reads reads += 1 - # The resource starts plain and then flips to requiring input. + # Starts plain, then flips to requiring input. if reads > 1 and params.input_responses is None: return InputRequiredResult(input_requests={"ask": ask}) return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=f"v{reads}")], ttl_ms=60_000) @@ -813,16 +725,14 @@ def text(result: ReadResourceResult) -> str: ) as client: assert text(await client.read_resource("memo://a")) == "v1" # cached for 60s assert text(await client.read_resource("memo://a", cache_mode="refresh")) == "v3" - # v1 must not resurface: the refresh purged it, and the driven terminal - # result (v3) was never stored — the plain read drives fresh rounds. + # v1 purged and v3 never stored: the plain read drives fresh rounds. assert text(await client.read_resource("memo://a")) == "v5" assert reads == 5 def _output_schema_server(call_result: CallToolResult) -> tuple[Server[Any], list[str | None]]: - """In-process server whose one tool declares an output schema; `call_tool` returns - the canned `call_result` so tests choose whether it satisfies that schema.""" + """One tool declaring an output schema; `call_tool` returns the canned `call_result`.""" fetches: list[str | None] = [] tool = Tool( name="run", @@ -848,10 +758,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara async def test_a_listing_served_from_a_shared_store_rebuilds_output_schemas() -> None: - """A fresh client whose first `list_tools` is served from a pre-warmed shared - store absorbs the served listing into the session: `call_tool` validates its - structured output against the absorbed schema without ever fetching the listing - from the server (the fetch log stays at the warming client's one entry).""" + """A served listing is absorbed into the session: output validation works without a wire fetch.""" call_result = CallToolResult(content=[TextContent(text="ok")], structured_content={"n": 1}) server, fetches = _output_schema_server(call_result) config = CacheConfig(store=InMemoryResponseCacheStore(), partition="p", target_id="svc", clock=_ManualClock()) @@ -864,15 +771,12 @@ async def test_a_listing_served_from_a_shared_store_rebuilds_output_schemas() -> result = await fresh.call_tool("run", {}) assert result.structured_content == {"n": 1} - # One wire fetch total: the fresh client's listing AND the validation schema both - # came from the served entry (a starved schema cache would have re-listed here). + # A starved schema cache would have re-listed here. assert fetches == [None] async def test_validation_from_a_served_listing_rejects_missing_structured_content() -> None: - """The schema absorbed from a served listing is enforced, not just present: a tool - result without structured content fails validation in the fresh client, again - without any wire refetch of the listing.""" + """The schema absorbed from a served listing is enforced, not just present.""" server, fetches = _output_schema_server(CallToolResult(content=[TextContent(text="ok")])) config = CacheConfig(store=InMemoryResponseCacheStore(), partition="p", target_id="svc", clock=_ManualClock()) @@ -889,10 +793,8 @@ async def test_validation_from_a_served_listing_rejects_missing_structured_conte async def test_a_cache_hit_listing_still_mirrors_x_mcp_headers_on_tools_call() -> None: - """A fresh client serving tools/list from a pre-warmed shared store still mirrors - `x-mcp-header` arguments into `Mcp-Param-*` headers on a later `tools/call`: the - arg→header maps are rebuilt from the served listing. Asserted at the wire (over - the in-process HTTP bridge) because the client never surfaces outgoing headers.""" + """The arg→header maps are rebuilt from a served listing. Asserted at the wire + because the client never surfaces outgoing headers.""" tool = Tool( name="run", input_schema={"type": "object", "properties": {"region": {"type": "string", "x-mcp-header": "Region"}}}, @@ -939,16 +841,14 @@ async def on_request(request: httpx.Request) -> None: await fresh.list_tools() await fresh.call_tool("run", {"region": "us-west1"}) - # Exactly one tools/list reached the wire: the fresh client served from the store. + # One tools/list on the wire: the fresh client served from the store. assert [json.loads(request.content)["method"] for request in posts] == ["tools/list", "tools/call"] assert posts[-1].headers["mcp-param-region"] == "us-west1" async def test_a_tools_list_changed_notification_makes_the_next_list_refetch() -> None: - """Spec SHOULD: a list_changed notification invalidates the cached listing — the - next `list_tools` goes back to the server. Runs on a legacy session (the only - in-process transport that delivers standalone notifications) with `default_ttl_ms` - providing the cached entry, proving eviction is era-independent.""" + """Spec SHOULD: list_changed invalidates the cached listing. Legacy session + + `default_ttl_ms` entry: eviction is era-independent.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -962,7 +862,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara server = Server("notify", on_list_tools=list_tools, on_call_tool=call_tool) - # The wrap evicts before delegating, so delivery here implies eviction completed. + # The wrap evicts before delegating: delivery implies eviction completed. delivered = anyio.Event() async def on_message(message: IncomingMessage) -> None: @@ -973,7 +873,7 @@ async def on_message(message: IncomingMessage) -> None: async with client: await client.list_tools() await client.list_tools() - assert fetches == [None] # cached via default_ttl_ms on the legacy session + assert fetches == [None] # cached via default_ttl_ms await client.call_tool("touch", {}) with anyio.fail_after(5): await delivered.wait() @@ -983,10 +883,8 @@ async def on_message(message: IncomingMessage) -> None: async def test_a_resource_updated_notification_evicts_that_uris_read_entry() -> None: - """Spec SHOULD: `notifications/resources/updated` invalidates the cached read for - its uri. This is also the uri-form agreement proof: the entry stored under the - string passed to `read_resource` is the one the notification's `params.uri` - evicts — the next read of that uri refetches.""" + """Spec SHOULD: resources/updated invalidates the cached read for its uri — + and the notification's `params.uri` must match the stored key's uri form.""" uri = "memo://cached" reads: list[str] = [] @@ -1016,25 +914,20 @@ async def on_message(message: IncomingMessage) -> None: async with client: await client.read_resource(uri) await client.read_resource(uri) - assert reads == [uri] # cached via default_ttl_ms on the legacy session + assert reads == [uri] # cached via default_ttl_ms await client.call_tool("poke", {}) with anyio.fail_after(5): await seen.wait() await client.read_resource(uri) - # The notification carried the exact string the entry was stored under. - assert delivered == [uri] + assert delivered == [uri] # the exact string the entry was stored under assert reads == [uri, uri] async def test_the_modern_in_process_path_drops_the_eviction_notification() -> None: - """Pins the documented transport gap: the default in-process connection - (mode="auto", DirectDispatcher) does not deliver standalone server notifications, - so a tools/list_changed emitted mid-call never reaches the cache - the warm entry - survives and the next `list_tools` is still served from it. Delivery on this path - would happen inline within the awaited `call_tool`, so asserting after it returns - is race-free. If this test starts failing, the path gained delivery: flip the - `docs/advanced/caching.md` eviction caveat and the legacy-mode notification tests.""" + """Pins the documented gap: the default in-process path (DirectDispatcher) drops + standalone notifications, so the warm entry survives. If this starts failing the + path gained delivery: flip the `docs/advanced/caching.md` caveat and the legacy-mode tests.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -1062,10 +955,8 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara async def test_a_discover_result_never_enters_the_response_cache() -> None: - """SDK ruling (documented): the response cache covers the five `Client` verbs - only. The connect-time server/discover result is never stored, even when it - carries a `ttlMs` hint - a persisted `prior_discover`'s freshness is the user's - bookkeeping (`DiscoverResult` carries the parsed hints for it).""" + """SDK ruling (documented): the cache covers the five verbs only — a persisted + `prior_discover`'s freshness is the user's bookkeeping.""" server = Server("hinted", cache_hints={"server/discover": CacheHint(ttl_ms=60_000)}) async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -1082,11 +973,8 @@ async def test_a_discover_result_never_enters_the_response_cache() -> None: @pytest.mark.parametrize("wire_ttl", [-5, -5.0]) async def test_a_negative_inbound_ttl_is_served_as_zero_and_never_cached(wire_ttl: int | float) -> None: - """Spec SHOULD (2026-07-28 caching): a negative `ttlMs` is treated as 0 — the - call succeeds instead of failing the `ge=0` wire validation, and a zero ttl is - never stored, so the next call goes back to the server. The peer is scripted - over raw streams because an SDK server cannot emit a negative ttl (server-side - `ge=0` enforcement).""" + """Spec SHOULD: a negative `ttlMs` is treated as 0, not a wire-validation failure. + Scripted peer: an SDK server enforces `ge=0` and cannot emit one.""" listings_served = 0 async def scripted_server(streams: MessageStream) -> None: @@ -1127,17 +1015,13 @@ async def scripted_transport() -> AsyncIterator[TransportStreams]: assert first.ttl_ms == 0 assert second.ttl_ms == 0 - assert listings_served == 2 # the clamped-to-zero ttl was never stored: the second call re-fetched + assert listings_served == 2 # the clamped-to-zero ttl was never stored @pytest.mark.parametrize("wire_ttl", [-5, -5.0]) async def test_a_negative_discover_ttl_still_connects_modern_in_auto_mode(wire_ttl: int | float) -> None: - """Spec SHOULD (2026-07-28 caching) — silent-downgrade regression: before the - parse-seam clamp, a negative `ttlMs` on `server/discover` failed `DiscoverResult` - validation inside the mode='auto' probe, which reads as "not modern evidence" and - silently fell back to the legacy initialize handshake. Clamped, the probe adopts - the modern era and the result carries `ttl_ms == 0` — for float negatives too, - the same as the tools/list seam (both call the shared clamp).""" + """Regression: pre-clamp, a negative discover `ttlMs` failed validation inside the + mode="auto" probe and silently downgraded to the legacy handshake.""" methods_seen: list[str] = [] async def scripted_server(streams: MessageStream) -> None: @@ -1147,7 +1031,7 @@ async def scripted_server(streams: MessageStream) -> None: frame = message.message assert isinstance(frame, types.JSONRPCRequest) methods_seen.append(frame.method) - # A legacy downgrade would send `initialize` next; fail loudly instead. + # A legacy downgrade would send `initialize`; fail loudly instead. assert frame.method == "server/discover" result: dict[str, Any] = { "supportedVersions": [LATEST_MODERN_VERSION], @@ -1182,9 +1066,8 @@ async def scripted_transport() -> AsyncIterator[TransportStreams]: def _versioned_read_server(*, ttl_ms: int = 60_000) -> tuple[Server[Any], list[str]]: - """In-process server whose every resources/read fetch returns a distinct payload - `v`, so a served entry is distinguishable from a refetch. The read log records - each request's uri.""" + """Server whose every read returns a distinct payload `v`, + so a served entry is distinguishable from a refetch.""" reads: list[str] = [] async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParams) -> ReadResourceResult: @@ -1201,20 +1084,8 @@ def _resource_text(result: ReadResourceResult) -> str: async def test_each_notification_evicts_exactly_its_entries_end_to_end() -> None: - """Spec SHOULD (notifications invalidate) plus its negative space, end to end. - - Steps: - 1. Prime all four list verbs and two resource reads; a second round of calls - is served entirely from the cache. - 2. tools/list_changed -> only tools/list refetches. - 3. resources/list_changed -> resources/list AND resources/templates/list - refetch; tools, prompts, and both reads stay served. - 4. resources/updated(X) -> only the X read refetches; Y and every list stay - served. - - Runs on a legacy session (the in-process transport that delivers standalone - notifications) with `default_ttl_ms` providing the cached entries. - """ + """Spec SHOULD (notifications invalidate) plus its negative space: each notification + refetches exactly its own entries — resources/list_changed also covers templates.""" uri_x, uri_y = "memo://x", "memo://y" fetched: list[str] = [] @@ -1268,7 +1139,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara eviction_done = [anyio.Event() for _ in range(3)] async def on_message(message: IncomingMessage) -> None: - # The wrap evicts before delegating, so each event implies its eviction completed. + # The wrap evicts before delegating: each event implies its eviction completed. delivered.append(message) eviction_done[len(delivered) - 1].set() @@ -1325,10 +1196,7 @@ async def served_round() -> list[str]: async def test_private_entries_never_cross_partitions_between_clients_sharing_a_store() -> None: - """Spec MUST (`"private"` never crosses authorization contexts), end to end: two - clients sharing one store and server identity but holding different partitions - each fetch their own listing - the second client is never served the first's - private-scoped entry.""" + """Spec MUST: "private" never crosses authorization contexts.""" server, fetches = _varying_tools_server() store = InMemoryResponseCacheStore() @@ -1344,10 +1212,7 @@ def config(partition: str) -> CacheConfig: async def test_a_server_stamped_public_entry_does_not_cross_partitions_by_default() -> None: - """SDK security default (deviates from the ts SDK), end to end: even when the - server stamps `cacheScope: "public"`, the default config keys the public arm by - partition - a same-partition client is served from the store, a different- - partition client fetches its own listing.""" + """SDK security default (deviates from the ts SDK): the public arm is still keyed by partition.""" server, fetches = _varying_tools_server(scope="public") store = InMemoryResponseCacheStore() @@ -1365,9 +1230,7 @@ def config(partition: str) -> CacheConfig: async def test_share_public_serves_a_server_stamped_public_entry_across_partitions() -> None: - """SDK-defined opt-in, end to end: with `share_public=True` the public arm drops - the partition, so the second tenant's first list_tools is served from the first - tenant's server-asserted-public entry without a fetch.""" + """With `share_public=True` the public arm drops the partition.""" server, fetches = _varying_tools_server(scope="public") store = InMemoryResponseCacheStore() @@ -1383,11 +1246,6 @@ def config(partition: str) -> CacheConfig: async def test_same_partition_clients_share_read_entries_through_the_store() -> None: - """SDK-defined sharing, end to end: two clients with the same store, server - identity, and partition share `resources/read` entries - the second client's - first read is served from the store without invoking the handler. (The - tools/list case, including its absorbed derived state, is pinned by the - shared-store absorption tests above.)""" server, reads = _versioned_read_server() store = InMemoryResponseCacheStore() @@ -1403,10 +1261,8 @@ def config() -> CacheConfig: async def test_mutating_returned_results_never_corrupts_the_cached_entry() -> None: - """SDK-defined deep-copy isolation, both directions, end to end: mutating the - result a verb returned (the very object the write deep-copied from) and mutating - a served hit (the object the read deep-copied out) both leave the stored entry - untouched - every later call serves the pristine listing from the single fetch.""" + """Deep-copy isolation in both directions: write-side (the fetched result) and + serve-side (the served hit).""" server, fetches = _varying_tools_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -1421,12 +1277,8 @@ async def test_mutating_returned_results_never_corrupts_the_cached_entry() -> No async def test_a_legacy_peer_injecting_cache_hints_caches_nothing() -> None: - """SDK-defined era gate, end to end: `ttlMs`/`cacheScope` are 2026-07-28 - assertions, but a 2025 peer can still put the keys on the wire. On a legacy - session with the default config nothing is cached - the second list_tools - reaches the peer and the store stays empty on both arms. The peer is scripted - over raw streams because an SDK server strips the hint fields when serializing - for a 2025 session, so the injection is not expressible through the server API.""" + """Era gate: hint keys a 2025 peer puts on the wire cache nothing. Scripted peer: + an SDK server strips the hint fields when serializing for a 2025 session.""" listings_served = 0 async def scripted_server(streams: MessageStream) -> None: @@ -1473,8 +1325,7 @@ async def scripted_transport() -> AsyncIterator[TransportStreams]: class _CancelOnSetStore(InMemoryResponseCacheStore): - """Store whose next `set` awaits a one-shot hook before committing, modelling an - async store whose commit a cancellation interrupts.""" + """Store whose next `set` awaits a one-shot hook before committing.""" def __init__(self) -> None: super().__init__() @@ -1488,17 +1339,8 @@ async def set(self, key: CacheKey, entry: CacheEntry) -> None: async def test_a_verb_cancelled_mid_write_leaves_no_stale_arm_pair() -> None: - """SDK-defined no-stale-pair invariant, end to end: a verb call cancelled while - its cache write is mid-set (after the opposite-arm delete) leaves at most one - entry for the key - here zero - so the superseded entry cannot be served. - - Steps: - 1. The first list_tools stores a public-scoped entry. - 2. A refresh call fetches a private-scoped result; its write deletes the - public arm first, then the store's `set` is cancelled before committing. - 3. Both arms are empty - never two entries answering for one key - and the - next call refetches. - """ + """No-stale-pair invariant: a cancellation between the opposite-arm delete and the + `set` commit leaves at most one entry per key, so the superseded entry cannot be served.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -1525,8 +1367,7 @@ async def cancel_mid_commit() -> None: await client.list_tools(cache_mode="refresh") assert scope.cancelled_caught - # The write deleted the opposite (public) arm before the cancelled set could - # commit: zero entries, and in particular not the stale pre-refresh one. + # The opposite (public) arm was deleted before the cancelled set could commit. assert store._entries == {} assert _tool_names(await client.list_tools()) == ["t2"] # nothing cached: refetched @@ -1534,13 +1375,9 @@ async def cancel_mid_commit() -> None: async def test_an_eviction_landing_mid_fetch_discards_that_fetchs_write() -> None: - """Spec-aligned race rule, end to end: a tools/list_changed notification that - arrives while the tools/list fetch it concerns is still in flight discards that - fetch's cache write - the store is empty after the call returns and the next - list_tools refetches (and then caches normally). The server emits the - notification mid-fetch and waits for the client-side eviction before responding - (the handler wrap delegates only after evicting), so the interleaving is - deterministic, not scheduler-dependent.""" + """Spec-aligned race rule: an eviction landing mid-fetch discards that fetch's write. + The server waits for the client-side eviction before responding, so the interleaving + is deterministic, not scheduler-dependent.""" fetches: list[str | None] = [] evicted = anyio.Event() @@ -1566,9 +1403,8 @@ async def on_message(message: IncomingMessage) -> None: async with client: assert _tool_names(await client.list_tools()) == ["t0"] - # Empty proves the write was SKIPPED, not stored-then-evicted: the eviction - # completed strictly before the response (the handler waited for it) and the - # write runs strictly after - had it landed, the entry would still be here. + # Empty proves the write was skipped, not stored-then-evicted: the eviction + # completed strictly before the response, the write strictly after. store = _coordinator(client)._store assert isinstance(store, InMemoryResponseCacheStore) assert store._entries == {} @@ -1579,10 +1415,6 @@ async def on_message(message: IncomingMessage) -> None: async def test_read_resource_bypass_neither_serves_nor_disturbs_a_warm_entry() -> None: - """`cache_mode="bypass"` on `read_resource` fetches fresh without reading the - warm entry and without storing over it - the following plain read still serves - the original value. SDK-defined mode semantics (the list-verb counterpart is - pinned above).""" server, reads = _versioned_read_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -1594,8 +1426,6 @@ async def test_read_resource_bypass_neither_serves_nor_disturbs_a_warm_entry() - async def test_read_resource_refresh_refetches_and_restores() -> None: - """`cache_mode="refresh"` on `read_resource` skips the warm entry, fetches, and - re-stores: the following plain read serves the refreshed value.""" server, reads = _versioned_read_server() async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: @@ -1607,10 +1437,7 @@ async def test_read_resource_refresh_refetches_and_restores() -> None: async def test_a_closed_client_raises_on_every_cacheable_verb_instead_of_serving_the_cache() -> None: - """SDK-defined: cache participation requires a live session. After the client - exits its context, each of the five cacheable verbs raises the same no-context - RuntimeError it raised before the cache existed - the still-warm entries are - never served, and nothing reaches the server.""" + """Cache participation requires a live session.""" fetched: list[str] = [] async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: @@ -1661,7 +1488,7 @@ async def read(ctx: ServerRequestContext, params: types.ReadResourceRequestParam await client.list_resources() await client.list_resource_templates() await client.read_resource("memo://a") - # The entries are warm: a repeat round is served entirely from the cache. + # A repeat round is served entirely from the warm entries. await client.list_tools() await client.read_resource("memo://a") assert len(fetched) == 5 diff --git a/tests/client/test_session.py b/tests/client/test_session.py index b6ddb40a9..f76991f65 100644 --- a/tests/client/test_session.py +++ b/tests/client/test_session.py @@ -1666,8 +1666,6 @@ async def test_discover_reraises_unsupported_version_with_malformed_error_data() @pytest.mark.anyio async def test_a_positive_inbound_ttl_reaches_the_result_unchanged() -> None: - """SDK-defined: the inbound clamp only floors negative `ttlMs` values — a valid - positive hint passes through to the typed result untouched.""" listing: dict[str, Any] = {"resultType": "complete", "tools": [], "ttlMs": 60_000, "cacheScope": "private"} dispatcher = _ScriptedDispatcher(_discover_result_dict(), listing) with anyio.fail_after(5): @@ -1680,9 +1678,7 @@ async def test_a_positive_inbound_ttl_reaches_the_result_unchanged() -> None: @pytest.mark.anyio @pytest.mark.parametrize("wire_ttl", [True, False]) async def test_a_boolean_inbound_ttl_is_not_clamped_only_coerced_by_validation(wire_ttl: bool) -> None: - """SDK-defined: `bool` is an `int` subclass but the clamp does not treat it as a - number — the value reaches validation untouched, where pydantic's lax mode - coerces it (True -> 1, False -> 0) rather than rejecting it.""" + """SDK-defined: `bool` is an `int` subclass; the clamp skips it and pydantic's lax mode coerces it instead.""" listing: dict[str, Any] = {"resultType": "complete", "tools": [], "ttlMs": wire_ttl, "cacheScope": "private"} dispatcher = _ScriptedDispatcher(_discover_result_dict(), listing) with anyio.fail_after(5): diff --git a/tests/docs_src/test_caching.py b/tests/docs_src/test_caching.py index db9d0a7dd..58014879c 100644 --- a/tests/docs_src/test_caching.py +++ b/tests/docs_src/test_caching.py @@ -64,17 +64,14 @@ async def test_the_handler_value_wins_over_the_map_per_field() -> None: async def test_the_client_program_on_the_page_makes_three_fetches_for_four_calls( capsys: pytest.CaptureFixture[str], ) -> None: - """tutorial003: `main()` is the literal client program on the page - the second - call is served from the cache, the clock advance expires the entry, and - `cache_mode="refresh"` skips the read, so four calls cost three fetches.""" + """tutorial003: a cache hit, an expiry, and `cache_mode="refresh"` make four calls cost three fetches.""" await tutorial003.main() assert capsys.readouterr().out == "4 calls, 3 fetches\n" def _counting_tools_server(*, ttl_ms: int | None = 60_000) -> tuple[Server[Any], list[str | None]]: - """In-process server whose every tools/list fetch returns a distinct tool name - `t`, so a served cache entry is distinguishable from a refetch by payload. - `ttl_ms=None` sends no hints at all.""" + """Each tools/list fetch returns a distinct tool name, so a cache hit is + payload-distinguishable from a refetch; `ttl_ms=None` sends no hints.""" fetches: list[str | None] = [] async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: @@ -88,8 +85,6 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def test_caching_is_on_by_default_the_second_call_makes_no_fetch() -> None: - """The page's claim: with no `cache=` argument at all, a result carrying a `ttlMs` - hint is stored and the identical call within the TTL never reaches the server.""" server, fetches = _counting_tools_server() async with Client(server) as client: first = await client.list_tools() @@ -99,8 +94,7 @@ async def test_caching_is_on_by_default_the_second_call_makes_no_fetch() -> None async def test_a_hintless_result_is_not_cached_by_default() -> None: - """The page's claim: `default_ttl_ms` defaults to 0, so a server that declares - nothing sees exactly the call-for-call traffic it always did.""" + """`default_ttl_ms` defaults to 0, so a hintless server sees its usual call-for-call traffic.""" server, fetches = _counting_tools_server(ttl_ms=None) async with Client(server) as client: await client.list_tools() @@ -109,8 +103,6 @@ async def test_a_hintless_result_is_not_cached_by_default() -> None: async def test_cache_false_makes_every_call_a_round_trip() -> None: - """The page's claim: `cache=False` disables caching entirely - two calls are two - fetches even though the server's hint allowed a minute of reuse.""" server, fetches = _counting_tools_server() async with Client(server, cache=False) as client: await client.list_tools() @@ -119,8 +111,6 @@ async def test_cache_false_makes_every_call_a_round_trip() -> None: async def test_refresh_refetches_and_replaces_the_cached_entry() -> None: - """The page's claim: `cache_mode="refresh"` never serves - it fetches and stores - the result, which the next plain call is then served from.""" server, fetches = _counting_tools_server() async with Client(server) as client: await client.list_tools() @@ -132,8 +122,6 @@ async def test_refresh_refetches_and_replaces_the_cached_entry() -> None: async def test_bypass_fetches_without_reading_or_writing_the_cache() -> None: - """The page's claim: `cache_mode="bypass"` makes the round trip without touching - the cache - it neither serves the warm entry nor replaces it.""" server, fetches = _counting_tools_server() async with Client(server) as client: first = await client.list_tools() @@ -145,9 +133,7 @@ async def test_bypass_fetches_without_reading_or_writing_the_cache() -> None: async def test_an_expired_entry_is_not_revived_when_the_refetch_fails() -> None: - """The page's claim (SDK ruling, no stale-if-error): once the entry has expired, - a failing refetch propagates the server's error instead of serving the expired - entry.""" + """SDK ruling: no stale-if-error - the refetch failure propagates.""" now = 1_000_000.0 fetches: list[None] = [] @@ -160,7 +146,7 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar server = Server("flaky", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}) async with Client(server, cache=CacheConfig(clock=lambda: now)) as client: await client.list_tools() - now += 60.0 # the entry is now expired, so the next call must refetch + now += 60.0 # past the 60s TTL with pytest.raises(MCPError) as exc: await client.list_tools() assert exc.value.code == INTERNAL_ERROR @@ -168,10 +154,8 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def test_two_concurrent_identical_calls_are_two_fetches() -> None: - """The page's claim (SDK ruling, no coalescing): a second identical call issued - while the first fetch is still in flight makes its own fetch instead of waiting - on the first. The handler barrier releases only once both calls are inside it, - so the test passes only if the two fetches were genuinely concurrent.""" + """SDK ruling: no coalescing. The handler barrier releases only once both + calls are inside it, so the test passes only if the fetches were concurrent.""" both_fetching = anyio.Event() fetches: list[None] = [] @@ -192,8 +176,7 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def test_a_session_tier_call_always_makes_the_round_trip() -> None: - """The page's claim: the cache lives on the `Client` verbs - `client.session` - calls bypass it even when a fresh entry is sitting in the store.""" + """The cache lives on the `Client` verbs; `client.session` sits below it.""" server, fetches = _counting_tools_server() async with Client(server) as client: await client.list_tools() @@ -202,16 +185,12 @@ async def test_a_session_tier_call_always_makes_the_round_trip() -> None: async def test_a_custom_store_requires_a_partition() -> None: - """The page's claim: passing your own store without a `partition` raises at - construction.""" with pytest.raises(ValueError) as exc: CacheConfig(store=InMemoryResponseCacheStore()) assert str(exc.value) == snapshot("a custom store requires an explicit partition") async def test_a_custom_store_with_an_in_process_server_requires_target_id() -> None: - """The page's claim: with no URL to derive a server identity from, a custom store - needs `CacheConfig.target_id` - and construction says so.""" server, _ = _counting_tools_server() with pytest.raises(ValueError) as exc: Client(server, cache=CacheConfig(store=InMemoryResponseCacheStore(), partition="user-1")) diff --git a/tests/server/test_caching.py b/tests/server/test_caching.py index 6552b0b26..abfcfba97 100644 --- a/tests/server/test_caching.py +++ b/tests/server/test_caching.py @@ -84,18 +84,14 @@ def test_a_non_cache_hint_value_is_rejected_at_server_construction() -> None: def test_a_non_string_cache_hints_key_is_rejected_with_the_unknown_key_error() -> None: - """SDK-defined: `cache_hints` is deliberately loose for config-shaped callers, - so a non-string key takes the same unknown-key ValueError as a typo - not a - TypeError from formatting the message.""" + """A non-string key takes the same unknown-key ValueError as a typo, not a TypeError from message formatting.""" with pytest.raises(ValueError) as exc: Server("srv", cache_hints=cast(Any, {42: CacheHint()})) assert str(exc.value) == snapshot("cache_hints keys must be cacheable methods (see CacheableMethod); got: 42") async def test_a_dict_returning_handler_takes_the_configured_hint() -> None: - """SDK-defined: the construction-time hint also stamps a handler that returns - a raw dict for a cacheable method, so the 2026-07-28 surface (where both - fields are required) accepts it and the wire carries the hint's values.""" + """The stamp covers raw-dict results too - 2026-07-28 requires both fields on the wire.""" hint = CacheHint(ttl_ms=60_000, scope="public") async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams) -> dict[str, Any]: @@ -110,9 +106,7 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def test_a_dict_provided_ttl_wins_and_the_hint_fills_only_the_missing_scope() -> None: - """SDK-defined precedence, dict path: wire keys the handler put in the dict - win, mirroring `model_fields_set` semantics on the model path - the hint - fills only the absent `cacheScope`.""" + """Dict path mirrors the model path's `model_fields_set` precedence: present wire keys win.""" async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams) -> dict[str, Any]: return {"tools": [], "resultType": "complete", "ttlMs": 25} @@ -126,9 +120,7 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def test_a_dict_returning_handler_leaks_no_hint_fields_to_a_2025_session() -> None: - """SDK-defined era gate: the stamp runs version-independently, but the 2025 - serialize sieve still strips `ttlMs`/`cacheScope` from a dict result - the - client model parses them as unset, not as wire values.""" + """The stamp runs version-independently; the 2025 serialize sieve strips the fields.""" async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams) -> dict[str, Any]: return {"tools": []} @@ -142,10 +134,7 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def test_an_input_required_shaped_dict_is_never_stamped() -> None: - """Spec-mandated MRTR carve-out: an interim `input_required` result carries no - cache hints even on a hinted cacheable method. The runner's stamp skips a - dict declaring that shape (and the serialize surface would drop stray hint - keys regardless), so the full dump is exactly what the handler returned.""" + """Spec carve-out: interim `input_required` results carry no cache hints, even on a hinted method.""" async def read_resource(ctx: ServerRequestContext[Any], params: ReadResourceRequestParams) -> dict[str, Any]: return {"resultType": "input_required", "requestState": "s1"} diff --git a/tests/types/test_methods.py b/tests/types/test_methods.py index 237578e52..342720c32 100644 --- a/tests/types/test_methods.py +++ b/tests/types/test_methods.py @@ -549,10 +549,7 @@ def test_built_in_maps_are_immutable(): def test_cacheable_methods_mirror_the_cacheable_method_literal(): - """Spec-mandated set (SEP-2549): the hand-written `CacheableMethod` Literal and - `CACHEABLE_METHODS` (derived from which `MONOLITH_RESULTS` rows have a - `CacheableResult` arm) name the same methods - if the schema gains or loses a - cacheable result, this weld breaks.""" + """SEP-2549 weld: the hand-written Literal and the set derived from `MONOLITH_RESULTS` must agree.""" assert methods.CACHEABLE_METHODS == frozenset(get_args(methods.CacheableMethod)) From 804043bfd7c56b434a5aff93a80e59f466c1840e Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 18:02:55 +0000 Subject: [PATCH 14/18] Address review feedback: drop tutorial globals, plain-ASCII docs prose, remove section header --- docs/advanced/caching.md | 68 ++++++++++++++--------------- docs/migration.md | 2 +- docs_src/caching/tutorial003.py | 26 ++++++----- src/mcp-types/mcp_types/methods.py | 2 - tests/client/test_client_caching.py | 16 +++---- 5 files changed, 58 insertions(+), 56 deletions(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index e797e72f5..27b04f186 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -4,56 +4,56 @@ Every result a server returns for `tools/list`, `prompts/list`, `resources/list` The server doesn't cache anything. The fields are a *declaration*: "this tool list is the same for everyone and won't change for a minute." A client (or a gateway in front of you) may then skip the round trip. Honoring the hints is the client's choice; emitting them is the server's job, and the SDK does it for you. -Out of the box every result says `ttlMs: 0, cacheScope: "private"` — immediately stale, never shared. That is always safe and always conformant. If your lists really are stable and identical for all callers, say so at construction: +Out of the box every result says `ttlMs: 0, cacheScope: "private"`: immediately stale, never shared. That is always safe and always conformant. If your lists really are stable and identical for all callers, say so at construction: ```python title="server.py" hl_lines="5-8" --8<-- "docs_src/caching/tutorial001.py" ``` -* The map is keyed by **method name** — the six cacheable methods are the only legal keys. The parameter is typed `Mapping[CacheableMethod, CacheHint]`, so your editor autocompletes the keys and flags a typo before you run; anything that slips past the type checker raises at construction. +* The map is keyed by **method name**, and the six cacheable methods are the only legal keys. The parameter is typed `Mapping[CacheableMethod, CacheHint]`, so your editor autocompletes the keys and flags a typo before you run; anything that slips past the type checker raises at construction. * A method you don't mention keeps the defaults. The map is a set of overrides, not a manifest. * `CacheHint(ttl_ms=5_000)` left `scope` unset, so it stays `"private"`: five seconds of freshness, per caller. Scope and TTL are independent decisions. -* `"server/discover"` is a legal key too — the handshake result is cacheable like any list. +* `"server/discover"` is a legal key too, since the handshake result is cacheable like any list. !!! warning - `cacheScope: "public"` means *anyone* may be served your cached response — a shared + `cacheScope: "public"` means *anyone* may be served your cached response. A shared gateway will happily hand one user's result to another, even when the request was authenticated. Mark a result `"public"` only when it is identical for every caller, and never use `cacheScope` as access control: it is a label, not a lock. ## Per-handler override -On the low-level `Server`, handlers build their results by hand — and `ttl_ms` / `cache_scope` are just fields on the result models. A handler that sets them explicitly always wins over the constructor map, field by field: +On the low-level `Server`, handlers build their results by hand, and `ttl_ms` / `cache_scope` are just fields on the result models. A handler that sets them explicitly always wins over the constructor map, field by field: ```python title="server.py" hl_lines="11 17" --8<-- "docs_src/caching/tutorial002.py" ``` -The handler said `ttl_ms=1_000` and nothing about scope. On the wire: `ttlMs: 1000` (the handler's, not the map's `60_000`) and `cacheScope: "public"` (the map's — the handler left it unset). Explicit beats configured, configured beats default — per field, so a handler can pin one field and leave the other to the server-wide policy. +The handler said `ttl_ms=1_000` and nothing about scope. On the wire: `ttlMs: 1000` (the handler's, not the map's `60_000`) and `cacheScope: "public"` (the map's, because the handler left it unset). Explicit beats configured, and configured beats default. This holds per field, so a handler can pin one field and leave the other to the server-wide policy. This is also the escape hatch for dynamics the constructor can't know: a handler that filters `resources/read` per user can return `cache_scope="private"` for one URI from an otherwise-public server. -One caveat on paginated lists: the protocol requires the **same `cacheScope` on every page** of one list. The constructor map satisfies that by construction — it's keyed by method, not by page. But a handler that overrides the scope itself owns that consistency: override it on *every* page, never only when a cursor is present, or page one and page two will disagree. +One caveat on paginated lists: the protocol requires the **same `cacheScope` on every page** of one list. The constructor map satisfies that by construction, since it's keyed by method, not by page. But a handler that overrides the scope itself owns that consistency: override it on *every* page, never only when a cursor is present, or page one and page two will disagree. ## What the client sees -On a 2026-07-28 session, `Client` honors the hints for you: it has a built-in response cache, on by default. A result that arrives carrying a `ttlMs` is stored, and an identical call within that TTL is served from the cache — no round trip. A result that carries *no* hint is not cached: hint-less results get `CacheConfig.default_ttl_ms`, which defaults to `0` (immediately stale), so a server that declares nothing sees exactly the call-for-call traffic it always did. +On a 2026-07-28 session, `Client` honors the hints for you: it has a built-in response cache, on by default. A result that arrives carrying a `ttlMs` is stored, and an identical call within that TTL is served from the cache with no round trip. A result that carries *no* hint is not cached: hint-less results get `CacheConfig.default_ttl_ms`, which defaults to `0` (immediately stale), so a server that declares nothing sees exactly the call-for-call traffic it always did. -```python title="client.py" hl_lines="28 30 33" +```python title="client.py" hl_lines="32 34 37" --8<-- "docs_src/caching/tutorial003.py" ``` -Four calls, three fetches. The second call found a fresh entry and never reached the server; advancing the (injected) clock past the TTL made the third fetch again; the fourth said `cache_mode="refresh"`. That kwarg exists on the five caching verbs — `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, `read_resource`: +Four calls, three fetches. The second call found a fresh entry and never reached the server; advancing the (injected) clock past the TTL made the third fetch again; the fourth said `cache_mode="refresh"`. That kwarg exists on the five caching verbs (`list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, `read_resource`): * `"use"` (the default) serves a fresh entry if there is one, and stores the fetch if not. * `"refresh"` never serves: it fetches and stores the result, replacing whatever was cached. -* `"bypass"` makes the round trip without touching the cache at all — no read, no write. +* `"bypass"` makes the round trip without touching the cache at all: no read, no write. -One rule sits above `"use"`: **calls carrying `meta` always reach the server.** A request with `meta` set (a progress token, tracing fields) expects a wire request, so under `cache_mode="use"` it is treated as `"refresh"` — the cache read is skipped, and the fetched result still replaces the cached entry. `"bypass"` and an explicit `"refresh"` behave as they always do. +One rule sits above `"use"`: **calls carrying `meta` always reach the server.** A request with `meta` set (a progress token, tracing fields) expects a wire request, so under `cache_mode="use"` it is treated as `"refresh"`: the cache read is skipped, and the fetched result still replaces the cached entry. `"bypass"` and an explicit `"refresh"` behave as they always do. To turn caching off entirely, construct with `Client(server, cache=False)`: every call is a round trip again, and `cache_mode`, while still accepted, does nothing. -Scope is honored automatically too — `"private"` entries are keyed to the cache's *partition* (below); `"public"` ones may opt into wider sharing — and **notifications beat TTL** for the exact entries they name: a `list_changed` notification evicts the matching cached listing, and `resources/updated` evicts the cached read stored under exactly its URI, however fresh they were. +Scope is honored automatically too: `"private"` entries are keyed to the cache's *partition* (below), while `"public"` ones may opt into wider sharing. And **notifications beat TTL** for the exact entries they name: a `list_changed` notification evicts the matching cached listing, and `resources/updated` evicts the cached read stored under exactly its URI, however fresh they were. One caveat on `resources/updated`: eviction is exact-URI only. The store contract has no enumerate or scan operation (same as the reference TypeScript implementation), so a notification carrying a *sub*-resource URI does not evict a cached read of its parent. If your server signals sub-resources this way, refetch the parent with `cache_mode="refresh"`. @@ -65,51 +65,51 @@ from mcp.client import CacheConfig client = Client("https://api.example.com/mcp", cache=CacheConfig(default_ttl_ms=5_000)) ``` -* `store` — where entries live. The default is a fresh in-memory store per client; pass your own `ResponseCacheStore` implementation (Redis-backed, say) to share a cache across clients or processes — the contract types (`ResponseCacheStore`, `CacheKey`, `CacheEntry`, and the default `InMemoryResponseCacheStore`) are importable from `mcp.client`. A lookup may issue up to two sequential store `get`s (the private arm, then the public one), so size a remote store's latency expectations accordingly. A custom store **requires** an explicit `partition`. -* `partition` — the authorization-context label that keeps one principal's `"private"` entries from being served to another within a shared store. -* `target_id` — explicit server identity, for custom transports and in-process servers (below). -* `default_ttl_ms` — TTL applied to results that carry no `ttlMs` hint. The default `0` leaves hint-less results uncached. -* `share_public` — serve server-asserted-`"public"` entries across partitions (below). Off by default. -* `clock` — the wall-clock source, epoch seconds. Inject one, as the example above does, and expiry tests need no sleeping. +* `store`: where entries live. The default is a fresh in-memory store per client; pass your own `ResponseCacheStore` implementation (Redis-backed, say) to share a cache across clients or processes. The contract types (`ResponseCacheStore`, `CacheKey`, `CacheEntry`, and the default `InMemoryResponseCacheStore`) are importable from `mcp.client`. A lookup may issue up to two sequential store `get`s (the private arm, then the public one), so size a remote store's latency expectations accordingly. A custom store **requires** an explicit `partition`. +* `partition`: the authorization-context label that keeps one principal's `"private"` entries from being served to another within a shared store. +* `target_id`: explicit server identity, for custom transports and in-process servers (below). +* `default_ttl_ms`: TTL applied to results that carry no `ttlMs` hint. The default `0` leaves hint-less results uncached. +* `share_public`: serve server-asserted-`"public"` entries across partitions (below). Off by default. +* `clock`: the wall-clock source, in epoch seconds. Inject one, as the example above does, and expiry tests need no sleeping. !!! warning "Partition = verified principal" - Derive `partition` from a **verified credential** — a validated token's subject, for example. Never from request-supplied data, and never from the server URL (server identity is a separate key axis). The SDK is a library with no authentication of its own: whoever constructs the `CacheConfig` — the deployment, not the tenant — is the trust anchor. A multi-tenant gateway mints one `CacheConfig` per authenticated principal. + Derive `partition` from a **verified credential**, such as a validated token's subject. Never derive it from request-supplied data, and never from the server URL (server identity is a separate key axis). The SDK is a library with no authentication of its own: the trust anchor is whoever constructs the `CacheConfig`, which is the deployment, not the tenant. A multi-tenant gateway mints one `CacheConfig` per authenticated principal. - The partition is also fixed for the `Client`'s lifetime. If the connection's authorization context changes mid-session — a re-authentication as a different principal, say — the cache does not follow; construct a new `Client` for the new principal. + The partition is also fixed for the `Client`'s lifetime. If the connection's authorization context changes mid-session (a re-authentication as a different principal, say), the cache does not follow; construct a new `Client` for the new principal. -Cache keys also carry the **server's identity**: the URL string you dialed, with any `user:pass@` userinfo stripped and otherwise byte-exact. No case folding, no query reordering, no trailing-slash cleanup — under-normalizing only costs sharing, while over-normalizing could merge two tenants (`?tenant=a` vs `?tenant=b`), so superficially different URLs simply don't share entries. When there is no URL — an in-process server, or a `Transport` instance — the client gets a random per-instance identity instead; set `CacheConfig.target_id` to name the server (with a custom store this is required, and construction says so). The identity is sha256-hashed before it enters key material, so a URL carrying secrets in its query string never appears in store keys — don't log the pre-hash form yourself, either. +Cache keys also carry the **server's identity**: the URL string you dialed, with any `user:pass@` userinfo stripped and otherwise byte-exact. No case folding, no query reordering, no trailing-slash cleanup. Under-normalizing only costs sharing, while over-normalizing could merge two tenants (`?tenant=a` vs `?tenant=b`), so superficially different URLs simply don't share entries. When there is no URL (an in-process server, or a `Transport` instance), the client gets a random per-instance identity instead; set `CacheConfig.target_id` to name the server (with a custom store this is required, and construction says so). The identity is sha256-hashed before it enters key material, so a URL carrying secrets in its query string never appears in store keys. Don't log the pre-hash form yourself, either. !!! warning "`share_public` trusts the server, fleet-wide" - By default even `"public"` entries stay within their partition. `share_public=True` serves entries the server marked `cacheScope: "public"` to **every** partition using the store — trusting the server's classification on behalf of all of them. A server that stamps `"public"` on per-tenant data (by bug or by malice) then leaks one tenant's response to the others. The flag is deliberately constructor-level only: the per-call `cache_mode` can narrow caching, but nothing per-call can widen sharing. + By default even `"public"` entries stay within their partition. `share_public=True` serves entries the server marked `cacheScope: "public"` to **every** partition using the store, trusting the server's classification on behalf of all of them. A server that stamps `"public"` on per-tenant data (by bug or by malice) then leaks one tenant's response to the others. The flag is deliberately constructor-level only: the per-call `cache_mode` can narrow caching, but nothing per-call can widen sharing. ### What the cache never does * **Session-tier calls bypass it.** `client.session.list_tools()` and friends always make the round trip; the cache lives on the `Client` verbs. -* **`server/discover` stays out of it.** The discover result is delivered once, at connect, and never enters the response cache — even when it carries a `ttlMs`. If you persist one yourself to skip the reconnect probe ([`prior_discover`](../client/protocol-versions.md#reconnecting-with-prior_discover)), its freshness is your bookkeeping: `DiscoverResult` carries `ttl_ms` and `cache_scope`, already parsed, for exactly that purpose. -* **Continuation pages are never cached.** Only cursor-less calls participate. A continuation page rejected for an expired cursor does *evict* the cached listing — the listing changed under it. +* **`server/discover` stays out of it.** The discover result is delivered once, at connect, and never enters the response cache, even when it carries a `ttlMs`. If you persist one yourself to skip the reconnect probe ([`prior_discover`](../client/protocol-versions.md#reconnecting-with-prior_discover)), its freshness is your bookkeeping: `DiscoverResult` carries `ttl_ms` and `cache_scope`, already parsed, for exactly that purpose. +* **Continuation pages are never cached.** Only cursor-less calls participate. A continuation page rejected for an expired cursor does *evict* the cached listing, because the listing changed under it. * **Multi-round-trip reads are never cached.** A `read_resource` seeded with `input_responses`/`request_state`, or one that resolves through input rounds, never enters the cache (a spec MUST). -* **Notification eviction needs notifications.** Eviction is only as good as the transport's delivery — the modern in-process path (`Client(server)` with the default `mode="auto"`) does not deliver standalone notifications today. +* **Notification eviction needs notifications.** Eviction is only as good as the transport's delivery, and the modern in-process path (`Client(server)` with the default `mode="auto"`) does not deliver standalone notifications today. * **Eviction is eventual, not instantaneous.** Wire-path notifications are dispatched from spawned tasks, so a call racing a notification's arrival may be served the pre-eviction entry once more; the window is bounded by dispatch latency, and the eviction still lands. * **No stale-if-error.** An expired entry is never served because the refetch failed; the error propagates. -* **No early re-fetch.** A stored entry is served until its TTL expires and the next call after that pays the round trip — nothing refreshes in the background. +* **No early re-fetch.** A stored entry is served until its TTL expires and the next call after that pays the round trip; nothing refreshes in the background. * **No coalescing.** Two concurrent identical calls are two fetches. -* **No TTL beyond 24 hours.** A larger `ttlMs` — server-sent or configured — is clamped down on store (`mcp.client.caching.MAX_TTL_MS`), bounding how long any entry, however generously hinted, can be served. -* On a **shared store**, clients race each other. Each client drops its own write when an eviction overtook the fetch in flight, but a *co-tenant* client can still write back an entry that an eviction it never saw had removed; and that race bookkeeping is itself bounded — past 4096 tracked keys the oldest key's guard is dropped first. Both windows are accepted, and closed by the TTL cap above. -* On a **shared persistent store**, a session that negotiated a different protocol era than the entry's writer may be served the writer's entry until TTL or eviction — accepted, and likewise bounded by the TTL cap. +* **No TTL beyond 24 hours.** A larger `ttlMs`, whether server-sent or configured, is clamped down on store (`mcp.client.caching.MAX_TTL_MS`), bounding how long any entry, however generously hinted, can be served. +* On a **shared store**, clients race each other. Each client drops its own write when an eviction overtook the fetch in flight, but a *co-tenant* client can still write back an entry that an eviction it never saw had removed; and that race bookkeeping is itself bounded: past 4096 tracked keys the oldest key's guard is dropped first. Both windows are accepted, and closed by the TTL cap above. +* On a **shared persistent store**, a session that negotiated a different protocol era than the entry's writer may be served the writer's entry until TTL or eviction. Accepted, and likewise bounded by the TTL cap. ### Reading the hints yourself -The hints are also plain fields on every cacheable result — `result.ttl_ms` and `result.cache_scope`, already parsed — if you want to layer your own bookkeeping on top of (or instead of) the built-in cache. +The hints are also plain fields on every cacheable result (`result.ttl_ms` and `result.cache_scope`, already parsed), in case you want to layer your own bookkeeping on top of (or instead of) the built-in cache. -Against an **older server** (pre-2026 protocol), the fields are simply absent from the wire, and the models show their conservative defaults: `ttl_ms == 0`, `cache_scope == "private"` — stale and unshared, the right assumption for a server that declared nothing. The cache treats a legacy session the same way: hints are never consulted there (whatever keys appear on the wire), only `default_ttl_ms` applies, and its default of `0` caches nothing — a pre-2026 connection behaves exactly as it did before the cache existed. If you need to distinguish "the server said 0" from "the server said nothing", check `"ttl_ms" in result.model_fields_set`: it's only set when the field actually arrived. +Against an **older server** (pre-2026 protocol), the fields are simply absent from the wire, and the models show their conservative defaults: `ttl_ms == 0` and `cache_scope == "private"`, stale and unshared, the right assumption for a server that declared nothing. The cache treats a legacy session the same way: hints are never consulted there (whatever keys appear on the wire), only `default_ttl_ms` applies, and its default of `0` caches nothing, so a pre-2026 connection behaves exactly as it did before the cache existed. If you need to distinguish "the server said 0" from "the server said nothing", check `"ttl_ms" in result.model_fields_set`: it's only set when the field actually arrived. ## Older clients -Clients on pre-2026 protocol versions never see either field — the SDK strips them at serialization for those connections. Configure your hints once; there is nothing version-specific to write. +Clients on pre-2026 protocol versions never see either field; the SDK strips them at serialization for those connections. Configure your hints once; there is nothing version-specific to write. ## Recap -* Six methods carry `ttlMs`/`cacheScope`; the SDK defaults them to `0`/`"private"` — stale and unshared, always safe. +* Six methods carry `ttlMs`/`cacheScope`; the SDK defaults them to `0`/`"private"`, stale and unshared, always safe. * `cache_hints={method: CacheHint(...)}` at construction (both `MCPServer` and `Server`) sets server-wide values per method. * A handler that sets the fields on its result overrides the map, per field. * `"public"` is a promise that the result is identical for every caller. It is not access control. diff --git a/docs/migration.md b/docs/migration.md index c9908a401..b29852cb5 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -429,7 +429,7 @@ For protocol 2026-07-28 over Streamable HTTP, a tool's input-schema property may ### `Client` verbs may serve cached responses ([SEP-2549](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2549)) -On protocol 2026-07-28, servers attach caching hints (`ttlMs`, `cacheScope`) to the cacheable results, and `Client` now honors them: `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, and `read_resource` may serve a cached response instead of making a round trip, for as long as the server's `ttlMs` says the result is fresh. Servers that send no hints — including every pre-2026 server — see identical call-for-call behavior, because hint-less results are not cached. Pass `Client(..., cache=False)` to disable the cache and restore v1 behavior exactly; per-call control (`cache_mode`) and configuration (`CacheConfig`) are described in [Caching hints](advanced/caching.md). +On protocol 2026-07-28, servers attach caching hints (`ttlMs`, `cacheScope`) to the cacheable results, and `Client` now honors them: `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, and `read_resource` may serve a cached response instead of making a round trip, for as long as the server's `ttlMs` says the result is fresh. Servers that send no hints, including every pre-2026 server, see identical call-for-call behavior, because hint-less results are not cached. Pass `Client(..., cache=False)` to disable the cache and restore v1 behavior exactly; per-call control (`cache_mode`) and configuration (`CacheConfig`) are described in [Caching hints](advanced/caching.md). ### Server extensions API ([SEP-2133](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2133)) diff --git a/docs_src/caching/tutorial003.py b/docs_src/caching/tutorial003.py index 5a17dbcdb..830f3085e 100644 --- a/docs_src/caching/tutorial003.py +++ b/docs_src/caching/tutorial003.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Any from mcp_types import ListToolsResult, PaginatedRequestParams, Tool @@ -6,29 +7,32 @@ from mcp.client import CacheConfig from mcp.server import CacheHint, Server, ServerRequestContext -fetches = 0 -now = 1_000_000.0 +@dataclass +class Demo: + fetches: int = 0 + now: float = 1_000_000.0 -async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: - global fetches - fetches += 1 - return ListToolsResult(tools=[Tool(name="forecast", input_schema={"type": "object"})]) + async def list_tools( + self, ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None + ) -> ListToolsResult: + self.fetches += 1 + return ListToolsResult(tools=[Tool(name="forecast", input_schema={"type": "object"})]) +demo = Demo() server = Server( "Weather", - on_list_tools=list_tools, + on_list_tools=demo.list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")}, ) async def main() -> None: - global now - async with Client(server, cache=CacheConfig(clock=lambda: now)) as client: + async with Client(server, cache=CacheConfig(clock=lambda: demo.now)) as client: await client.list_tools() # fetch 1 await client.list_tools() # fresh for 60s: served from the cache - now += 60.0 + demo.now += 60.0 await client.list_tools() # the TTL ran out: fetch 2 await client.list_tools(cache_mode="refresh") # skip the cache read: fetch 3 - print(f"4 calls, {fetches} fetches") + print(f"4 calls, {demo.fetches} fetches") diff --git a/src/mcp-types/mcp_types/methods.py b/src/mcp-types/mcp_types/methods.py index 985968b1d..f49c158d9 100644 --- a/src/mcp-types/mcp_types/methods.py +++ b/src/mcp-types/mcp_types/methods.py @@ -406,8 +406,6 @@ """Monolith result model (or two-arm union) per request method.""" -# --- Cacheable methods --- - CacheableMethod = Literal[ "prompts/list", "resources/list", diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index f302621cc..2627a841a 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -94,8 +94,8 @@ async def __aexit__( def _list_changed_server() -> Server[Any]: - """Server whose `touch` tool emits tools/list_changed; connect with `mode="legacy"` — - the modern in-process path drops standalone server notifications.""" + """Server whose `touch` tool emits tools/list_changed; connect with `mode="legacy"` + because the modern in-process path drops standalone server notifications.""" async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: return ListToolsResult(tools=[types.Tool(name="touch", input_schema={"type": "object"})]) @@ -142,7 +142,7 @@ def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None def test_urls_differing_only_in_query_have_distinct_cache_identities() -> None: - """URL identity is byte-exact outside userinfo — over-normalization would merge tenants.""" + """URL identity is byte-exact outside userinfo; over-normalization would merge tenants.""" tenant_a = Client("https://example.com/mcp?tenant=a") tenant_b = Client("https://example.com/mcp?tenant=b") @@ -688,7 +688,7 @@ async def elicitation_callback( async def test_a_refresh_that_resolves_to_input_required_purges_the_warm_entry() -> None: """The refresh cannot store its driven terminal result (the rounds carry - `inputResponses` — spec MUST), but it still purges the warm entry.""" + `inputResponses`, a spec MUST), but it still purges the warm entry.""" reads = 0 ask = ElicitRequest( params=ElicitRequestFormParams( @@ -793,7 +793,7 @@ async def test_validation_from_a_served_listing_rejects_missing_structured_conte async def test_a_cache_hit_listing_still_mirrors_x_mcp_headers_on_tools_call() -> None: - """The arg→header maps are rebuilt from a served listing. Asserted at the wire + """The arg-to-header maps are rebuilt from a served listing. Asserted at the wire because the client never surfaces outgoing headers.""" tool = Tool( name="run", @@ -883,7 +883,7 @@ async def on_message(message: IncomingMessage) -> None: async def test_a_resource_updated_notification_evicts_that_uris_read_entry() -> None: - """Spec SHOULD: resources/updated invalidates the cached read for its uri — + """Spec SHOULD: resources/updated invalidates the cached read for its uri, and the notification's `params.uri` must match the stored key's uri form.""" uri = "memo://cached" reads: list[str] = [] @@ -955,7 +955,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara async def test_a_discover_result_never_enters_the_response_cache() -> None: - """SDK ruling (documented): the cache covers the five verbs only — a persisted + """SDK ruling (documented): the cache covers the five verbs only; a persisted `prior_discover`'s freshness is the user's bookkeeping.""" server = Server("hinted", cache_hints={"server/discover": CacheHint(ttl_ms=60_000)}) @@ -1085,7 +1085,7 @@ def _resource_text(result: ReadResourceResult) -> str: async def test_each_notification_evicts_exactly_its_entries_end_to_end() -> None: """Spec SHOULD (notifications invalidate) plus its negative space: each notification - refetches exactly its own entries — resources/list_changed also covers templates.""" + refetches exactly its own entries, and resources/list_changed also covers templates.""" uri_x, uri_y = "memo://x", "memo://y" fetched: list[str] = [] From fb1c510f5d94fac288d37af516acc98b01f30347 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 18:09:00 +0000 Subject: [PATCH 15/18] Keep the tutorial handler a plain function with a separate state holder --- docs/advanced/caching.md | 2 +- docs_src/caching/tutorial003.py | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index 27b04f186..75e4a89d6 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -39,7 +39,7 @@ One caveat on paginated lists: the protocol requires the **same `cacheScope` on On a 2026-07-28 session, `Client` honors the hints for you: it has a built-in response cache, on by default. A result that arrives carrying a `ttlMs` is stored, and an identical call within that TTL is served from the cache with no round trip. A result that carries *no* hint is not cached: hint-less results get `CacheConfig.default_ttl_ms`, which defaults to `0` (immediately stale), so a server that declares nothing sees exactly the call-for-call traffic it always did. -```python title="client.py" hl_lines="32 34 37" +```python title="client.py" hl_lines="33 35 38" --8<-- "docs_src/caching/tutorial003.py" ``` diff --git a/docs_src/caching/tutorial003.py b/docs_src/caching/tutorial003.py index 830f3085e..d50a89610 100644 --- a/docs_src/caching/tutorial003.py +++ b/docs_src/caching/tutorial003.py @@ -9,30 +9,31 @@ @dataclass -class Demo: +class DemoState: fetches: int = 0 now: float = 1_000_000.0 - async def list_tools( - self, ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None - ) -> ListToolsResult: - self.fetches += 1 - return ListToolsResult(tools=[Tool(name="forecast", input_schema={"type": "object"})]) + +state = DemoState() + + +async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult: + state.fetches += 1 + return ListToolsResult(tools=[Tool(name="forecast", input_schema={"type": "object"})]) -demo = Demo() server = Server( "Weather", - on_list_tools=demo.list_tools, + on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")}, ) async def main() -> None: - async with Client(server, cache=CacheConfig(clock=lambda: demo.now)) as client: + async with Client(server, cache=CacheConfig(clock=lambda: state.now)) as client: await client.list_tools() # fetch 1 await client.list_tools() # fresh for 60s: served from the cache - demo.now += 60.0 + state.now += 60.0 await client.list_tools() # the TTL ran out: fetch 2 await client.list_tools(cache_mode="refresh") # skip the cache read: fetch 3 - print(f"4 calls, {demo.fetches} fetches") + print(f"4 calls, {state.fetches} fetches") From ef7fdff17fb0ce19980e76179bc24f51e7299b91 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 18:59:49 +0000 Subject: [PATCH 16/18] Era-scope cache arms and harden store interaction paths --- docs/advanced/caching.md | 2 +- src/mcp/client/caching.py | 114 +++++++++----- tests/client/test_caching.py | 234 +++++++++++++++++++++------- tests/client/test_client_caching.py | 22 ++- 4 files changed, 274 insertions(+), 98 deletions(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index 75e4a89d6..8bdb5b3bd 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -95,7 +95,7 @@ Cache keys also carry the **server's identity**: the URL string you dialed, with * **No coalescing.** Two concurrent identical calls are two fetches. * **No TTL beyond 24 hours.** A larger `ttlMs`, whether server-sent or configured, is clamped down on store (`mcp.client.caching.MAX_TTL_MS`), bounding how long any entry, however generously hinted, can be served. * On a **shared store**, clients race each other. Each client drops its own write when an eviction overtook the fetch in flight, but a *co-tenant* client can still write back an entry that an eviction it never saw had removed; and that race bookkeeping is itself bounded: past 4096 tracked keys the oldest key's guard is dropped first. Both windows are accepted, and closed by the TTL cap above. -* On a **shared persistent store**, a session that negotiated a different protocol era than the entry's writer may be served the writer's entry until TTL or eviction. Accepted, and likewise bounded by the TTL cap. +* **No serving across protocol eras.** Entries are scoped to the negotiated protocol version: on a shared persistent store, a session never serves an entry written under a different negotiated version (the same listing genuinely differs by era, since the SDK strips the 2026 fields for older sessions). Eviction likewise touches only the current era's entries; another era's entries simply age out by TTL. ### Reading the hints yourself diff --git a/src/mcp/client/caching.py b/src/mcp/client/caching.py index 9d31851db..a464accd1 100644 --- a/src/mcp/client/caching.py +++ b/src/mcp/client/caching.py @@ -10,6 +10,7 @@ from typing import Any, Final, Literal, Protocol import anyio +import anyio.lowlevel from mcp_types import ( CacheableResult, PromptListChangedNotification, @@ -135,35 +136,32 @@ class InMemoryResponseCacheStore: """Default in-process `ResponseCacheStore`. Method bodies are synchronous, so concurrent tasks never observe a torn - write. Non-read methods form a small closed key set; `max_read_entries` - caps the `resources/read` keys, FIFO-evicting at the cap (`0` disables it). + write. `max_entries` caps the whole store, evicting least-recently-used + at the cap (`0` disables it); `get` and `set` both refresh recency, so a + hot entry survives churn from other keys. Raises: - ValueError: If `max_read_entries` is negative. + ValueError: If `max_entries` is negative. """ - def __init__(self, *, max_read_entries: int = 512) -> None: - if max_read_entries < 0: - raise ValueError(f"max_read_entries must be >= 0, got {max_read_entries}") - self._max_read_entries = max_read_entries + def __init__(self, *, max_entries: int = 1024) -> None: + if max_entries < 0: + raise ValueError(f"max_entries must be >= 0, got {max_entries}") + self._max_entries = max_entries self._entries: dict[CacheKey, CacheEntry] = {} async def get(self, key: CacheKey) -> CacheEntry | None: - return self._entries.get(key) + entry = self._entries.get(key) + if entry is not None: + # Pop-and-reinsert moves the key to the back: the dict's insertion order is the LRU ledger. + self._entries[key] = self._entries.pop(key) + return entry async def set(self, key: CacheKey, entry: CacheEntry) -> None: - if ( - self._max_read_entries - and key.method == "resources/read" - and key not in self._entries - # Total size below the cap implies the read subset is below it too - skip the scan. - and len(self._entries) >= self._max_read_entries - ): - # Insertion order (replacement keeps position) makes the dict itself the FIFO ledger. - read_keys = [k for k in self._entries if k.method == "resources/read"] - if len(read_keys) >= self._max_read_entries: - del self._entries[read_keys[0]] + self._entries.pop(key, None) self._entries[key] = entry + if self._max_entries and len(self._entries) > self._max_entries: + del self._entries[next(iter(self._entries))] async def delete(self, key: CacheKey) -> None: self._entries.pop(key, None) @@ -175,6 +173,10 @@ async def clear(self) -> None: _GENERATION_MAP_CAP: Final[int] = 4096 """Cap on the generation map; at the cap the oldest key's eviction-race guard is dropped (FIFO).""" +_STORE_CLEANUP_TIMEOUT: Final[float] = 5 +"""Bound for must-complete store cleanup deletes (mirrors the dispatcher's final-write bound); +a wedged store delete must not hold client teardown uncancellably.""" + class ClientResponseCache: """Coordinates the `Client` caching verbs with a `ResponseCacheStore`: keys, era gate, TTL/scope, eviction.""" @@ -190,27 +192,43 @@ def __init__( share_public: bool, negotiated_version: Callable[[], str | None], generation_map_cap: int = _GENERATION_MAP_CAP, + store_cleanup_timeout: float = _STORE_CLEANUP_TIMEOUT, ) -> None: self._store = store + self._partition = partition + self._arm_id = arm_id + self._share_public = share_public self._default_ttl_ms = default_ttl_ms self._clock = clock self._negotiated_version = negotiated_version - # JSON arrays so crafted arm_id/partition values cannot collide across field boundaries. - self._private_arm = json.dumps(["private", arm_id, partition]) - self._public_arm = json.dumps(["public", arm_id] if share_public else ["public", arm_id, partition]) # A key is eviction-race-guarded iff registered here. self._generations: dict[tuple[str, str], int] = {} self._generation_map_cap = generation_map_cap + self._store_cleanup_timeout = store_cleanup_timeout self._warned_store_ops: set[str] = set() + def _arm(self, scope: Literal["public", "private"]) -> str: + # JSON arrays so crafted arm_id/partition values cannot collide across field boundaries. + # The negotiated version era-scopes every arm: a session never serves an entry written + # under a different protocol era (its content differs - sieve-stripped fields, header + # filtering). Every caller runs post-connect; were that ever untrue, the supplier's + # None still partitions harmlessly. + fields: list[str | None] = [scope, self._negotiated_version(), self._arm_id] + if scope == "private" or not self._share_public: + fields.append(self._partition) + return json.dumps(fields) + async def read(self, method: str, params_key: str) -> CacheableResult | None: """Serve a fresh entry for the key, or `None`; the served result is a deep copy.""" + # A hit completes without any other yielding await, so checkpoint here: a poll + # loop over a fresh entry must not starve spawned tasks (eviction dispatch). + await anyio.lowlevel.checkpoint() # A wrong-shape entry raises as late as the copy, so the boundary wraps the whole read path. try: - entry = await self._get_fresh(CacheKey(method, params_key, self._private_arm)) + entry = await self._get_fresh(CacheKey(method, params_key, self._arm("private"))) if entry is None: # After a scope flip, a stale private entry must not shadow a fresh public one. - entry = await self._get_fresh(CacheKey(method, params_key, self._public_arm)) + entry = await self._get_fresh(CacheKey(method, params_key, self._arm("public"))) if entry is not None and entry.scope != "public": # Never serve an entry the server scoped "private" out of the shared arm. entry = None @@ -250,49 +268,52 @@ async def write( if self._generation_moved(gen_key, gen_at_capture): return # the key was evicted while the fetch was in flight ttl_ms, scope = self._resolve(result) - private_key = CacheKey(method, params_key, self._private_arm) - public_key = CacheKey(method, params_key, self._public_arm) + private_key = CacheKey(method, params_key, self._arm("private")) + public_key = CacheKey(method, params_key, self._arm("public")) if ttl_ms <= 0: if mode == "refresh": - # The refetch superseded the warm entry; shielded so a cancellation cannot leave one arm warm. - with anyio.CancelScope(shield=True): - await self._delete(private_key) - await self._delete(public_key) + # The refetch superseded the warm entry, which a cancellation must not leave serving. + await self._cleanup_delete(private_key, public_key) return own, opposite = (public_key, private_key) if scope == "public" else (private_key, public_key) # Opposite arm first: a failed delete aborts before the set - never two arms answering for one key. if not await self._delete(opposite): - # The own arm's entry is superseded too: shielded best-effort delete, degrading to a full miss. - with anyio.CancelScope(shield=True): - await self._delete(own) + # The own arm's entry is superseded too: best-effort delete, degrading to a full miss. + await self._cleanup_delete(own) return entry = CacheEntry(value=result.model_copy(deep=True), scope=scope, expires_at=self._clock() + ttl_ms / 1000) try: - await self._set(own, entry) + if not await self._set(own, entry): + # The fetch superseded any pre-existing own-arm entry, and the failed set + # left it in place: purge it (mirrors the opposite-arm-failure path). + await self._cleanup_delete(own) finally: # An eviction can land while the set commits - even when the await - # is cancelled - so re-check on every exit; the delete is shielded + # is cancelled - so re-check on every exit; the delete must complete # so the pending cancellation cannot resurrect the evicted entry. if self._generation_moved(gen_key, gen_at_capture): - with anyio.CancelScope(shield=True): - await self._delete(own) + await self._cleanup_delete(own) async def evict_method(self, method: str) -> None: """Evict the method's cursor-less entry.""" await self.evict_key(method, "") async def evict_key(self, method: str, params_key: str) -> None: - """Evict one key from both arms.""" + """Evict one key from both arms. + + Only the current era's arms are touched; other-era entries in a persistent store age out by TTL. + """ gen_key = (method, params_key) # Bump first so an in-flight fetch cannot write the evicted entry back. # Unregistered keys skip the bump (uris must not grow the map) but not # the deletes - a persistent store may hold uncaptured entries. if gen_key in self._generations: self._generations[gen_key] += 1 - # Shielded: a cancellation between the deletes would leave one arm serving the evicted entry. - with anyio.CancelScope(shield=True): - await self._delete(CacheKey(method, params_key, self._private_arm)) - await self._delete(CacheKey(method, params_key, self._public_arm)) + # Must complete: a cancellation between the deletes would leave one arm serving the evicted entry. + await self._cleanup_delete( + CacheKey(method, params_key, self._arm("private")), + CacheKey(method, params_key, self._arm("public")), + ) async def evict_for_notification(self, notification: ServerNotification) -> None: """Map a server notification to the entries it makes stale. @@ -340,6 +361,15 @@ async def _set(self, key: CacheKey, entry: CacheEntry) -> bool: self._warned_store_ops.discard("set") return True + async def _cleanup_delete(self, *keys: CacheKey) -> None: + # Must-complete cleanup: shielded so a pending cancellation cannot skip the deletes, + # bounded so a wedged store delete cannot hold client teardown uncancellably. + with anyio.move_on_after(self._store_cleanup_timeout, shield=True) as scope: + for key in keys: + await self._delete(key) + if scope.cancelled_caught: + logger.warning("Response cache store delete timed out; the entry will age out by TTL") + async def _delete(self, key: CacheKey) -> bool: try: await self._store.delete(key) diff --git a/tests/client/test_caching.py b/tests/client/test_caching.py index d4d67e0c5..3e4024c80 100644 --- a/tests/client/test_caching.py +++ b/tests/client/test_caching.py @@ -214,11 +214,11 @@ def test_a_negative_default_ttl_is_rejected_at_construction() -> None: assert str(exc.value) == snapshot("default_ttl_ms must be >= 0, got -1") -# --- InMemoryResponseCacheStore read cap --- +# --- InMemoryResponseCacheStore LRU cap --- -async def test_a_new_read_key_at_the_cap_evicts_the_oldest_read_key() -> None: - store = InMemoryResponseCacheStore(max_read_entries=2) +async def test_a_new_entry_past_the_cap_evicts_the_least_recently_used_one() -> None: + store = InMemoryResponseCacheStore(max_entries=2) await store.set(_read_key("file:///a"), _entry("a")) await store.set(_read_key("file:///b"), _entry("b")) await store.set(_read_key("file:///c"), _entry("c")) @@ -227,51 +227,42 @@ async def test_a_new_read_key_at_the_cap_evicts_the_oldest_read_key() -> None: assert await store.get(_read_key("file:///c")) == _entry("c") -async def test_replacing_a_read_key_at_the_cap_neither_evicts_nor_refreshes_its_age() -> None: - """Eviction order is first-insertion order (FIFO), not recency (LRU).""" - store = InMemoryResponseCacheStore(max_read_entries=2) +async def test_a_get_refreshes_an_entrys_recency() -> None: + """Eviction order is recency (LRU), not insertion order: serving an entry keeps it alive.""" + store = InMemoryResponseCacheStore(max_entries=2) await store.set(_read_key("file:///a"), _entry("a")) await store.set(_read_key("file:///b"), _entry("b")) - await store.set(_read_key("file:///a"), _entry("a-replaced")) - assert await store.get(_read_key("file:///a")) == _entry("a-replaced") - assert await store.get(_read_key("file:///b")) == _entry("b") - await store.set(_read_key("file:///c"), _entry("c")) - assert await store.get(_read_key("file:///a")) is None - assert await store.get(_read_key("file:///b")) == _entry("b") + assert await store.get(_read_key("file:///a")) == _entry("a") # a is now the most recent + await store.set(_read_key("file:///c"), _entry("c")) # evicts b, not a + assert await store.get(_read_key("file:///a")) == _entry("a") + assert await store.get(_read_key("file:///b")) is None + assert await store.get(_read_key("file:///c")) == _entry("c") -async def test_only_read_keys_count_toward_the_cap_and_only_read_keys_are_evicted() -> None: - """The non-read cacheable methods are a small closed key set, so they are never capped.""" - store = InMemoryResponseCacheStore(max_read_entries=1) - list_keys = [ - CacheKey("tools/list"), - CacheKey("prompts/list"), - CacheKey("resources/list"), - CacheKey("resources/templates/list"), - CacheKey("server/discover"), - ] - for key in list_keys: - await store.set(key, _entry(key.method)) +async def test_replacing_an_entry_at_the_cap_refreshes_its_recency_without_evicting() -> None: + store = InMemoryResponseCacheStore(max_entries=2) await store.set(_read_key("file:///a"), _entry("a")) - for key in list_keys: - assert await store.get(key) == _entry(key.method) await store.set(_read_key("file:///b"), _entry("b")) - assert await store.get(_read_key("file:///a")) is None - assert await store.get(_read_key("file:///b")) == _entry("b") - for key in list_keys: - assert await store.get(key) == _entry(key.method) + await store.set(_read_key("file:///a"), _entry("a-replaced")) # still two entries; a is now the most recent + await store.set(_read_key("file:///c"), _entry("c")) # evicts b + assert await store.get(_read_key("file:///a")) == _entry("a-replaced") + assert await store.get(_read_key("file:///b")) is None + assert await store.get(_read_key("file:///c")) == _entry("c") -async def test_a_non_read_set_never_triggers_eviction_even_with_reads_at_the_cap() -> None: - store = InMemoryResponseCacheStore(max_read_entries=1) - await store.set(_read_key("file:///a"), _entry("a")) +async def test_a_touched_list_entry_survives_read_key_churn_through_the_cap() -> None: + """The reason the cap is LRU over all entries: a hot list singleton each principal + keeps re-reading must survive churn from per-uri resources/read keys.""" + store = InMemoryResponseCacheStore(max_entries=3) await store.set(CacheKey("tools/list"), _entry("tools")) - assert await store.get(_read_key("file:///a")) == _entry("a") + for i in range(10): + assert await store.get(CacheKey("tools/list")) == _entry("tools") # each serve re-touches it + await store.set(_read_key(f"file:///{i}"), _entry(i)) assert await store.get(CacheKey("tools/list")) == _entry("tools") -async def test_a_zero_cap_disables_read_eviction() -> None: - store = InMemoryResponseCacheStore(max_read_entries=0) +async def test_a_zero_cap_disables_eviction() -> None: + store = InMemoryResponseCacheStore(max_entries=0) uris = [f"file:///{i}" for i in range(5)] for uri in uris: await store.set(_read_key(uri), _entry(uri)) @@ -279,18 +270,18 @@ async def test_a_zero_cap_disables_read_eviction() -> None: assert await store.get(_read_key(uri)) == _entry(uri) -async def test_deleting_a_read_key_frees_its_cap_slot() -> None: - store = InMemoryResponseCacheStore(max_read_entries=1) +async def test_deleting_an_entry_frees_its_cap_slot() -> None: + store = InMemoryResponseCacheStore(max_entries=1) await store.set(_read_key("file:///a"), _entry("a")) await store.delete(_read_key("file:///a")) await store.set(_read_key("file:///b"), _entry("b")) assert await store.get(_read_key("file:///b")) == _entry("b") -def test_a_negative_read_cap_is_rejected_at_construction() -> None: +def test_a_negative_cap_is_rejected_at_construction() -> None: with pytest.raises(ValueError) as exc: - InMemoryResponseCacheStore(max_read_entries=-1) - assert str(exc.value) == snapshot("max_read_entries must be >= 0, got -1") + InMemoryResponseCacheStore(max_entries=-1) + assert str(exc.value) == snapshot("max_entries must be >= 0, got -1") # --- ClientResponseCache coordinator --- @@ -319,6 +310,7 @@ def _coordinator( share_public: bool = False, version: str | None = MODERN_VERSION, generation_map_cap: int = 4096, + store_cleanup_timeout: float = 5, ) -> ClientResponseCache: return ClientResponseCache( store=store, @@ -329,15 +321,16 @@ def _coordinator( share_public=share_public, negotiated_version=lambda: version, generation_map_cap=generation_map_cap, + store_cleanup_timeout=store_cleanup_timeout, ) -def _private_arm(arm_id: str = "arm", partition: str = "") -> str: - return json.dumps(["private", arm_id, partition]) +def _private_arm(arm_id: str = "arm", partition: str = "", era: str | None = MODERN_VERSION) -> str: + return json.dumps(["private", era, arm_id, partition]) -def _public_arm(arm_id: str = "arm", partition: str = "") -> str: - return json.dumps(["public", arm_id, partition]) +def _public_arm(arm_id: str = "arm", partition: str = "", era: str | None = MODERN_VERSION) -> str: + return json.dumps(["public", era, arm_id, partition]) def _wire_result(ttl_ms: int | None = None, cache_scope: str | None = None) -> ListToolsResult: @@ -439,6 +432,35 @@ async def clear(self) -> None: raise NotImplementedError +class _WedgingDeleteStore: + """Once `wedged` flips, every `delete` blocks forever (an Event nothing sets), + modelling a remote store with no socket timeout of its own.""" + + before_set_commits: Callable[[], Awaitable[None]] + """Awaited before `set` commits; assigned by the one test whose write reaches `set`.""" + + def __init__(self, *, wedged: bool = False) -> None: + self.inner = InMemoryResponseCacheStore() + self.wedged = wedged + self.deletes_started = 0 + + async def get(self, key: CacheKey) -> CacheEntry | None: + raise NotImplementedError + + async def set(self, key: CacheKey, entry: CacheEntry) -> None: + await self.before_set_commits() + await self.inner.set(key, entry) + + async def delete(self, key: CacheKey) -> None: + self.deletes_started += 1 + if self.wedged: + await anyio.Event().wait() + await self.inner.delete(key) + + async def clear(self) -> None: + raise NotImplementedError + + class _RehydratingStore: """`get` returns whatever a persistent store's deserializer produced - not necessarily what `set` received.""" @@ -470,8 +492,8 @@ async def test_hints_from_a_non_modern_session_are_ignored(version: str | None) gen = cache.capture("tools/list", "") await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") assert await cache.read("tools/list", "") is None - assert await store.get(CacheKey("tools/list", "", _private_arm())) is None - assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + assert await store.get(CacheKey("tools/list", "", _private_arm(era=version))) is None + assert await store.get(CacheKey("tools/list", "", _public_arm(era=version))) is None async def test_a_legacy_session_with_a_default_ttl_caches_on_the_private_arm_only() -> None: @@ -481,14 +503,52 @@ async def test_a_legacy_session_with_a_default_ttl_caches_on_the_private_arm_onl cache = _coordinator(store, version=LEGACY_VERSION, default_ttl_ms=60_000, clock=clock) gen = cache.capture("tools/list", "") await cache.write("tools/list", "", _wire_result(ttl_ms=5, cache_scope="public"), gen, "use") - private_entry = await store.get(CacheKey("tools/list", "", _private_arm())) + private_entry = await store.get(CacheKey("tools/list", "", _private_arm(era=LEGACY_VERSION))) assert private_entry is not None assert private_entry.scope == "private" - assert await store.get(CacheKey("tools/list", "", _public_arm())) is None + assert await store.get(CacheKey("tools/list", "", _public_arm(era=LEGACY_VERSION))) is None clock.now += 1.0 # well past the injected 5ms; the default 60s governs assert await cache.read("tools/list", "") == _wire_result(ttl_ms=5, cache_scope="public") +async def test_entries_never_cross_negotiated_eras_on_a_shared_store() -> None: + """Arms fold in the negotiated version: the same listing genuinely differs by era + (the SDK strips the 2026 fields for legacy sessions), so a 2025-negotiated session + is never served an entry a 2026 session wrote - on either arm - nor vice versa.""" + store = InMemoryResponseCacheStore() + modern = _coordinator(store, partition="p", default_ttl_ms=60_000) + legacy = _coordinator(store, partition="p", version=LEGACY_VERSION, default_ttl_ms=60_000) + + gen = modern.capture("tools/list", "") + await modern.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") # public arm + private_result = ListPromptsResult.model_validate({"prompts": [], "ttlMs": 60_000}) + gen = modern.capture("prompts/list", "") + await modern.write("prompts/list", "", private_result, gen, "use") # private arm + assert await legacy.read("tools/list", "") is None + assert await legacy.read("prompts/list", "") is None + + gen = legacy.capture("resources/read", "file:///a") + await legacy.write("resources/read", "file:///a", _read_result(ttl_ms=60_000), gen, "use") + assert await legacy.read("resources/read", "file:///a") is not None # cached for legacy itself... + assert await modern.read("resources/read", "file:///a") is None # ...but invisible across the era boundary + + +async def test_coordinators_negotiating_the_same_era_share_entries_through_the_store() -> None: + """Era scoping splits eras only: same-era clients sharing a store still share both arms.""" + store = InMemoryResponseCacheStore() + writer = _coordinator(store, partition="p") + reader = _coordinator(store, partition="p") + + gen = writer.capture("tools/list", "") + await writer.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") + private_result = ListPromptsResult.model_validate({"prompts": [], "ttlMs": 60_000}) + gen = writer.capture("prompts/list", "") + await writer.write("prompts/list", "", private_result, gen, "use") + + assert await reader.read("tools/list", "") == _wire_result(ttl_ms=60_000, cache_scope="public") + assert await reader.read("prompts/list", "") == private_result + + # --- Coordinator: TTL and scope resolution --- @@ -550,15 +610,19 @@ async def test_arm_key_layout_is_pinned_for_shared_store_compatibility() -> None """Arm strings are cross-process store key material; changing their layout breaks shared stores.""" store = InMemoryResponseCacheStore() cache = _coordinator(store, partition="tenant-a", arm_id="abc123", default_ttl_ms=60_000) + assert cache._arm("private") == snapshot('["private", "2026-07-28", "abc123", "tenant-a"]') + assert cache._arm("public") == snapshot('["public", "2026-07-28", "abc123", "tenant-a"]') + shared = _coordinator(store, partition="tenant-a", arm_id="abc123", share_public=True) + assert shared._arm("public") == snapshot('["public", "2026-07-28", "abc123"]') + # And entries genuinely land under those strings. gen = cache.capture("tools/list", "") await cache.write("tools/list", "", _wire_result(), gen, "use") - assert await store.get(CacheKey("tools/list", "", snapshot('["private", "abc123", "tenant-a"]'))) is not None + assert await store.get(CacheKey("tools/list", "", '["private", "2026-07-28", "abc123", "tenant-a"]')) is not None await cache.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") - assert await store.get(CacheKey("tools/list", "", snapshot('["public", "abc123", "tenant-a"]'))) is not None - shared = _coordinator(store, partition="tenant-a", arm_id="abc123", share_public=True) + assert await store.get(CacheKey("tools/list", "", '["public", "2026-07-28", "abc123", "tenant-a"]')) is not None gen = shared.capture("tools/list", "") await shared.write("tools/list", "", _wire_result(ttl_ms=60_000, cache_scope="public"), gen, "use") - assert await store.get(CacheKey("tools/list", "", snapshot('["public", "abc123"]'))) is not None + assert await store.get(CacheKey("tools/list", "", '["public", "2026-07-28", "abc123"]')) is not None async def test_public_entries_do_not_cross_partitions_by_default() -> None: @@ -726,6 +790,54 @@ async def test_a_cancellation_during_an_eviction_still_evicts_both_arms() -> Non assert await store.inner.get(public_key) is None +# --- Coordinator: bounded must-complete cleanup --- +# These tests inject a tiny `store_cleanup_timeout` because the bound itself is the +# behavior under test; the wedged delete only ever blocks for that injected bound. + + +async def test_evict_key_with_a_wedged_store_delete_returns_at_the_cleanup_bound( + caplog: pytest.LogCaptureFixture, +) -> None: + """A store delete that never completes cannot make eviction - and with it client + teardown - hang uncancellably: the must-complete cleanup is bounded, the remaining + deletes are abandoned, and the unreaped entries age out by TTL.""" + store = _WedgingDeleteStore(wedged=True) + cache = _coordinator(store, store_cleanup_timeout=0.01) + with caplog.at_level(logging.WARNING, logger="mcp.client.caching"), anyio.fail_after(5): + await cache.evict_key("tools/list", "") + assert store.deletes_started == 1 # the second arm's delete was abandoned with the first + assert caplog.messages == snapshot(["Response cache store delete timed out; the entry will age out by TTL"]) + + +async def test_a_refresh_purge_with_a_wedged_store_delete_returns_at_the_cleanup_bound() -> None: + store = _WedgingDeleteStore(wedged=True) + cache = _coordinator(store, store_cleanup_timeout=0.01) + gen = cache.capture("tools/list", "") + with anyio.fail_after(5): + await cache.write("tools/list", "", _wire_result(ttl_ms=0), gen, "refresh") + assert store.deletes_started == 1 + + +async def test_an_eviction_mid_set_with_a_wedged_store_delete_returns_at_the_cleanup_bound() -> None: + """The post-set compensating delete is bounded like every other must-complete delete; + the entry it could not reap stays in the store and ages out by TTL.""" + store = _WedgingDeleteStore() + cache = _coordinator(store, store_cleanup_timeout=0.01) + gen = cache.capture("tools/list", "") + + async def wedge_then_evict() -> None: + store.wedged = True + await cache.evict_method("tools/list") # its own cleanup hits the bound too + + store.before_set_commits = wedge_then_evict + with anyio.fail_after(5): + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + # Opposite-arm delete, the eviction's first delete, the compensating delete. + assert store.deletes_started == 3 + # The accepted degradation: the unreaped entry stays until its TTL expires. + assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is not None + + # --- Coordinator: store error discipline --- @@ -789,6 +901,22 @@ async def test_a_raising_store_set_caches_nothing_and_does_not_raise() -> None: assert await cache.read("tools/list", "") is None +async def test_a_failed_set_purges_the_pre_existing_own_arm_entry() -> None: + """The fetch superseded the warm own-arm entry, and the failed set left it in place: + without the purge it would keep serving the superseded value for its full TTL.""" + store = _FailingStore() + cache = _coordinator(store) + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") + assert await cache.read("tools/list", "") is not None # the warm own-arm entry + store.fail_set = True + gen = cache.capture("tools/list", "") + await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") # the caller's fetch is unaffected + assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is None + assert await store.inner.get(CacheKey("tools/list", "", _public_arm())) is None + assert await cache.read("tools/list", "") is None + + async def test_eviction_with_a_raising_delete_still_bumps_the_generation() -> None: """Bump-first: a fetch captured before the eviction cannot write back even when the deletes raise.""" store = _FailingStore() diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 2627a841a..8168e85ca 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -74,7 +74,7 @@ def _coordinator(client: Client) -> ClientResponseCache: def _private_arm(client: Client) -> str: """The identity arm stamped into store keys; only equality between clients matters here.""" - return _coordinator(client)._private_arm + return _coordinator(client)._arm("private") def _tools_list_key(client: Client) -> CacheKey: @@ -138,7 +138,8 @@ def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None client = Client("https://user:pass@example.com/mcp?api_key=SECRET") arm_id = hashlib.sha256(b"https://example.com/mcp?api_key=SECRET").hexdigest() - assert _private_arm(client) == json.dumps(["private", arm_id, ""]) + # The era slot is None pre-connect; only the identity hash matters here. + assert _private_arm(client) == json.dumps(["private", None, arm_id, ""]) def test_urls_differing_only_in_query_have_distinct_cache_identities() -> None: @@ -1276,6 +1277,23 @@ async def test_mutating_returned_results_never_corrupts_the_cached_entry() -> No assert fetches == [None] +async def test_a_cache_hit_still_yields_to_the_event_loop() -> None: + """A hit completes without a wire await, so the verb checkpoints explicitly: a poll + loop over a fresh entry would otherwise starve spawned tasks (eviction dispatch). + Pinned by calling a warm verb inside an already-cancelled scope: only a yield can + observe the cancellation.""" + server, fetches = _varying_tools_server() + + async with Client(server, cache=CacheConfig(clock=_ManualClock())) as client: + assert _tool_names(await client.list_tools()) == ["t0"] # warm the entry + with anyio.CancelScope() as scope: + scope.cancel() + await client.list_tools() # would be a hit; must yield and observe the cancellation + assert scope.cancelled_caught + + assert fetches == [None] # the cancelled call neither fetched nor served + + async def test_a_legacy_peer_injecting_cache_hints_caches_nothing() -> None: """Era gate: hint keys a 2025 peer puts on the wire cache nothing. Scripted peer: an SDK server strips the hint fields when serializing for a 2025 session.""" From af36ead79e9a54579ecc3aaef53c24e01cef3ab7 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 19:02:47 +0000 Subject: [PATCH 17/18] Strip userinfo textually and address review notes on the docs --- docs/advanced/caching.md | 2 +- docs/migration.md | 2 +- docs_src/caching/tutorial003.py | 3 ++- src/mcp/client/client.py | 9 +++++---- tests/client/test_client_caching.py | 16 ++++++++++++++++ 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md index 8bdb5b3bd..ba979ccc1 100644 --- a/docs/advanced/caching.md +++ b/docs/advanced/caching.md @@ -39,7 +39,7 @@ One caveat on paginated lists: the protocol requires the **same `cacheScope` on On a 2026-07-28 session, `Client` honors the hints for you: it has a built-in response cache, on by default. A result that arrives carrying a `ttlMs` is stored, and an identical call within that TTL is served from the cache with no round trip. A result that carries *no* hint is not cached: hint-less results get `CacheConfig.default_ttl_ms`, which defaults to `0` (immediately stale), so a server that declares nothing sees exactly the call-for-call traffic it always did. -```python title="client.py" hl_lines="33 35 38" +```python title="client.py" hl_lines="34 36 39" --8<-- "docs_src/caching/tutorial003.py" ``` diff --git a/docs/migration.md b/docs/migration.md index b29852cb5..047626ee2 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -429,7 +429,7 @@ For protocol 2026-07-28 over Streamable HTTP, a tool's input-schema property may ### `Client` verbs may serve cached responses ([SEP-2549](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2549)) -On protocol 2026-07-28, servers attach caching hints (`ttlMs`, `cacheScope`) to the cacheable results, and `Client` now honors them: `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, and `read_resource` may serve a cached response instead of making a round trip, for as long as the server's `ttlMs` says the result is fresh. Servers that send no hints, including every pre-2026 server, see identical call-for-call behavior, because hint-less results are not cached. Pass `Client(..., cache=False)` to disable the cache and restore v1 behavior exactly; per-call control (`cache_mode`) and configuration (`CacheConfig`) are described in [Caching hints](advanced/caching.md). +On protocol 2026-07-28, servers attach caching hints (`ttlMs`, `cacheScope`) to the cacheable results, and `Client` now honors them: `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, and `read_resource` may serve a cached response instead of making a round trip, for as long as the server's `ttlMs` says the result is fresh. With the default configuration, servers that send no hints, including every pre-2026 server, see identical call-for-call behavior, because hint-less results are not cached (a `CacheConfig.default_ttl_ms` above zero caches them too). Pass `Client(..., cache=False)` to disable the cache and restore v1 behavior exactly; per-call control (`cache_mode`) and configuration (`CacheConfig`) are described in [Caching hints](advanced/caching.md). ### Server extensions API ([SEP-2133](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2133)) diff --git a/docs_src/caching/tutorial003.py b/docs_src/caching/tutorial003.py index d50a89610..29c168c9f 100644 --- a/docs_src/caching/tutorial003.py +++ b/docs_src/caching/tutorial003.py @@ -30,10 +30,11 @@ async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestPar async def main() -> None: + start = state.fetches async with Client(server, cache=CacheConfig(clock=lambda: state.now)) as client: await client.list_tools() # fetch 1 await client.list_tools() # fresh for 60s: served from the cache state.now += 60.0 await client.list_tools() # the TTL ran out: fetch 2 await client.list_tools(cache_mode="refresh") # skip the cache read: fetch 3 - print(f"4 calls, {state.fetches} fetches") + print(f"4 calls, {state.fetches - start} fetches") diff --git a/src/mcp/client/client.py b/src/mcp/client/client.py index 311213a5b..2eca19f1f 100644 --- a/src/mcp/client/client.py +++ b/src/mcp/client/client.py @@ -9,7 +9,7 @@ from contextlib import AsyncExitStack from dataclasses import KW_ONLY, dataclass, field from typing import Any, Literal, TypeVar, cast -from urllib.parse import urlsplit, urlunsplit +from urllib.parse import urlsplit import anyio import anyio.lowlevel @@ -132,10 +132,11 @@ def _strip_userinfo(url: str) -> str: Credentials must not enter cache-key material; any further normalization could merge distinct servers. """ - parts = urlsplit(url) - if "@" not in parts.netloc: + netloc = urlsplit(url).netloc # raw authority bytes (urlsplit case-folds only `.scheme`), so slicing is exact + if "@" not in netloc: return url - return urlunsplit(parts._replace(netloc=parts.netloc.rpartition("@")[2])) + start = url.index("//") + 2 + return url[:start] + netloc.rpartition("@")[2] + url[start + len(netloc) :] def _evicting_message_handler(cache: ClientResponseCache, user_handler: MessageHandlerFnT | None) -> MessageHandlerFnT: diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index 8168e85ca..c58080e75 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -133,6 +133,22 @@ def test_userinfo_variants_of_a_server_url_share_one_cache_identity() -> None: assert _private_arm(bare) == _private_arm(with_password) == _private_arm(with_token) +@pytest.mark.parametrize( + ("with_userinfo", "bare"), + [ + ("HTTPS://a@X.example/mcp", "HTTPS://X.example/mcp"), + ("https://u@h/p?", "https://h/p?"), + ("https://u@h/p#", "https://h/p#"), + ], + ids=["scheme-case", "empty-query", "empty-fragment"], +) +def test_stripping_userinfo_changes_no_other_byte_of_the_url(with_userinfo: str, bare: str) -> None: + """The removed `userinfo@` is the only byte difference: no scheme case-folding, no dropped + empty `?`/`#` delimiters. A userinfo-free URL passes through untouched, so arm equality + proves the stripped form is byte-identical to the bare URL.""" + assert _private_arm(Client(with_userinfo)) == _private_arm(Client(bare)) + + def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None: """Pins the docs' secrets-never-in-keys claim: a query-string secret never appears in store keys.""" client = Client("https://user:pass@example.com/mcp?api_key=SECRET") From b44a8917065ec086d8398fd6872abaa94e367c26 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Mon, 29 Jun 2026 21:02:47 +0000 Subject: [PATCH 18/18] Address review feedback on identity stripping and stale tool-map pruning --- src/mcp/client/client.py | 26 +++-- src/mcp/client/session.py | 16 +++- tests/client/test_caching.py | 16 ++-- tests/client/test_client.py | 94 +++++++++++++++++++ tests/client/test_client_caching.py | 43 ++++++++- .../transports/test_hosting_http_modern.py | 3 +- 6 files changed, 176 insertions(+), 22 deletions(-) diff --git a/src/mcp/client/client.py b/src/mcp/client/client.py index 2eca19f1f..638ea63a9 100644 --- a/src/mcp/client/client.py +++ b/src/mcp/client/client.py @@ -9,7 +9,6 @@ from contextlib import AsyncExitStack from dataclasses import KW_ONLY, dataclass, field from typing import Any, Literal, TypeVar, cast -from urllib.parse import urlsplit import anyio import anyio.lowlevel @@ -132,11 +131,19 @@ def _strip_userinfo(url: str) -> str: Credentials must not enter cache-key material; any further normalization could merge distinct servers. """ - netloc = urlsplit(url).netloc # raw authority bytes (urlsplit case-folds only `.scheme`), so slicing is exact - if "@" not in netloc: + # Pure text, no urlsplit: it strips embedded tab/CR/LF before parsing, which would misalign slices. + sep = url.find("//") + if sep == -1: return url - start = url.index("//") + 2 - return url[:start] + netloc.rpartition("@")[2] + url[start + len(netloc) :] + start = sep + 2 + end = len(url) + for delimiter in "/?#": + if (found := url.find(delimiter, start)) != -1: + end = min(end, found) + authority = url[start:end] + if "@" not in authority: + return url + return url[:start] + authority.rpartition("@")[2] + url[end:] def _evicting_message_handler(cache: ClientResponseCache, user_handler: MessageHandlerFnT | None) -> MessageHandlerFnT: @@ -746,9 +753,12 @@ async def list_tools( meta=meta, cache_mode=cache_mode, send=lambda: self.session.list_tools(params=PaginatedRequestParams(cursor=cursor, _meta=meta)), - # A cache hit skips session.list_tools, so the session re-absorbs the - # served listing to rebuild its derived per-tool state. - absorb=self.session._absorb_tool_listing, # pyright: ignore[reportPrivateUsage] + # A cache hit skips session.list_tools, so the session re-absorbs the served + # listing to rebuild its derived per-tool state. Hits are cursorless, but a + # cached page 1 can carry next_cursor - never prune on a partial listing. + absorb=lambda hit: self.session._absorb_tool_listing( # pyright: ignore[reportPrivateUsage] + hit, complete=hit.next_cursor is None + ), ) @deprecated("The roots capability is deprecated as of 2026-07-28 (SEP-2577).", category=MCPDeprecationWarning) diff --git a/src/mcp/client/session.py b/src/mcp/client/session.py index c76eb8fc7..6a2298ad9 100644 --- a/src/mcp/client/session.py +++ b/src/mcp/client/session.py @@ -906,12 +906,14 @@ async def list_tools(self, *, params: types.PaginatedRequestParams | None = None types.ListToolsRequest(params=params), types.ListToolsResult, ) - return self._absorb_tool_listing(result) + complete = (params is None or params.cursor is None) and result.next_cursor is None + return self._absorb_tool_listing(result, complete=complete) - def _absorb_tool_listing(self, result: types.ListToolsResult) -> types.ListToolsResult: + def _absorb_tool_listing(self, result: types.ListToolsResult, *, complete: bool) -> types.ListToolsResult: """Filter the listing per the 2026 x-mcp-header MUST and rebuild derived per-tool state, in place. Idempotent: cached values are already post-filter, so the response cache can re-absorb a served listing. + `complete` (an uncursored single-page listing) prunes per-tool state down to the listing's tools. """ if self._negotiated_version in MODERN_PROTOCOL_VERSIONS: # 2026-07-28: clients MUST drop tools whose x-mcp-header annotations are invalid. @@ -928,11 +930,17 @@ def _absorb_tool_listing(self, result: types.ListToolsResult) -> types.ListTools kept.append(tool) result.tools = kept - # Cache tool output schemas for future validation - # Note: don't clear the cache, as we may be using a cursor + # Cache tool output schemas for future validation; cursor pages only ever add. for tool in result.tools: self._tool_output_schemas[tool.name] = tool.output_schema + if complete: + # The listing is the full tool universe, so state for unlisted tools is stale + # (the server dropped them, or a shared-cache writer's filter did). + names = {tool.name for tool in result.tools} + self._x_mcp_header_maps = {k: v for k, v in self._x_mcp_header_maps.items() if k in names} + self._tool_output_schemas = {k: v for k, v in self._tool_output_schemas.items() if k in names} + return result @deprecated("The roots capability is deprecated as of 2026-07-28 (SEP-2577).", category=MCPDeprecationWarning) diff --git a/tests/client/test_caching.py b/tests/client/test_caching.py index 3e4024c80..dc445a6ec 100644 --- a/tests/client/test_caching.py +++ b/tests/client/test_caching.py @@ -432,6 +432,8 @@ async def clear(self) -> None: raise NotImplementedError +# The lax pragmas here and in the wedged-store tests: 3.11's settrace-based coverage loses +# tracing in frames resumed after the coordinator's bounded-shield cleanup cancellation. class _WedgingDeleteStore: """Once `wedged` flips, every `delete` blocks forever (an Event nothing sets), modelling a remote store with no socket timeout of its own.""" @@ -449,7 +451,7 @@ async def get(self, key: CacheKey) -> CacheEntry | None: async def set(self, key: CacheKey, entry: CacheEntry) -> None: await self.before_set_commits() - await self.inner.set(key, entry) + await self.inner.set(key, entry) # pragma: lax no cover async def delete(self, key: CacheKey) -> None: self.deletes_started += 1 @@ -805,8 +807,10 @@ async def test_evict_key_with_a_wedged_store_delete_returns_at_the_cleanup_bound cache = _coordinator(store, store_cleanup_timeout=0.01) with caplog.at_level(logging.WARNING, logger="mcp.client.caching"), anyio.fail_after(5): await cache.evict_key("tools/list", "") - assert store.deletes_started == 1 # the second arm's delete was abandoned with the first - assert caplog.messages == snapshot(["Response cache store delete timed out; the entry will age out by TTL"]) + assert store.deletes_started == 1 # pragma: lax no cover # the second arm's delete was abandoned with the first + assert caplog.messages == snapshot( # pragma: lax no cover + ["Response cache store delete timed out; the entry will age out by TTL"] + ) async def test_a_refresh_purge_with_a_wedged_store_delete_returns_at_the_cleanup_bound() -> None: @@ -815,7 +819,7 @@ async def test_a_refresh_purge_with_a_wedged_store_delete_returns_at_the_cleanup gen = cache.capture("tools/list", "") with anyio.fail_after(5): await cache.write("tools/list", "", _wire_result(ttl_ms=0), gen, "refresh") - assert store.deletes_started == 1 + assert store.deletes_started == 1 # pragma: lax no cover async def test_an_eviction_mid_set_with_a_wedged_store_delete_returns_at_the_cleanup_bound() -> None: @@ -833,9 +837,9 @@ async def wedge_then_evict() -> None: with anyio.fail_after(5): await cache.write("tools/list", "", _wire_result(ttl_ms=60_000), gen, "use") # Opposite-arm delete, the eviction's first delete, the compensating delete. - assert store.deletes_started == 3 + assert store.deletes_started == 3 # pragma: lax no cover # The accepted degradation: the unreaped entry stays until its TTL expires. - assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is not None + assert await store.inner.get(CacheKey("tools/list", "", _private_arm())) is not None # pragma: lax no cover # --- Coordinator: store error discipline --- diff --git a/tests/client/test_client.py b/tests/client/test_client.py index a6a9ac6ea..820478f3f 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -506,6 +506,100 @@ async def on_list_tools( assert [t.name for t in result.tools] == ["ok", "dropme"] +_RETIRED_TOOL = Tool( + name="retired", + input_schema={"type": "object", "properties": {"region": {"type": "string", "x-mcp-header": "Region"}}}, + output_schema={"type": "object"}, +) +_SURVIVOR_TOOL = Tool(name="survivor", input_schema={"type": "object"}) + + +def _scripted_listing_server(listings: list[ListToolsResult]) -> Server: + """Serves the given listings in order, one per tools/list request.""" + + async def on_list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + return listings.pop(0) + + return Server("test", on_list_tools=on_list_tools) + + +async def test_a_complete_listing_prunes_per_tool_state_for_tools_it_no_longer_contains() -> None: + """SDK-defined: a complete (uncursored, cursorless) listing is the full tool universe, so the + header map and output schema derived from an earlier listing of a now-absent tool are dropped.""" + server = _scripted_listing_server( + [ + ListToolsResult(tools=[_RETIRED_TOOL, _SURVIVOR_TOOL]), + ListToolsResult(tools=[_SURVIVOR_TOOL]), + ] + ) + + with anyio.fail_after(5): + async with Client(server) as client: + await client.session.list_tools() + assert set(client.session._x_mcp_header_maps) == {"retired", "survivor"} + assert set(client.session._tool_output_schemas) == {"retired", "survivor"} + + await client.session.list_tools() + assert set(client.session._x_mcp_header_maps) == {"survivor"} + assert set(client.session._tool_output_schemas) == {"survivor"} + + +async def test_a_complete_listing_prunes_output_schemas_on_a_legacy_session_too() -> None: + """SDK-defined: the prune is era-independent -- legacy sessions cache output schemas the same + way (their header-map dict just stays empty, since the x-mcp-header filter is 2026-only).""" + server = _scripted_listing_server( + [ + ListToolsResult(tools=[_RETIRED_TOOL, _SURVIVOR_TOOL]), + ListToolsResult(tools=[_SURVIVOR_TOOL]), + ] + ) + + with anyio.fail_after(5): + async with Client(server, mode="legacy") as client: + await client.session.list_tools() + assert set(client.session._tool_output_schemas) == {"retired", "survivor"} + assert client.session._x_mcp_header_maps == {} + + await client.session.list_tools() + assert set(client.session._tool_output_schemas) == {"survivor"} + + +async def test_a_listing_with_a_next_cursor_prunes_no_per_tool_state() -> None: + """SDK-defined: a first page carrying next_cursor is not the full universe -- state for tools + expected on later pages must survive it.""" + server = _scripted_listing_server( + [ + ListToolsResult(tools=[_RETIRED_TOOL, _SURVIVOR_TOOL]), + ListToolsResult(tools=[_SURVIVOR_TOOL], next_cursor="2"), + ] + ) + + with anyio.fail_after(5): + async with Client(server) as client: + await client.session.list_tools() + await client.session.list_tools() + assert set(client.session._x_mcp_header_maps) == {"retired", "survivor"} + assert set(client.session._tool_output_schemas) == {"retired", "survivor"} + + +async def test_a_cursor_page_fetch_prunes_no_per_tool_state() -> None: + """SDK-defined: a continuation page is partial even when it ends the pagination (no + next_cursor) -- only an uncursored single-page listing prunes.""" + server = _scripted_listing_server( + [ + ListToolsResult(tools=[_RETIRED_TOOL, _SURVIVOR_TOOL]), + ListToolsResult(tools=[_SURVIVOR_TOOL]), + ] + ) + + with anyio.fail_after(5): + async with Client(server) as client: + await client.session.list_tools() + await client.session.list_tools(params=types.PaginatedRequestParams(cursor="2")) + assert set(client.session._x_mcp_header_maps) == {"retired", "survivor"} + assert set(client.session._tool_output_schemas) == {"retired", "survivor"} + + def test_client_rejects_handshake_era_mode_at_construction() -> None: """A handshake-era protocol-version string passed as `mode=` is rejected by `__post_init__` with a hint to use `mode='legacy'` — the version-pin path is diff --git a/tests/client/test_client_caching.py b/tests/client/test_client_caching.py index c58080e75..708d83db4 100644 --- a/tests/client/test_client_caching.py +++ b/tests/client/test_client_caching.py @@ -139,16 +139,26 @@ def test_userinfo_variants_of_a_server_url_share_one_cache_identity() -> None: ("HTTPS://a@X.example/mcp", "HTTPS://X.example/mcp"), ("https://u@h/p?", "https://h/p?"), ("https://u@h/p#", "https://h/p#"), + ("https://u\tser:p@h.example/p", "https://h.example/p"), + ("https://u:p@h.example/pa\tth", "https://h.example/pa\tth"), ], - ids=["scheme-case", "empty-query", "empty-fragment"], + ids=["scheme-case", "empty-query", "empty-fragment", "tab-in-userinfo", "tab-in-path"], ) def test_stripping_userinfo_changes_no_other_byte_of_the_url(with_userinfo: str, bare: str) -> None: """The removed `userinfo@` is the only byte difference: no scheme case-folding, no dropped - empty `?`/`#` delimiters. A userinfo-free URL passes through untouched, so arm equality - proves the stripped form is byte-identical to the bare URL.""" + empty `?`/`#` delimiters, and control characters - which urlsplit would silently strip, + misaligning any parser-derived slice - stay byte-exact outside the removed span. A + userinfo-free URL passes through untouched, so arm equality proves the stripped form is + byte-identical to the bare URL.""" assert _private_arm(Client(with_userinfo)) == _private_arm(Client(bare)) +def test_a_url_without_an_authority_passes_through_unchanged() -> None: + """No `//` means no authority span, so an `@` elsewhere strips nothing.""" + arm_id = hashlib.sha256(b"mailto:a@b").hexdigest() + assert _private_arm(Client("mailto:a@b")) == json.dumps(["private", None, arm_id, ""]) + + def test_the_server_url_is_sha256_hashed_before_it_enters_key_material() -> None: """Pins the docs' secrets-never-in-keys claim: a query-string secret never appears in store keys.""" client = Client("https://user:pass@example.com/mcp?api_key=SECRET") @@ -863,6 +873,33 @@ async def on_request(request: httpx.Request) -> None: assert posts[-1].headers["mcp-param-region"] == "us-west1" +async def test_a_shared_store_hit_prunes_a_header_map_the_writers_filter_dropped() -> None: + """Cached listings are post-filter: when another client's refresh wrote a listing whose + filter dropped tool `x` (its annotation went invalid), a hit on that entry must prune the + reader's stale arg-to-header map, or it would keep emitting Mcp-Param-* headers for `x`.""" + valid = {"type": "object", "properties": {"region": {"type": "string", "x-mcp-header": "Region"}}} + invalid = {"type": "object", "properties": {"region": {"type": "string", "x-mcp-header": "bad name"}}} + schema = valid + + async def list_tools(ctx: ServerRequestContext, params: types.PaginatedRequestParams | None) -> ListToolsResult: + return ListToolsResult(tools=[Tool(name="x", input_schema=schema)]) + + server = Server("filtering", on_list_tools=list_tools, cache_hints={"tools/list": CacheHint(ttl_ms=60_000)}) + config = CacheConfig(store=InMemoryResponseCacheStore(), partition="p", target_id="svc", clock=_ManualClock()) + + with anyio.fail_after(5): + async with Client(server, cache=config) as reader, Client(server, cache=config) as writer: + await reader.list_tools() # fetches while `x` is valid; the reader holds its header map + assert "x" in reader.session._x_mcp_header_maps + + schema = invalid + await writer.list_tools(cache_mode="refresh") # the writer's filter drops `x`; the entry is replaced + + served = await reader.list_tools() # hit on the writer's entry + assert served.tools == [] + assert "x" not in reader.session._x_mcp_header_maps + + async def test_a_tools_list_changed_notification_makes_the_next_list_refetch() -> None: """Spec SHOULD: list_changed invalidates the cached listing. Legacy session + `default_ttl_ms` entry: eviction is era-independent.""" diff --git a/tests/interaction/transports/test_hosting_http_modern.py b/tests/interaction/transports/test_hosting_http_modern.py index a8f1f53c7..3feed4fed 100644 --- a/tests/interaction/transports/test_hosting_http_modern.py +++ b/tests/interaction/transports/test_hosting_http_modern.py @@ -511,7 +511,8 @@ async def test_modern_client_stops_mirroring_after_a_re_list_drops_the_tool() -> bad_schema = {"type": "object", "properties": {"a": {"type": "string", "x-mcp-header": "bad name"}}} valid = Tool(name="run", input_schema=schema) invalid = Tool(name="run", input_schema=bad_schema) - listings = iter([valid, invalid]) + # Three pages: the call after the drop re-lists once because the prune also cleared `run`'s schema entry. + listings = iter([valid, invalid, invalid]) async def list_tools(ctx: ServerRequestContext, params: PaginatedRequestParams | None) -> ListToolsResult: return ListToolsResult(tools=[next(listings)], ttl_ms=0, cache_scope="public")