Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 87 additions & 2 deletions .github/actions/conformance/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
json-schema-ref-no-deref - Connect, list tools (no $ref deref)
request-metadata - Connect with all callbacks; client stamps _meta
http-standard-headers - Connect, call a tool (Mcp-* headers checked)
http-invalid-tool-headers - List tools, call every surfaced tool (x-mcp-header filter)
elicitation-sep1034-client-defaults - Elicitation with default accept callback
sep-2322-client-request-state - Drive the manual MRTR retry surface
auth/client-credentials-jwt - Client credentials with private_key_jwt
auth/client-credentials-basic - Client credentials with client_secret_basic
auth/* - Authorization code flow (default for auth scenarios)
Expand Down Expand Up @@ -296,6 +298,43 @@ async def run_http_standard_headers(server_url: str) -> None:
logger.debug(f"add_numbers result: {result}")


def _stub_required_args(input_schema: dict[str, Any]) -> dict[str, Any]:
"""Minimal arguments satisfying a tool inputSchema's required list."""
by_type: dict[str, Any] = {
"string": "x",
"integer": 0,
"number": 0,
"boolean": False,
"object": {},
"array": [],
"null": None,
}
properties = input_schema.get("properties", {})
return {name: by_type.get(properties.get(name, {}).get("type"), "x") for name in input_schema.get("required", [])}


@register("http-invalid-tool-headers")
async def run_http_invalid_tool_headers(server_url: str) -> None:
"""List tools, then call every tool the SDK surfaces (SEP-2243).

The harness mock advertises one valid tool plus several with malformed
x-mcp-header annotations (empty, non-primitive type, duplicate, invalid
chars). The scenario passes if valid_tool is called and the malformed
ones are not -- so a conforming client filters them out of the list_tools
result and the loop below never sees them. The scenario sets
allowClientError, so a per-call failure is logged and skipped rather
than aborting the whole run.
"""
async with Client(server_url, mode=client_mode()) as client:
listed = await client.list_tools()
logger.debug(f"Surfaced tools: {[t.name for t in listed.tools]}")
for tool in listed.tools:
try:
await client.call_tool(tool.name, _stub_required_args(tool.input_schema))
except Exception:
logger.exception(f"call_tool({tool.name!r}) failed")


@register("elicitation-sep1034-client-defaults")
async def run_elicitation_defaults(server_url: str) -> None:
"""Connect with elicitation callback that applies schema defaults."""
Expand All @@ -305,6 +344,53 @@ async def run_elicitation_defaults(server_url: str) -> None:
logger.debug(f"test_client_elicitation_defaults result: {result}")


@register("sep-2322-client-request-state")
async def run_mrtr_client(server_url: str) -> None:
"""Drive the manual MRTR retry surface against the SEP-2322 client mock.

The mock speaks the modern lifecycle (server/discover, no initialize) and
inspects the wire params of each tools/call round, so this exercises the
explicit allow_input_required=True path rather than an auto-loop: round 1
receives an InputRequiredResult, the fixture fulfils the elicitation
locally, then round 2 retries with input_responses + the echoed
request_state. Passing request_state straight off the typed result -- a
str when the server sent one, None when it didn't -- lets the
serializer's exclude_none drop the key in the no-state case without a
branch here. The unrelated call between rounds proves MRTR params don't
leak across tools, and the no-result-type call must parse as a complete
CallToolResult with no retry.
"""
async with Client(server_url, mode=client_mode()) as client:
await client.list_tools()
confirm = {"confirm": types.ElicitResult(action="accept", content={"confirmed": True})}

r1 = await client.call_tool("test_mrtr_echo_state", {}, allow_input_required=True)
assert isinstance(r1, types.InputRequiredResult)

await client.call_tool("test_mrtr_unrelated", {})

await client.call_tool(
"test_mrtr_echo_state",
Comment thread
maxisbey marked this conversation as resolved.
{},
input_responses=confirm,
request_state=r1.request_state,
allow_input_required=True,
)

r2 = await client.call_tool("test_mrtr_no_state", {}, allow_input_required=True)
assert isinstance(r2, types.InputRequiredResult)
await client.call_tool(
"test_mrtr_no_state",
{},
input_responses=confirm,
request_state=r2.request_state,
allow_input_required=True,
)

result = await client.call_tool("test_mrtr_no_result_type", {})
assert isinstance(result, types.CallToolResult)


@register("auth/client-credentials-jwt")
async def run_client_credentials_jwt(server_url: str) -> None:
"""Client credentials flow with private_key_jwt authentication."""
Expand Down Expand Up @@ -441,8 +527,7 @@ def main() -> None:
asyncio.run(run_auth_code_client(server_url))
else:
# Unhandled scenarios:
# - sep-2322-client-request-state (SEP-2322 / S6: MRTR client loop)
# - http-custom-headers, http-invalid-tool-headers (SEP-2243 / S8: Mcp-Param-* headers)
# - http-custom-headers (SEP-2243 / S8: Mcp-Param-* emission)
print(f"Unknown scenario: {scenario}", file=sys.stderr)
sys.exit(1)
else:
Expand Down
37 changes: 4 additions & 33 deletions .github/actions/conformance/expected-failures.2026-07-28.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,48 +21,19 @@
# milestone.

client:
# --- Same gaps as the 2025 baseline (fail identically when forced to 2026-07-28) ---
# SEP-2322 (multi-round-trip requests): client does not echo requestState /
# handle IncompleteResult yet.
- sep-2322-client-request-state
# SEP-2243 (HTTP standardization): no fixture handler / client Mcp-Param-* support yet.
# SEP-2243 (HTTP standardization): no client Mcp-Param-* support yet — needs the
# tool-schema-cache vs per-call tool_definition design (S8).
- http-custom-headers
- http-invalid-tool-headers
# auth/enterprise-managed-authorization (SEP-990) is in the 2025 baseline but
# NOT here: the harness skips it as inapplicable at --spec-version 2026-07-28
# (it is an extension scenario not carried into the 2026 wire), so it is
# neither run nor evaluated on this leg.

server:
# --- Carried-forward 2025-era scenarios still failing on the 2026 wire ---
# The stateless 2026 path now reaches handlers for plain request/response
# scenarios; tools-call-with-progress still fails because the stateless
# server has no channel for server→client progress notifications.
- tools-call-with-progress
# SEP-2106 (JSON Schema 2020-12 in tool inputSchema): the fixture tool's
# schema has none of the 2020-12 keywords the scenario checks. The scenario
# is in `--suite all` but not `--suite active`, so this is the only leg that
# runs it; it fails identically at 2025-11-25 (not a 2026-path regression).
- json-schema-2020-12

# --- Draft scenarios (same failures and reasons as the `--suite draft` leg) ---
# SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented.
- input-required-result-basic-elicitation
- input-required-result-basic-sampling
- input-required-result-basic-list-roots
- input-required-result-request-state
- input-required-result-multiple-input-requests
- input-required-result-multi-round
# SEP-2322 (multi-round-trip requests / IncompleteResult): the prompt pipeline
# cannot return InputRequiredResult from MCPServer yet (tools/call can).
- input-required-result-non-tool-request
- input-required-result-result-type
- input-required-result-tampered-state
- input-required-result-capability-check
# SEP-2243 (HTTP header standardization): Mcp-Method / Mcp-Name cross-check
# against the request body is not implemented.
- http-header-validation
# WARNING-only entries: these scenarios emit no FAILURE checks but the
# expected-failures evaluator counts WARNINGs as failures (the summary line
# only shows passed/failed, not warnings, so a local re-probe can mis-read
# these as stale).
- input-required-result-missing-input-response
- input-required-result-validate-input
28 changes: 4 additions & 24 deletions .github/actions/conformance/expected-failures.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@

client:
# --- Draft-spec scenarios (in `--suite draft`, also part of `--suite all`) ---
# SEP-2322 (multi-round-trip requests): client does not echo requestState /
# handle IncompleteResult yet.
- sep-2322-client-request-state
# SEP-2243 (HTTP standardization): no fixture handler / client Mcp-Param-* support yet.
# SEP-2243 (HTTP standardization): no client Mcp-Param-* support yet — needs the
# tool-schema-cache vs per-call tool_definition design (S8).
- http-custom-headers
- http-invalid-tool-headers

# --- Pre-existing scenarios that fail on checks added after conformance 0.1.15 ---
# SEP-990 (enterprise-managed authorization extension): no fixture handler /
Expand All @@ -26,23 +23,6 @@ client:

server:
# --- Draft-spec scenarios (in `--suite draft`; the `active` suite is green) ---
# SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented.
- input-required-result-basic-elicitation
- input-required-result-basic-sampling
- input-required-result-basic-list-roots
- input-required-result-request-state
- input-required-result-multiple-input-requests
- input-required-result-multi-round
# SEP-2322 (multi-round-trip requests / IncompleteResult): the prompt pipeline
# cannot return InputRequiredResult from MCPServer yet (tools/call can).
- input-required-result-non-tool-request
- input-required-result-result-type
- input-required-result-tampered-state
- input-required-result-capability-check
# SEP-2243 (HTTP header standardization): Mcp-Method / Mcp-Name cross-check
# against the request body is not implemented.
- http-header-validation
# WARNING-only entries: these scenarios emit no FAILURE checks but the
# expected-failures evaluator counts WARNINGs as failures (the summary line
# only shows passed/failed, not warnings, so a local re-probe can mis-read
# these as stale).
- input-required-result-missing-input-response
- input-required-result-validate-input
36 changes: 7 additions & 29 deletions .github/workflows/conformance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,10 @@ permissions:

env:
# Pinned conformance harness package spec (passed verbatim to `npx --yes`).
# Use a published version, e.g. @modelcontextprotocol/conformance@0.2.0-alpha.5.
# Use a published version, e.g. @modelcontextprotocol/conformance@0.2.0-alpha.7.
# Bump deliberately and reconcile both
# .github/actions/conformance/expected-failures*.yml files in the same change.
#
# TODO: replace with @modelcontextprotocol/conformance@0.2.0-alpha.5 once
# https://github.com/modelcontextprotocol/conformance/pull/357 publishes, and
# drop CONFORMANCE_PKG_SHA256 plus the fetch-and-verify step below.
CONFORMANCE_PKG: "https://pkg.pr.new/@modelcontextprotocol/conformance@65fcd39"
CONFORMANCE_PKG_SHA256: "9a381d7083f8be2fe7ae44efeca54530f18c61425805ddaf9cd88915efcc1574"
CONFORMANCE_PKG: "@modelcontextprotocol/conformance@0.2.0-alpha.7"

jobs:
server-conformance:
Expand All @@ -39,19 +34,6 @@ jobs:
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: 24
- name: Fetch and verify conformance harness
# Only when CONFORMANCE_PKG is a URL: download, check the recorded
# sha256, and re-point CONFORMANCE_PKG at the verified local tarball.
# When CONFORMANCE_PKG is a registry spec, this step is a no-op (npm's
# own integrity check applies).
run: |
case "$CONFORMANCE_PKG" in
https://*)
curl -fsSL "$CONFORMANCE_PKG" -o /tmp/conformance.tgz
echo "$CONFORMANCE_PKG_SHA256 /tmp/conformance.tgz" | sha256sum -c -
echo "CONFORMANCE_PKG=file:/tmp/conformance.tgz" >> "$GITHUB_ENV"
;;
esac
- run: uv sync --frozen --all-extras --package mcp-everything-server
- name: Run server conformance (active suite)
run: >-
Expand Down Expand Up @@ -83,26 +65,22 @@ jobs:
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: 24
- name: Fetch and verify conformance harness
run: |
case "$CONFORMANCE_PKG" in
https://*)
curl -fsSL "$CONFORMANCE_PKG" -o /tmp/conformance.tgz
echo "$CONFORMANCE_PKG_SHA256 /tmp/conformance.tgz" | sha256sum -c -
echo "CONFORMANCE_PKG=file:/tmp/conformance.tgz" >> "$GITHUB_ENV"
;;
esac
- run: uv sync --frozen --all-extras --package mcp
- name: Run client conformance (all suite)
# The harness runs all scenarios via unbounded Promise.all; with 40
# scenarios on a 2-core runner the slowest one (sse-retry, which has a
# real-time SSE reconnect wait) needs more than the 30s default budget.
run: >-
npx --yes "$CONFORMANCE_PKG" client
--command 'uv run --frozen python .github/actions/conformance/client.py'
--suite all
--timeout 60000
--expected-failures ./.github/actions/conformance/expected-failures.yml
- name: Run client conformance (2026-07-28 wire, all suite)
run: >-
npx --yes "$CONFORMANCE_PKG" client
--command 'uv run --frozen python .github/actions/conformance/client.py'
--suite all
--timeout 60000
--spec-version 2026-07-28
--expected-failures ./.github/actions/conformance/expected-failures.2026-07-28.yml
3 changes: 2 additions & 1 deletion docs/migration.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ Version 2 of the MCP Python SDK introduces several breaking changes to improve t

### `MCPServer.call_tool()` returns `CallToolResult`

`MCPServer.call_tool()` now always returns a `CallToolResult`. It previously
`MCPServer.call_tool()` now returns a `CallToolResult` (or an
`InputRequiredResult` when a multi-round tool requests further input). It previously
advertised `Sequence[ContentBlock] | dict[str, Any]` and leaked the internal
conversion shapes (a bare content sequence or a `(content, structured_content)`
tuple), forcing callers to re-assemble a `CallToolResult` themselves.
Expand Down
Loading
Loading