diff --git a/.github/actions/codeclone/README.md b/.github/actions/codeclone/README.md index 5b8e1838..28ea25fc 100644 --- a/.github/actions/codeclone/README.md +++ b/.github/actions/codeclone/README.md @@ -1,6 +1,7 @@ # CodeClone GitHub Action -Baseline-aware structural code quality analysis for Python with: +**Structural Change Controller for AI-assisted Python development** — CI +integration with: - configurable CI gating - SARIF upload for GitHub Code Scanning @@ -82,9 +83,9 @@ jobs: | `python-version` | `3.14` | Python version used to run the action | | `package-version` | `2.0.2` | CodeClone version from PyPI for remote installs; ignored when the action runs from the checked-out CodeClone repo | | `path` | `.` | Project root to analyze | -| `json-path` | `.cache/codeclone/report.json` | JSON report output path | +| `json-path` | `.codeclone/report.json` | JSON report output path | | `sarif` | `true` | Generate SARIF and try to upload it | -| `sarif-path` | `.cache/codeclone/report.sarif` | SARIF output path | +| `sarif-path` | `.codeclone/report.sarif` | SARIF output path | | `pr-comment` | `true` | Post or update a PR summary comment | | `fail-on-new` | `true` | Fail if new clone groups are detected | | `fail-on-new-metrics` | `false` | Fail if metrics regress vs baseline | @@ -182,4 +183,4 @@ Local/self-repo validation: - [CodeClone repository](https://github.com/orenlab/codeclone) - [Documentation](https://orenlab.github.io/codeclone/) -- [SARIF integration](https://orenlab.github.io/codeclone/sarif/) +- [SARIF integration](https://orenlab.github.io/codeclone/guide/integrations/sarif/export/) diff --git a/.github/actions/codeclone/_action_impl.py b/.github/actions/codeclone/_action_impl.py index bfb2d364..692f8a28 100644 --- a/.github/actions/codeclone/_action_impl.py +++ b/.github/actions/codeclone/_action_impl.py @@ -25,7 +25,7 @@ from typing import Literal COMMENT_MARKER = "" -DEFAULT_CODECLONE_PACKAGE_VERSION = "2.0.2" +DEFAULT_CODECLONE_PACKAGE_VERSION = "2.1.0a1" @dataclass(frozen=True, slots=True) diff --git a/.github/actions/codeclone/action.yml b/.github/actions/codeclone/action.yml index eff91fde..e8ab50a6 100644 --- a/.github/actions/codeclone/action.yml +++ b/.github/actions/codeclone/action.yml @@ -18,7 +18,7 @@ inputs: package-version: description: "CodeClone version from PyPI for remote installs (ignored when the action runs from the checked-out CodeClone repo)" required: false - default: "2.0.2" + default: "2.1.0a1" path: description: "Project root" @@ -28,7 +28,7 @@ inputs: json-path: description: "Canonical JSON report output path" required: false - default: ".cache/codeclone/report.json" + default: ".codeclone/report.json" sarif: description: "Generate SARIF and upload to Code Scanning" @@ -38,7 +38,7 @@ inputs: sarif-path: description: "SARIF output path" required: false - default: ".cache/codeclone/report.sarif" + default: ".codeclone/report.sarif" pr-comment: description: "Post or update a PR summary comment" diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 85c616a5..acba6ccd 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -39,7 +39,7 @@ jobs: uses: actions/configure-pages@v5 - name: Build docs site - run: uv run --with mkdocs --with mkdocs-material mkdocs build --strict + run: uv run --with zensical==0.0.43 zensical build --clean --strict - name: Generate sample report artifacts run: uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live diff --git a/.github/workflows/integrations.yml b/.github/workflows/integrations.yml new file mode 100644 index 00000000..4e16f7ae --- /dev/null +++ b/.github/workflows/integrations.yml @@ -0,0 +1,126 @@ +name: integrations +run-name: integrations • ${{ github.event_name }} • ${{ github.ref_name }} + +# Tests the IDE/agent integration surfaces that the main `tests` workflow does +# not cover: the Node-based VS Code extension and Claude Desktop bundle, plus +# the Codex and Cursor plugin contract tests. Runs only when an integration +# surface (or the shared MCP tool snapshot the manifests are pinned to) changes. + +on: + push: + branches: [ "**" ] + paths: + - "extensions/**" + - "plugins/**" + - "tests/test_codex_plugin.py" + - "tests/test_cursor_plugin.py" + - "tests/test_cursor_plugin_hooks.py" + - "tests/fixtures/contract_snapshots/mcp_tool_schemas.json" + - ".github/workflows/integrations.yml" + pull_request: + paths: + - "extensions/**" + - "plugins/**" + - "tests/test_codex_plugin.py" + - "tests/test_cursor_plugin.py" + - "tests/test_cursor_plugin_hooks.py" + - "tests/fixtures/contract_snapshots/mcp_tool_schemas.json" + - ".github/workflows/integrations.yml" + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: integrations-${{ github.ref }} + cancel-in-progress: true + +jobs: + vscode-extension: + name: VS Code extension + runs-on: ubuntu-latest + defaults: + run: + working-directory: extensions/vscode-codeclone + steps: + - name: Checkout + uses: actions/checkout@v6.0.2 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: npm + cache-dependency-path: extensions/vscode-codeclone/package-lock.json + + - name: Install dependencies + run: npm ci + + - name: Build, typecheck, and syntax-check + run: npm run check + + - name: Unit tests + run: npm test + + claude-desktop-bundle: + name: Claude Desktop bundle + runs-on: ubuntu-latest + defaults: + run: + working-directory: extensions/claude-desktop-codeclone + steps: + - name: Checkout + uses: actions/checkout@v6.0.2 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: npm + cache-dependency-path: extensions/claude-desktop-codeclone/package-lock.json + + - name: Install dependencies + run: npm ci + + - name: Syntax-check launcher and server + run: npm run check + + - name: Unit tests + run: npm test + + - name: Bundle build smoke + run: npm run pack + + plugins: + name: Codex and Cursor plugins + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6.0.2 + + - name: Set up Python + uses: actions/setup-python@v6.2.0 + with: + python-version: "3.14" + allow-prereleases: true + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync --extra dev --extra mcp + + - name: Plugin contract tests + run: >- + uv run pytest -q + tests/test_codex_plugin.py + tests/test_cursor_plugin.py + tests/test_cursor_plugin_hooks.py + + - name: Validate bundled plugin manifests + run: | + python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json > /dev/null + python3 -m json.tool plugins/codeclone/.mcp.json > /dev/null + python3 -m json.tool .agents/plugins/marketplace.json > /dev/null diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6acb4a4d..ea61c7a3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,7 +35,7 @@ jobs: enable-cache: true - name: Install dependencies - run: uv sync --extra dev --extra mcp + run: uv sync --all-extras - name: Run tests # Smoke CLI tests intentionally disable subprocess coverage collection @@ -67,10 +67,10 @@ jobs: enable-cache: true - name: Install dependencies - run: uv sync --extra dev --extra mcp + run: uv sync --all-extras - name: Ruff run: uv run ruff check . - name: Mypy - run: uv run mypy . + run: uv run mypy diff --git a/.gitignore b/.gitignore index 71bd32fd..4d93ff6f 100644 --- a/.gitignore +++ b/.gitignore @@ -22,12 +22,14 @@ site/ # Tool caches .cache/ +.codeclone/ .codeclone-cache/ .codeclone-baseline.json.bak # IDE .idea/ .vscode/ +/.cursor/ # OS .DS_Store @@ -45,3 +47,8 @@ extensions/vscode-codeclone/node_modules /scripts/refactor_guard.sh /docs/refactoring-spec.md /smoke_cli.sh +/codeclone-v2.1-roadmap.md +/specs/ +/coverage.json +/benchmarks/memory_semantic_eval.md +/scripts/commit_memory_phases.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cffc75e5..6c17738f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -default_install_hook_types: [ pre-commit, pre-push ] +default_install_hook_types: [pre-commit, pre-push] repos: - repo: https://github.com/pre-commit/pre-commit-hooks @@ -10,46 +10,62 @@ repos: - id: check-added-large-files - id: check-toml - id: check-yaml + - id: check-json + - id: check-ast + - id: debug-statements + - id: check-case-conflict + - id: check-symlinks + - id: destroyed-symlinks + - id: detect-private-key + - id: mixed-line-ending + args: [--fix=lf] + - id: forbid-submodules - repo: local hooks: - id: ruff-format name: Ruff (format) - entry: ruff format . + entry: uv run ruff format . language: system pass_filenames: false - types: [ python ] - stages: [ pre-commit ] + stages: [pre-commit] - id: ruff-check name: Ruff (lint) - entry: ruff check . + entry: uv run ruff check . language: system pass_filenames: false - types: [ python ] - stages: [ pre-commit ] + stages: [pre-commit] - id: mypy name: Mypy - entry: mypy + entry: uv run mypy language: system pass_filenames: false - types: [ python ] - stages: [ pre-commit ] + always_run: true + stages: [pre-commit] - id: codeclone name: CodeClone - entry: codeclone + entry: uv run codeclone language: system pass_filenames: false - args: [ ".", "--ci" ] - types: [ python ] - stages: [ pre-commit ] + args: [".", "--ci"] + always_run: true + stages: [pre-commit] - id: pytest name: Pytest - entry: pytest -q + entry: uv run pytest -q --cov=codeclone --cov-report=term-missing --cov-fail-under=99 language: system pass_filenames: false - types: [ python ] - stages: [ pre-push ] + always_run: true + stages: [pre-push] + + - id: lint-admonitions + name: Lint admonitions + entry: uv run scripts/lint_admonitions.py docs/ --fix + language: system + pass_filenames: false + files: ^docs/.*\.md$ + stages: [ pre-commit ] diff --git a/AGENTS.md b/AGENTS.md index 4b285479..17ba6abd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,20 +1,29 @@ # AGENTS.md — CodeClone (AI Agent Playbook) This document is the **source of truth** for agent operating rules in this repository. -It is optimized for **determinism**, **CI stability**, and **reproducible changes**. +It is optimized for **explicit scope**, **determinism**, **CI stability**, and +**reproducible, human-reviewable changes**. For architecture, module ownership, and runtime behavior, the **current repository code is the source of truth**. If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. -> Repository goal: maximize **honesty**, **reproducibility**, **determinism**, and **precision** for real‑world CI -> usage. +**CodeClone** is a deterministic **Structural Change Controller** for +AI-assisted Python development. It starts before a diff exists: an agent +declares intent, CodeClone maps the structural blast radius, bounds the edit, +verifies the resulting patch against one canonical report, and leaves an +auditable receipt. + +> Repository goal: make AI-assisted structural change **explicit**, **bounded**, +> **remembered**, and **verifiable** without turning LLM output into truth. --- ## 1) Operating principles (non‑negotiable) -1. **Do not break CI contracts.** - - Treat baseline, cache, and report formats as **public APIs**. +1. **Do not break public contracts.** + - Treat controller workflow semantics, baseline, analysis cache, canonical + report formats, Engineering Memory schemas/governance, documented MCP + payloads, and published client behavior as **public APIs**. - Any contract change must be **versioned**, documented, and accompanied by tests. 2. **Determinism > cleverness.** @@ -34,57 +43,120 @@ If AGENTS.md and code diverge, follow code and update AGENTS.md accordingly. 6. **Fingerprint-adjacent optimization policy** - Performance work must not change AST normalization, fingerprint inputs, or clone identity semantics while - `FINGERPRINT_VERSION` remains unchanged. + `BASELINE_FINGERPRINT_VERSION` remains unchanged. - If a change in AST/core analysis can affect fingerprint bytes, clone identity, NEW vs KNOWN classification, or baseline compatibility semantics, it is not a routine optimization. It must be treated as an explicit fingerprint contract change and requires: - - `FINGERPRINT_VERSION` review or bump + - `BASELINE_FINGERPRINT_VERSION` review or bump - documentation updates - migration/release notes - explicit maintainer approval - Performance alone is never a sufficient reason to change fingerprint semantics. +7. **Control starts before the diff.** + - For repository edits, declare intent and scope before editing. + - `edit_allowed=true` is the authoritative permission signal when the + change-control surface is available. + - Blast radius, do-not-touch boundaries, actual changed files, patch + verification, and the review receipt are part of the change contract. + +8. **Agent-authored code requires human ownership.** + - CodeClone accepts code written with agents and language models. + - A human contributor must inspect and understand the complete diff, verify + tests/contracts/security/provenance, and be able to maintain it. + - Substantive human review is mandatory before merge. Agent-only review, + automated approval, or green CI does not satisfy this requirement. + - Material agent assistance must be disclosed in the pull request. + --- ## 2) Quick orientation -CodeClone provides structural code quality analysis for Python. It supports: - -- **function clones** (strongest signal) -- **block clones** (sliding window of statements, may be noisy on boilerplate) -- **segment clones** (report-only unless explicitly gated) - -Key artifacts: - -- `codeclone.baseline.json` — trusted baseline snapshot (for CI comparisons) -- `.cache/codeclone/cache.json` — analysis cache (integrity-checked) -- `.cache/codeclone/report.html|report.json|report.md|report.sarif|report.txt` — reports -- `codeclone-mcp` — optional read-only MCP server (install via `codeclone[mcp]`) +CodeClone controls structural change through this deterministic lifecycle: + +1. declare intent and allowed scope; +2. inspect blast radius, review context, and do-not-touch boundaries + (`get_implementation_context` with `intent_id` after `start`, or `get_blast_radius` + for blast-only inspection — see `codeclone-implementation-context` skill in plugins); +3. make the bounded edit only after permission is granted; +4. reconcile actual changed files with declared scope; +5. verify structural deltas and review claims against one canonical report; +6. leave an auditable receipt and Patch Trail evidence. + +The controller is built on one deterministic structural analysis. The canonical +report includes function/block/segment clones, structural findings, quality +metrics, coverage and API-surface joins, baseline-aware novelty, and health +signals. CLI, reports, MCP, IDEs, plugins, and CI project the same facts. + +Key state and surfaces: + +- `codeclone.baseline.json` — trusted comparison snapshot for baseline-aware CI +- `.codeclone/cache.json` — integrity-checked analysis optimization, never truth +- `.codeclone/report.html|report.json|report.md|report.sarif|report.txt` — + deterministic projections of the canonical report +- `.codeclone/intents/` or configured SQLite registry — ephemeral, + lease/TTL-bound workspace coordination, never analysis truth +- `.codeclone/db/audit.sqlite3` — optional passive controller evidence +- `.codeclone/memory/engineering_memory.sqlite3` — governed Engineering Memory + with FTS, trajectory, Patch Trail, Experience, and projection-job state +- `.codeclone/memory/semantic_index.lance` — optional semantic sidecar +- `.codeclone/db/platform_observability.sqlite3` — opt-in local diagnostics for + CodeClone itself; never repository quality evidence or a gate input +- `codeclone-mcp` — optional MCP server: read-only with respect to source + files, baselines, canonical/generated reports, and analysis cache; explicit + controller, audit, memory, projection, and observability contracts may write + only their documented bounded local state (install via `codeclone[mcp]`) - `extensions/vscode-codeclone/` — stable VS Code extension as a native, read-only IDE client over `codeclone-mcp` - `extensions/claude-desktop-codeclone/` — stable Claude Desktop `.mcpb` bundle as a local install wrapper over `codeclone-mcp` +- `plugins/claude-code-codeclone/` — stable Claude Code plugin source, synchronized to the public + `orenlab/codeclone-claude-code` marketplace with bundled MCP configuration and CodeClone skills - `plugins/codeclone/` + `.agents/plugins/marketplace.json` — stable Codex plugin as a native local discovery layer - over `codeclone-mcp`, with a bundled CodeClone review skill -- MCP runs are in-memory only; review markers are session-local and must never - leak into baseline/cache/report artifacts -- `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline + over `codeclone-mcp`, with bundled CodeClone skills under `plugins/codeclone/skills/` +- `plugins/cursor-codeclone/` — stable Cursor plugin as a native local discovery layer over `codeclone-mcp`, with + bundled skills, rules, hooks, and an agent definition +- MCP runs are in-memory only. Review markers are session-local. Change intent + truth is session-local, with optional ephemeral workspace coordination records + under `.codeclone/intents/`; none of this may leak into + baseline/cache/report artifacts. Optional audit trail is passive evidence + state and must not affect canonical report digests, baseline trust, cache + compatibility, or finding identity. +- `docs/`, `zensical.toml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- -## 3) One command to validate your change +## 3) Validation stages -Run these locally before proposing changes: +The installed `pre-commit` stage runs hygiene checks, Ruff, Mypy, +baseline-aware `codeclone . --ci`, and the docs admonition fixer: ```bash uv run pre-commit run --all-files ``` -If you touched baseline/cache/report contracts, also run the repo’s audit runner (or the scenario script if present). -If you touched `docs/`, `mkdocs.yml`, docs publishing workflow, or sample-report generation, also run: +This does **not** run the `pre-push` pytest hook. Run it explicitly before +pushing: + +```bash +uv run pre-commit run --hook-stage pre-push --all-files +``` + +The pre-push hook and CI enforce package coverage `>=99%`: + +```bash +uv run pytest -q --cov=codeclone --cov-report=term-missing --cov-fail-under=99 +``` + +Hooks may rewrite files. Inspect `git diff` again afterward. Never use +`--no-verify` to bypass a failing hook. + +If you touched baseline/cache/report contracts or CLI/MCP audit surfaces, also exercise the CLI audit path +(`--audit` / `codeclone/surfaces/cli/audit.py`) or the relevant audit/MCP tests. +If you touched `docs/`, `zensical.toml`, docs publishing workflow, or sample-report generation, also run: ```bash -uv run --with mkdocs --with mkdocs-material mkdocs build --strict +uv run --with zensical==0.0.43 zensical build --clean --strict ``` If you touched the MCP surface, also run: @@ -93,6 +165,17 @@ If you touched the MCP surface, also run: uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py ``` +If you touched Engineering Memory, semantic retrieval, trajectories, +Experiences, or projection jobs, run the nearest owning modules, including the +applicable `tests/test_memory_*.py`, `tests/test_semantic_*.py`, and MCP memory +contract tests. + +If you touched Platform Observability, also run: + +```bash +uv run pytest -q tests/test_observability_*.py +``` + If you touched the VS Code extension surface, also run: ```bash @@ -131,22 +214,78 @@ python3 -m json.tool .agents/plugins/marketplace.json >/tmp/codeclone-codex-mark uv run pytest -q tests/test_codex_plugin.py ``` +If you touched the Claude Code plugin surface, also run: + +```bash +python3 -m json.tool plugins/claude-code-codeclone/.claude-plugin/plugin.json >/tmp/codeclone-claude-code-plugin.json +python3 -m json.tool plugins/claude-code-codeclone/.mcp.json >/tmp/codeclone-claude-code-mcp.json +python3 -m json.tool scripts/integration_dist/marketplace.claude-code.json >/tmp/codeclone-claude-code-marketplace.json +claude plugin validate plugins/claude-code-codeclone +uv run pytest -q tests/test_claude_code_plugin.py +``` + +If you touched the Cursor plugin surface, also run: + +```bash +uv run pytest -q tests/test_cursor_plugin.py tests/test_cursor_plugin_hooks.py +``` + +If you touched the GitHub Action helpers, also run: + +```bash +uv run pytest -q tests/test_github_action_helpers.py +``` + +If you touched `scripts/sync_integrations.py`, +`scripts/integration_dist/*`, or integration distribution layouts, also run: + +```bash +uv run pytest -q tests/test_sync_integrations.py +``` + --- ## 4) Baseline contract (v2, stable) ### Versioned constants (single source of truth) -All schema/version constants live in `codeclone/contracts/__init__.py`. **Always read them from code, never copy -from another doc.** Current values (verified at write time): - -| Constant | Source | Current value | -|-----------------------------------|-----------------------------------|---------------| -| `BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.1` | -| `BASELINE_FINGERPRINT_VERSION` | `codeclone/contracts/__init__.py` | `1` | -| `CACHE_VERSION` | `codeclone/contracts/__init__.py` | `2.8` | -| `REPORT_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.11` | -| `METRICS_BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `1.2` | +Cross-surface schema/version constants live in +`codeclone/contracts/__init__.py`; subsystem-local wire versions may live with +their owning modules. **Always read values from code, never copy from another +doc.** Current central values (verified at write time): + +| Constant | Current value | +|------------------------------------------|-----------------| +| `BASELINE_SCHEMA_VERSION` | `2.1` | +| `BASELINE_FINGERPRINT_VERSION` | `1` | +| `CACHE_VERSION` | `2.10` | +| `REPORT_SCHEMA_VERSION` | `2.11` | +| `METRICS_BASELINE_SCHEMA_VERSION` | `1.2` | +| `ENGINEERING_MEMORY_SCHEMA_VERSION` | `1.7` | +| `SEMANTIC_INDEX_FORMAT_VERSION` | `2` | +| `PATCH_TRAIL_SCHEMA_VERSION` | `1` | +| `PLATFORM_OBSERVABILITY_SCHEMA_VERSION` | `1.1` | +| `TRAJECTORY_PROJECTION_VERSION` | `trajectory-v3` | +| `TRAJECTORY_QUALITY_SCORE_VERSION` | `2` | +| `EXPERIENCE_DISTILLATION_VERSION` | `experience-v1` | +| `IDE_GOVERNANCE_PROTOCOL_VERSION` | `2` | +| `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION` | `1.2` | +| `CORPUS_EXPORT_SCHEMA_VERSION` | `1.3` | +| `CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION` | `1` | +| `CORPUS_CONTROL_PLANE_CONTRACT_VERSION` | `1.0` | +| `CORPUS_REPRESENTATION_CONTRACT_VERSION` | `3` | +| `CORPUS_NORMALIZER_VERSION` | `1` | +| `CORPUS_EMBEDDING_CONTRACT_VERSION` | `2` | +| `CORPUS_AGENT_LABEL_CONTRACT_VERSION` | `1` | +| `CORPUS_PARTITION_MAP_VERSION` | `1` | + +Subsystem-local wire versions (not in `contracts/__init__.py`): + +| Constant | Value | Owner | +|----------------------------|-------|-----------------------------------------------------| +| `AUDIT_EVENT_CORE_VERSION` | `2` | `codeclone/audit/events.py` | +| `CONTEXT_CONTRACT_VERSION` | `1` | `codeclone/surfaces/mcp/_implementation_context.py` | +| `CALL_RESOLUTION_VERSION` | `1` | `codeclone/surfaces/mcp/_implementation_context.py` | When updating any doc that mentions a version, re-read `codeclone/contracts/__init__.py` first. Do not derive versions from another document. @@ -182,6 +321,13 @@ versions from another document. - Runtime writes baseline schema `2.1`. - Runtime accepts baseline schema `1.0` and `2.0`–`2.1` (governed by `_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR` in `codeclone/baseline/trust.py`). +- Baseline novelty is **baseline-relative**, not patch-relative: + `novelty="known"` means a finding fingerprint is accepted by the trusted + baseline. It does not prove that the current patch did not introduce or + reintroduce that finding. +- Patch-local regression claims require clean before-run to after-run evidence + (`compare_runs` / `check_patch_contract(mode="verify")`), not a single run's + baseline novelty. - Compatibility is tied to: - `fingerprint_version` - `python_tag` @@ -230,9 +376,33 @@ Reports come in: - SARIF (`--sarif`) - Text (`--text`) -MCP is a separate optional interface, not a report format. It must remain a -read-only agent layer over the same canonical report/baseline/cache contracts. -Session review markers are allowed only as ephemeral MCP process state. +MCP is a separate optional interface, not a report format. It must remain +read-only with respect to repository source, baselines, canonical reports, +generated reports, and analysis cache. Explicit controller/developer contracts +may maintain bounded local state: + +- session-local runs and review markers; +- ephemeral workspace intent coordination; +- optional controller audit evidence; +- governed Engineering Memory drafts and projection metadata; +- opt-in Platform Observability telemetry. + +These writes must use their owning controller, memory, audit, projection, or +observability contract. They must never alter canonical report identity, +baseline trust, cache compatibility, findings, gates, or edit authorization. +Workspace intent registry files under `.codeclone/intents/` are advisory +coordination state only, not analysis cache or report truth. + +For file edits, agents should prefer the workflow tools +`start_controlled_change` and `finish_controlled_change` — they aggregate +workspace check, intent declaration, blast radius, budget, verification, +receipt, and cleanup into two calls. Use `dirty_scope_policy="continue_own_wip"` +when resuming own uncommitted scope without foreign dirty overlap. Atomic change +control tools (`manage_change_intent`, `get_blast_radius`, `check_patch_contract`, +`validate_review_claims`, `create_review_receipt`) remain available for +queue/promote/recover operations, deep inspection, and backward +compatibility with older MCP servers. Pass `patch_health_delta` to +`validate_review_claims` when using the atomic verify path. ### Report invariants @@ -294,20 +464,32 @@ No UI-only heuristics that affect gating. ## 8) How to propose changes (agent workflow) +For repository edits, follow `CLAUDE.md` / the active CodeClone change-control +skill first. No edit begins until `start_controlled_change` returns +`edit_allowed=true`. Retrieve relevant memory after scope authorization, keep +the patch inside declared boundaries, verify with the profile selected by +`finish_controlled_change`, and leave a receipt. This section describes what to +report around that controlled change, not a replacement workflow. + When you implement something: 1. **State the intent** (what user-visible issue does it solve?) -2. **List files touched** and why. -3. **Call out contracts affected**: +2. **Declare allowed files, related context, and forbidden paths.** +3. **Inspect blast radius, review context, and do-not-touch boundaries.** +4. **List actual files touched** and why. +5. **Call out contracts affected**: - baseline / cache / report schema - - CLI exit codes / messages -4. **Add/adjust tests** for: + - controller / memory / observability / MCP payloads + - CLI exit codes / messages / integration surfaces +6. **Add/adjust tests** for: - normal-mode behavior - CI gating behavior - determinism (identical output on rerun) - legacy/untrusted scenarios where applicable -5. Run: +7. **Run**: - `ruff`, `mypy`, `pytest` +8. **Request substantive human review** of the complete diff. Automated + analysis, agent review, receipts, and green CI are evidence, not approval. Avoid changing unrelated files (locks, roadmap) unless required. @@ -330,6 +512,16 @@ Changed-scope flags are contract-sensitive: - `--diff-against` requires `--changed-only`. - `--paths-from-git-diff` implies `--changed-only`. +Controller and workspace query flags (terminal-only; see `docs/book/11-cli.md` and +`tests/fixtures/contract_snapshots/cli_help.txt`): + +- `--blast-radius`, `--patch-verify`, `--strictness` — patch/blast-radius query +- `--session-stats`, `--audit`, `--audit-json` — workspace/audit query (read-only; + `--audit` requires `audit_enabled=true` in effective config) + +Full flag inventory and combination rules: `docs/book/11-cli.md`, +`docs/book/10-config-and-defaults.md`. + If you introduce a new exit reason, document it and add tests. --- @@ -358,8 +550,17 @@ Before cutting a release: - Don’t embed suppressions into baseline unless explicitly designed as a versioned contract. - Don’t introduce nondeterministic ordering (dict iteration, set ordering, filesystem traversal without sort). - Don’t make the base `codeclone` install depend on optional MCP runtime packages. -- Don’t let MCP mutate baselines, source files, or repo state. +- Don’t edit before the controller authorizes the declared scope when the + change-control surface is available. +- Don’t let MCP mutate source files, baselines, canonical reports, generated + reports, or analysis cache data. Bounded controller, memory, projection, + audit, and observability state is allowed only through explicit owning + contracts. - Don’t let MCP re-synthesize design findings from raw metrics; read canonical `findings.groups.design` only. +- Don’t let Engineering Memory, trajectories, Experiences, or Platform + Observability authorize edits or override canonical report facts. +- Don’t describe agent review, receipts, or automated checks as the mandatory + human review required for merge. --- @@ -367,14 +568,23 @@ Before cutting a release: Architecture is layered, but grounded in current code (not aspirational diagrams): +- **Structural Change Controller** (`codeclone/surfaces/mcp/_session_workflow_mixin.py`, + intent/blast-radius/patch-contract/receipt helpers under + `codeclone/surfaces/mcp/`, `codeclone/analysis/blast_radius.py`, + `codeclone/budget/*`) owns pre-edit scope authorization, deterministic blast + radius, patch verification, claim validation, and review receipts over + canonical report facts. - **CLI entry + orchestration surface** (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*`) owns argument parsing, runtime/config resolution, summaries, report writes, and exit routing. + User-facing copy lives in `ui_messages/` submodules (`help`, `labels`, `runtime`, + `markers`, `formatters`, `controller`, `styling`). - **Config layer** (`codeclone/config/*`) is the single source of truth for option specs, parser construction, `pyproject.toml` loading, and CLI > pyproject > defaults resolution. - **Core orchestration** (`codeclone/core/*`) owns bootstrap → discovery → worker processing → project metrics → report/gate integration. It does not own shell UX. -- **Analysis layer** (`codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*`) - parses source, normalizes AST/CFG facts, extracts units, and prepares deterministic analysis inputs. +- **Analysis layer** (`codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*`, + `codeclone/scanner/*`) parses source, normalizes AST/CFG facts, extracts units, and prepares deterministic analysis + inputs. - **Clone/finding derivation layer** (`codeclone/findings/*`, `codeclone/metrics/*`) groups clones and computes structural and quality signals from already-extracted facts. - **Domain/contracts layer** (`codeclone/models.py`, `codeclone/contracts/*`, `codeclone/domain/*`) defines typed @@ -386,45 +596,91 @@ Architecture is layered, but grounded in current code (not aspirational diagrams and deterministic projections. - **HTML/UI rendering** (`codeclone/report/html/*`) renders views from canonical report/meta facts. HTML is render-only. -- **MCP agent interface** (`codeclone/surfaces/mcp/*`) exposes the same pipeline/report contracts as a deterministic, - read-only MCP surface for AI agents and MCP-capable clients. -- **Documentation/publishing surface** (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, +- **MCP agent interface** (`codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*`) + exposes the same pipeline/report contracts as a deterministic MCP surface for AI agents and MCP-capable clients, + read-only with respect to source/baseline/report/cache artifacts and stateful + only through explicit controller, memory, projection, audit, and observability + contracts. +- **Engineering Memory** (`codeclone/memory/*`, `codeclone/config/memory*.py`) + owns the local evidence-linked store, FTS/semantic retrieval, staleness, + governance, trajectory and Patch Trail projection, Experience distillation, + and coalesced projection jobs. It guides agents but never authorizes edits. +- **Platform Observability** (`codeclone/observability/*`) owns opt-in local + operation/span telemetry, normalized SQL fingerprints, bounded query + projections, and self-contained JSON/HTML diagnostics for CodeClone + development. It is never repository quality truth or a gate input. +- **Controller insights** (`codeclone/controller_insights/*`) owns shared + session-stat and audit-trail projections used by CLI and IDE-only MCP tools. +- **Audit trail** (`codeclone/audit/*`) stores optional passive evidence (SQLite by default via + `codeclone/surfaces/cli/audit.py` / MCP audit emit). It must not affect canonical report digests, baseline trust, + cache compatibility, or finding identity. +- **Patch budget helpers** (`codeclone/budget/*`) provide shared budget estimation for CLI/MCP patch-verify flows. +- **Documentation/publishing surface** (`docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) publishes contract docs and the live sample report. +- **Developer/release scripts** (`scripts/lint_admonitions.py`, + `scripts/sync_integrations.py`, `scripts/integration_dist/*`, + `scripts/launch_mcp`) provide docs hygiene, storefront synchronization, and + launcher adapters. They must remain thin and contract-tested. +- **GitHub Action surface** (`.github/actions/codeclone/*`) packages the public + composite Action over the same CLI contracts; shell inputs, timeouts, outputs, + and exit behavior are contract-sensitive. - **VS Code extension surface** (`extensions/vscode-codeclone/*`) is a native, workspace-only IDE client over `codeclone-mcp`, with baseline-aware, triage-first, source-first review UX. - **Claude Desktop bundle surface** (`extensions/claude-desktop-codeclone/*`) is a native `.mcpb` install wrapper for Claude Desktop that launches the same local `codeclone-mcp` server via local `stdio`. +- **Claude Code plugin surface** (`plugins/claude-code-codeclone/*`, + `scripts/integration_dist/marketplace.claude-code.json`) is a native + marketplace plugin over `codeclone-mcp`, with bundled skills and MCP + configuration synchronized to `orenlab/codeclone-claude-code`. - **Codex plugin surface** (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) is a native local Codex plugin - over `codeclone-mcp`, with repo-local discovery metadata and a bundled CodeClone review skill. + over `codeclone-mcp`, with repo-local discovery metadata and bundled skills under `plugins/codeclone/skills/`. +- **Cursor plugin surface** (`plugins/cursor-codeclone/*`) is a native local Cursor plugin over `codeclone-mcp` with + bundled skills, rules, hooks, and an agent definition. - **Tests-as-spec** (`tests/`) lock behavior, contracts, determinism, and architecture boundaries. Non-negotiable interpretation: +- The Controller begins before the diff; intent/scope/blast radius are not + post-hoc review annotations. - Core produces facts; renderers present facts. - Baseline/cache are persistence contracts, not analysis truth. - UI/report must not invent gating semantics. - MCP reuses pipeline/report contracts and must not create a second analysis truth path. +- Engineering Memory, trajectories, Experiences, and Patch Trail are + evidence/context layers, not edit authorization or analysis truth. +- Platform Observability describes CodeClone execution cost, not repository + quality, vulnerabilities, or permission. - The VS Code extension is a guided IDE view over MCP and must not introduce a second analysis or truth path. - The Claude Desktop bundle is a local setup surface over `codeclone-mcp` and must not introduce a second server or truth path. +- The Claude Code plugin is a discovery and guidance surface over + `codeclone-mcp` and must not introduce a second analyzer, MCP server, or + truth path. - The Codex plugin is a local discovery and guidance surface over `codeclone-mcp` and must not introduce a second analyzer, MCP server, or truth path. +- The Cursor plugin is a local discovery and guidance surface over `codeclone-mcp` and must not introduce a second + analyzer, MCP server, or truth path. ## 13) Module map Use this map to route changes to the right owner module. - `codeclone/main.py` — public CLI entrypoint only. Keep it tiny. +- `codeclone/analysis/blast_radius.py` — deterministic dependency/blast-radius + graph core shared by CLI/MCP controller projections; keep it independent from + MCP session policy. - `codeclone/surfaces/cli/workflow.py` — top-level CLI orchestration and exit routing. Add CLI control flow here, not in `main.py`. - `codeclone/surfaces/cli/*` — CLI support slices (startup, runtime, execution, post-run handling, summaries, - reports, changed-scope logic, baseline state, console helpers). Keep them orchestration/UX-focused. + reports, changed-scope logic, baseline state, audit rendering, console helpers). Keep them orchestration/UX-focused. - `codeclone/config/*` — parser construction, option specs/defaults, pyproject loading, config resolution. Do not duplicate option semantics elsewhere. - `codeclone/core/*` — canonical runtime pipeline and payload plumbing. Change integration flow here; do not move shell UX or HTML-only logic here. - `codeclone/analysis/*` — AST parsing, CFG/fingerprint preparation, declaration/reference collection, and unit - extraction. Change parsing/extraction semantics here; keep it independent from CLI/report/baseline UX. + extraction (`units.py`, `_module_walk.py`). Change parsing/extraction semantics here; keep it independent from + CLI/report/baseline UX. +- `codeclone/scanner/*` — Python file discovery helpers and module-name resolution used by core discovery. - `codeclone/findings/clones/grouping.py` + `codeclone/blocks/*` — clone grouping and block/segment mechanics. - `codeclone/findings/structural/detectors.py` — structural finding extraction/normalization policy; keep it factual and deterministic. @@ -449,10 +705,27 @@ Use this map to route changes to the right owner module. - `codeclone/report/gates/*` — metric-gate reason derivation over canonical metrics state. - `codeclone/report/*.py` (other modules) — deterministic report support slices such as explainability, suggestions, merge, overview, findings helpers, and source-kind routing. +- `codeclone/memory/*` — Engineering Memory persistence, ingest, scoped + retrieval, semantic sidecar, governance, trajectories, Patch Trail, + Experiences, and projection jobs. Memory mutations go through explicit memory + tools/workflows only — never the general source-edit workflow. - `codeclone/surfaces/mcp/service.py` — typed, in-process MCP service over the current pipeline/report contracts; - keep it deterministic and read-only except for session-local in-memory markers. + keep source/baseline/report/cache access read-only. Local mutations are + limited to documented controller, memory, projection, audit, and + observability contracts. - `codeclone/surfaces/mcp/server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. +- `codeclone/surfaces/mcp/messages/*` — MCP user-facing copy (tool/resource descriptions, help topics, workflow and + intent messages, parameter Field docs, patch-contract hints, verification copy, remediation shapes). Keep message + policy centralized like `ui_messages/`. +- `codeclone/audit/*` — audit event schema, validation, writer/reader; passive evidence only. +- `codeclone/budget/*` — patch/token budget estimation shared by CLI and MCP surfaces. +- `codeclone/controller_insights/*` — shared session-stats and audit-trail + collectors; CLI and IDE projections must reuse these rather than duplicating + insight semantics. +- `codeclone/observability/*` — developer-only instrumentation, local telemetry + persistence, bounded query views, and JSON/HTML rendering. It must remain + independent from findings, gates, baselines, memory facts, and authorization. - `tests/test_mcp_service.py`, `tests/test_mcp_server.py` — MCP contract and integration tests; run these when touching any MCP surface. - `codeclone/contracts/*` — version constants, schema types, exit enum, URLs, and typed exceptions. Treat as contract @@ -461,14 +734,32 @@ Use this map to route changes to the right owner module. - `codeclone/domain/*.py` — centralized domain taxonomies/IDs (families, categories, source scopes, risk/severity levels); use these constants in pipeline/report/UI instead of scattering raw literals. - `codeclone/ui_messages/*` — CLI text/marker/help constants and formatter helpers. Keep message policy centralized. -- `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py` — docs-site source, +- `codeclone/report/messages/*` — report-layer user copy (glossary, suggestions, + explainability, overview, security, chrome, text/markdown/sarif projections, + gate prefixes). +- `docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py` — docs-site source, publication workflow, and live sample-report generation; keep published docs aligned with code contracts. +- `scripts/lint_admonitions.py` — deterministic MkDocs admonition/details + indentation validator/fixer used by pre-commit. +- `scripts/sync_integrations.py` + `scripts/integration_dist/*` — guarded + storefront synchronization and distribution overlays. Dry-run first; test + with `tests/test_sync_integrations.py`. +- `scripts/launch_mcp` — monorepo adapter to the shared Codex plugin launcher, + not an independent launcher implementation. +- `.github/actions/codeclone/*` — composite GitHub Action surface; pass inputs + through `env:`, keep subprocess timeouts explicit, and preserve documented + CLI/output semantics. - `extensions/vscode-codeclone/*` — stable VS Code extension surface; keep it baseline-aware, triage-first, source-first, and faithful to MCP/canonical report semantics rather than building a second analyzer or report model. - `extensions/claude-desktop-codeclone/*` — stable Claude Desktop bundle surface; keep it local-stdio-only, launcher-focused, and faithful to `codeclone-mcp` rather than re-implementing MCP semantics in the bundle layer. +- `plugins/claude-code-codeclone/*` — stable Claude Code plugin source; keep it + Claude-native, marketplace-installable, skills-guided, and faithful to + `codeclone-mcp` rather than inventing plugin-only analysis logic. - `plugins/codeclone/*`, `.agents/plugins/marketplace.json` — stable Codex plugin surface; keep it Codex-native, conservative-first, skills-guided, and faithful to `codeclone-mcp` rather than inventing plugin-only analysis logic. +- `plugins/cursor-codeclone/*` — stable Cursor plugin surface; keep it Cursor-native, skills/rules/hooks-guided, and + faithful to `codeclone-mcp` rather than inventing plugin-only analysis logic. - `tests/` — executable specification: architecture rules, contracts, goldens, invariants, regressions. ## 14) Dependency direction @@ -484,6 +775,12 @@ Dependency direction is enforceable and partially test-guarded (`tests/test_arch and findings must also stay independent from config/report-builder wiring. - `codeclone.models` may import only `codeclone.contracts` from local modules. - `codeclone.domain.*` must remain leaf domain modules. +- `codeclone.memory.*` may import `codeclone.contracts`, `codeclone.utils`, blast-radius helpers under + `codeclone/analysis/`, and report document types as needed for ingestion. It must NOT import `codeclone.surfaces.*` + or `codeclone.ui_messages`. +- `codeclone.observability.*` is diagnostics-only and must not become a + dependency that changes analysis, findings, gates, baselines, memory facts, + or authorization. Operational rules: @@ -494,6 +791,8 @@ Operational rules: - Persistence semantics (baseline/cache trust/integrity) must stay in persistence/domain modules, not in render/UI layers. - MCP may depend on pipeline/report/contracts, but core/persistence/report layers must not depend on MCP modules. +- Controller insights are shared projections; CLI/MCP render them but must not + fork their collection semantics. ## 15) Suppression policy @@ -506,8 +805,8 @@ Inline suppressions are explicit local policy, not analysis truth. - inline comment on the declaration header closing line for multiline signatures - Binding is target-specific (`filepath`, `qualname`, declaration span, kind). No file-wide/global implicit scope. - Unknown/malformed directives are ignored safely; analysis must not fail because of suppression syntax issues. -- Current active semantic effect is dead-code suppression (`dead-code`) through `extractor.py` → - `DeadCandidate.suppressed_rules` → `metrics/dead_code.py`. +- Current active semantic effect is dead-code suppression (`dead-code`) through + `codeclone/analysis/_module_walk.py` → `DeadCandidate.suppressed_rules` → `codeclone/metrics/dead_code.py`. - Suppressed dead-code findings are excluded from active dead-code findings and health impact, but remain observable in report surfaces where implemented (JSON summary/details, text/markdown/html, CLI counters). - Suppressions must not silently alter unrelated finding families. @@ -518,19 +817,27 @@ Prefer explicit inline suppressions for runtime/dynamic false positives instead If you change a contract-sensitive zone, route docs/tests/approval deliberately. -| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | -|-------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------| -| Baseline schema/trust/integrity (`codeclone/baseline/clone_baseline.py`, `codeclone/baseline/trust.py`) | `docs/book/06-baseline.md`, `docs/book/14-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | -| Cache schema/profile/integrity (`codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | -| Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | -| CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | -| Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | -| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | -| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, read-only semantics, optional-dependency packaging behavior change | public MCP tool names, resource URIs, launcher/install behavior, or response semantics change | -| VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/book/21-vscode-extension.md`, `docs/vscode-extension.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | -| Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/book/22-claude-desktop-bundle.md`, `docs/claude-desktop-bundle.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | -| Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/book/23-codex-plugin.md`, `docs/codex-plugin.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | -| Docs site / sample report publication (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/README.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `mkdocs build --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | +| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | +|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------| +| Baseline schema/trust/integrity (`codeclone/baseline/clone_baseline.py`, `codeclone/baseline/trust.py`) | `docs/book/07-baseline.md`, `docs/book/24-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | +| Cache schema/profile/integrity (`codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`) | `docs/book/08-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | +| Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/05-report.md` (+ `docs/book/06-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | +| CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/11-cli.md`, `docs/book/09-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | +| Structural Change Controller (intent, blast radius, patch contract, hygiene, claims, receipts, Patch Trail) | `docs/book/12-structural-change-controller/`, `docs/guide/change-control/`, `docs/book/14-claim-guard.md`, MCP/plugin guidance, `README.md`, `CHANGELOG.md` | Controller/intent/verification/claim/receipt tests in `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, `tests/test_verification_profile.py`, `tests/test_patch_trail_*.py`, plus tool-schema snapshots when payloads change | edit authorization, scope/hygiene, verification profile, claim semantics, receipt or Patch Trail contract changes | workflow tool payloads, status transitions, permission signals, verification/receipt schemas change | +| Fingerprint-adjacent analysis (`codeclone/analysis/units.py`, `codeclone/analysis/_module_walk.py`, `codeclone/analysis/cfg.py`, `codeclone/analysis/normalizer.py`, `codeclone/findings/clones/grouping.py`) | `docs/book/03-core-pipeline.md`, `docs/book/04-cfg-semantics.md`, `docs/book/24-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | +| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, `codeclone/analysis/_module_walk.py` dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/17-dead-code-contract.md`, `docs/book/05-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | +| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/25-mcp-interface/`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/book/24-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, `tests/fixtures/contract_snapshots/mcp_tool_schemas.json`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, workflow tool payloads, repository-read-only semantics, optional-dependency packaging behavior change | public MCP tool names, workflow tool payloads, resource URIs, launcher/install behavior, or response semantics change | +| Engineering Memory, semantic retrieval, trajectories, Experiences, projection jobs (`codeclone/memory/*`, `codeclone/config/memory*.py`) | `docs/book/13-engineering-memory/`, trajectory/Experience guides, `docs/book/25-mcp-interface/`, `docs/book/11-cli.md`, plugin skills, `CHANGELOG.md` | Applicable `tests/test_memory_*.py`, `tests/test_semantic_*.py`, projection/trajectory/Experience tests, MCP memory tests, and tool-schema snapshots when payloads change | schema/governance transitions, retrieval/fusion semantics, trajectory quality, Experience promotion, or worker lifecycle change | memory/semantic/projection versions, SQLite DDL, CLI/MCP payloads, ranking/filter/governance semantics change | +| Platform Observability (`codeclone/observability/*`, CLI trace, MCP bounded slicer, worker instrumentation) | `docs/book/26-platform-observability.md`, `docs/guide/observability/diagnostics.md`, config/MCP docs, `CHANGELOG.md` when user-visible | `tests/test_observability_*.py`, plus worker/memory/MCP tests for changed instrumentation boundaries | privacy/trust boundary, persisted schema, correlation, payload-size, SQL fingerprint, or public projection changes | `PLATFORM_OBSERVABILITY_SCHEMA_VERSION`, CLI/MCP section payloads, persistence or collection semantics change | +| Controller audit and insights (`codeclone/audit/*`, `codeclone/controller_insights/*`, CLI/MCP session/audit surfaces) | Controller, CLI/config, retention, MCP, and integration docs; `CHANGELOG.md` when public | `tests/test_audit_*.py`, `tests/test_controller_insights.py`, CLI/MCP projection tests | audit event core/schema, retention, token/payload footprint, or shared collector semantics change | audit schema/event core, `--audit`/`--session-stats`, IDE-only insight payloads change | +| VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/guide/integrations/vscode/setup.md`, `docs/book/integrations/vs-code-extension.md`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | +| Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/guide/integrations/claude-desktop/setup.md`, `docs/book/integrations/claude-desktop-bundle.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | +| Claude Code plugin surface (`plugins/claude-code-codeclone/*`, `scripts/integration_dist/marketplace.claude-code.json`) | `docs/guide/integrations/claude-code/setup.md`, `docs/book/integrations/claude-code-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/claude-code-codeclone/.claude-plugin/plugin.json`, `python3 -m json.tool plugins/claude-code-codeclone/.mcp.json`, `python3 -m json.tool scripts/integration_dist/marketplace.claude-code.json`, `claude plugin validate plugins/claude-code-codeclone`, `tests/test_claude_code_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, launcher behavior, or marketplace metadata change | documented Claude Code install/discovery/runtime behavior or plugin manifest/marketplace semantics change | +| Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/guide/integrations/codex/setup.md`, `docs/book/integrations/codex-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | +| Cursor plugin surface (`plugins/cursor-codeclone/*`) | `docs/guide/integrations/cursor/install-and-skills.md`, `docs/book/integrations/cursor-plugin.md`, `docs/guide/mcp/`, `docs/book/02-architecture-map.md`, `docs/index.md`, `CHANGELOG.md` | `tests/test_cursor_plugin.py`, `tests/test_cursor_plugin_hooks.py` | plugin discovery/runtime model, bundled MCP config, bundled skill/rule/hook behavior, or plugin metadata change | documented Cursor plugin install/discovery/runtime behavior or plugin manifest semantics change | +| GitHub Action surface (`.github/actions/codeclone/*`) | Action README, main README/getting-started/CI docs, `CHANGELOG.md` when user-visible | `tests/test_github_action_helpers.py`, shell/action smoke for changed workflow behavior | input interpolation, command construction, timeout, output, or exit behavior changes | public action inputs/outputs/runtime behavior changes | +| Storefront sync and distribution overlays (`scripts/sync_integrations.py`, `scripts/integration_dist/*`, launcher copy rules) | `docs/releasing.md`, affected integration docs/READMEs, `CHANGELOG.md` when publish behavior changes | `tests/test_sync_integrations.py`, then target-native package/test smoke after sync | deletion/copy boundary, target layout, launcher override, denylist, manifest provenance, or dirty-source policy changes | distribution layout, copied source set, `SYNC_MANIFEST.json`, storefront launcher/metadata semantics change | +| Docs site / sample report publication (`docs/`, `zensical.toml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/index.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `zensical build --clean --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | Golden rule: do not “fix” failures by snapshot refresh unless the underlying contract change is intentional, documented, and approved. @@ -541,33 +848,57 @@ Treat tests as specification with explicit intent: - **Unit tests** — module-level behavior and edge conditions (e.g., `tests/test_cfg.py`, `tests/test_normalize.py`, `tests/test_metrics_modules.py`, `tests/test_suppressions.py`). -- **Contract tests** — baseline/cache/report/CLI public semantics (e.g., `tests/test_baseline.py`, - `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`). +- **Contract tests** — controller, baseline/cache/report/CLI/MCP/Memory public + semantics (e.g., `tests/test_mcp_service.py`, `tests/test_baseline.py`, + `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, + `tests/test_memory_compact_contract.py`). - **Golden tests** — snapshot sentinels for stable outputs (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`). - **Determinism/invariant tests** — ordering, branch-path invariants, and canonical stability (e.g., - `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`). + `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`, + `tests/test_semantic_determinism_gate.py`). - **Scenario/regression tests** — multi-step integration and process-level behavior (e.g., - `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, `tests/test_cli_smoke.py`). + `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, + `tests/test_memory_projection_jobs.py`, `tests/test_sync_integrations.py`). +- **Developer diagnostics tests** — observer configuration, correlation, + persistence, query, rendering, MCP, and worker chain behavior + (`tests/test_observability_*.py`). Policy: - Expand the closest taxonomy bucket when changing behavior. - If a change touches a public surface, include/adjust contract tests, not only unit tests. - Goldens validate intended contract shifts; they are not a substitute for reasoning or routing. +- Put tests in the owning behavior module. Do not create generic + coverage-uplift or miscellaneous dumping-ground test files. +- Coverage is a guardrail, not a reason to execute lines without asserting + behavior. ## 18) Public vs internal surfaces ### Public / contract-sensitive surfaces +- Structural Change Controller intent, permission, scope/hygiene, blast-radius, + verification-profile, claim, receipt, and Patch Trail semantics. - CLI flags, defaults, exit codes, and stable script-facing messages. - Baseline schema/trust semantics/integrity compatibility (`BASELINE_SCHEMA_VERSION` contract family). - Cache schema/status/profile compatibility/integrity (`CACHE_VERSION` contract family). - Canonical report JSON schema/payload semantics (`REPORT_SCHEMA_VERSION` contract family). - Documented report projections and their machine/user-facing semantics (HTML/Markdown/SARIF/Text). -- Documented MCP launcher/install behavior, tool names, resource URIs, and read-only semantics. +- Documented MCP launcher/install behavior, tool names, resource URIs, and + repository-read-only semantics. +- Documented MCP workflow tools, verification profiles, workspace intent + coordination, queue/promote semantics, and review receipt payloads. +- Engineering Memory schema, governance transitions, retrieval/filter/ranking + semantics, semantic sidecar format, trajectory quality, Experience promotion, + projection jobs, and CLI/MCP payloads. +- Platform Observability environment contract, local schema/privacy boundary, + CLI trace output, bounded MCP sections, and correlation behavior. +- Controller audit/event-core and shared session/audit insight payloads. - Session-local MCP review state semantics (`mark_finding_reviewed`, `exclude_reviewed`) as documented public behavior. - Documented VS Code extension behavior: commands, views, setup guidance, trusted-workspace model, and its baseline-aware triage workflow over MCP. +- Documented Claude Desktop, Claude Code, Codex, Cursor, GitHub Action, and + storefront-sync install/runtime/package semantics. - Documented finding families/kinds/ids and suppression-facing report fields. - Metrics baseline schema/compatibility where used by CI/gating. - Benchmark schema/outputs if consumed as a reproducible contract surface. @@ -667,6 +998,17 @@ These rules exist because of real incidents in this repo. They are non-negotiabl - Before starting work, run `git status` and review uncommitted/untracked changes. They may belong to a parallel agent or to the maintainer; do not delete or overwrite them without explicit approval. +### Human review boundary + +- Agents may author substantial contributions, but they do not own merge + approval. +- A human contributor must inspect and understand the complete diff, verify + tests/contracts/security/licensing/provenance, and accept maintenance + responsibility. +- Material agent assistance must be disclosed in the pull request. +- Never describe agent review, CodeClone findings, receipts, or CI as a + substitute for substantive human review. + ### Documentation hygiene - Every doc claim about code (schema version, module path, function name, MCP tool count, exit code, @@ -693,8 +1035,9 @@ These rules exist because of real incidents in this repo. They are non-negotiabl `codeclone/report/html/assets/*`) are imported, not duplicated locally inside `codeclone/report/html/sections/*`. If you need a helper that doesn't exist, add it to the shared module. -- Glossary terms used in stat-card labels live in `codeclone/report/html/widgets/glossary.py`. Adding a - new label without a glossary entry is a contract gap. +- Glossary term definitions live in `codeclone/report/messages/glossary.py`; + `codeclone/report/html/widgets/glossary.py` renders HTML tooltips from that + catalog. Adding a new stat-card label without a glossary entry is a contract gap. ### Conflict avoidance @@ -717,14 +1060,18 @@ These rules exist because of real incidents in this repo. They are non-negotiabl ## 21) Minimal checklist for PRs (agents) +- [ ] Intent and scope were declared before editing; `edit_allowed=true` was observed when available. +- [ ] Actual changed files match declared scope; required verification and receipt completed. - [ ] Change is deterministic. - [ ] Contracts preserved or versioned. -- [ ] Tests added for new behavior. -- [ ] `ruff`, `mypy`, `pytest` green. +- [ ] Tests were added to the owning test module for new behavior. +- [ ] Pre-commit and pre-push/coverage validation are green. - [ ] CLI messages remain helpful and stable (don’t break scripts). - [ ] Reports contain provenance fields and reflect trust model correctly. - [ ] Golden snapshots were **not** updated just to satisfy failing tests. - [ ] If any golden snapshot changed, the corresponding contract change is intentional, documented, and approved. +- [ ] Material agent assistance is disclosed. +- [ ] A human reviewed and understood the complete diff before merge. --- diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c3ee8d6..f5ae6599 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,197 @@ -# Changelog +Changelog + +[2.1.0a1] - Unreleased + +2.1.0a1 opens the CodeClone 2.1 alpha line with intent-first structural +change control, Engineering Memory, trajectory and experience layers, semantic +retrieval, Platform Observability, native agent integrations, and a reorganized +documentation site. + +Added + +* Structural Change Controller. The new + start_controlled_change / finish_controlled_change workflow reduces the + governed agent edit cycle from 7–11 MCP calls to 3–4. It combines workspace + checks, intent declaration, blast-radius mapping, bounded edit scope, patch + verification, review-claim validation, and deterministic review receipts. + CodeClone now exposes 33 agent-visible MCP tools by default. +* Live Implementation Context. The new read-only + get_implementation_context tool projects bounded structural facts for + repo-relative paths from one existing run. It reports workspace freshness, + cache origin, imports/importers, public surface, blast radius, and test + anchors, with separate deterministic digests for the off-report context + artifact and the exact bounded projection. Active intents add explicit + allowed/review/do-not-touch boundaries, while impact mode adds transitive + dependency context and baseline-sensitive findings. Engineering Memory, + tests, docs, trajectories, and Experiences remain lane-separated evidence. + Exact qualname subjects resolve through an off-report Unit and API-surface + location index, with unknown symbols reported explicitly rather than guessed. + Zero-argument queries now resolve active intent scope or bounded live dirty + paths, related module roles collapse with explicit relation tags, and one + safety-first global budget reports all ordinary and safety omissions. + Cache schema 2.9 adds a separate, rebuildable per-function relationship-fact + projection without changing Unit serialization or canonical report identity. + Cross-module calls and resolved non-call references are now attributed to + their caller with production/test lanes; conservative caller-scope shadow + guards keep ambiguous imported names as unresolved call observations. + Intra-module functions, same-module class methods, and self/cls receiver + methods now resolve against the enclosing module and class (keyed on the + actual first-parameter name, never a hardcoded self, and never for + staticmethods), only when the target definition exists; cache schema 2.10. + Per-function relationship facts now aggregate across files (cold and cached) + onto the analysis result and the MCP run record, off the canonical report. + get_implementation_context now projects call_context (callers, callees, + references, test_callers) from those facts with relation_kind x + resolution_status evidence tags, separate production and test caller lanes, + unresolved call observations, and a complete/partial/unavailable + call_graph_status; relationship records are bound into context_artifact_digest. + contract mode returns a truth-map (definition_sites, version_constants, + contract_tests, memory_conflicts) and persistence/serialization path callers + that are emitted only with a typed or memory-backed anchor and are otherwise + not_available rather than name- or directory-guessed. + Context evidence never authorizes edits; edit_allowed remains authoritative. +* Change-intent lifecycle and multi-agent coordination. + manage_change_intent supports declare, check, clear, queue, promote, and + recover operations. Renewable leases, ownership classification, optional + SQLite coordination, retention, workspace hygiene, and recoverable-intent + handling make concurrent agent work explicit and auditable. +* Engineering Memory. A local SQLite knowledge graph stores typed, + evidence-linked repository facts such as contracts, decisions, risks, test + anchors, prior changes, and git provenance. Agents receive ranked, + scope-aware context through get_relevant_memory and + query_engineering_memory; drafts remain human-governed and can be approved + through the CLI or VS Code Memory view. Memory never authorizes edits or + overrides the canonical report, gates, or Patch Trail. +* Trajectory Memory and Patch Trail. Audit-derived trajectories preserve + agent workflows, declared scope, actual changed paths, verification outcomes, + incidents, citations, and review evidence. The current trajectory-v3 + projection adds quality passports, complexity scoring, anomaly detection, + agent profiles, dashboards, semantic retrieval, and deterministic Patch Trail + summaries. Engineering Memory schema 1.7 persists trajectory and Patch + Trail evidence. +* Experience Layer. Deterministic experience-v1 patterns are distilled + from canonical trajectories across all outcomes and exposed through a + separate advisory retrieval lane. Experiences retain supporting evidence and + agent-diversity facets, but never become authority automatically; + promote_experience creates a human-governed memory draft. +* Semantic memory retrieval. Optional LanceDB-backed hybrid search combines + FTS5/BM25 and vector retrieval using deterministic Reciprocal Rank Fusion. + Local embeddings are available through codeclone[semantic-local] with + BAAI/bge-small-en-v1.5. Semantic indexing is lazy, failure-tolerant, and + eventually consistent rather than synchronously rebuilt after every finish. +* Platform Observability. Opt-in, development-only telemetry traces + CodeClone’s own CLI, MCP, analysis, database, semantic-index, and projection + worker activity. The local observer captures timings, RSS/CPU, MCP payload and + token pressure, DB query counts and shapes, causal worker chains, and costly + no-ops. JSON/HTML views provide a diagnostic cockpit, while + query_platform_observability exposes bounded MCP sections for development + agents. Observability never affects reports, gates, baselines, memory facts, + or edit authorization. +* IDE and agent integrations. The VS Code extension gains Engineering + Memory governance, trajectory dashboards, controller audit views, and + workspace session statistics. Native integrations are available for Claude + Desktop, Claude Code, Codex, and Cursor. Claude Code now has a dedicated + marketplace plugin and storefront, separate from the Desktop `.mcpb` bundle. + The Cursor plugin includes skills, rules, fail-closed preToolUse enforcement, + scoped workspace-intent checks, and a structural-review agent. +* Controller and diagnostic CLI surfaces. Added blast-radius, patch + verification, session statistics, controller audit, memory trajectory, + anomaly, agent-profile, semantic-search, and Platform Observability commands. +* Documentation and edition model. Documentation is reorganized into a + thematic 00–26 contract book with unified integration guides, dedicated + chapters for the Controller, Engineering Memory, trajectories, Experiences, + and Platform Observability, plus explicit Open Source / Team / Enterprise + retention and capability tiers. +* MCP schemas now include parameter-level descriptions and deterministic + next_tool guidance. Workspace hygiene warnings, audit events, token-budget + tracking, and documentation-contract linting were also added. +* **Corpus Analytics (intent lane, Slice 1).** Optional offline clustering of + historical change-control intents via `codeclone analytics …`. + Requires `codeclone[analytics]`. Reads audit + trajectory (+ optional registry + overlay), writes SQLite/LanceDB artifacts under `.codeclone/analytics/`, and + exports inspectable JSON/HTML with sweep comparison, cluster diagnostics, + noise exploration, explicit heuristic recommendation vs maintainer selection, + and runtime observability spans. Analytics embeddings and their lifecycle are + separate from the Engineering Memory semantic index; + `[tool.codeclone.analytics]` configures paths and clustering defaults. +* **Corpus Analytics interpretability (Slice 1.1).** JSON export schema `1.2` + and the self-contained HTML report now separate formal technical validity + from human interpretation. Valid runs expose dominant-cluster ratios, + bounded representative/boundary/noise previews, numeric summaries, + categorical correlations, small-cluster provenance completeness, and + explicit preview disclosure. Invalid and failed runs remain inspectable in a + limited diagnostic mode without partition metrics, previews, score, or rank; + sweep comparison includes every persisted candidate. Representation contract + `3` materializes explicit trajectory, Patch Trail, and registry-overlay + presence facts for new snapshots without adding live registry state to source + identity. +* **Corpus Analytics profiles and selection control (Slice 1.2).** Added + versioned bundled and repository-local profile manifests, finite + profile-scoped sweeps, separate suitability and profile-aware ranking, + immutable profile batch receipts, configurable ordinary sweep grids, manual + clustering parameters, and append-only maintainer selection events. Store + schema is now `1.2`; JSON export schema `1.3` adds control-plane contract + `1.0`, profile context/summary, and active selection without changing + technical-validity semantics. + +Changed + +* The default project workspace moved from .cache/codeclone/ to + .codeclone/; legacy locations now produce a migration warning. +* Documentation builds now use Zensical with strict, clean builds. +* pydantic is now a base dependency. +* LCOM4 excludes Protocol methods and Pydantic validation/serialization hooks; + computed_field remains part of cohesion analysis. +* Repository test coverage is enforced at >=99%. + +Fixed + +* Durable memory writes. Engineering Memory now uses + synchronous=FULL, preserving committed drafts across unclean MCP process + exits. Intent and audit stores retain recovery-oriented + synchronous=NORMAL. +* Atomic memory ingestion. persist_batch no longer commits records and + subjects mid-batch; it now defers the commit so a later failure in the same + batch rolls back the whole ingestion instead of leaving half-written records + behind. Standalone store writes keep their previous commit-on-write behavior. +* Observable best-effort failures. The non-fatal audit-event writer and the + best-effort finish-payload memory proposer no longer swallow exceptions with + zero signal; each now increments an observability counter (audit.emit_dropped, + memory.propose_candidate_dropped) on its fallback path, so silent drops stay + countable in the cockpit. Both remain non-fatal and the telemetry never + re-raises. +* Compact implementation-context misses. get_implementation_context no longer + emits the full empty facet scaffolding (structural_context, budget_summary, + dataflow, call_context, uncertainties) when an explicit symbol query resolves + nothing. The subject_not_found response now returns only the unresolved + subject, a slim provenance block, the projection digest, and an actionable + next_steps list, so a miss does not burn agent context. +* Memory lifecycle correctness. Draft records are no longer marked stale + before human promotion. Trajectory rebuilds now deduplicate superseded + projections, repoint evidence, remove stale workflow rows, and preserve + bounded claim-validation citations. +* Workspace hygiene and intent attribution. Finish blocks only on missing + evidence or foreign dirty overlap. Out-of-scope dirt is advisory, + continue_own_wip supports resuming owned work, queued foreign intents no + longer create false overlaps, and recoverable intents do not grant foreign + attribution. +* Patch verification correctness. Identical before/after runs are rejected + for structural and governance profiles. Negative health deltas now surface a + regression advisory, and Claim Guard warns when review text overstates patch + quality. +* Semantic retrieval correctness and cost. Hybrid search now preserves + lexical and vector relevance through RRF instead of allowing metadata ranking + to suppress strong matches. Per-source vector retrieval prevents dense lanes + from crowding out other sources. Embedding providers load lazily, failures + preserve documented fallback behavior, and redundant projection jobs are + coalesced or deferred. +* Architecture and import boundaries. Blast-radius graph logic moved into + codeclone/analysis/blast_radius.py, removing the CLI-to-MCP dependency + violation. +* Regression accuracy. respect_pyproject=false no longer reports + golden-fixture clone groups as false new regressions. Documentation URLs, + plugin references, and contract tests were updated after the documentation + reorganization. ## [2.0.2] - 2026-05-19 @@ -715,7 +908,7 @@ codeclone . --update-baseline ### Cache & Security -- Cache default moved to `/.cache/codeclone/cache.json` with legacy path warning. +- Cache default moved to `/.codeclone/cache.json` with legacy path warning. - Cache schema moved to compact signed payload format (`CACHE_VERSION=1.2`) with relative file keys and fixed-array entries for faster IO and smaller files. - Cache integrity uses constant-time signature checks and deep schema validation. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..be0e8b72 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,416 @@ +# CodeClone — Claude Code Directives + +## Identity + +CodeClone: deterministic structural controller for Python. +Full architecture, contracts, and agent playbook → `AGENTS.md`. +Code is the implementation source of truth. If docs and code diverge, +follow code for implementation decisions and report the divergence. + +## Default role + +**Specs and validation only.** Do not edit production code unless the +user explicitly permits it for a specific task. "Реализуй" / "Implement" +is explicit permission. "Проверь" / "Validate" is not. + +When permitted to edit code, follow the change control workflow below. +Applies to any tracked file in the target repository. Task type (coverage, CI, +docs-only) does not skip `start`. Spec edits count too. When CodeClone MCP is +available, read the bundled **`codeclone-change-control`** skill for the full +pipeline (tool tiers, decision tables, profiles). + +## Change control workflow + +The protocol below is mandatory, but the visible workflow depends on the +patch type: + +- **Python structural / governance config**: full before/after workflow. +- **Documentation-only**: lightweight verify; no after-run required when the + controller derives `documentation_only`. +- **Blocked follow-up**: queue intent behind foreign active; promote before + editing. +- **Read-only / spec validation**: no edit workflow unless repository files + change. + +Do not skip, replace, reorder, or approximate the required steps for the +derived workflow profile. Steps explicitly marked as optional or +profile-dependent may be skipped only under the stated conditions. +If a required MCP call fails or is unavailable, stop and report the blocker +instead of continuing as a normal edit. + +Before editing any repository files: + +1. `analyze_repository(root="")` + — if a valid recent run for the same absolute root already exists, skip +2. `start_controlled_change(root="", scope={...}, intent="...")` + — returns blast radius, budget, workspace state, intent_id + — if `status: "needs_analysis"`, run `analyze_repository` first + — if `status: "queued"`, do not edit; wait for promotion + — if `concurrent_intents` non-empty without queue, narrow scope or ask + — if start blocks only because declared scope is already dirty and you are + resuming known WIP with no foreign overlap, retry with + `dirty_scope_policy="continue_own_wip"`; finish must still prove scope +3. `get_relevant_memory(root=..., scope=... or intent_id=...)` — after + `edit_allowed=true` (see **Memory-aware workflow**) +4. Edit within declared scope only +5. `analyze_repository(root="")` + — after-run; required for Python structural and governance config changes. + May be skipped for documentation-only and other non-Python patches when + `finish` can verify from changed-file evidence +6. **Engineering Memory write (MCP) when required** — before step 7, not in chat + (see **Before `finish`: incident / complexity memory**) +7. `finish_controlled_change(intent_id=..., changed_files=[...], after_run_id=...)` + — returns scope check, verification, receipt, and clears intent + — finish **reconciles evidence with the start-time dirty snapshot and the + full git tree**: under-reported in-scope dirty → + `finish_block_reason: missing_evidence`; live foreign in-scope overlap → + `foreign_dirty_overlap`; unattributed out-of-scope dirt blocks finish only + when `CODECLONE_STRICT_FINISH` is truthy (`finish_block_reason: + own_unscoped_dirty`). Otherwise new/modified/unknown unattributed out-of-scope + dirt and unchanged preexisting unscoped dirty are **advisory** — finish may + return `accepted_with_external_changes`. Foreign active/stale dirty outside + your scope → `foreign_attributed_outside_scope` (ignored). **Recoverable** + (dead PID) intents do not grant foreign attribution + — if `status: "unverified"`, the intent stays active; follow `next_step` + (e.g. run `analyze_repository` with a **new** run_id — identical before/after + runs return `after_run_not_new` for Python structural patches), then call + `finish` again on the **same `intent_id`** with the missing evidence + — if `status: "violated"` (scope), the intent stays active; either + remove out-of-scope changes and retry `finish`, or expand scope via + `start_controlled_change` with a wider scope + — if `user_action_required: true`, stop and escalate to the user + — `auto_clear=true` by default; intent cleared only on accepted + +Workflow profiles determine which steps are needed: + +- **Python structural / governance config**: + `analyze` → `start` → `get_relevant_memory` → edit → `analyze` → + `record_candidate` (if required) → `finish(after_run_id=...)` +- **Documentation-only / non-Python**: + `analyze` → `start` → `get_relevant_memory` → edit → + `record_candidate` (if required) → `finish(changed_files=[...])` + For `non_python_patch`, report controller-stated limitations and do not + present the result as full structural verification. + +### Memory-aware workflow + +Engineering Memory is a local SQLite store of evidence-linked repository facts. +Full playbook: `docs/book/13-engineering-memory/index.md`. MCP help: +`help(topic="engineering_memory")`. + +**Chat is not memory.** Text in this conversation is ephemeral (context shrink, +new session, new MCP process). Anything the next agent must remember belongs in +Engineering Memory via MCP — never “I noted it in the summary” alone. + +**Bootstrap:** default `mcp_sync_policy=bootstrap_if_missing` auto-creates the +store from the latest MCP run on `get_relevant_memory`. Explicit refresh: +`manage_engineering_memory(action="refresh_from_run")`. CLI `memory init` remains +for CI/offline. Human approve still required for agent drafts (VS Code Memory +view — not MCP, not `codeclone memory approve`). + +After `start_controlled_change` returns `edit_allowed: true`: + +1. Call `get_relevant_memory(root="", scope=... or intent_id=...)`. + **`root` is required** — `intent_id` alone fails MCP validation. +2. Read contract warnings, stale decisions, and `contradiction_note` alerts +3. Use `query_engineering_memory(mode=for_path)` or `mode=search` for drill-down +4. Do NOT ignore stale memory warnings — they indicate changed context +5. Do NOT treat `draft`, `inferred`, or excluded stale records as established facts +6. If memory contains a `contradiction_note` for your scope, surface it to + the user before editing + +**Scope and token hygiene:** never use project root as memory scope. Compress +`record_candidate` statements to one durable fact (target ≤300 chars; +`validate_claims` warns above 500; hard limit 1000). List responses default to +compact previews. Treat `records[]`, `experiences[]`, and `trajectories[]` as +separate evidence lanes: records = asserted knowledge, trajectories = episodic +workflow evidence, experiences = advisory patterns, `coverage` = visibility +metadata. **Scores are lane-local — never compare `relevance_score` across +lanes; `for_path` and plain (non-semantic) search are unranked.** +`subject_count` / `subjects_truncated` means more subjects exist, not that +evidence disappeared. Use `mode=get` or `detail_level=full` for complete +subjects, agent facets, trajectory contracts, steps, evidence ids, and +payloads; `patch_trail_summary` rides each trajectory (never duplicated at the +payload root). + +### Before `finish`: incident / complexity memory (MANDATORY) + +**Do not call `finish_controlled_change` until this check passes.** + +If the edit cycle involved **any** of the following, you **must** write at least +one durable note through MCP **before** step 7 (`finish`): + +| Trigger | Examples | +|----------------|--------------------------------------------------------------------------------------------------------------| +| **Incident** | verify/hygiene surprise, `unverified`/`violated` recovery, workaround, blocked step, foreign intent friction | +| **Complexity** | non-obvious root cause, multi-file debug, near `do_not_touch`, acted on stale/contradiction memory | +| **Decision** | tradeoff, integration quirk, “next agent must not repeat X” | + +**Skip** only trivial edits (typo, one obvious line, nothing to relearn). + +**How to write (MCP only):** + +```text +manage_engineering_memory( + action=record_candidate, + record_type=risk_note | change_rationale, + statement="", + subject_path="
" +) +``` + +Or batch several notes with `finish_controlled_change(..., propose_memory=true)`. +Optional: `manage_engineering_memory(action=validate_claims, text=...)` on +`claims_text` before finish. + +| Other writes | Tool | +|----------------------------------|----------------------------------------------------------| +| During edit (stable observation) | `record_candidate` (same as above) | +| After accepted patch | `finish(..., propose_memory=true)` → `memory_candidates` | + +Agents **cannot** call `approve` / `reject` / `archive` via MCP. Ask the user to +use the CodeClone VS Code **Memory** view to promote drafts. + +Memory cannot authorize edits, expand scope, or override findings. + +Queue/promote workflow (when `start` returns `status: "queued"`): + +1. `start_controlled_change(on_conflict="queue")` → `status: "queued"` +2. Wait for foreign intent to clear +3. `manage_change_intent(action="promote", intent_id=...)` + — edit only after promote returns `status: "active"` + — if `before_run_evicted`: re-analyze and re-start + +### Atomic workflow (fallback) + +When `start_controlled_change` / `finish_controlled_change` are unavailable, +use the atomic path in the change-control skill. Do not mix primary and atomic +verification in one cycle. + +### Rules + +- Prefer `start_controlled_change` / `finish_controlled_change` over + the atomic workflow. Use atomic tools only for queue/promote/recover + or when the workflow tools are unavailable. +- Do not mix workflow and atomic verification paths in the same edit + cycle. Queue/promote/recover operations via `manage_change_intent` + are allowed alongside workflow tools because workflow tools do not + expose those administrative transitions. +- `start_controlled_change` does not run analysis. Ensure a valid run + exists before calling it. +- `finish_controlled_change` does not run analysis. For Python + structural and governance config changes, run `analyze_repository` + after editing and pass `after_run_id`. +- MUST NOT edit files without declaring intent first — including `tests/**/*.py`. +- MUST NOT silently expand scope. If the fix requires files outside the + declared scope, stop before editing them. Expand scope only after user + approval unless the user already explicitly allowed expansion. Call + `start_controlled_change` again with the expanded scope to get a fresh + intent with updated blast radius and budget. Continue only when the + expanded intent is active. Do not edit extra files based on blast-radius + context alone. +- MUST NOT edit while intent is `queued`. Promote first. +- `do_not_touch` is a hard boundary. `review_context` is context, not a ban. +- Do not update baselines, analysis cache, or generated reports. +- When `finish` or verify returns a `next_step` hint, follow it — do not + invent a different recovery path. +- CodeClone findings are the source of truth — do not reinterpret. +- If `finish_controlled_change` returns `status: "unverified"` or + `"violated"`, do not claim the patch is verified. +- Leaving an active or recoverable own intent behind is a blocked cleanup, not + a completed task. +- Live foreign intent means **stop**, not kill. Never suggest killing + a process without explicit user confirmation that the PID is abandoned. +- **MUST NOT** call `finish_controlled_change` after a non-trivial cycle without + `record_candidate` (or `propose_memory=true`) when incident/complexity/decision + triggers applied — chat summaries do not count. +- **MUST NOT** treat assistant chat text as Engineering Memory. + +### User escalation policy + +Run routine controller steps automatically. Queue blocked follow-up work +automatically when it can wait — do not ask before queueing. + +Ask the user only when: + +- scope expansion is required and was not already explicitly allowed by + the user; +- a `do_not_touch` path must be touched; +- a live foreign intent overlaps and queue is not appropriate; +- patch contract returned `violated`, or returned `unverified` and the + agent cannot execute the deterministic `next_step`; +- baseline, analysis cache, canonical reports, or generated state would + be modified; +- recovery or reset of another agent's intent is needed. + +Routine controller work is automatic. Boundary decisions require the user. + +**Edit permission (MCP workflow):** do not edit unless +`start_controlled_change` returned `status == "active"` **and** +`edit_allowed == true`. Workflow `status: "blocked"` is not persisted +registry lifecycle — clear abandoned blocked intents via +`manage_change_intent(action="clear")`. Finish `reason=workspace_hygiene` means +evidence/scope/git/start snapshot disagree — read `finish_block_reason` +(`missing_evidence`, `foreign_dirty_overlap`, or `own_unscoped_dirty` only +under `CODECLONE_STRICT_FINISH`). Advisory hygiene fields such as +`new_unattributed_unscoped_dirty` are not block reasons — they may appear in +`external_changes` or `accepted_with_external_changes`. Widen scope, fix +evidence, reconcile the tree, or coordinate foreign in-scope overlap. Do not +bypass with atomic verify. + +### Completion gate + +Do not say "done", "implemented", "validated", "verified", "ready", or +equivalent unless all of these are true: + +1. `finish_controlled_change` returned `status: "accepted"` (or + `"accepted_with_external_changes"`); OR, in the atomic fallback + workflow, `manage_change_intent(action="check")` returned `clean` or + `expanded`, `check_patch_contract(mode="verify")` returned `accepted`, + and `manage_change_intent(action="clear")` succeeded; +2. `scope_check.status` is `"clean"` or `"expanded"`; +3. `intent_cleared` is `true` in the finish response; OR + `manage_change_intent(action="clear")` succeeded; +4. if `claims` is present in the finish response and `claims.valid` is + `false`, report the warnings — do not suppress; +5. claim validation was handled by `finish_controlled_change` when + `review_text` was provided and `claim_validation_recommended` was + `true`; for atomic workflow, final summary claims passed + `validate_review_claims` with `patch_health_delta` from verify unless + `claim_validation_recommended` was explicitly `false`. + +If status is `accepted_with_external_changes`, report the external-change +advisory instead of presenting the patch as fully clean. + +If any item cannot be completed, report `BLOCKED` or `UNVERIFIED`, include the +`intent_id`, and state the exact missing step. Do not present the work as +finished. + +### Verification profiles + +The controller derives a **verification profile** from actual changed files. +The profile determines which structural checks apply. The agent does not choose +the profile — it is computed by `finish_controlled_change` (through +`check_patch_contract(mode="verify")` internally), or directly by +`check_patch_contract(mode="verify")` in the atomic workflow. + +| Profile | When | `after_run` required | Structural checks | +|-------------------------|-------------------------------------------------------------------------------------------------------|----------------------|-------------------| +| `python_structural` | any `.py` / `.pyi` touched | yes | all | +| `governance_config` | config files only (pyproject.toml, CI, Dockerfile…) | yes | not applicable | +| `documentation_only` | only docs files (`.md`, `.rst`, LICENSE…) | no | not applicable | +| `non_python_patch` | other files, no Python / docs | no | not applicable | +| `state_artifact_change` | CodeClone state artifacts touched (`codeclone.baseline.json`, `.codeclone/**`, `.cache/codeclone/**`) | no (violated) | not applicable | + +Key rules: + +- **`start` is always required** before edit; lighter profiles only affect + after-run / verify, not intent declaration. +- If **any** Python source, governance configuration, baseline, cache, or + generated state files were touched, the lightweight path is not accepted. +- Documentation-only patches can verify without `after_run_id` + when `changed_files` or `diff_ref` evidence is provided. +- Other non-Python patches may verify without `after_run_id`, but only + with controller-reported limitations. Do not present this as full + structural verification. +- The agent MUST NOT claim which profile applies — CodeClone decides. +- Receipts use "not applicable" for skipped structural checks, never "passed". +- Claim Guard may reject or warn on claims that exceed the derived profile. + For documentation-only patches, "no Python files touched" is allowed; + "no structural regressions" requires structural evidence from an after-run. +- `novelty="known"` is baseline-relative, not patch-relative. Do not infer that + a patch did not introduce/reintroduce a finding from baseline novelty alone; + patch-local regression claims require clean before-run to after-run evidence. + +### When to skip + +- Read-only tasks (analysis, validation, research) +- CodeClone MCP not available and the task is read-only. For repository + edits that require change control, stop and report the blocker +- User explicitly says analysis-only + +## Spec writing discipline + +Specs are disposable implementation briefs, not documentation. +They are deleted after implementation and validation. + +### Invariants + +- **One model per decision.** If the spec describes alternative + approaches, choose one and close the others. Never leave two + incompatible paths in the same section. +- **Verify against code.** Every function signature, data model, and + behavior claim in the spec must be verified against current code + before writing. Read the source, do not assume. +- **No aspirational APIs.** If a function doesn't exist yet, say so. + Do not describe it as if it does. +- **Decision table for state machines.** If the spec introduces states + or classifications, provide an exhaustive decision table. Every + input combination must map to exactly one output. +- **Dependency direction explicit.** List what each new file imports + and what imports it. Verify against the architecture rules in + `AGENTS.md` §14. + +### Self-check before delivery + +Before presenting a spec, verify: + +1. Are there two conflicting approaches in the same spec? → pick one. +2. Does every code snippet match the actual codebase API? → read source. +3. Is every state transition deterministic? → write the decision table. +4. Can the implementer follow this without interpreting ambiguity? → if + unclear, it's wrong. + +## Validation discipline + +When validating an implementation against a spec: + +1. Read all implementation files (not just grep). +2. Cross-reference every spec requirement against code. +3. Run the relevant tests: `uv run pytest -q `. +4. Run `uv run pre-commit run --all-files` if the user asks to commit. +5. Check MCP tool visibility if a new tool was added. +6. Report: conformant / improved / divergent / missing — with evidence. + +## Verification commands + +```bash +# Always +uv run pre-commit run --all-files + +# MCP changes +uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py + +# Full suite +uv run pytest -q +``` + +See `AGENTS.md` §3 for surface-specific commands. + +## Hard boundaries + +- Never update golden snapshots merely to "fix" tests. Snapshot updates + require explicit user approval and a contract/schema change rationale. +- Never change fingerprint semantics without `FINGERPRINT_VERSION` review. +- Never make base `codeclone` depend on MCP runtime packages. +- Never let MCP mutate baselines, source files, canonical reports, or + analysis cache. Ephemeral coordination state (workspace intents) and + audit trail under `.codeclone/` are allowed only through the + controller and audit contracts. +- Never iterate sets/dicts without sorting when output order matters. +- Never introduce `Any` in core/domain code without narrowing it immediately. +- Never create `*.md` specs inside `docs/` — use `specs/` directory. +- Version constants live in `codeclone/contracts/__init__.py` — always + read from there, never copy from another doc. + +## Commit style + +``` +feat(scope): short imperative description + +Optional body with context. +``` + +Scopes: `mcp`, `cli`, `core`, `baseline`, `cache`, `report`, `html`, +`metrics`, `docs`, `vscode`, `codex`, `claude-desktop`. +Prefixes: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd105f66..c1244803 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,263 +1,674 @@ # Contributing to CodeClone -Thank you for your interest in contributing to **CodeClone**. +Thank you for contributing to **CodeClone**. -CodeClone provides **structural code quality analysis** for Python, including clone detection, -quality metrics, baseline-aware CI governance, and an optional MCP agent interface. +**CodeClone** is a deterministic **Structural Change Controller** for +AI-assisted Python development. -Contributions are welcome — especially those that improve **signal quality**, **CFG semantics**, -and **real-world CI usability**. +It starts before a diff exists: an agent declares intent, CodeClone maps the +structural blast radius, bounds the edit, verifies the resulting patch against +one canonical report, and leaves an auditable receipt. ---- +```text +intent → blast radius → bounded edit → patch check → review receipt +``` -## Project Philosophy +CodeClone combines structural analysis, baseline-aware CI, Engineering Memory, +agent trajectories, MCP tooling, and IDE integrations without turning LLM +output into truth. It is not an AI reviewer guessing whether code is safe; it +is a deterministic control layer for structural change. + +Contributions are welcome when they preserve the project's central guarantees: +honesty, reproducibility, determinism, explainability, and safe use in real +CI environments. + +## Source of Truth + +Before changing code, read: + +- [`AGENTS.md`](AGENTS.md) for repository-wide operating rules, module + ownership, change routing, and required validation; +- the [architecture map](docs/book/02-architecture-map.md) for current + boundaries and dependency direction; +- [testing as specification](docs/book/23-testing-as-spec.md) for contract and + test ownership; +- the relevant contract chapter under [`docs/book/`](docs/book/). + +The current repository code is the source of truth for implementation +behavior. Version constants must be read from +`codeclone/contracts/__init__.py`, not copied from this file or another +document. If contributor documentation and code diverge, align the +documentation as part of the change. + +## Project Principles + +- **Determinism over cleverness.** Identical inputs and versions must produce + stable findings, ordering, identities, and canonical payloads. +- **Control starts before the diff.** Intent, scope, blast radius, and + do-not-touch boundaries are part of the change contract, not review + commentary added afterward. +- **Evidence over inference.** Core analysis produces facts and metrics; + renderers and clients present them without inventing new gating semantics. +- **Low noise over inflated recall.** Detection changes must account for both + false positives and false negatives. +- **One analysis truth.** CLI, reports, MCP, extensions, and plugins project the + same canonical pipeline and report contracts. +- **Contracts are public APIs.** Baselines, cache compatibility, report + schemas, CLI behavior, MCP payloads, and published integration behavior + require deliberate compatibility handling. +- **Safety first.** Treat source code, paths, configuration, baselines, caches, + and external tool input as untrusted. + +Changes that increase unexplained noise, introduce nondeterminism, weaken +contract boundaries, or silently change trusted artifacts are unlikely to be +accepted. + +## AI-Assisted Contributions + +CodeClone accepts contributions written with coding agents, language models, +and other automated development tools. Agent assistance is welcome, including +substantial agent-authored code, but it does not transfer responsibility away +from people. + +Every AI-assisted contribution must meet all of the following requirements: + +- A human contributor must inspect the complete diff, understand it, and be + able to explain and maintain it. +- A human must verify the relevant tests, contract implications, security + properties, generated artifacts, and documentation before requesting review. +- A substantive human review is mandatory before merge. Agent-only review, + automated approval, or a passing CI run does not satisfy this requirement. +- Material agent assistance should be disclosed in the pull request + description, including what the agent produced or changed. +- The contributor must verify provenance, licensing, and third-party rights for + generated code, text, fixtures, and assets. +- Secrets, private prompts, credentials, unrelated user data, and unreviewed + generated output must not be committed. +- Do not submit code that no human can confidently explain, test, or support. + +CodeClone's Structural Change Controller, Engineering Memory, review receipts, +claim validation, and Platform Observability can strengthen review evidence. +They do not replace human engineering judgment or human approval. + +## Where to Contribute + +Contributions are especially useful in: + +- Structural Change Controller intent, scope, blast-radius, patch-contract, + claim-validation, and receipt workflows; +- Engineering Memory retrieval, semantic indexing, trajectories, Patch Trail, + Experiences, governance, and projection jobs; +- Platform Observability instrumentation and developer diagnostics; +- AST normalization, CFG construction, and structural extraction; +- clone grouping, explainability, and false-positive reduction; +- complexity, coupling, cohesion, dependency, dead-code, coverage, adoption, + API-surface, and health metrics; +- baseline, cache, canonical report, and deterministic renderer contracts; +- MCP tools, resources, messages, and transport behavior; +- VS Code, Claude Desktop, Claude Code, Codex, Cursor, and GitHub Action surfaces; +- performance work that preserves fingerprint and canonical-output semantics; +- documentation, examples, tests, and real-world CI scenarios. + +Use the module ownership table in [`AGENTS.md`](AGENTS.md) and the +[architecture map](docs/book/02-architecture-map.md) to route changes to the +correct layer. + +## Contribution Workflow + +1. Confirm the user-visible problem and identify the owning module. +2. Classify whether the change affects a versioned or public contract. +3. Read the nearest tests and normative documentation before editing. +4. Keep the patch narrowly scoped and preserve unrelated work in the tree. +5. Add tests in the test module that owns the behavior. Do not create generic + coverage-uplift or miscellaneous test dumping grounds. +6. Update documentation when behavior, configuration, commands, payloads, or + public integration surfaces change. +7. Run the relevant focused checks, then the repository validation required + below. +8. Review the final diff as a human-readable change, not merely as passing + automation. + +When CodeClone MCP change control is available, contributors and coding agents +should use `start_controlled_change` before editing and +`finish_controlled_change` after verification. These tools bind intent, scope, +blast radius, patch budget, verification, and the review receipt. The atomic +tools remain available for deeper inspection and recovery. See the +[Structural Change Controller](docs/book/12-structural-change-controller/index.md). -Core principles: +## Reporting Bugs -- **Low noise over high recall** -- **Structural and control-flow similarity**, not semantic equivalence -- **Deterministic and explainable behavior** -- Optimized for **CI usage** and architectural analysis +Use the appropriate GitHub issue template. Include: + +- a minimal reproducer, preferably source text rather than screenshots; +- CodeClone and Python versions; +- the command, configuration, and relevant optional extras; +- expected and actual behavior; +- whether a baseline, cache, coverage XML, MCP client, memory store, semantic + sidecar, projection worker, or observability store was involved; +- sanitized logs or payload excerpts where useful. -If a change increases false positives, reduces determinism, or weakens explainability, -it is unlikely to be accepted. +Classify the affected area when possible: change control/blast radius, +Engineering Memory/trajectories, analysis/CFG, normalization, clones, metrics, +baseline/cache/report, CLI, MCP, observability, documentation, or a +client/integration surface. + +For false positives, explain why the detected code is architecturally distinct +in control flow, responsibilities, or structure. Naming, comments, and +formatting alone are not sufficient evidence. + +For a suspected Platform Observability issue, include the operation or +correlation ID and a bounded, sanitized JSON projection when possible. Never +attach raw repository secrets or private source unnecessarily. + +## Design-Sensitive Changes + +### Analysis, CFG, and fingerprints + +For AST normalization, CFG, extraction, or clone identity changes, describe: + +- current and proposed behavior; +- concrete positive and negative examples; +- expected false-positive and false-negative impact; +- determinism implications; +- baseline and fingerprint compatibility implications. + +Performance work must not change normalization, fingerprint inputs, clone +identity, or NEW-versus-KNOWN classification while +`BASELINE_FINGERPRINT_VERSION` is unchanged. Fingerprint-adjacent changes +require explicit maintainer approval, version review, migration/release notes, +tests, and documentation. ---- +### Golden tests + +Golden tests are contract sentinels. Do not update snapshots merely to make a +failure disappear. A golden update is acceptable only when the contract change +is intentional, reviewed, documented, and versioned where required. + +### Security and safety + +- Preserve path validation and repository-root containment. +- Keep normal-mode fail-open and gating-mode fail-closed behavior only where + the owning contract explicitly defines it. +- Add negative tests for parser, normalization, transport, path, and + persistence boundaries. +- Do not let UI, MCP, memory, observability, or client surfaces invent analysis + facts or authorization. + +See the [security model](docs/book/21-security-model.md). + +## Versioned Contracts + +Current values must always be verified in `codeclone/contracts/__init__.py`. +At the time this document was updated, the main contracts were: + +| Contract | Version | Primary owner | +|------------------------|--------:|-----------------------------------| +| Baseline schema | `2.1` | `codeclone/baseline/` | +| Baseline fingerprint | `1` | `codeclone/contracts/__init__.py` | +| Analysis cache | `2.10` | `codeclone/cache/` | +| Canonical report | `2.11` | `codeclone/report/document/` | +| Metrics baseline | `1.2` | `codeclone/baseline/` | +| Engineering Memory | `1.7` | `codeclone/memory/` | +| Semantic index format | `2` | `codeclone/memory/semantic/` | +| Platform Observability | `1.1` | `codeclone/observability/` | + +Any schema shape or semantic change requires version review, tests, and +documentation. Compatibility details live in +[compatibility and versioning](docs/book/24-compatibility-and-versioning.md). + +### Baseline and CI behavior + +- Baseline trust depends on schema compatibility, fingerprint version, Python + tag, generator identity, and canonical payload integrity. +- Regenerate the baseline when fingerprint compatibility or Python tag changes. +- Do not regenerate it for report-only, UI-only, cache-only, or performance-only + work that preserves fingerprint semantics. +- Untrusted baseline state fails fast with exit `2` in gating mode. +- Outside gating mode, an untrusted baseline is ignored with a warning and + comparison proceeds against an empty baseline. +- Baseline novelty is baseline-relative. Patch-local regression claims require + a clean before/after comparison. + +Public exit categories are: + +- `0`: success; +- `2`: contract or invocation error; +- `3`: analysis/quality gate failure; +- `5`: unexpected internal error. + +See [baseline trust](docs/book/07-baseline.md), +[exit codes](docs/book/09-exit-codes.md), and +[metrics and gates](docs/book/16-metrics-and-quality-gates.md). + +## MCP and Agent Surfaces + +The optional `codeclone[mcp]` server is read-only with respect to source files, +baselines, canonical/generated reports, and analysis cache data. + +Explicit controller and developer contracts may maintain bounded local state: -## Areas Open for Contribution +- session-local runs and review markers; +- ephemeral workspace intent records under `.codeclone/intents/`; +- optional audit evidence under `.codeclone/db/`; +- governed Engineering Memory and projection state under `.codeclone/memory/`; +- optional Platform Observability telemetry under + `.codeclone/db/platform_observability.sqlite3`. -We especially welcome contributions in the following areas: +Engineering Memory mutations must use explicit memory tools. Agent-initiated +mutations are limited to the documented refresh, projection, and draft +proposal contracts; approval, rejection, and archival remain human-governed. +None of this state may alter canonical report identity, baseline trust, cache +compatibility, findings, or edit authorization. -- Control Flow Graph (CFG) construction and semantics -- AST normalization improvements -- Segment-level clone detection and reporting -- Quality metrics (complexity, coupling, cohesion, dead-code, dependencies) -- False-positive reduction -- HTML report UX improvements -- MCP server tools and agent workflows -- GitHub Action improvements -- Performance optimizations -- Documentation and real-world examples +Tool names, parameter fields, response shapes, resource URIs, descriptions, and +error semantics are public surfaces. Keep optional MCP dependencies lazy so the +base package and non-MCP CI do not require them. + +See the [MCP interface](docs/book/25-mcp-interface/index.md) and +[MCP contributor guide](docs/guide/mcp/README.md). + +## Engineering Memory + +Engineering Memory is a local, evidence-linked knowledge store, not a second +analyzer and not analysis cache. It combines governed records with report, git, +documentation, audit, trajectory, Patch Trail, and Experience evidence. + +When changing memory behavior: + +- preserve deterministic retrieval and stable bounded payloads; +- keep FTS, semantic sidecar, trajectory, and Experience lanes explicit; +- preserve human governance for durable promoted knowledge; +- treat semantic search as optional and keep the default installation free of + vector-model dependencies; +- keep projection jobs coalesced, watermarked, observable, and independent from + analysis truth; +- test schema migration, staleness, filtering, ranking, scope, governance, and + worker lifecycle as applicable. + +Start with the [Engineering Memory chapter](docs/book/13-engineering-memory/index.md), +[trajectory and Patch Trail contract](docs/book/13-engineering-memory/trajectory-and-patch-trail.md), +[Experience Layer](docs/book/13-engineering-memory/experience-layer.md), and +[projection jobs](docs/book/13-engineering-memory/projection-jobs.md). + +## Platform Observability + +Platform Observability is an opt-in developer diagnostics surface for +CodeClone's own execution. It helps investigate slow CLI/MCP operations, +database cost, projection workers, memory pipelines, redundant work, and +cross-process correlations. + +It is disabled by default and configured only through environment variables. +It stores bounded local telemetry, normalized literal-free SQL fingerprints, +durations, counters, and optional process metrics. It does not store raw prompt +or MCP payload bodies and has no network exporter. + +Most importantly, observer data is **not** repository quality evidence. It must +never affect findings, gates, baseline trust, cache compatibility, memory facts, +permissions, or edit authorization. + +Enable it for a local diagnostic run: ---- +```bash +CODECLONE_OBSERVABILITY_ENABLED=1 uv run codeclone . +uv run codeclone observability trace --root . +uv run codeclone observability trace \ + --root . \ + --last 50 \ + --html /tmp/codeclone-observer.html +``` -## Reporting Bugs +Optional process metrics require the `perf` extra and +`CODECLONE_OBSERVABILITY_PROFILE=1`. Raw payload snapshots are unsupported. +Automatic retention pruning is not currently guaranteed, so developers who +enable persistence own the lifecycle of the local SQLite database. -Please use the appropriate **GitHub Issue Template**. +Instrumentation must be initialized before instrumented stores/connections are +opened, and worker correlation IDs must be propagated rather than synthesized +independently. New spans and counters must remain numeric, bounded, +deterministic in shape, and privacy-safe. -When reporting issues related to clone detection, include: +Read the normative [Platform Observability contract](docs/book/26-platform-observability.md) +and the practical [diagnostics guide](docs/guide/observability/diagnostics.md) +before modifying instrumentation, storage, rendering, or MCP projections. -- minimal reproducible code snippets (preferred over screenshots); -- the CodeClone version; -- the Python version (`python_tag`, e.g. `cp314`); -- whether the issue is primarily: - - AST-related, - - CFG-related, - - normalization-related, - - metrics-related, - - MCP-related, - - reporting / UI-related. +## Native Clients and Integrations -Screenshots alone are usually insufficient for analysis. +VS Code, Claude Desktop, Claude Code, Codex, Cursor, and the composite GitHub +Action are clients or packaging surfaces over the same CodeClone/MCP contracts. +They must not implement a second analyzer, redefine finding semantics, or +silently drift from MCP tool schemas. ---- +Public commands, views, manifests, bundled skills/rules/hooks, launcher +behavior, trust boundaries, packaged assets, and marketplace metadata require +surface-specific tests and documentation. -## False Positives +Architecture references: -False positives are **expected edge cases**, not necessarily bugs. +- [VS Code extension](docs/book/integrations/vs-code-extension.md) +- [Claude Desktop bundle](docs/book/integrations/claude-desktop-bundle.md) +- [Claude Code plugin](docs/book/integrations/claude-code-plugin.md) +- [Codex plugin](docs/book/integrations/codex-plugin.md) +- [Cursor plugin](docs/book/integrations/cursor-plugin.md) -When reporting a false positive: +For the GitHub Action, never interpolate `${{ inputs.* }}` directly into shell +scripts; pass values through `env:`. Keep subprocess timeouts explicit and +preserve documented output and exit semantics. -- explain **why the detected code is architecturally distinct**; -- avoid arguments based solely on naming, comments, or formatting; -- focus on **control-flow, responsibilities, or structural differences**. +## Developer Scripts -Well-argued false-positive reports are valuable and appreciated. +The top-level [`scripts/`](scripts/) directory contains developer, docs, and +release utilities. It is not a miscellaneous home for product behavior: +runtime logic belongs in the owning `codeclone/` module and scripts should stay +thin, explicit, and tested. ---- +| Path | Purpose | Important boundary | +|----------------------------------------|--------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| +| `scripts/build_docs_example_report.py` | Analyze the repository and stage the live docs example as HTML, JSON, SARIF, and `manifest.json` | Writes generated output, by default under `site/examples/report/live`; use it only for docs example/report publication work | +| `scripts/lint_admonitions.py` | Validate MkDocs admonition/details indentation | `--fix` rewrites Markdown; review the resulting diff | +| `scripts/launch_mcp` | Monorepo adapter that delegates to the shared Codex plugin MCP launcher | Not an independent launcher implementation; keep launcher resolution in `plugins/codeclone/scripts/launch_mcp.py` | +| `scripts/sync_integrations.py` | Synchronize Codex, Claude Code, Cursor, VS Code, and Claude Desktop distribution repositories | Maintainer/release tool that deletes and recopies managed target paths; always dry-run first | +| `scripts/integration_dist/*` | Distribution-only README, `.gitignore`, and marketplace overlays used by storefront sync | Source-controlled release inputs, not generated scratch files | +| `scripts/__init__.py` | Package marker for importing script helpers in tests | Not a command-line entrypoint | -## CFG Semantics Discussions +### Docs utilities -If proposing changes to CFG semantics, include: +When changing the live sample report or its publication path: -- a description of the current behavior; -- the proposed new behavior; -- the expected impact on clone detection quality (noise/recall); -- concrete code examples; -- a note on determinism implications. +```bash +uv run python scripts/build_docs_example_report.py \ + --output-dir site/examples/report/live +uv run --with zensical==0.0.43 zensical build --clean --strict +``` -Such changes often require design-level discussion and may be staged across versions. +The generator runs CodeClone against the repository, stages its output in a +temporary directory, then copies only the documented artifacts to the +destination. Changes require the relevant report/HTML tests plus +`tests/test_docs_example_report.py`. ---- +Validate admonition indentation without writing: -## Security & Safety Expectations +```bash +uv run scripts/lint_admonitions.py docs/ +``` -- Assume **untrusted input** (paths and source code). -- Prefer **fail-closed in gating modes** and **fail-open in normal modes** only when explicitly intended. -- Add **negative tests** for any normalization/CFG change. -- Changes must preserve determinism and avoid introducing new false positives. +Apply its deterministic indentation repair only when needed: ---- +```bash +uv run scripts/lint_admonitions.py docs/ --fix +``` -## Baseline & CI +The pre-commit hook uses `--fix`, so docs commits must be re-reviewed after the +hook runs. -### Baseline contract (v2) +### Storefront synchronization -- The baseline schema is versioned (`meta.schema_version`, currently `2.0`). -- Compatibility/trust gates include `schema_version`, `fingerprint_version`, `python_tag`, - and `meta.generator.name`. -- Integrity is tamper-evident via `meta.payload_sha256` over canonical payload. -- The baseline may embed a `metrics` section for metrics-baseline-aware CI gating. +`scripts/sync_integrations.py` mirrors monorepo integration sources into sibling +git repositories named `codeclone-codex`, `codeclone-claude-code`, +`codeclone-cursor`, `codeclone-vscode`, and `codeclone-claude-desktop`. It also +writes a `SYNC_MANIFEST.json` containing source commit and package provenance. -### When baseline regeneration is required +Run it from the monorepo root and inspect a dry run first: + +```bash +uv run python scripts/sync_integrations.py \ + --dry-run \ + --all \ + --base-dir .. +``` -- Regenerate baseline with `codeclone . --update-baseline` when - `fingerprint_version` **or** `python_tag` changes. -- Regeneration is **not** required for UI/report/CLI/cache/performance-only changes - if both `fingerprint_version` and `python_tag` are unchanged. +Only after reviewing the plan should a maintainer perform a write: -### Gating behavior +```bash +uv run python scripts/sync_integrations.py --all --base-dir .. +``` -- In `--ci` (or explicit gating flags), **untrusted baseline states fail fast** as a contract error (exit 2). -- Outside gating mode, an untrusted/missing baseline is ignored with a warning and comparison proceeds - against an empty baseline. +The script refuses a dirty source tree by default, validates target repository +names and containment, rejects copied symlinks, and writes the manifest +atomically. `--allow-dirty` is an emergency override, not a normal release +workflow; its dirty provenance is recorded in the manifest. Sync each target, +inspect its diff, run its native checks, and commit/push each distribution +repository separately. -### Exit codes contract +Cursor and Claude Code have intentional launcher overrides: their monorepo +launchers are thin delegates, while standalone distributions receive the full +shared launcher implementation. Do not replace this with a blind directory +copy. -- **0** — success -- **2** — contract error (e.g., missing/untrusted baseline in gating, invalid output path/extension, incompatible - versions) -- **3** — gating failure (new clones detected, `--fail-threshold` exceeded) -- **5** — internal error (unexpected exception; please report) +Changes to sync logic, layouts, deny lists, launchers, or +`scripts/integration_dist/*` require: ---- +```bash +uv run pytest -q tests/test_sync_integrations.py +``` -## Versioned schemas +The full operational and post-sync checklist is in +[`docs/releasing.md`](docs/releasing.md). -CodeClone maintains several versioned schema contracts: +## Development Setup -| Schema | Current version | Owner | -|------------------|-----------------|-------------------------------------| -| Baseline | `2.1` | `codeclone/baseline.py` | -| Report | `2.8` | `codeclone/report/json_contract.py` | -| Cache | `2.4` | `codeclone/cache_io.py` | -| Metrics baseline | `1.2` | `codeclone/metrics_baseline.py` | +CodeClone supports Python 3.10 through 3.14. -Any change to schema shape or semantics requires version review, documentation, and tests. +```bash +git clone https://github.com/orenlab/codeclone.git +cd codeclone +uv sync --extra dev --extra mcp --extra token-bench +uv run pre-commit install +``` ---- +`.pre-commit-config.yaml` installs both `pre-commit` and `pre-push` hooks by +default. Do not use `--no-verify` to bypass them; fix the failure or document a +genuine infrastructure blocker for maintainers. -## MCP Interface +The semantic and performance extras are intentionally optional. Install them +only for work that needs those paths, for example: -CodeClone includes an optional **read-only MCP server** (`codeclone[mcp]`) for AI agents. +```bash +uv sync --extra dev --extra mcp --extra semantic-local --extra perf +``` -When contributing to MCP: +## Required Validation -- MCP must remain **read-only** — it must never mutate baselines, source files, or repo state. -- Session-local review markers are the only allowed mutable state (in-memory, ephemeral). -- MCP reuses pipeline/report contracts — do not create a second analysis truth path. -- Tool names, resource URIs, and response shapes are public surfaces — changes require tests and docs. +The pre-commit stage runs repository hygiene checks, Ruff formatting and lint, +Mypy, baseline-aware `codeclone . --ci`, and the docs admonition fixer when +matching Markdown changed: -See `docs/mcp.md` and `docs/book/20-mcp-interface.md` for details. +```bash +uv run pre-commit run --all-files +``` ---- +Some hooks modify files (`end-of-file-fixer`, trailing whitespace, line endings, +Ruff format, and docs admonition repair). Always inspect `git diff` again after +the hook completes. -## GitHub Action +The command above runs the **pre-commit stage only**. It does not run the +pre-push pytest hook. Run that stage explicitly before pushing: -CodeClone ships a composite GitHub Action (`.github/actions/codeclone/`). +```bash +uv run pre-commit run --hook-stage pre-push --all-files +``` -When contributing to the Action: +The pre-push hook executes the full test suite with package coverage of at +least 99%. Its underlying CI command is: -- Never inline `${{ inputs.* }}` in shell scripts — pass through `env:` variables. -- Prefer major-tag pinning for actions (e.g., `actions/setup-python@v5`). -- Add timeouts to all `subprocess.run` calls. +```bash +uv run pytest \ + --cov=codeclone \ + --cov-report=term-missing \ + --cov-fail-under=99 +``` ---- +CI runs this suite on Python 3.10, 3.11, 3.12, 3.13, and 3.14. A test that only +passes on the contributor's interpreter is not sufficient. -## Development Setup +Run focused tests while developing, but do not use them as a substitute for +the required full validation when the change can affect shared behavior. + +### Contract-specific checks + +For MCP changes: ```bash -git clone https://github.com/orenlab/codeclone.git -cd codeclone -uv sync --all-extras --dev -uv run pre-commit install +uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py ``` -Run tests: +For Engineering Memory, semantic retrieval, trajectory, Experience, or +projection-job changes, run the nearest owning modules described in +[testing as specification](docs/book/23-testing-as-spec.md), including the +relevant `tests/test_memory_*.py`, `tests/test_semantic_*.py`, and MCP memory +contract tests. + +For Platform Observability changes: ```bash -uv run pytest +uv run pytest -q tests/test_observability_*.py ``` -Static checks: +For documentation, navigation, publishing, or sample-report changes: ```bash -uv run pre-commit run --all-files +uv run --with zensical==0.0.43 zensical build --clean --strict ``` -Build documentation (if you touched `docs/` or `mkdocs.yml`): +For VS Code extension changes: ```bash -uv run --with mkdocs --with mkdocs-material mkdocs build --strict +node --check extensions/vscode-codeclone/src/support.js +node --check extensions/vscode-codeclone/src/mcpClient.js +node --check extensions/vscode-codeclone/src/extension.js +node --test extensions/vscode-codeclone/test/*.test.js +node extensions/vscode-codeclone/test/runExtensionHost.js ``` -Run MCP tests (if you touched `mcp_service.py` or `mcp_server.py`): +If VS Code packaging metadata or assets changed, also package a `.vsix` with +`vsce package --out /tmp/codeclone.vsix`. + +For Claude Desktop bundle changes: ```bash -uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py +node --check extensions/claude-desktop-codeclone/server/index.js +node --check extensions/claude-desktop-codeclone/src/launcher.js +node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs +node --test extensions/claude-desktop-codeclone/test/*.test.js +node extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs \ + --out /tmp/codeclone-claude-desktop.mcpb ``` ---- +For Codex plugin changes: -## Commit Messages +```bash +python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json \ + >/tmp/codeclone-codex-plugin.json +python3 -m json.tool plugins/codeclone/.mcp.json \ + >/tmp/codeclone-codex-mcp.json +python3 -m json.tool .agents/plugins/marketplace.json \ + >/tmp/codeclone-codex-marketplace.json +uv run pytest -q tests/test_codex_plugin.py +``` -Use the repository's existing **Conventional Commits** style: +For Claude Code plugin changes: -- format: `type(scope): imperative summary` -- keep `type` lowercase (`feat`, `fix`, `docs`, `chore`, ...) -- keep the summary short, imperative, and specific to the user-visible change -- use a narrow scope when it helps (`metrics`, `mcp,vscode`, `core,ci`, ...) -- split unrelated changes into separate commits instead of writing one broad summary +```bash +python3 -m json.tool \ + plugins/claude-code-codeclone/.claude-plugin/plugin.json \ + >/tmp/codeclone-claude-code-plugin.json +python3 -m json.tool plugins/claude-code-codeclone/.mcp.json \ + >/tmp/codeclone-claude-code-mcp.json +python3 -m json.tool scripts/integration_dist/marketplace.claude-code.json \ + >/tmp/codeclone-claude-code-marketplace.json +claude plugin validate plugins/claude-code-codeclone +uv run pytest -q tests/test_claude_code_plugin.py +``` -Examples from the current history: +For Cursor plugin changes: -- `fix(core,ci): harden git diff validation, make segment digests canonical, and align CI policy` -- `feat(metrics): add adoption and public API baselines with compact schema-aware storage` -- `chore(docs): align AGENTS and contract docs with current code` +```bash +uv run pytest -q tests/test_cursor_plugin.py tests/test_cursor_plugin_hooks.py +``` -If a commit needs extra context, keep the subject line concise and explain the -rest in the commit body. +For GitHub Action helper changes: ---- +```bash +uv run pytest -q tests/test_github_action_helpers.py +``` -## Code Style +The change-routing matrix in [`AGENTS.md`](AGENTS.md) is authoritative when a +change spans more than one contract or integration. + +## Test Policy + +- Put tests beside the contract they specify, using the owning module's test + file and naming conventions. +- Prefer behavior and invariant assertions over implementation-detail checks. +- Cover normal mode, gating mode, error paths, determinism, and legacy or + untrusted states where relevant. +- Public payload changes require contract tests, not only unit tests. +- Avoid sleeps, unstable filesystem ordering, machine-local paths, and + network-dependent assertions. +- Coverage is a guardrail, not a reason to create artificial test modules or + tests that merely execute lines without asserting behavior. +- A bug fix should normally include a regression test that fails before the + fix and passes after it. + +## Pull Requests + +A pull request should state: + +- the problem and user-visible outcome; +- files and ownership boundaries affected; +- contract, schema, baseline, cache, report, CLI, MCP, memory, observability, or + integration implications; +- tests and validation commands run; +- documentation and migration/release-note impact; +- material use of coding agents or generated content. -- Python **3.10 – 3.14** -- Type annotations are required -- `Any` should be minimized; prefer precise types and small typed helpers -- `mypy` must pass -- `ruff check` must pass -- Code must be formatted with `ruff format` -- Prefer explicit, readable logic over clever or implicit constructs +Keep unrelated refactors and generated churn out of the patch. Do not claim a +finding is new, fixed, regression-free, or patch-local without the evidence +required by the relevant contract. ---- +Maintainers may request narrower scope, stronger negative tests, before/after +evidence, or a versioned migration even when CI is green. + +## Commit Messages + +Use the repository's Conventional Commits style: + +- `type(scope): imperative summary`; +- lowercase type such as `feat`, `fix`, `docs`, `test`, or `chore`; +- a narrow scope when useful; +- separate commits for unrelated work; +- a concise subject with explanatory detail in the body when needed. + +Examples: + +- `fix(memory): preserve lane filters during semantic fusion` +- `feat(observability): correlate projection worker spans` +- `docs(contributing): align developer workflow with current surfaces` + +## Code Style -## Versioning +- Python 3.10 through 3.14 +- required type annotations and precise types +- minimal use of `Any` +- `ruff format`, `ruff check`, and `mypy` must pass +- explicit, readable control flow over clever implicit behavior +- comments only where they clarify non-obvious reasoning or contracts +- deterministic sorting and serialization at all public boundaries -CodeClone follows **semantic versioning**: +Follow existing local patterns before introducing new abstractions. -- **MAJOR**: fundamental detection model changes -- **MINOR**: new detection capabilities (e.g., new detectors or major CFG/normalization behavior shifts) -- **PATCH**: bug fixes, performance improvements, and UI/UX polish +## Releases and Changelog -Any change that affects detection behavior must include documentation and tests, -and may require a `fingerprint_version` bump (and thus baseline regeneration). +User-facing features, compatibility changes, migrations, and notable developer +surfaces belong in `CHANGELOG.md`. Routine fixes made during the current +development cycle do not need individual changelog entries unless they alter a +published contract or require user action. ---- +Release work must follow [`docs/releasing.md`](docs/releasing.md), including +artifact, installation, and publishing checks for every affected surface. ## License -By contributing code to CodeClone, you agree that your contributions will be -licensed under **MPL-2.0**. +By contributing code to CodeClone, you agree that the contribution is licensed +under **MPL-2.0**. Documentation contributions are licensed under **MIT**. diff --git a/README.md b/README.md index b2f25858..097ef4d0 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,22 @@ srcset="https://raw.githubusercontent.com/orenlab/codeclone/main/docs/assets/codeclone-wordmark.svg" > CodeClone -

A structural review layer for Python — baseline-aware, deterministic, built for CI and AI agents

+

Structural Change Controller for AI-assisted Python development

-[![][pypi-shield]][pypi-link] [![][status-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][python-shield]][pypi-link] [![][score-shield]][score-link] [![][license-shield]][license-link] +

+ + Let agents move fast.
+ Keep structural change explicit, bounded, remembered, and verifiable. +
+

+ +[![][pypi-shield]][pypi-link] [![][status-shield]][pypi-link] [![][downloads-shield]][pypi-link] [![][python-shield]][pypi-link] [![][license-shield]][license-link] [![][tests-shield]][tests-link] [![][benchmark-shield]][benchmark-link] @@ -26,455 +33,323 @@ --- -CodeClone adds a **control layer** between analysis and CI: it **isolates structural regressions** -from historical debt, so merges are blocked only by **what actually got worse**. - -**One canonical analysis.** The same **deterministic facts** across CLI, HTML reports, -IDE, and MCP — for both **human reviewers** and **AI agents**. - -Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · -Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) - -## Features - -**Control & governance** - -- **Baseline governance** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed -- **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support -- **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report +> [!NOTE] +> This repository and the documentation site track the **unreleased v2.1.0 development line**. +> For the current stable release, use +> [CodeClone v2.0.2](https://github.com/orenlab/codeclone/tree/v2.0.2) +> or install [CodeClone 2.0.2 from PyPI](https://pypi.org/project/codeclone/2.0.2/). -**Detection & analysis** +**CodeClone** is a deterministic **Structural Change Controller** for AI-assisted Python development, built on one +canonical structural analysis of the repository. -- **Clone detection** — function (CFG fingerprint), block (statement windows), and segment (report-only) clones -- **Structural findings** — duplicated branch families, clone guard/exit divergence, and clone-cohort drift -- **Quality metrics** — cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive depth - profile, dead code, health score, and overloaded-module profiling -- **Adoption & API** — type/docstring annotation coverage, public API surface inventory and baseline diff -- **Coverage Join** — fuse external Cobertura XML into the current run to surface coverage hotspots and scope gaps -- **Security Surfaces** — report-only inventory of security-relevant capability boundaries without vulnerability claims +Before editing, an agent declares intent. CodeClone maps the structural blast radius, establishes explicit edit +boundaries, and exposes the regression budget. After editing, it compares the actual patch with the declared scope, +verifies structural changes, checks review claims against report facts, and leaves an auditable receipt. -**Surfaces & integrations** - -- **MCP control surface** — triage-first agent and IDE interface over the same canonical pipeline; read-only by contract -- **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract - -**Performance** +```text +intent → blast radius → bounded edit → patch check → review receipt +``` -- **Fast** — incremental caching, parallel processing, warm-run optimization +CodeClone does not use LLM judgment to classify structural regressions or authorize edits. Structural facts come +from deterministic analysis; the same facts serve agents, human reviewers, IDEs, and CI. -## How It Works +## Install and try -
-Pipeline overview -
-CodeClone pipeline -
+Stable release: -Architecture: [Architecture narrative](https://orenlab.github.io/codeclone/architecture/) · -CFG semantics: [CFG semantics](https://orenlab.github.io/codeclone/cfg/) +```bash +uv tool install codeclone +codeclone . +codeclone . --html --open-html-report +``` -## Installation +Run without installing: ```bash -# recommended -uv tool install codeclone +uvx codeclone@latest . +``` -# pip -pip install codeclone +Install the MCP server for local AI agents and IDE clients: -# with MCP server +```bash uv tool install "codeclone[mcp]" -pip install "codeclone[mcp]" +codeclone-mcp --transport stdio ``` -
-Run without install +Install the in-development 2.1 line (alpha/beta prereleases). A plain install +resolves the latest stable release; add a prerelease flag to get 2.1: ```bash -uvx codeclone@latest . +uv tool install --prerelease allow "codeclone[mcp]" # uv +pip install --pre "codeclone[mcp]" # pip ``` -
- -## Quick Start +Run the current development line from source: ```bash -codeclone . # analyze -codeclone . --html # HTML report -codeclone . --html --open-html-report # open in browser -codeclone . --json --md --sarif --text # all formats -codeclone . --ci # CI mode +git clone https://github.com/orenlab/codeclone.git +cd codeclone +uv sync --all-extras +uv run codeclone . ``` -
-More examples +## Why CodeClone -```bash -# timestamped report snapshots -codeclone . --html --json --timestamped-report-paths +AI coding agents accelerate implementation, but they also make scope expansion easier to miss. A narrow task can +quietly spread into shared helpers, tests, public APIs, configuration, and unrelated modules while the final diff +still looks reasonable. -# changed-scope gating against git diff -codeclone . --changed-only --diff-against main +Most review tools start with the completed diff. CodeClone starts with the declared intent. -# shorthand: diff source for changed-scope review -codeclone . --paths-from-git-diff HEAD~1 +```text +declare intent + → inspect structural blast radius + → establish edit boundaries + → make the change + → compare declared and actual scope + → verify structural regressions + → record the outcome ``` -
+The agent still writes the code. CodeClone makes the declared scope explicit before editing and exposes undeclared +expansion when the patch is verified. -## CI Integration +## Structural Change Controller -```bash -# 1. Generate baseline (commit to repo) -codeclone . --update-baseline +The controller reduces the governed agent workflow to four steps: -# 2. Add to CI pipeline -codeclone . --ci +```text +analyze → start → edit → finish ``` -> [!TIP] -> Run `codeclone . --update-baseline` once after install to establish your CI reference point. -> Commit the baseline file — it becomes the contract CI enforces on every push. +- **Start controlled change** — `start_controlled_change` checks workspace state, records intent, maps blast radius, + separates allowed paths from review context and do-not-touch boundaries, and returns the authoritative + `edit_allowed` permission. +- **Finish controlled change** — `finish_controlled_change` resolves the actual changed files once, checks scope, + verifies the patch against the canonical report, validates optional review claims, and produces a review receipt. +- **Patch Trail** — records declared, changed, untouched-in-declared, and boundary-held paths together with + verification and audit anchors. +- **Multi-agent coordination** — lease-bound intents, queues, recovery, and workspace hygiene make concurrent work + visible without treating advisory ownership as structural truth. -
-What --ci enables +Host integrations can enforce the permission model before file edits where the host supports hooks. Regardless of +host enforcement, finish-time verification remains deterministic. -The `--ci` preset equals `--fail-on-new --no-color --quiet`. -When a trusted metrics baseline is loaded, CI mode also enables `--fail-on-new-metrics`. +[Structural Change Controller documentation](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) -
+## One canonical report, every structural surface -### GitHub Action +CodeClone runs one deterministic structural analysis and renders its canonical report through CLI, HTML, JSON, +Markdown, SARIF, MCP, IDE integrations, GitHub Action, and CI. There is no separate analysis engine for agents. -CodeClone ships a composite GitHub Action for PR and CI workflows: +The report covers: -```yaml -- uses: orenlab/codeclone/.github/actions/codeclone@v2 - with: - fail-on-new: "true" - sarif: "true" - pr-comment: "true" +- function clones through CFG fingerprints; +- block clones through statement windows and report-only segment clones; +- clone-cohort drift, duplicated branch families, and guard/exit divergence; +- cyclomatic complexity, coupling, cohesion, dependency cycles, and dead code; +- overloaded-module and other report-only design context; +- type and docstring adoption; +- public API inventory and baseline-aware API break detection; +- external Cobertura coverage joined with structural hotspots; +- report-only security capability boundaries without vulnerability claims; +- deterministic structural health and review priorities. + +```bash +codeclone . --json --html --md --sarif --text ``` -It can run baseline-aware gating, generate JSON and SARIF reports, upload SARIF to GitHub Code Scanning, -and post or update a PR summary comment. +[How CodeClone works](https://orenlab.github.io/codeclone/guide/explanation/how-it-works/) · +[Canonical report contract](https://orenlab.github.io/codeclone/book/05-report/) -Action -docs: [.github/actions/codeclone/README.md](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) +## Baseline-aware CI -### Quality Gates +CodeClone separates accepted legacy debt from new structural regressions. ```bash -# Metrics thresholds -codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 --fail-health 60 +# Create and commit the project baseline once +codeclone . --update-baseline -# Structural policies -codeclone . --fail-cycles --fail-dead-code +# Gate future changes against that baseline +codeclone . --ci +``` + +The baseline is a versioned, integrity-checked contract. CI can reject newly introduced clones and baseline-aware +metric, API, and coverage regressions without requiring the existing codebase to be clean first. Absolute threshold +gates remain opt-in. -# Regression detection vs baseline +```bash codeclone . --fail-on-new-metrics +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 +codeclone . --fail-cycles --fail-dead-code +codeclone . --coverage coverage.xml --fail-on-untested-hotspots +codeclone . --api-surface --fail-on-api-break +``` -# Adoption and API governance -codeclone . --min-typing-coverage 80 --min-docstring-coverage 60 -codeclone . --fail-on-typing-regression --fail-on-docstring-regression -codeclone . --api-surface --update-metrics-baseline -codeclone . --fail-on-api-break +[Metrics and quality gates](https://orenlab.github.io/codeclone/book/16-metrics-and-quality-gates/) · +[Baseline contract](https://orenlab.github.io/codeclone/book/07-baseline/) -# Coverage Join — fuse external Cobertura XML into the review -codeclone . --coverage coverage.xml --fail-on-untested-hotspots --coverage-min 50 -``` +## Engineering Memory -Gate details: [Metrics and quality gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) +Engineering Memory gives agents durable, repository-specific context without treating model output as project truth. -### Pre-commit +The local SQLite store contains typed, evidence-linked knowledge such as contracts, architecture decisions, risks, +test anchors, public surfaces, git provenance, and prior controlled changes. Scope-aware retrieval supports the +current change, while project-wide search can combine FTS5 with optional semantic retrieval. -```yaml -repos: - - repo: local - hooks: - - id: codeclone - name: CodeClone - entry: codeclone - language: system - pass_filenames: false - args: [ ".", "--ci" ] - types: [ python ] +Audit-derived trajectories preserve how work actually unfolded. Trajectory passports, anomaly profiles, Patch Trail +evidence, and recurring advisory patterns called **Experiences** make previous successes and failures reusable. +Agent-created records remain drafts until a human approves them. + +```bash +codeclone memory init --root . +codeclone memory search "baseline schema" --match all +codeclone memory approve mem-12345678 --i-know-what-im-doing ``` -## MCP Control Surface +Memory can guide an agent. It cannot authorize edits, override blast radius, change a gate, or replace canonical +report facts. + +[Engineering Memory documentation](https://orenlab.github.io/codeclone/book/13-engineering-memory/) · +[Trajectories and Experiences](https://orenlab.github.io/codeclone/guide/memory/trajectories-and-experiences/) + +## AI agents and IDE integrations -Triage-first MCP server for AI agents and IDE clients, built on the same canonical pipeline as the CLI. -Read-only by contract: never mutates source, baselines, or repo state. +The MCP server is triage-first: analyze the repository, narrow the problem, inspect evidence, start a controlled +change, and finish with verification. `get_implementation_context` projects bounded, drift-aware structural context +for repo-relative paths from the existing run, with separate digests for the source artifact and exact response. +It is evidence for planning, never edit authorization. Bounded tools and resources keep the full report out of agent +context until deeper evidence is requested. ```bash -# local stdio clients codeclone-mcp --transport stdio - -# remote / HTTP-only clients codeclone-mcp --transport streamable-http ``` +Structural analysis tools do not mutate source files, baselines, generated reports, or analysis cache. Controller +and memory operations update only their explicit state stores. + > [!WARNING] -> Analysis tools require an absolute repository root. Relative roots such as `.` are rejected. -> Keep `stdio` as the default transport for local IDE and agent clients; HTTP exposure beyond -> loopback requires explicit `--allow-remote`. +> Analysis tools require an absolute repository root. Keep `stdio` as the default transport for local clients. +> Exposing HTTP beyond loopback requires explicit `--allow-remote`. -[MCP usage guide](https://orenlab.github.io/codeclone/mcp/) · -[MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) +| Surface | Install or source | Documentation | +|---------------------------|----------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------| +| **VS Code extension** | [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | [Setup](https://orenlab.github.io/codeclone/guide/integrations/vscode/setup/) | +| **Cursor plugin** | [Cursor storefront](https://github.com/orenlab/codeclone-cursor) | [Install](https://orenlab.github.io/codeclone/guide/integrations/cursor/install-and-skills/) | +| **Claude Code plugin** | [Claude Code marketplace](https://github.com/orenlab/codeclone-claude-code) | [Install](https://orenlab.github.io/codeclone/guide/integrations/claude-code/setup/) | +| **Codex plugin** | [Codex marketplace](https://github.com/orenlab/codeclone-codex) | [Install](https://orenlab.github.io/codeclone/guide/integrations/codex/setup/) | +| **Claude Desktop bundle** | [Bundle repository](https://github.com/orenlab/codeclone-claude-desktop) | [Setup](https://orenlab.github.io/codeclone/guide/integrations/claude-desktop/setup/) | -### Native Client Surfaces +Every client uses the same `codeclone-mcp` interface and canonical structural facts. -| Surface | Location | Purpose | -|---------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------| -| **VS Code extension** | [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) | Triage-first structural review in the editor | -| **Claude Desktop bundle** | [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone) | Local `.mcpb` install with pre-loaded instructions | -| **Codex plugin** | [`plugins/codeclone/`](https://github.com/orenlab/codeclone/tree/main/plugins/codeclone) | Native discovery, two skills, and MCP definition | +[MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) · +[MCP interface contract](https://orenlab.github.io/codeclone/book/25-mcp-interface/) · +[Implementation-context tools](https://orenlab.github.io/codeclone/book/25-mcp-interface/tools/implementation-context/) -All three are native clients over the same `codeclone-mcp` contract — no second analysis engine. +## Quick workflows -[VS Code extension docs](https://orenlab.github.io/codeclone/book/21-vscode-extension/) · -[Claude Desktop docs](https://orenlab.github.io/codeclone/book/22-claude-desktop-bundle/) · -[Codex plugin docs](https://orenlab.github.io/codeclone/book/23-codex-plugin/) +Review only the current Git scope: -## Configuration +```bash +codeclone . --changed-only --diff-against main +codeclone . --paths-from-git-diff HEAD~1 +``` -CodeClone loads project-level configuration from `pyproject.toml`: +Inspect structural blast radius or run a baseline-relative patch check: -```toml -[tool.codeclone] -min_loc = 10 -min_stmt = 6 -baseline = "codeclone.baseline.json" -golden_fixture_paths = ["tests/fixtures/golden_*"] -skip_metrics = false -quiet = false -html_out = ".cache/codeclone/report.html" -json_out = ".cache/codeclone/report.json" -md_out = ".cache/codeclone/report.md" -sarif_out = ".cache/codeclone/report.sarif" -text_out = ".cache/codeclone/report.txt" -block_min_loc = 20 -block_min_stmt = 8 -segment_min_loc = 20 -segment_min_stmt = 10 +```bash +codeclone . --blast-radius codeclone/analysis/parser.py +codeclone . --patch-verify ``` -Precedence: CLI flags > `pyproject.toml` > built-in defaults. - -Config reference: [Config and defaults](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) - -## Baseline Workflow - -Baselines capture the current duplication state. Once committed, they become the CI reference point. - -- Clones are classified as **NEW** (not in baseline) or **KNOWN** (accepted debt) -- `--update-baseline` writes both clone and metrics snapshots -- Trust is verified via `generator`, `fingerprint_version`, and `payload_sha256` -- In `--ci` mode, an untrusted baseline is a contract error (exit 2) - -Full contract: [Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) - -## Exit Codes - -| Code | Meaning | -|------|-------------------------------------------------------------------------------| -| `0` | Success | -| `2` | Contract error — untrusted baseline, invalid config, unreadable sources in CI | -| `3` | Gating failure — new clones or metric threshold exceeded | -| `5` | Internal error | - -Contract errors (`2`) take precedence over gating failures (`3`). - -Full policy: [Exit codes and failure policy](https://orenlab.github.io/codeclone/book/03-contracts-exit-codes/) - -## Reports - -| Format | Flag | Default path | -|----------|-----------|---------------------------------| -| HTML | `--html` | `.cache/codeclone/report.html` | -| JSON | `--json` | `.cache/codeclone/report.json` | -| Markdown | `--md` | `.cache/codeclone/report.md` | -| SARIF | `--sarif` | `.cache/codeclone/report.sarif` | -| Text | `--text` | `.cache/codeclone/report.txt` | - -All formats are rendered from one canonical JSON report. -`--open-html-report` opens the HTML in the default browser. -`--timestamped-report-paths` appends a UTC timestamp to default filenames. - -Report contract: [Report contract](https://orenlab.github.io/codeclone/book/08-report/) · -[HTML render](https://orenlab.github.io/codeclone/book/10-html-render/) - -
-Canonical JSON report shape (v2.11) - -Full schema contract: [Report contract](https://orenlab.github.io/codeclone/book/08-report/) - -Top-level keys: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`. - -```json -{ - "report_schema_version": "2.11", - "meta": { - "codeclone_version": "2.0.2", - "project_name": "...", - "scan_root": ".", - "...": "..." - }, - "inventory": { - "files": {}, - "code": {}, - "file_registry": { - "encoding": "relative_path", - "items": [] - } - }, - "findings": { - "summary": {}, - "groups": { - "clones": { - "functions": [], - "blocks": [], - "segments": [] - }, - "structural": { - "groups": [] - }, - "dead_code": { - "groups": [] - }, - "design": { - "groups": [] - } - } - }, - "metrics": { - "summary": { - "coverage_adoption": {}, - "coverage_join": {}, - "api_surface": {} - }, - "families": { - "coverage_adoption": {}, - "coverage_join": {}, - "api_surface": {} - } - }, - "derived": { - "suggestions": [], - "overview": { - "families": {}, - "top_risks": [], - "health_snapshot": {}, - "directory_hotspots": {} - }, - "hotlists": { - "most_actionable_ids": [], - "highest_spread_ids": [], - "production_hotspot_ids": [] - } - }, - "integrity": { - "canonicalization": { - "version": "1", - "scope": "canonical_only" - }, - "digest": { - "algorithm": "sha256", - "verified": true, - "value": "..." - } - } -} +`--patch-verify` is a terminal-only controller query: it cannot combine with +`--changed-only`, `--diff-against`, or `--paths-from-git-diff`. Use changed-scope +flags for git-selected review; use `--patch-verify` alone for a trusted-baseline +budget check on the working tree. Patch-local before/after verification with +explicit changed-file evidence belongs in MCP change control (`check_patch_contract`). + +Use CodeClone in GitHub Actions: + +```yaml +- uses: orenlab/codeclone/.github/actions/codeclone@v2 + with: + fail-on-new: "true" + sarif: "true" + pr-comment: "true" ``` -
+The Action can run baseline-aware gating, publish SARIF to GitHub Code Scanning, upload reports, and maintain a PR +summary comment. -## Inline Suppressions +[GitHub Action documentation](https://orenlab.github.io/codeclone/getting-started/#github-action) -When a symbol is invoked through runtime dynamics (framework callbacks, plugin loading, reflection), -suppress the known false positive at the declaration site: +## Platform Observability -```python -# codeclone: ignore[dead-code] -def handle_exception(exc: Exception) -> None: - ... +Platform Observability is an opt-in diagnostics layer for developing CodeClone itself. It correlates CLI, MCP, +analysis, database, semantic-index, and projection-worker execution and exposes timings, RSS/CPU, query shapes, +payload pressure, causal worker chains, and costly no-ops. +It is disabled by default, stores no raw payload bodies, and cannot affect repository findings, gates, baselines, +memory facts, or edit authorization. -class Middleware: # codeclone: ignore[dead-code] - ... +```bash +CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . +codeclone observability trace --root . --html /tmp/codeclone-observer.html ``` -Suppression contract: [Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) · -[Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) - -## Benchmarking +[Platform Observability documentation](https://orenlab.github.io/codeclone/book/26-platform-observability/) -
-Reproducible Docker benchmark +## Configuration -```bash -./benchmarks/run_docker_benchmark.sh -``` +Project configuration lives in `pyproject.toml`: -The wrapper builds `benchmarks/Dockerfile`, runs isolated container benchmarks, and writes results to -`.cache/benchmarks/codeclone-benchmark.json`. +```toml +[tool.codeclone] +baseline = "codeclone.baseline.json" -Use environment overrides to pin the benchmark envelope: +min_loc = 10 +min_stmt = 6 -```bash -CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ - ./benchmarks/run_docker_benchmark.sh +block_min_loc = 20 +block_min_stmt = 8 ``` -Performance claims are backed by the reproducible benchmark workflow documented in -[Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/). +Precedence is **CLI flags > `pyproject.toml` > built-in defaults**. -
+[Configuration reference](https://orenlab.github.io/codeclone/book/10-config-and-defaults/) · +[Inline suppressions](https://orenlab.github.io/codeclone/book/19-inline-suppressions/) ## Documentation -Full docs and contract book: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) +The documentation site contains user guides, interface contracts, report and baseline schemas, configuration +reference, integration setup, and maintainer material: -Quick links: -[Baseline](https://orenlab.github.io/codeclone/book/06-baseline/) · -[Report](https://orenlab.github.io/codeclone/book/08-report/) · -[Metrics & gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) · -[MCP](https://orenlab.github.io/codeclone/book/20-mcp-interface/) · -[CLI](https://orenlab.github.io/codeclone/book/09-cli/) +**[orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/)** ## License - **Code:** MPL-2.0 (`LICENSE`) - **Documentation and docs-site content:** MIT (`LICENSE-MIT`) -Versions released before this change remain under their original license terms. - ## Links -- **Docs:** +- **Documentation:** +- **PyPI:** - **Issues:** - **Discussions:** -- **PyPI:** -- **Licenses:** [MPL-2.0](https://github.com/orenlab/codeclone/blob/main/LICENSE) · [MIT docs](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) · [Scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) +- **Licenses:** [MPL-2.0](https://github.com/orenlab/codeclone/blob/main/LICENSE) · [MIT documentation license](https://github.com/orenlab/codeclone/blob/main/LICENSE-MIT) · [License scope map](https://github.com/orenlab/codeclone/blob/main/LICENSES.md) - [pypi-shield]: https://img.shields.io/pypi/v/codeclone?style=flat-square&color=6366f1 [status-shield]: https://img.shields.io/pypi/status/codeclone?style=flat-square&color=6366f1 [downloads-shield]: https://img.shields.io/pypi/dm/codeclone?style=flat-square&color=6366f1 [python-shield]: https://img.shields.io/pypi/pyversions/codeclone?style=flat-square&color=6366f1 -[score-shield]: https://img.shields.io/badge/codeclone-90%20(A)-6366f1?style=flat-square [license-shield]: https://img.shields.io/badge/license-MPL--2.0-6366f1?style=flat-square [tests-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/tests.yml?branch=main&style=flat-square&label=tests [benchmark-shield]: https://img.shields.io/github/actions/workflow/status/orenlab/codeclone/benchmark.yml?style=flat-square&label=benchmark - [pypi-link]: https://pypi.org/project/codeclone/ -[score-link]: #how-it-works [license-link]: #license [tests-link]: https://github.com/orenlab/codeclone/actions/workflows/tests.yml -[benchmark-link]: #benchmarking +[benchmark-link]: https://github.com/orenlab/codeclone/actions/workflows/benchmark.yml diff --git a/SECURITY.md b/SECURITY.md index 72cf77a7..54347435 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,6 +9,7 @@ The following versions currently receive security updates: | Version | Supported | |---------|-----------| +| 2.1.x | Yes | | 2.0.x | Yes | | 1.4.x | No | | 1.3.x | No | @@ -43,7 +44,9 @@ CodeClone operates purely on static input and follows a conservative execution m ### Baseline and cache integrity - Baseline files are schema/type validated with size limits and tamper-evident integrity fields - (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in baseline schema `2.0`). + (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in + baseline schema `2.1`; legacy `2.0` payloads remain readable under the trust + model). - Baseline integrity is tamper-evident (audit signal), not tamper-proof cryptographic signing. An actor who can rewrite baseline content and recompute `payload_sha256` can still alter it. - Baseline hash covers canonical clone payload (`clones.functions`, `clones.blocks`, @@ -57,15 +60,20 @@ CodeClone operates purely on static input and follows a conservative execution m with explicit warning and comparison proceeds against an empty baseline. - Cache files are integrity-signed with canonical payload hashing (constant-time comparison), size-limited, and ignored on mismatch. -- Legacy cache secret files (`.cache/codeclone/.cache_secret`) are obsolete and should be removed. +- Legacy cache secret files (`.codeclone/.cache_secret`) are obsolete and should be removed. ### MCP server CodeClone includes an optional read-only MCP server (`codeclone[mcp]`) that exposes analysis results over JSON-RPC (stdio transport). -- The MCP server is **read-only**: it never mutates baselines, source files, cache, or repo state. -- Session-local review markers are in-memory only and discarded on process exit. +- The MCP server is **read-only** with respect to source files, baselines, + analysis cache, and canonical report artifacts. +- Allowed repo-local writes are limited to ephemeral controller coordination + (`.codeclone/intents/`) and optional audit trail + (`.codeclone/db/audit.sqlite3` when `audit_enabled=true`). +- Session-local review markers and in-memory run history do not survive process + exit. - Tool arguments that accept git refs (`git_diff_ref`) are validated against a strict regex to prevent command injection via `subprocess` calls. - The MCP run store is bounded (`history_limit`) with FIFO eviction to prevent unbounded diff --git a/benchmarks/mcp_token_budget.py b/benchmarks/mcp_token_budget.py new file mode 100644 index 00000000..f0586012 --- /dev/null +++ b/benchmarks/mcp_token_budget.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Standalone benchmark for MCP payload token budget estimation. + +Requires the ``codeclone[token-bench]`` extra (``tiktoken``). + +Usage:: + + uv run python benchmarks/mcp_token_budget.py +""" + +from __future__ import annotations + +import json +import sys + + +def main() -> None: + try: + from codeclone.budget.estimator import estimate_payload + except ImportError: + print( + "ERROR: tiktoken not installed. " + "Install with: uv pip install 'codeclone[token-bench]'", + file=sys.stderr, + ) + sys.exit(1) + + scenarios: dict[str, dict[str, object]] = { + "analyze_repository_small": _analyze_repository_small(), + "get_blast_radius_bounded": _blast_radius_bounded(), + "get_blast_radius_large_truncated": _blast_radius_large(), + "check_patch_contract_verify": _patch_contract_verify(), + "create_review_receipt_markdown": _review_receipt(), + "manage_change_intent_declare": _change_intent_declare(), + } + + results: dict[str, dict[str, int]] = {} + total_chars = 0 + total_tokens = 0 + + for name, payload in scenarios.items(): + estimate = estimate_payload(payload) + results[name] = { + "chars": estimate.characters, + "tokens": estimate.tokens, + } + total_chars += estimate.characters + total_tokens += estimate.tokens + + results["full_workflow_all_calls"] = { + "chars": total_chars, + "tokens": total_tokens, + } + + output = { + "encoder": "o200k_base", + "scenarios": results, + } + + print(json.dumps(output, indent=2)) + + +def _analyze_repository_small() -> dict[str, object]: + return { + "run_id": "abc12345", + "focus": "repository", + "version": "2.1.0a1", + "schema": "2.11", + "mode": "full", + "baseline": { + "loaded": True, + "status": "ok", + "trusted": True, + }, + "inventory": {"files": 120, "lines": 45000, "functions": 800, "classes": 90}, + "health": { + "score": 92, + "grade": "A", + "dimensions": { + "clones": 100, + "complexity": 75, + "coupling": 80, + "cohesion": 95, + "dead_code": 100, + "coverage": 85, + "dependencies": 90, + }, + }, + "findings": { + "total": 3, + "new": 1, + "known": 2, + "by_family": {"clones": 2, "dead_code": 1}, + }, + "warnings": [], + "failures": [], + } + + +def _blast_radius_bounded() -> dict[str, object]: + return { + "radius_level": "medium", + "direct_dependents": [ + { + "path": f"pkg/module_{i}.py", + "reason": "imports target", + "edge_type": "import", + } + for i in range(8) + ], + "clone_cohort_members": [ + { + "path": f"pkg/clone_{i}.py", + "finding_id": f"CCLONE00{i}", + "clone_type": "Type-2", + } + for i in range(3) + ], + "do_not_touch": [ + {"path": ".codeclone/**", "reason": "generated state"}, + {"path": "codeclone.baseline.json", "reason": "baseline file"}, + ], + "review_context": [ + { + "path": f"pkg/context_{i}.py", + "reason": "report-only signal", + "category": "security_boundary", + } + for i in range(5) + ], + "structural_risk": { + "hub_dependents": 8, + "cohort_spread": 3, + }, + } + + +def _blast_radius_large() -> dict[str, object]: + base = _blast_radius_bounded() + base["direct_dependents"] = [ + { + "path": f"pkg/deep/sub/module_{i}.py", + "reason": "transitive import chain via pkg.core", + "edge_type": "import", + } + for i in range(50) + ] + base["review_context"] = [ + { + "path": f"pkg/large_context_{i}.py", + "reason": f"overloaded module candidate (score={0.7 + i * 0.01:.2f})", + "category": "overloaded_module", + } + for i in range(30) + ] + return base + + +def _patch_contract_verify() -> dict[str, object]: + return { + "mode": "verify", + "status": "accepted", + "before": {"run_id": "before12", "health": 90}, + "after": {"run_id": "after123", "health": 90}, + "strictness": "ci", + "structural_delta": { + "regressions": [], + "improvements": [ + {"id": "CCLONE001", "kind": "clone_group", "severity": "medium"} + ], + "health_delta": 0, + "verdict": "stable", + }, + "worsened": [], + "scope_check": { + "status": "clean", + "declared_scope": ["pkg/a.py", "pkg/b.py"], + "actual_changed_files": ["pkg/a.py"], + "unexpected_files": [], + "forbidden_touched": [], + }, + "gate_preview": {"would_fail": False, "exit_code": 0, "reasons": []}, + "baseline_abuse": {"detected": False, "triggers": []}, + "contract_violations": [], + "blocking_violations": [], + "message": "Patch contract accepted.", + } + + +def _review_receipt() -> dict[str, object]: + return { + "format": "markdown", + "receipt": { + "verdict": "clean", + "provenance": { + "digest": "a" * 64, + "schema_version": "2.11", + "baseline_trust": "ok", + "run_id": "abc12345", + "root": "/repo", + }, + "scope": { + "intent_id": "intent-abc-001", + "declared_files": ["pkg/a.py", "pkg/b.py"], + "changed_files": ["pkg/a.py"], + "unexpected_files": [], + }, + "blast_radius_summary": { + "level": "low", + "direct_dependents": 2, + "clone_cohorts": 0, + "do_not_touch": 3, + }, + "reviewed_findings": [ + { + "finding_id": "CCLONE001", + "reviewed": True, + "note": "Accepted: intentional parallel implementation", + } + ], + "patch_contract": { + "status": "accepted", + "violations": [], + }, + "human_decision_points": [ + "Clone divergence in pkg/a.py:func_a acknowledged", + ], + "claims_not_made": [ + "Security Surfaces are boundary inventory, not vulnerability claims", + "Report-only signals are not CI gates", + ], + }, + } + + +def _change_intent_declare() -> dict[str, object]: + return { + "intent_id": "intent-abc-001", + "run_id": "abc12345", + "status": "active", + "scope": { + "allowed_files": ["pkg/a.py", "pkg/b.py", "tests/test_a.py"], + "allowed_related": ["pkg/utils.py"], + "forbidden": [".cache/**", "codeclone.baseline.json"], + }, + "intent": "Refactor module A and B to reduce coupling", + "guards": [ + "scope_expansion_requires_explanation", + "baseline_update_forbidden", + "new_structural_regression_forbidden", + ], + "blast_radius_summary": { + "radius_level": "medium", + "direct_dependents_count": 5, + "clone_cohort_members_count": 1, + "do_not_touch_count": 3, + }, + "concurrent_intents": [], + "workspace_registered": True, + "ttl_seconds": 3600, + } + + +if __name__ == "__main__": + main() diff --git a/codeclone.baseline.json b/codeclone.baseline.json index c6b5719f..e67b4026 100644 --- a/codeclone.baseline.json +++ b/codeclone.baseline.json @@ -2,14 +2,14 @@ "meta": { "generator": { "name": "codeclone", - "version": "2.0.0b6" + "version": "2.1.0a1" }, "schema_version": "2.1", "fingerprint_version": "1", "python_tag": "cp314", - "created_at": "2026-04-24T14:37:27Z", + "created_at": "2026-06-01T13:12:27Z", "payload_sha256": "a2e5e3ac672ddbc7ba95c3a9608257727a01480ef343bc6a70c168fc9355e99a", - "metrics_payload_sha256": "26ebd9e502bb4d98d97da593532395de140b2c64b03d85ab91e681f9025fedff" + "metrics_payload_sha256": "988dd9d57712603e46cbd43a05f3894b67ec73da9ba43267205ff6b17485c30c" }, "clones": { "functions": [], @@ -18,18 +18,18 @@ "metrics": { "max_complexity": 20, "high_risk_functions": [], - "max_coupling": 9, + "max_coupling": 8, "high_coupling_classes": [], "max_cohesion": 3, "low_cohesion_classes": [], "dependency_cycles": [], - "dependency_max_depth": 16, + "dependency_max_depth": 18, "dead_code_items": [], - "health_score": 90, + "health_score": 91, "health_grade": "A", "typing_param_permille": 1000, "typing_return_permille": 999, - "docstring_permille": 39, - "typing_any_count": 10 + "docstring_permille": 81, + "typing_any_count": 20 } } diff --git a/codeclone/analysis/_module_walk.py b/codeclone/analysis/_module_walk.py index 268a9ad7..288b7189 100644 --- a/codeclone/analysis/_module_walk.py +++ b/codeclone/analysis/_module_walk.py @@ -10,10 +10,16 @@ import tokenize from collections.abc import Iterator from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Literal, NamedTuple +from typing import TYPE_CHECKING, Literal, NamedTuple, TypeGuard from .. import qualnames as _qualnames -from ..models import DeadCandidate, ModuleDep +from ..models import ( + DeadCandidate, + FunctionRelationshipFacts, + ModuleDep, + RelationshipOriginLane, + RelationshipRecord, +) from .class_metrics import _node_line_span from .parser import ( _build_declaration_token_index, @@ -60,6 +66,13 @@ "validator", } ) +# Cohesion ignores declarative validation/serialization hooks because they are +# field-local framework callbacks, not instance-behavior methods. `computed_field` +# is deliberately excluded: it commonly reads `self.*` and participates in real +# object cohesion, so it stays in the LCOM4 graph. +_COHESION_IGNORED_PYDANTIC_HOOKS = _PYDANTIC_DECORATOR_NAMES - frozenset( + {"computed_field"} +) def _resolve_import_target( @@ -97,6 +110,9 @@ class _ModuleWalkState: default_factory=lambda: set(_NON_RUNTIME_DECORATOR_SYMBOLS) ) pydantic_module_aliases: set[str] = field(default_factory=lambda: {"pydantic"}) + cohesion_ignored_decorator_aliases: set[str] = field( + default_factory=lambda: set(_COHESION_IGNORED_PYDANTIC_HOOKS) + ) def _append_module_dep( @@ -387,6 +403,9 @@ def _collect_import_from_node( state.non_runtime_decorator_aliases.update( _matching_import_aliases(node, _PYDANTIC_DECORATOR_NAMES) ) + state.cohesion_ignored_decorator_aliases.update( + _matching_import_aliases(node, _COHESION_IGNORED_PYDANTIC_HOOKS) + ) if not collect_referenced_names or not target: return @@ -414,6 +433,388 @@ def _collect_load_reference_node( state.attr_nodes.append(node) +@dataclass(frozen=True, slots=True) +class _RelationshipImportIndex: + symbol_bindings: dict[str, frozenset[str]] + module_bindings: dict[str, frozenset[str]] + module_shadowed_names: frozenset[str] + + +def _iter_relationship_scope_nodes(body: list[ast.stmt]) -> Iterator[ast.AST]: + stack: list[ast.AST] = list(reversed(body)) + while stack: + node = stack.pop() + yield node + if isinstance( + node, + ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef | ast.Lambda, + ): + continue + stack.extend(reversed(list(ast.iter_child_nodes(node)))) + + +def _freeze_relationship_bindings( + bindings: dict[str, set[str]], +) -> dict[str, frozenset[str]]: + return { + name: frozenset(sorted(targets)) for name, targets in sorted(bindings.items()) + } + + +def _scope_declaration_binding_name(node: ast.AST) -> str | None: + if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef): + return node.name + if isinstance(node, ast.ExceptHandler) and node.name: + return node.name + if isinstance(node, ast.MatchAs | ast.MatchStar) and node.name: + return node.name + return None + + +def _collect_relationship_import_index( + *, + tree: ast.AST, + module_name: str, +) -> _RelationshipImportIndex: + symbol_bindings: dict[str, set[str]] = {} + module_bindings: dict[str, set[str]] = {} + shadowed_names: set[str] = set() + if not isinstance(tree, ast.Module): + return _RelationshipImportIndex({}, {}, frozenset()) + + for node in _iter_relationship_scope_nodes(tree.body): + if isinstance(node, ast.Import): + for alias in node.names: + alias_name = alias.asname or alias.name.split(".", 1)[0] + module_bindings.setdefault(alias_name, set()).add(alias.name) + continue + if isinstance(node, ast.ImportFrom): + target = _resolve_import_target(module_name, node) + if target: + for alias in node.names: + if alias.name != "*": + alias_name = alias.asname or alias.name + symbol_bindings.setdefault(alias_name, set()).add( + f"{target}:{alias.name}" + ) + continue + if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store | ast.Del): + shadowed_names.add(node.id) + continue + declaration_name = _scope_declaration_binding_name(node) + if declaration_name is not None: + shadowed_names.add(declaration_name) + + return _RelationshipImportIndex( + symbol_bindings=_freeze_relationship_bindings(symbol_bindings), + module_bindings=_freeze_relationship_bindings(module_bindings), + module_shadowed_names=frozenset(sorted(shadowed_names)), + ) + + +def _function_parameter_names(node: _qualnames.FunctionNode) -> set[str]: + positional = [*node.args.posonlyargs, *node.args.args, *node.args.kwonlyargs] + names = {arg.arg for arg in positional} + if node.args.vararg is not None: + names.add(node.args.vararg.arg) + if node.args.kwarg is not None: + names.add(node.args.kwarg.arg) + return names + + +def _caller_local_bindings(node: _qualnames.FunctionNode) -> frozenset[str]: + bound_names = _function_parameter_names(node) + global_names: set[str] = set() + nonlocal_names: set[str] = set() + for scope_node in _iter_relationship_scope_nodes(node.body): + if isinstance(scope_node, ast.Name) and isinstance( + scope_node.ctx, ast.Store | ast.Del + ): + bound_names.add(scope_node.id) + elif isinstance(scope_node, ast.Import): + bound_names.update( + alias.asname or alias.name.split(".", 1)[0] + for alias in scope_node.names + ) + elif isinstance(scope_node, ast.ImportFrom): + bound_names.update( + alias.asname or alias.name + for alias in scope_node.names + if alias.name != "*" + ) + elif isinstance(scope_node, ast.Global): + global_names.update(scope_node.names) + elif isinstance(scope_node, ast.Nonlocal): + nonlocal_names.update(scope_node.names) + else: + declaration_name = _scope_declaration_binding_name(scope_node) + if declaration_name is not None: + bound_names.add(declaration_name) + bound_names.difference_update(global_names) + bound_names.difference_update(nonlocal_names) + return frozenset(sorted(bound_names)) + + +def _first_parameter_name(node: _qualnames.FunctionNode) -> str | None: + positional = [*node.args.posonlyargs, *node.args.args] + return positional[0].arg if positional else None + + +def _decorator_simple_names(node: _qualnames.FunctionNode) -> frozenset[str]: + names: set[str] = set() + for decorator in node.decorator_list: + target = decorator.func if isinstance(decorator, ast.Call) else decorator + if isinstance(target, ast.Name): + names.add(target.id) + elif isinstance(target, ast.Attribute): + names.add(target.attr) + return frozenset(names) + + +def _relationship_expression(node: ast.AST) -> str | None: + try: + expression = ast.unparse(node) + except (TypeError, ValueError): + return None + return expression or None + + +def _single_relationship_target( + targets: frozenset[str] | None, + *, + resolved_rule: str, +) -> tuple[str | None, str]: + if not targets: + return None, "unresolved_name" + if len(targets) != 1: + return None, "ambiguous_import" + return next(iter(targets)), resolved_rule + + +def _resolve_relationship_expression( + node: ast.expr, + *, + module_name: str, + imports: _RelationshipImportIndex, + caller_bindings: frozenset[str], + top_level_function_names: frozenset[str], + top_level_class_names: frozenset[str], + local_method_qualnames: frozenset[str], + enclosing_class_local: str | None, + receiver_name: str | None, +) -> tuple[str | None, str]: + if isinstance(node, ast.Name): + import_targets = imports.symbol_bindings.get(node.id) + if import_targets and ( + node.id in caller_bindings or node.id in imports.module_shadowed_names + ): + return None, "local_shadowing" + if import_targets: + return _single_relationship_target( + import_targets, + resolved_rule="imported_symbol", + ) + if node.id in caller_bindings: + return None, "unresolved_name" + if node.id in top_level_function_names: + return f"{module_name}:{node.id}", "same_module_function" + return None, "unresolved_name" + + if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name): + base_name = node.value.id + import_targets = imports.module_bindings.get(base_name) + if import_targets and ( + base_name in caller_bindings or base_name in imports.module_shadowed_names + ): + return None, "local_shadowing" + if import_targets: + target_module, rule = _single_relationship_target( + import_targets, + resolved_rule="imported_module_attribute", + ) + if target_module is not None: + return f"{target_module}:{node.attr}", rule + return None, rule + # The receiver parameter (self/cls) is itself a caller binding, so the + # self/cls case must precede the generic caller-shadow guard below. + if ( + receiver_name is not None + and enclosing_class_local is not None + and base_name == receiver_name + ): + candidate = f"{module_name}:{enclosing_class_local}.{node.attr}" + if candidate in local_method_qualnames: + return candidate, "self_or_cls_method" + return None, "unresolved_dynamic" + if base_name in top_level_class_names and base_name not in caller_bindings: + candidate = f"{module_name}:{base_name}.{node.attr}" + if candidate in local_method_qualnames: + return candidate, "same_module_class_method" + return None, "unresolved_dynamic" + return None, "unresolved_dynamic" + + +def _relationship_record( + *, + relation_kind: Literal["call", "reference"], + origin_lane: RelationshipOriginLane, + source_qualname: str, + target_qualname: str | None, + filepath: str, + node: ast.expr, + resolution_rule: str, +) -> RelationshipRecord: + return RelationshipRecord( + relation_kind=relation_kind, + resolution_status="resolved" if target_qualname is not None else "unresolved", + origin_lane=origin_lane, + source_qualname=source_qualname, + target_qualname=target_qualname, + path=filepath, + line=max(1, int(getattr(node, "lineno", 1))), + expression=_relationship_expression(node), + resolution_rule=resolution_rule, + ) + + +def _relationship_record_sort_key( + record: RelationshipRecord, +) -> tuple[str, str, str, str, int, str, str]: + return ( + record.relation_kind, + record.origin_lane, + record.target_qualname or "", + record.path, + record.line, + record.resolution_rule or "", + record.expression or "", + ) + + +def _is_relationship_reference_node( + node: ast.AST, + *, + call_function_node_ids: set[int], +) -> TypeGuard[ast.Name | ast.Attribute]: + return ( + id(node) not in call_function_node_ids + and isinstance(node, ast.Name | ast.Attribute) + and isinstance(node.ctx, ast.Load) + ) + + +def _collect_function_relationship_facts( + *, + tree: ast.AST, + module_name: str, + filepath: str, + collector: _qualnames.QualnameCollector, + origin_lane: RelationshipOriginLane, +) -> tuple[FunctionRelationshipFacts, ...]: + imports = _collect_relationship_import_index( + tree=tree, + module_name=module_name, + ) + top_level_function_names = frozenset( + local_name for local_name, _node in collector.units if "." not in local_name + ) + top_level_class_names = frozenset( + class_qualname + for class_qualname, _node in collector.class_nodes + if "." not in class_qualname + ) + local_method_qualnames = frozenset( + f"{module_name}:{local_name}" + for local_name, _node in collector.units + if "." in local_name + ) + facts: list[FunctionRelationshipFacts] = [] + for local_name, function_node in collector.units: + source_qualname = f"{module_name}:{local_name}" + caller_bindings = _caller_local_bindings(function_node) + # The enclosing class of a method is the qualname segment before its own + # name; top-level functions have none. The receiver (self/cls) is the + # first parameter, but only for non-static methods — a staticmethod's + # first parameter is an ordinary value, not a receiver. + enclosing_class_local = ( + local_name.rsplit(".", 1)[0] if "." in local_name else None + ) + receiver_name = ( + _first_parameter_name(function_node) + if enclosing_class_local is not None + and "staticmethod" not in _decorator_simple_names(function_node) + else None + ) + scope_nodes = tuple(_iter_relationship_scope_nodes(function_node.body)) + calls = tuple(node for node in scope_nodes if isinstance(node, ast.Call)) + call_function_node_ids = { + id(descendant) for call in calls for descendant in ast.walk(call.func) + } + records: list[RelationshipRecord] = [] + for call in calls: + target_qualname, resolution_rule = _resolve_relationship_expression( + call.func, + module_name=module_name, + imports=imports, + caller_bindings=caller_bindings, + top_level_function_names=top_level_function_names, + top_level_class_names=top_level_class_names, + local_method_qualnames=local_method_qualnames, + enclosing_class_local=enclosing_class_local, + receiver_name=receiver_name, + ) + records.append( + _relationship_record( + relation_kind="call", + origin_lane=origin_lane, + source_qualname=source_qualname, + target_qualname=target_qualname, + filepath=filepath, + node=call.func, + resolution_rule=resolution_rule, + ) + ) + for node in scope_nodes: + if not _is_relationship_reference_node( + node, + call_function_node_ids=call_function_node_ids, + ): + continue + target_qualname, resolution_rule = _resolve_relationship_expression( + node, + module_name=module_name, + imports=imports, + caller_bindings=caller_bindings, + top_level_function_names=top_level_function_names, + top_level_class_names=top_level_class_names, + local_method_qualnames=local_method_qualnames, + enclosing_class_local=enclosing_class_local, + receiver_name=receiver_name, + ) + if target_qualname is not None: + records.append( + _relationship_record( + relation_kind="reference", + origin_lane=origin_lane, + source_qualname=source_qualname, + target_qualname=target_qualname, + filepath=filepath, + node=node, + resolution_rule=resolution_rule, + ) + ) + if records: + facts.append( + FunctionRelationshipFacts( + source_qualname=source_qualname, + relationships=tuple( + sorted(records, key=_relationship_record_sort_key) + ), + ) + ) + return tuple(sorted(facts, key=lambda item: item.source_qualname)) + + def _is_protocol_class( class_node: ast.ClassDef, *, @@ -448,6 +849,69 @@ def _is_known_pydantic_decorator( ) +def _is_cohesion_ignored_decorator( + name: str, + *, + cohesion_ignored_decorator_aliases: frozenset[str], + pydantic_module_aliases: frozenset[str], +) -> bool: + # Bare or no-asname form: the decorator name matches a known hook alias. + if name in cohesion_ignored_decorator_aliases: + return True + # Dotted form, e.g. pydantic.field_validator. + terminal = name.rsplit(".", 1)[-1] + if terminal not in _COHESION_IGNORED_PYDANTIC_HOOKS or "." not in name: + return False + module_alias = name.rsplit(".", 1)[0] + return any( + module_alias == alias or module_alias.startswith(f"{alias}.") + for alias in pydantic_module_aliases + ) + + +def _cohesion_ignored_method_names( + class_node: ast.ClassDef, + *, + protocol_symbol_aliases: frozenset[str], + protocol_module_aliases: frozenset[str], + pydantic_module_aliases: frozenset[str], + cohesion_ignored_decorator_aliases: frozenset[str], +) -> frozenset[str]: + """Return method names excluded from LCOM4 cohesion for this class. + + Protocol declarations contribute all their method names (the whole class is + an interface surface). Other classes contribute only methods decorated with + Pydantic validator/serializer hooks. ``computed_field`` is never ignored + because it commonly reads ``self.*`` and carries real cohesion. + """ + methods = [ + node + for node in class_node.body + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) + ] + if _is_protocol_class( + class_node, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + ): + return frozenset(method.name for method in methods) + + ignored: set[str] = set() + for method in methods: + for decorator in method.decorator_list: + name = _decorator_expr_name(decorator) + if name is None: + continue + if _is_cohesion_ignored_decorator( + name, + cohesion_ignored_decorator_aliases=cohesion_ignored_decorator_aliases, + pydantic_module_aliases=pydantic_module_aliases, + ): + ignored.add(method.name) + break + return frozenset(ignored) + + def _is_non_runtime_candidate( node: _qualnames.FunctionNode, *, @@ -633,6 +1097,7 @@ class _ModuleWalkResult(NamedTuple): protocol_module_aliases: frozenset[str] non_runtime_decorator_aliases: frozenset[str] pydantic_module_aliases: frozenset[str] + cohesion_ignored_decorator_aliases: frozenset[str] def _collect_module_walk_data( @@ -693,6 +1158,9 @@ def _collect_module_walk_data( protocol_module_aliases=frozenset(state.protocol_module_aliases), non_runtime_decorator_aliases=frozenset(state.non_runtime_decorator_aliases), pydantic_module_aliases=frozenset(state.pydantic_module_aliases), + cohesion_ignored_decorator_aliases=frozenset( + state.cohesion_ignored_decorator_aliases + ), ) diff --git a/codeclone/analysis/blast_radius.py b/codeclone/analysis/blast_radius.py new file mode 100644 index 00000000..a778c39d --- /dev/null +++ b/codeclone/analysis/blast_radius.py @@ -0,0 +1,659 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Neutral blast-radius computation over canonical report dicts.""" + +from __future__ import annotations + +from collections import deque +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Final, Literal + +from ..paths.workspace import FORBIDDEN_WORKSPACE_GLOBS + +BlastRadiusDepth = Literal["direct", "transitive"] + +BOUNDARY_REASON_BASELINE_OR_STATE: Final = ( + "baseline, CodeClone state/cache, and generated artifacts " + "require explicit separate changes" +) +BOUNDARY_REASON_EXPLICIT_FORBIDDEN: Final = "declared forbidden path" +REVIEW_REASON_KNOWN_BASELINE_DEBT: Final = "known baseline debt outside declared origin" +REVIEW_REASON_GOLDEN_FIXTURE_SURFACE: Final = "golden fixture clone suppression surface" +REVIEW_REASON_SECURITY_BOUNDARY: Final = "report-only security boundary inventory" +REVIEW_REASON_REPORT_ONLY_DESIGN: Final = "report-only design signal" +BOUNDARY_REASON_AFFECTED_NOT_ALLOWED: Final = ( + "affected by blast radius but outside declared edit scope" +) + +GUARDRAIL_REVIEW_DEPENDENTS: Final = ( + "review direct dependents before editing public behavior" +) +GUARDRAIL_CLONE_COHORT_CONTEXT: Final = ( + "treat clone cohort members as comparison context, not automatic edit targets" +) +GUARDRAIL_HIGH_RADIUS_APPROVAL: Final = ( + "high blast radius requires explicit human scope approval" +) +GUARDRAIL_DO_NOT_TOUCH_APPROVAL: Final = ( + "do-not-touch paths require separate explicit approval" +) + +DEFAULT_DO_NOT_TOUCH_PATTERNS: Final[tuple[str, ...]] = ( + "codeclone.baseline.json", + *FORBIDDEN_WORKSPACE_GLOBS, +) +MAX_CONTEXT_ITEMS: Final[int] = 20 + + +@dataclass(frozen=True, slots=True) +class BlastRadiusResult: + run_id: str + origin: tuple[str, ...] + depth: BlastRadiusDepth + radius_level: str + direct_dependents: tuple[str, ...] + transitive_dependents: tuple[str, ...] + clone_cohort_members: tuple[str, ...] + in_dependency_cycle: tuple[str, ...] + structural_risk: dict[str, list[str]] + do_not_touch: tuple[dict[str, str], ...] + review_context: tuple[dict[str, str], ...] + guardrails: tuple[str, ...] + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _as_int(value: object, default: int = 0) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + try: + return int(value.strip()) + except ValueError: + return default + return default + + +def _normalize_relative_path(path: object) -> str: + text = str(path).replace("\\", "/").strip() + if text == ".": + return "" + if text.startswith("./"): + text = text[2:] + return text.rstrip("/") + + +def _path_to_module(path: str) -> str: + normalized = _normalize_relative_path(path) + if not normalized.endswith(".py"): + return normalized.replace("/", ".") + without_suffix = normalized[:-3] + if without_suffix.endswith("/__init__"): + without_suffix = without_suffix[: -len("/__init__")] + if without_suffix == "__init__": + without_suffix = "" + return without_suffix.replace("/", ".").strip(".") + + +def _module_to_candidate_path(module: str) -> str: + return f"{module.replace('.', '/')}.py" if module else "" + + +def _dedupe_sorted(values: Sequence[str] | set[str]) -> tuple[str, ...]: + return tuple(sorted({value for value in values if value})) + + +def _item_path(item: Mapping[str, object]) -> str: + for key in ("relative_path", "path", "filepath", "file"): + value = _normalize_relative_path(item.get(key, "")) + if value: + return value + return "" + + +def _module_path_index(report_document: Mapping[str, object]) -> dict[str, str]: + modules: dict[str, str] = {} + inventory = _as_mapping(report_document.get("inventory")) + file_registry = _as_mapping(inventory.get("file_registry")) + for raw_path in _as_sequence(file_registry.get("items")): + path = _normalize_relative_path(raw_path) + module = _path_to_module(path) + if module and path: + modules.setdefault(module, path) + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + for family_name in ( + "complexity", + "coupling", + "cohesion", + "coverage_join", + "overloaded_modules", + "security_surfaces", + "api_surface", + "coverage_adoption", + ): + family = _as_mapping(families.get(family_name)) + for raw_item in _as_sequence(family.get("items")): + item = _as_mapping(raw_item) + path = _item_path(item) + module = str(item.get("module", "")).strip() or _path_to_module(path) + if module and path: + modules.setdefault(module, path) + return modules + + +def _module_to_output(module: str, module_paths: Mapping[str, str]) -> str: + return module_paths.get(module, _module_to_candidate_path(module) or module) + + +def _build_reverse_import_graph( + edges: Sequence[Mapping[str, object]], +) -> dict[str, set[str]]: + reverse: dict[str, set[str]] = {} + for edge in edges: + source = str(edge.get("source", "")).strip() + target = str(edge.get("target", "")).strip() + if source and target: + reverse.setdefault(target, set()).add(source) + return reverse + + +def _dependency_edges( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + return tuple(_as_mapping(item) for item in _as_sequence(dependencies.get("items"))) + + +def _dependency_cycles( + report_document: Mapping[str, object], +) -> tuple[tuple[str, ...], ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + cycles: list[tuple[str, ...]] = [] + for raw_cycle in _as_sequence(dependencies.get("cycles")): + cycle = tuple( + str(module).strip() + for module in _as_sequence(raw_cycle) + if str(module).strip() + ) + if cycle: + cycles.append(cycle) + return tuple(sorted(cycles, key=lambda item: (len(item), item))) + + +def _compute_direct_dependents( + *, + origin_modules: Sequence[str], + reverse_graph: Mapping[str, set[str]], +) -> tuple[str, ...]: + dependents: set[str] = set() + for module in origin_modules: + dependents.update(reverse_graph.get(module, set())) + return _dedupe_sorted(dependents) + + +def _compute_transitive_dependents( + *, + origin_modules: Sequence[str], + reverse_graph: Mapping[str, set[str]], +) -> tuple[str, ...]: + seen: set[str] = set() + queue: deque[str] = deque(origin_modules) + origin_set = set(origin_modules) + while queue: + current = queue.popleft() + for dependent in sorted(reverse_graph.get(current, set())): + if dependent in seen or dependent in origin_set: + continue + seen.add(dependent) + queue.append(dependent) + return _dedupe_sorted(seen) + + +def _clone_group_buckets( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get("clones")) + buckets: list[Mapping[str, object]] = [] + for bucket_name in ("functions", "blocks", "segments"): + buckets.extend( + _as_mapping(item) for item in _as_sequence(clones.get(bucket_name)) + ) + return tuple(buckets) + + +def _suppressed_clone_buckets( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get("clones")) + suppressed = _as_mapping(clones.get("suppressed")) + buckets: list[Mapping[str, object]] = [] + for bucket_name in ( + "function", + "block", + "segment", + "functions", + "blocks", + "segments", + ): + buckets.extend( + _as_mapping(item) for item in _as_sequence(suppressed.get(bucket_name)) + ) + return tuple(buckets) + + +def _compute_clone_cohort_members( + *, + report_document: Mapping[str, object], + origin_paths: Sequence[str], +) -> tuple[str, ...]: + origin_set = set(origin_paths) + cohort_paths: set[str] = set() + for group in _clone_group_buckets(report_document): + item_paths = { + _item_path(_as_mapping(item)) for item in _as_sequence(group.get("items")) + } + item_paths.discard("") + if origin_set.intersection(item_paths): + cohort_paths.update(item_paths - origin_set) + return _dedupe_sorted(cohort_paths) + + +def _compute_cycle_membership( + *, + origin_modules: Sequence[str], + origin_by_module: Mapping[str, str], + report_document: Mapping[str, object], +) -> tuple[str, ...]: + cycle_modules = { + module for cycle in _dependency_cycles(report_document) for module in cycle + } + return _dedupe_sorted( + { + origin_by_module[module] + for module in origin_modules + if module in cycle_modules and origin_by_module.get(module) + } + ) + + +def _compute_radius_level( + *, + direct_dependents: Sequence[str], + clone_cohort_members: Sequence[str], +) -> str: + total_affected = len(direct_dependents) + len(clone_cohort_members) + if total_affected == 0: + return "low" + if total_affected <= 5: + return "medium" + return "high" + + +def _blast_zone( + *, + origin_paths: Sequence[str], + direct_dependents: Sequence[str], + transitive_dependents: Sequence[str], + clone_cohort_members: Sequence[str], +) -> set[str]: + return { + *origin_paths, + *direct_dependents, + *transitive_dependents, + *clone_cohort_members, + } + + +def _compute_risk_signals( + *, + report_document: Mapping[str, object], + blast_zone_paths: set[str], +) -> dict[str, list[str]]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + complexity = _as_mapping(families.get("complexity")) + coupling = _as_mapping(families.get("coupling")) + coverage_join = _as_mapping(families.get("coverage_join")) + overloaded_modules = _as_mapping(families.get("overloaded_modules")) + + high_complexity = { + _item_path(_as_mapping(item)) + for item in _as_sequence(complexity.get("items")) + if str(_as_mapping(item).get("risk", "")).strip() == "high" + and _item_path(_as_mapping(item)) in blast_zone_paths + } + high_coupling = { + _item_path(_as_mapping(item)) + for item in _as_sequence(coupling.get("items")) + if str(_as_mapping(item).get("risk", "")).strip() == "high" + and _item_path(_as_mapping(item)) in blast_zone_paths + } + low_coverage = { + _item_path(_as_mapping(item)) + for item in _as_sequence(coverage_join.get("items")) + if ( + bool(_as_mapping(item).get("coverage_hotspot")) + or bool(_as_mapping(item).get("scope_gap_hotspot")) + ) + and _item_path(_as_mapping(item)) in blast_zone_paths + } + overloaded = { + _item_path(_as_mapping(item)) + for item in _as_sequence(overloaded_modules.get("items")) + if str(_as_mapping(item).get("candidate_status", "")).strip() == "candidate" + and _item_path(_as_mapping(item)) in blast_zone_paths + } + return { + "high_complexity_in_blast_zone": list(_dedupe_sorted(high_complexity)), + "high_coupling_in_blast_zone": list(_dedupe_sorted(high_coupling)), + "low_coverage_in_blast_zone": list(_dedupe_sorted(low_coverage)), + "overloaded_modules_in_blast_zone": list(_dedupe_sorted(overloaded)), + } + + +def _finding_paths(finding: Mapping[str, object]) -> tuple[str, ...]: + return _dedupe_sorted( + {_item_path(_as_mapping(item)) for item in _as_sequence(finding.get("items"))} + ) + + +def _all_finding_groups( + report_document: Mapping[str, object], +) -> tuple[Mapping[str, object], ...]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + result: list[Mapping[str, object]] = [] + for family_payload in groups.values(): + family_map = _as_mapping(family_payload) + for value in family_map.values(): + result.extend(_as_mapping(item) for item in _as_sequence(value)) + return tuple(result) + + +def _append_boundary_entry( + entries: dict[str, dict[str, str]], + *, + path: str, + reason: str, + category: str, + severity: str, +) -> None: + if not path: + return + entries.setdefault( + path, + { + "path": path, + "reason": reason, + "category": category, + "severity": severity, + }, + ) + + +def _append_review_entry( + entries: dict[tuple[str, str, str], dict[str, str]], + *, + path: str, + reason: str, + category: str, + severity: str = "context", +) -> None: + if not path: + return + entries.setdefault( + (path, category, reason), + { + "path": path, + "reason": reason, + "category": category, + "severity": severity, + }, + ) + + +def _compute_change_boundaries( + *, + report_document: Mapping[str, object], + origin_paths: Sequence[str], + blast_zone_paths: set[str], + forbidden_patterns: Sequence[str], + allowed_scope: Sequence[str] = (), +) -> tuple[tuple[dict[str, str], ...], tuple[dict[str, str], ...]]: + do_not_touch_entries: dict[str, dict[str, str]] = {} + review_entries: dict[tuple[str, str, str], dict[str, str]] = {} + origin_set = set(origin_paths) + allowed_set = set(allowed_scope) + for pattern in DEFAULT_DO_NOT_TOUCH_PATTERNS: + _append_boundary_entry( + do_not_touch_entries, + path=pattern, + reason=BOUNDARY_REASON_BASELINE_OR_STATE, + category="baseline_or_generated_state", + severity="hard", + ) + for pattern in forbidden_patterns: + _append_boundary_entry( + do_not_touch_entries, + path=pattern, + reason=BOUNDARY_REASON_EXPLICIT_FORBIDDEN, + category="explicit_forbidden", + severity="hard", + ) + for group in _all_finding_groups(report_document): + if str(group.get("novelty", "")).strip() != "known": + continue + for path in _finding_paths(group): + if path in blast_zone_paths and path not in origin_set: + _append_review_entry( + review_entries, + path=path, + reason=REVIEW_REASON_KNOWN_BASELINE_DEBT, + category="known_baseline_debt", + ) + for group in _suppressed_clone_buckets(report_document): + for path in _finding_paths(group): + if path in blast_zone_paths: + _append_review_entry( + review_entries, + path=path, + reason=REVIEW_REASON_GOLDEN_FIXTURE_SURFACE, + category="golden_fixture_surface", + ) + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + for family_name, reason, category in ( + ( + "security_surfaces", + REVIEW_REASON_SECURITY_BOUNDARY, + "security_boundary_context", + ), + ( + "overloaded_modules", + REVIEW_REASON_REPORT_ONLY_DESIGN, + "report_only_context", + ), + ): + family = _as_mapping(families.get(family_name)) + for raw_item in _as_sequence(family.get("items")): + path = _item_path(_as_mapping(raw_item)) + if path in blast_zone_paths and path not in origin_set: + _append_review_entry( + review_entries, + path=path, + reason=reason, + category=category, + ) + if allowed_set: + for path in blast_zone_paths: + if path not in allowed_set: + _append_boundary_entry( + do_not_touch_entries, + path=path, + reason=BOUNDARY_REASON_AFFECTED_NOT_ALLOWED, + category="affected_but_not_allowed", + severity="requires_expansion", + ) + do_not_touch = tuple( + do_not_touch_entries[path] for path in sorted(do_not_touch_entries) if path + ) + review_context = tuple( + entry + for entry in sorted( + review_entries.values(), + key=lambda item: (item["path"], item["category"], item["reason"]), + ) + if entry["path"] not in do_not_touch_entries + ) + return do_not_touch, review_context + + +def _guardrails( + *, + radius_level: str, + do_not_touch: Sequence[Mapping[str, str]], +) -> tuple[str, ...]: + guardrails = [ + GUARDRAIL_REVIEW_DEPENDENTS, + GUARDRAIL_CLONE_COHORT_CONTEXT, + ] + if radius_level == "high": + guardrails.append(GUARDRAIL_HIGH_RADIUS_APPROVAL) + if do_not_touch: + guardrails.append(GUARDRAIL_DO_NOT_TOUCH_APPROVAL) + return tuple(guardrails) + + +def compute_blast_radius( + *, + run_id: str, + report_document: Mapping[str, object], + files: Sequence[str], + depth: BlastRadiusDepth = "direct", + forbidden_patterns: Sequence[str] = DEFAULT_DO_NOT_TOUCH_PATTERNS, + allowed_scope: Sequence[str] = (), +) -> BlastRadiusResult: + origin_paths = _dedupe_sorted( + tuple(_normalize_relative_path(path) for path in files) + ) + module_paths = _module_path_index(report_document) + origin_by_module = { + module: path + for path in origin_paths + for module in (_path_to_module(path),) + if module + } + origin_modules = tuple(sorted(origin_by_module)) + reverse_graph = _build_reverse_import_graph(_dependency_edges(report_document)) + direct_modules = _compute_direct_dependents( + origin_modules=origin_modules, + reverse_graph=reverse_graph, + ) + transitive_modules = ( + _compute_transitive_dependents( + origin_modules=origin_modules, + reverse_graph=reverse_graph, + ) + if depth == "transitive" + else () + ) + direct_dependents = _dedupe_sorted( + tuple(_module_to_output(module, module_paths) for module in direct_modules) + ) + transitive_dependents = _dedupe_sorted( + tuple( + _module_to_output(module, module_paths) + for module in transitive_modules + if module not in set(direct_modules) + ) + ) + clone_cohort_members = _compute_clone_cohort_members( + report_document=report_document, + origin_paths=origin_paths, + ) + dependency_cycle_members = _compute_cycle_membership( + origin_modules=origin_modules, + origin_by_module=origin_by_module, + report_document=report_document, + ) + radius_level = _compute_radius_level( + direct_dependents=direct_dependents, + clone_cohort_members=clone_cohort_members, + ) + zone = _blast_zone( + origin_paths=origin_paths, + direct_dependents=direct_dependents, + transitive_dependents=transitive_dependents, + clone_cohort_members=clone_cohort_members, + ) + risk = _compute_risk_signals( + report_document=report_document, + blast_zone_paths=zone, + ) + do_not_touch, review_context = _compute_change_boundaries( + report_document=report_document, + origin_paths=origin_paths, + blast_zone_paths=zone, + forbidden_patterns=forbidden_patterns, + allowed_scope=allowed_scope, + ) + return BlastRadiusResult( + run_id=run_id, + origin=origin_paths, + depth=depth, + radius_level=radius_level, + direct_dependents=direct_dependents, + transitive_dependents=transitive_dependents, + clone_cohort_members=clone_cohort_members, + in_dependency_cycle=dependency_cycle_members, + structural_risk=risk, + do_not_touch=do_not_touch, + review_context=review_context, + guardrails=_guardrails(radius_level=radius_level, do_not_touch=do_not_touch), + ) + + +__all__ = [ + "BOUNDARY_REASON_AFFECTED_NOT_ALLOWED", + "BOUNDARY_REASON_BASELINE_OR_STATE", + "BOUNDARY_REASON_EXPLICIT_FORBIDDEN", + "DEFAULT_DO_NOT_TOUCH_PATTERNS", + "GUARDRAIL_CLONE_COHORT_CONTEXT", + "GUARDRAIL_DO_NOT_TOUCH_APPROVAL", + "GUARDRAIL_HIGH_RADIUS_APPROVAL", + "GUARDRAIL_REVIEW_DEPENDENTS", + "MAX_CONTEXT_ITEMS", + "REVIEW_REASON_GOLDEN_FIXTURE_SURFACE", + "REVIEW_REASON_KNOWN_BASELINE_DEBT", + "REVIEW_REASON_REPORT_ONLY_DESIGN", + "REVIEW_REASON_SECURITY_BOUNDARY", + "BlastRadiusDepth", + "BlastRadiusResult", + "compute_blast_radius", +] diff --git a/codeclone/analysis/class_metrics.py b/codeclone/analysis/class_metrics.py index d343ec78..96299e54 100644 --- a/codeclone/analysis/class_metrics.py +++ b/codeclone/analysis/class_metrics.py @@ -29,6 +29,7 @@ def _class_metrics_for_node( filepath: str, module_import_names: set[str], module_class_names: set[str], + cohesion_ignored_methods: frozenset[str] = frozenset(), ) -> ClassMetrics | None: span = _node_line_span(class_node) if span is None: @@ -39,7 +40,10 @@ def _class_metrics_for_node( module_import_names=module_import_names, module_class_names=module_class_names, ) - lcom4, method_count, instance_var_count = compute_lcom4(class_node) + lcom4, method_count, instance_var_count = compute_lcom4( + class_node, + ignored_methods=cohesion_ignored_methods, + ) return ClassMetrics( qualname=f"{module_name}:{class_qualname}", filepath=filepath, diff --git a/codeclone/analysis/units.py b/codeclone/analysis/units.py index 8c1cba07..b4fcf7d2 100644 --- a/codeclone/analysis/units.py +++ b/codeclone/analysis/units.py @@ -34,7 +34,9 @@ from ..paths import is_test_filepath from ._module_walk import ( _build_suppression_index_for_source, + _cohesion_ignored_method_names, _collect_dead_candidates, + _collect_function_relationship_facts, _collect_module_walk_data, ) from .class_metrics import _class_metrics_for_node, _node_line_span @@ -136,6 +138,14 @@ def extract_units_and_stats_from_source( protocol_module_aliases = _walk.protocol_module_aliases non_runtime_decorator_aliases = _walk.non_runtime_decorator_aliases pydantic_module_aliases = _walk.pydantic_module_aliases + cohesion_ignored_decorator_aliases = _walk.cohesion_ignored_decorator_aliases + function_relationship_facts = _collect_function_relationship_facts( + tree=tree, + module_name=module_name, + filepath=filepath, + collector=collector, + origin_lane="test" if is_test_file else "production", + ) suppression_index = _build_suppression_index_for_source( source=source, @@ -245,6 +255,13 @@ def extract_units_and_stats_from_source( structural_findings.extend(structure_facts.structural_findings) for class_qualname, class_node in collector.class_nodes: + cohesion_ignored_methods = _cohesion_ignored_method_names( + class_node, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + pydantic_module_aliases=pydantic_module_aliases, + cohesion_ignored_decorator_aliases=cohesion_ignored_decorator_aliases, + ) class_metric = _class_metrics_for_node( module_name=module_name, class_qualname=class_qualname, @@ -252,6 +269,7 @@ def extract_units_and_stats_from_source( filepath=filepath, module_import_names=module_import_names, module_class_names=module_class_names, + cohesion_ignored_methods=cohesion_ignored_methods, ) if class_metric is not None: class_metrics.append(class_metric) @@ -330,6 +348,7 @@ def extract_units_and_stats_from_source( typing_coverage=typing_coverage, docstring_coverage=docstring_coverage, api_surface=api_surface, + function_relationship_facts=function_relationship_facts, ), structural_findings, ) diff --git a/codeclone/analytics/__init__.py b/codeclone/analytics/__init__.py new file mode 100644 index 00000000..e4875c65 --- /dev/null +++ b/codeclone/analytics/__init__.py @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Corpus analytics — derived clustering over intent historical evidence.""" + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/codeclone/analytics/agent_labels.py b/codeclone/analytics/agent_labels.py new file mode 100644 index 00000000..f5d96d74 --- /dev/null +++ b/codeclone/analytics/agent_labels.py @@ -0,0 +1,47 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from ..contracts import CORPUS_AGENT_LABEL_CONTRACT_VERSION + +_AGENT_FAMILY_RULES: tuple[tuple[str, str], ...] = ( + ("cursor-", "cursor"), + ("claude-", "claude"), + ("codex-", "codex"), + ("vscode-", "vscode"), + ("mcp-client", "mcp"), +) + + +def map_agent_family(agent_client_raw: str | None) -> str: + """Map raw agent client label to a deterministic agent family string.""" + if not agent_client_raw: + return "unknown" + normalized = agent_client_raw.strip().lower() + if not normalized: + return "unknown" + for prefix, family in _AGENT_FAMILY_RULES: + if normalized.startswith(prefix) or prefix in normalized: + return family + return "unknown" + + +def agent_label_contract_version() -> str: + return CORPUS_AGENT_LABEL_CONTRACT_VERSION + + +def agent_family_rules() -> Sequence[tuple[str, str]]: + return _AGENT_FAMILY_RULES + + +__all__ = [ + "agent_family_rules", + "agent_label_contract_version", + "map_agent_family", +] diff --git a/codeclone/analytics/capabilities.py b/codeclone/analytics/capabilities.py new file mode 100644 index 00000000..2b42bbbb --- /dev/null +++ b/codeclone/analytics/capabilities.py @@ -0,0 +1,63 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +from dataclasses import dataclass +from typing import Literal + +AnalyticsCapability = Literal["base", "embed", "cluster", "full"] + + +@dataclass(frozen=True, slots=True) +class CapabilityStatus: + available: bool + missing_packages: tuple[str, ...] + + +def _package_available(name: str) -> bool: + try: + importlib.import_module(name) + except ImportError: + return False + return True + + +def check_capability(capability: AnalyticsCapability) -> CapabilityStatus: + if capability == "base": + return CapabilityStatus(available=True, missing_packages=()) + missing: list[str] = [] + if capability in {"embed", "full"}: + missing.extend( + package + for package in ("fastembed", "lancedb") + if not _package_available(package) + ) + if capability in {"cluster", "full"}: + missing.extend( + package + for package in ("sklearn", "hdbscan") + if not _package_available(package) + ) + return CapabilityStatus( + available=not missing, + missing_packages=tuple(sorted(set(missing))), + ) + + +def install_hint(missing_packages: tuple[str, ...]) -> str: + if not missing_packages: + return "uv sync --extra analytics" + return "uv sync --extra analytics" + + +__all__ = [ + "AnalyticsCapability", + "CapabilityStatus", + "check_capability", + "install_hint", +] diff --git a/codeclone/analytics/clustering/__init__.py b/codeclone/analytics/clustering/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/clustering/canonicalize.py b/codeclone/analytics/clustering/canonicalize.py new file mode 100644 index 00000000..4715d211 --- /dev/null +++ b/codeclone/analytics/clustering/canonicalize.py @@ -0,0 +1,110 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import math +from collections.abc import Sequence + +from ..corpus.keys import membership_digest +from .models import NOISE_LABEL, ClusterPartition + + +def canonicalize_partitions( + partitions: Sequence[ClusterPartition], + *, + coordinates: dict[str, tuple[float, ...]], +) -> tuple[ClusterPartition, ...]: + """Assign display order: size desc, medoid asc, membership_digest asc.""" + non_noise = [part for part in partitions if part.cluster_label != NOISE_LABEL] + noise = [part for part in partitions if part.cluster_label == NOISE_LABEL] + non_noise.sort( + key=lambda part: ( + -len(part.snapshot_item_ids), + medoid_item_id( + member_ids=part.snapshot_item_ids, + coordinates=coordinates, + ), + part.membership_digest, + ) + ) + canonical: list[ClusterPartition] = [] + for _display_id, part in enumerate(non_noise, start=1): + canonical.append( + ClusterPartition( + cluster_label=part.cluster_label, + snapshot_item_ids=part.snapshot_item_ids, + membership_digest=part.membership_digest, + ) + ) + canonical.extend(noise) + return tuple(canonical) + + +def display_cluster_id_map( + partitions: Sequence[ClusterPartition], +) -> dict[int, int | None]: + mapping: dict[int, int | None] = {} + display = 1 + for part in partitions: + if part.cluster_label == NOISE_LABEL: + mapping[part.cluster_label] = None + continue + mapping[part.cluster_label] = display + display += 1 + return mapping + + +def medoid_item_id( + *, + member_ids: Sequence[str], + coordinates: dict[str, tuple[float, ...]], +) -> str: + if not member_ids: + return "" + if len(member_ids) == 1: + return member_ids[0] + + def average_distance(item_id: str) -> float: + anchor = coordinates.get(item_id) + if anchor is None: + return float("inf") + total = 0.0 + count = 0 + for other_id in member_ids: + if other_id == item_id: + continue + other = coordinates.get(other_id) + if other is None: + continue + total += _euclidean(anchor, other) + count += 1 + return total / count if count else float("inf") + + return min(member_ids, key=lambda item_id: (average_distance(item_id), item_id)) + + +def _euclidean(left: Sequence[float], right: Sequence[float]) -> float: + return math.sqrt(float(sum((a - b) ** 2 for a, b in zip(left, right, strict=True)))) + + +def partition_membership_map( + partitions: Sequence[ClusterPartition], +) -> dict[str, str]: + mapping: dict[str, str] = {} + for part in partitions: + digest = membership_digest(list(part.snapshot_item_ids)) + for item_id in part.snapshot_item_ids: + mapping[item_id] = digest + return mapping + + +__all__ = [ + "canonicalize_partitions", + "display_cluster_id_map", + "medoid_item_id", + "partition_membership_map", +] diff --git a/codeclone/analytics/clustering/diagnostics.py b/codeclone/analytics/clustering/diagnostics.py new file mode 100644 index 00000000..65b9aa72 --- /dev/null +++ b/codeclone/analytics/clustering/diagnostics.py @@ -0,0 +1,544 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +import math +import re +from collections import Counter +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Literal + +from ..contracts import ClusterAssignmentRecord, CorpusItemRecord +from ..integrity import validate_cluster_diagnostic_refs +from .canonicalize import medoid_item_id +from .models import NOISE_LABEL, ClusterPartition + + +@dataclass(frozen=True, slots=True) +class CorrelationCell: + numerator: int + denominator: int + rate: float | None + insufficient_sample: bool + + +@dataclass(frozen=True, slots=True) +class NoiseExplorerFlags: + short_text: bool + long_text: bool + multiple_paragraphs: bool + high_conjunction_count: bool + template_match: bool + low_membership_strength: bool + + +@dataclass(frozen=True, slots=True) +class MetadataDisplayValue: + kind: Literal["unknown", "confirmed_none", "empty_collection", "value"] + display: str + + +@dataclass(frozen=True, slots=True) +class ItemPreview: + snapshot_item_id: str + source_record_id: str + source_kind: str + intent_id: str | None + normalized_text_preview: str + membership_strength: float | None + agent_family: MetadataDisplayValue + outcome: MetadataDisplayValue + quality_tier: MetadataDisplayValue + scope_check_status: MetadataDisplayValue + verification_status: MetadataDisplayValue + + +@dataclass(frozen=True, slots=True) +class NumericFieldSummary: + field: str + known_count: int + unknown_count: int + min: int | None + p25: float | None + median: float | None + p75: float | None + max: int | None + mean: float | None + buckets: dict[str, int] + + +EMPTY_MEANS_CONFIRMED_NONE_FIELDS = frozenset({"anomaly_kinds"}) +MAX_PREVIEW_CHARACTERS = 240 +_MISSING = object() + + +def cluster_size_percent(size: int, total: int) -> float: + if total <= 0: + return 0.0 + return (size / total) * 100.0 + + +def metadata_distribution( + items: Sequence[CorpusItemRecord], + *, + field: str, + min_sample_size: int, +) -> dict[str, CorrelationCell]: + counts: Counter[str] = Counter() + for item in items: + payload = _metadata_object(item.metadata_json) + for key in _metadata_values(payload.get(field)): + counts[key] += 1 + total = len(items) + return { + key: _cell(count, total, min_sample_size=min_sample_size) + for key, count in sorted(counts.items()) + } + + +def correlation_rate( + *, + numerator: int, + denominator: int, + min_sample_size: int, +) -> CorrelationCell: + return _cell(numerator, denominator, min_sample_size=min_sample_size) + + +def build_cluster_diagnostics( + *, + partition: ClusterPartition, + items_by_id: Mapping[str, CorpusItemRecord], + coordinates: Mapping[str, tuple[float, ...]], + membership_strengths: Mapping[str, float | None], + total_items: int, + min_correlation_sample_size: int, +) -> dict[str, object]: + member_items = [ + items_by_id[item_id] + for item_id in partition.snapshot_item_ids + if item_id in items_by_id + ] + size = len(member_items) + medoid = medoid_item_id( + member_ids=partition.snapshot_item_ids, + coordinates=dict(coordinates), + ) + strengths = [ + membership_strengths.get(item_id) for item_id in partition.snapshot_item_ids + ] + avg_strength = _average([value for value in strengths if value is not None]) + metadata_fields = ( + "agent_family", + "outcome", + "quality_tier", + "scope_check_status", + "verification_status", + "scope_expanded", + "anomaly_kinds", + ) + distributions = { + field: { + key: { + "numerator": cell.numerator, + "denominator": cell.denominator, + "rate": cell.rate, + "insufficient_sample": cell.insufficient_sample, + } + for key, cell in metadata_distribution( + member_items, + field=field, + min_sample_size=min_correlation_sample_size, + ).items() + } + for field in metadata_fields + } + representatives = _representative_ids( + member_ids=partition.snapshot_item_ids, + medoid=medoid, + coordinates=coordinates, + membership_strengths=membership_strengths, + ) + boundary_items = _boundary_ids( + member_ids=partition.snapshot_item_ids, + medoid=medoid, + coordinates=coordinates, + membership_strengths=membership_strengths, + ) + diagnostics: dict[str, object] = { + "cluster_label": partition.cluster_label, + "membership_digest": partition.membership_digest, + "size": size, + "size_percent": cluster_size_percent(size, total_items), + "medoid_snapshot_item_id": medoid, + "average_membership_strength": avg_strength, + "representatives": list(representatives), + "boundary_items": list(boundary_items), + "metadata_distributions": distributions, + "min_correlation_sample_size": min_correlation_sample_size, + } + if partition.cluster_label == NOISE_LABEL: + diagnostics["noise_items"] = [ + { + "snapshot_item_id": item.snapshot_item_id, + "flags": _flags_dict( + noise_explorer_flags( + item=item, + membership_strength=membership_strengths.get( + item.snapshot_item_id + ), + ) + ), + } + for item in sorted(member_items, key=lambda entry: entry.snapshot_item_id) + ] + else: + validate_cluster_diagnostic_refs( + cluster_label=partition.cluster_label, + diagnostics=diagnostics, + items_by_id=items_by_id, + assigned_item_ids=partition.snapshot_item_ids, + ) + return diagnostics + + +def build_item_preview( + item: CorpusItemRecord, + assignment: ClusterAssignmentRecord | None, + *, + source_kind: str, + source_record_id: str, +) -> ItemPreview: + metadata = _metadata_object(item.metadata_json) + return ItemPreview( + snapshot_item_id=item.snapshot_item_id, + source_record_id=source_record_id, + source_kind=source_kind, + intent_id=item.intent_id if source_kind == "intent_historical" else None, + normalized_text_preview=truncate_preview(item.normalized_text), + membership_strength=( + assignment.membership_strength if assignment is not None else None + ), + agent_family=metadata_display_value(metadata, "agent_family"), + outcome=metadata_display_value(metadata, "outcome"), + quality_tier=metadata_display_value(metadata, "quality_tier"), + scope_check_status=metadata_display_value(metadata, "scope_check_status"), + verification_status=metadata_display_value(metadata, "verification_status"), + ) + + +def truncate_preview(text: str) -> str: + if len(text) <= MAX_PREVIEW_CHARACTERS: + return text + return text[: MAX_PREVIEW_CHARACTERS - 1] + "\u2026" + + +def preview_digest(preview: str) -> str: + return hashlib.sha256(preview.encode("utf-8")).hexdigest() + + +def metadata_display_value( + metadata: Mapping[str, object], + field: str, +) -> MetadataDisplayValue: + value = metadata.get(field, _MISSING) + if value is _MISSING or value is None: + return MetadataDisplayValue(kind="unknown", display="unknown") + if isinstance(value, list): + if not value: + if field in EMPTY_MEANS_CONFIRMED_NONE_FIELDS: + return MetadataDisplayValue( + kind="confirmed_none", + display="none (confirmed empty)", + ) + return MetadataDisplayValue( + kind="empty_collection", + display="empty collection", + ) + return MetadataDisplayValue( + kind="value", + display=", ".join(sorted({str(item) for item in value})), + ) + if isinstance(value, bool): + return MetadataDisplayValue( + kind="value", + display="true" if value else "false", + ) + return MetadataDisplayValue(kind="value", display=str(value)) + + +def numeric_field_summary( + items: Sequence[CorpusItemRecord], + *, + field: str, +) -> NumericFieldSummary: + values: list[int] = [] + for item in items: + if field == "description_length": + values.append(len(item.normalized_text)) + continue + value = _metadata_object(item.metadata_json).get(field) + if isinstance(value, int) and not isinstance(value, bool): + values.append(value) + values.sort() + unknown_count = len(items) - len(values) + return NumericFieldSummary( + field=field, + known_count=len(values), + unknown_count=unknown_count, + min=values[0] if values else None, + p25=linear_percentile(values, 25.0), + median=linear_percentile(values, 50.0), + p75=linear_percentile(values, 75.0), + max=values[-1] if values else None, + mean=(sum(values) / len(values)) if values else None, + buckets=_numeric_buckets(field, values), + ) + + +def linear_percentile( + sorted_values: Sequence[float], + q: float, +) -> float | None: + if not 0.0 <= q <= 100.0: + raise ValueError("percentile q must be between 0 and 100") + if not sorted_values: + return None + if len(sorted_values) == 1: + return float(sorted_values[0]) + rank = (len(sorted_values) - 1) * (q / 100.0) + lower = math.floor(rank) + upper = math.ceil(rank) + if lower == upper: + return float(sorted_values[lower]) + lower_value = sorted_values[lower] + upper_value = sorted_values[upper] + return float(lower_value + (rank - lower) * (upper_value - lower_value)) + + +def _numeric_buckets(field: str, values: Sequence[int]) -> dict[str, int]: + if field == "description_length": + buckets = {"0-39": 0, "40-119": 0, "120-399": 0, "400+": 0} + for value in values: + if value < 40: + buckets["0-39"] += 1 + elif value < 120: + buckets["40-119"] += 1 + elif value < 400: + buckets["120-399"] += 1 + else: + buckets["400+"] += 1 + return buckets + buckets = {"0": 0, "1-3": 0, "4-10": 0, "11+": 0} + for value in values: + if value == 0: + buckets["0"] += 1 + elif value <= 3: + buckets["1-3"] += 1 + elif value <= 10: + buckets["4-10"] += 1 + else: + buckets["11+"] += 1 + return buckets + + +def noise_explorer_flags( + *, + item: CorpusItemRecord, + membership_strength: float | None, + strength_threshold: float = 0.2, +) -> NoiseExplorerFlags: + text = item.normalized_text + conjunctions = len(re.findall(r"\b(and|or|but|while|whereas)\b", text, re.I)) + return NoiseExplorerFlags( + short_text=len(text) < 40, + long_text=len(text) > 800, + multiple_paragraphs=text.count("\n\n") >= 2, + high_conjunction_count=conjunctions >= 4, + template_match=text.startswith("<"), + low_membership_strength=( + membership_strength is not None and membership_strength < strength_threshold + ), + ) + + +def nearest_cluster_ids( + *, + cluster_label: int, + centroids: Mapping[int, tuple[float, ...]], + limit: int = 3, +) -> tuple[int, ...]: + origin = centroids.get(cluster_label) + if origin is None: + return () + distances: list[tuple[float, int]] = [] + for label, centroid in centroids.items(): + if label in (cluster_label, NOISE_LABEL): + continue + distances.append((_euclidean(origin, centroid), label)) + distances.sort(key=lambda item: (item[0], item[1])) + return tuple(label for _distance, label in distances[:limit]) + + +def compute_centroids( + *, + partitions: Sequence[ClusterPartition], + coordinates: Mapping[str, tuple[float, ...]], +) -> dict[int, tuple[float, ...]]: + centroids: dict[int, tuple[float, ...]] = {} + for partition in partitions: + if partition.cluster_label == NOISE_LABEL: + continue + vectors = [ + coordinates[item_id] + for item_id in partition.snapshot_item_ids + if item_id in coordinates + ] + if not vectors: + continue + dim = len(vectors[0]) + sums = [0.0] * dim + for vector in vectors: + for index, value in enumerate(vector): + sums[index] += value + count = float(len(vectors)) + centroids[partition.cluster_label] = tuple(value / count for value in sums) + return centroids + + +def _metadata_object(text: str) -> dict[str, object]: + try: + parsed = json.loads(text) + except json.JSONDecodeError: + return {} + return parsed if isinstance(parsed, dict) else {} + + +def _metadata_values(value: object) -> tuple[str, ...]: + if value is None: + return ("null",) + if isinstance(value, list): + normalized = tuple(sorted({str(item) for item in value})) + return normalized or ("none",) + if isinstance(value, bool): + return ("true" if value else "false",) + return (str(value),) + + +def _representative_ids( + *, + member_ids: Sequence[str], + medoid: str, + coordinates: Mapping[str, tuple[float, ...]], + membership_strengths: Mapping[str, float | None], + limit: int = 5, +) -> tuple[str, ...]: + if not member_ids: + return () + ordered = sorted( + (item_id for item_id in member_ids if item_id != medoid), + key=lambda item_id: ( + -_strength(membership_strengths.get(item_id)), + _distance_from(item_id, medoid, coordinates), + item_id, + ), + ) + return tuple(([medoid] if medoid else []) + ordered)[:limit] + + +def _boundary_ids( + *, + member_ids: Sequence[str], + medoid: str, + coordinates: Mapping[str, tuple[float, ...]], + membership_strengths: Mapping[str, float | None], + limit: int = 5, +) -> tuple[str, ...]: + ordered = sorted( + member_ids, + key=lambda item_id: ( + _strength(membership_strengths.get(item_id)), + -_distance_from(item_id, medoid, coordinates), + item_id, + ), + ) + return tuple(ordered[:limit]) + + +def _strength(value: float | None) -> float: + return value if value is not None else 1.0 + + +def _distance_from( + item_id: str, + anchor_id: str, + coordinates: Mapping[str, tuple[float, ...]], +) -> float: + item = coordinates.get(item_id) + anchor = coordinates.get(anchor_id) + if item is None or anchor is None: + return float("inf") + return _euclidean(item, anchor) + + +def _flags_dict(flags: NoiseExplorerFlags) -> dict[str, bool]: + return { + "short_text": flags.short_text, + "long_text": flags.long_text, + "multiple_paragraphs": flags.multiple_paragraphs, + "high_conjunction_count": flags.high_conjunction_count, + "template_match": flags.template_match, + "low_membership_strength": flags.low_membership_strength, + } + + +def _cell(numerator: int, denominator: int, *, min_sample_size: int) -> CorrelationCell: + insufficient = denominator < min_sample_size + rate = (numerator / denominator) if denominator and not insufficient else None + return CorrelationCell( + numerator=numerator, + denominator=denominator, + rate=rate, + insufficient_sample=insufficient, + ) + + +def _average(values: Sequence[float]) -> float | None: + if not values: + return None + return sum(values) / len(values) + + +def _euclidean(left: Sequence[float], right: Sequence[float]) -> float: + return math.sqrt(float(sum((a - b) ** 2 for a, b in zip(left, right, strict=True)))) + + +__all__ = [ + "EMPTY_MEANS_CONFIRMED_NONE_FIELDS", + "MAX_PREVIEW_CHARACTERS", + "CorrelationCell", + "ItemPreview", + "MetadataDisplayValue", + "NoiseExplorerFlags", + "NumericFieldSummary", + "build_cluster_diagnostics", + "build_item_preview", + "cluster_size_percent", + "compute_centroids", + "correlation_rate", + "linear_percentile", + "metadata_display_value", + "metadata_distribution", + "nearest_cluster_ids", + "noise_explorer_flags", + "numeric_field_summary", + "preview_digest", + "truncate_preview", +] diff --git a/codeclone/analytics/clustering/models.py b/codeclone/analytics/clustering/models.py new file mode 100644 index 00000000..6e4e276d --- /dev/null +++ b/codeclone/analytics/clustering/models.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class ClusteringParameters: + pca_dimensions: int + min_cluster_size: int + min_samples: int + cluster_selection_method: str + + +@dataclass(frozen=True, slots=True) +class EffectiveClusteringParameters: + pca_dimensions: int + min_cluster_size: int + min_samples: int + cluster_selection_method: str + n_samples: int + n_features: int + + +@dataclass(frozen=True, slots=True) +class ClusterPartition: + cluster_label: int + snapshot_item_ids: tuple[str, ...] + membership_digest: str + + +@dataclass(frozen=True, slots=True) +class ClusteringPipelineResult: + partitions: tuple[ClusterPartition, ...] + labels: tuple[int, ...] + membership_strengths: tuple[float | None, ...] + reduced_coordinates: tuple[tuple[float, ...], ...] + effective_parameters: EffectiveClusteringParameters + + +NOISE_LABEL = -1 + + +__all__ = [ + "NOISE_LABEL", + "ClusterPartition", + "ClusteringParameters", + "ClusteringPipelineResult", + "EffectiveClusteringParameters", +] diff --git a/codeclone/analytics/clustering/pipeline.py b/codeclone/analytics/clustering/pipeline.py new file mode 100644 index 00000000..0ec6e9f3 --- /dev/null +++ b/codeclone/analytics/clustering/pipeline.py @@ -0,0 +1,179 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +import math +import types +from collections.abc import Sequence +from typing import Any + +from ..corpus.keys import membership_digest +from ..exceptions import AnalyticsCapabilityError +from .models import ( + NOISE_LABEL, + ClusteringParameters, + ClusteringPipelineResult, + ClusterPartition, + EffectiveClusteringParameters, +) + + +def resolve_effective_parameters( + requested: ClusteringParameters, + *, + n_samples: int, + n_features: int, +) -> EffectiveClusteringParameters | None: + effective_pca = min(requested.pca_dimensions, n_samples - 1, n_features) + eligible = n_samples + if ( + requested.min_cluster_size > eligible + or requested.min_samples > eligible + or effective_pca < 2 + ): + return None + return EffectiveClusteringParameters( + pca_dimensions=effective_pca, + min_cluster_size=requested.min_cluster_size, + min_samples=requested.min_samples, + cluster_selection_method=requested.cluster_selection_method, + n_samples=n_samples, + n_features=n_features, + ) + + +def _l2_normalize(matrix: list[list[float]]) -> list[list[float]]: + normalized: list[list[float]] = [] + for row in matrix: + norm = math.sqrt(sum(value * value for value in row)) or 1.0 + normalized.append([value / norm for value in row]) + return normalized + + +def _validate_embedding_matrix(embeddings: Sequence[Sequence[float]]) -> int: + if not embeddings: + return 0 + width = len(embeddings[0]) + if width <= 0: + raise ValueError("embedding vectors must not be empty") + for index, row in enumerate(embeddings): + if len(row) != width: + raise ValueError( + f"embedding dimension mismatch at row {index}: " + f"actual={len(row)}, expected={width}" + ) + if not all(math.isfinite(float(value)) for value in row): + raise ValueError(f"embedding row {index} contains non-finite values") + return width + + +def _load_sklearn_pca() -> Any: # Any: optional sklearn import boundary + try: + decomposition = importlib.import_module("sklearn.decomposition") + except ImportError as exc: + raise AnalyticsCapabilityError( + "scikit-learn is required for analytics clustering; " + "install with: uv sync --extra analytics" + ) from exc + return decomposition.PCA + + +def _load_hdbscan() -> types.ModuleType: + try: + return importlib.import_module("hdbscan") + except ImportError as exc: + raise AnalyticsCapabilityError( + "hdbscan is required for analytics clustering; " + "install with: uv sync --extra analytics" + ) from exc + + +def run_clustering_pipeline( + *, + snapshot_item_ids: Sequence[str], + embeddings: Sequence[Sequence[float]], + requested: ClusteringParameters, + random_seed: int = 42, +) -> ClusteringPipelineResult | None: + if len(snapshot_item_ids) != len(embeddings): + msg = "snapshot_item_ids and embeddings length mismatch" + raise ValueError(msg) + if not snapshot_item_ids: + return None + n_samples = len(snapshot_item_ids) + n_features = _validate_embedding_matrix(embeddings) + effective = resolve_effective_parameters( + requested, + n_samples=n_samples, + n_features=n_features, + ) + if effective is None: + return None + + matrix = _l2_normalize([list(row) for row in embeddings]) + pca_cls = _load_sklearn_pca() + reducer = pca_cls( + n_components=effective.pca_dimensions, + whiten=False, + svd_solver="full", + random_state=random_seed, + ) + reduced = reducer.fit_transform(matrix) + reduced_rows = [tuple(float(value) for value in row) for row in reduced.tolist()] + + hdbscan = _load_hdbscan() + clusterer = hdbscan.HDBSCAN( + min_cluster_size=effective.min_cluster_size, + min_samples=effective.min_samples, + metric="euclidean", + cluster_selection_method=effective.cluster_selection_method, + core_dist_n_jobs=1, + ) + labels_raw = clusterer.fit_predict(reduced) + labels = tuple(int(value) for value in labels_raw.tolist()) + probabilities = getattr(clusterer, "probabilities_", None) + if probabilities is not None: + strengths: list[float | None] = [ + float(value) for value in probabilities.tolist() + ] + else: + strengths = [None for _ in labels] + + by_label: dict[int, list[str]] = {} + for item_id, label in zip(snapshot_item_ids, labels, strict=True): + by_label.setdefault(label, []).append(item_id) + + partitions: list[ClusterPartition] = [] + for label, members in sorted(by_label.items()): + ordered = sorted(members) + partitions.append( + ClusterPartition( + cluster_label=label, + snapshot_item_ids=tuple(ordered), + membership_digest=membership_digest(ordered), + ) + ) + + return ClusteringPipelineResult( + partitions=tuple(partitions), + labels=labels, + membership_strengths=tuple(strengths), + reduced_coordinates=tuple(reduced_rows), + effective_parameters=effective, + ) + + +def is_noise_label(label: int) -> bool: + return label == NOISE_LABEL + + +__all__ = [ + "is_noise_label", + "resolve_effective_parameters", + "run_clustering_pipeline", +] diff --git a/codeclone/analytics/clustering/sweep.py b/codeclone/analytics/clustering/sweep.py new file mode 100644 index 00000000..80f73a56 --- /dev/null +++ b/codeclone/analytics/clustering/sweep.py @@ -0,0 +1,245 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +from collections.abc import Sequence +from dataclasses import dataclass +from importlib.metadata import PackageNotFoundError, version +from typing import Literal + +from ...utils.json_io import json_text +from ..corpus.keys import sha256_hex +from ..profiles.models import ClusteringProfileManifest, ProfileSearchSpace +from .models import ClusteringParameters, EffectiveClusteringParameters +from .pipeline import resolve_effective_parameters + +SWEEP_PCA_DIMENSIONS = (32, 64, 128) +SWEEP_MIN_CLUSTER_SIZES = (5, 8, 12, 15) +SWEEP_MIN_SAMPLES = (1, 3, 5) +SWEEP_SELECTION_METHODS: tuple[Literal["eom", "leaf"], ...] = ("eom", "leaf") + + +@dataclass(frozen=True, slots=True) +class SweepCandidate: + requested: ClusteringParameters + effective: EffectiveClusteringParameters + dedupe_key: str + + +@dataclass(frozen=True, slots=True) +class SweepCandidateResult: + candidate: SweepCandidate + score: float + cluster_count: int + noise_fraction: float + + +def iter_sweep_candidates( + *, + n_samples: int, + n_features: int, + grid: ProfileSearchSpace | None = None, +) -> tuple[SweepCandidate, ...]: + selected_grid = grid or ProfileSearchSpace( + pca_dimensions=SWEEP_PCA_DIMENSIONS, + min_cluster_size=SWEEP_MIN_CLUSTER_SIZES, + min_samples=SWEEP_MIN_SAMPLES, + cluster_selection_method=SWEEP_SELECTION_METHODS, + ) + return iter_grid_candidates( + grid=selected_grid, + n_samples=n_samples, + n_features=n_features, + ) + + +def iter_grid_candidates( + *, + grid: ProfileSearchSpace, + n_samples: int, + n_features: int, +) -> tuple[SweepCandidate, ...]: + seen: set[str] = set() + candidates: list[SweepCandidate] = [] + for pca_dimensions in grid.pca_dimensions: + for min_cluster_size in grid.min_cluster_size: + for min_samples in grid.min_samples: + for method in grid.cluster_selection_method: + requested = ClusteringParameters( + pca_dimensions=pca_dimensions, + min_cluster_size=min_cluster_size, + min_samples=min_samples, + cluster_selection_method=method, + ) + effective = resolve_effective_parameters( + requested, + n_samples=n_samples, + n_features=n_features, + ) + if effective is not None: + dedupe_key = candidate_dedupe_key(effective) + if dedupe_key not in seen: + seen.add(dedupe_key) + candidates.append( + SweepCandidate( + requested=requested, + effective=effective, + dedupe_key=dedupe_key, + ) + ) + return tuple( + sorted( + candidates, + key=lambda item: ( + item.effective.pca_dimensions, + item.effective.min_cluster_size, + item.effective.min_samples, + item.effective.cluster_selection_method, + ), + ) + ) + + +def iter_profile_candidates( + *, + profile: ClusteringProfileManifest, + n_samples: int, + n_features: int, +) -> tuple[SweepCandidate, ...]: + return iter_grid_candidates( + grid=profile.primary_space, + n_samples=n_samples, + n_features=n_features, + ) + + +def candidate_dedupe_key(effective: EffectiveClusteringParameters) -> str: + return "|".join( + ( + str(effective.pca_dimensions), + str(effective.min_cluster_size), + str(effective.min_samples), + effective.cluster_selection_method, + ) + ) + + +def candidate_space_digest( + candidates: Sequence[SweepCandidate], + *, + fixed_parameters: dict[str, object] | None = None, +) -> str: + return sha256_hex( + json_text( + { + "candidate_dedupe_keys": sorted( + candidate.dedupe_key for candidate in candidates + ), + "fixed_parameters": fixed_parameters or {}, + }, + sort_keys=True, + ) + ) + + +def rank_sweep_results( + results: Sequence[SweepCandidateResult], +) -> SweepCandidateResult | None: + if not results: + return None + return min( + results, + key=lambda item: ( + -item.score, + item.candidate.effective.pca_dimensions, + item.candidate.effective.min_cluster_size, + item.candidate.effective.min_samples, + item.candidate.effective.cluster_selection_method, + ), + ) + + +def score_clustering_result( + *, + cluster_count: int, + noise_fraction: float, + n_samples: int, +) -> float: + if n_samples == 0: + return 0.0 + cluster_bonus = min(cluster_count, 12) / 12.0 + noise_penalty = noise_fraction + return cluster_bonus - noise_penalty + + +def run_digest( + *, + snapshot_id: str, + embedding_generation_id: str, + effective: EffectiveClusteringParameters, + random_seed: int, + algorithm_manifest: dict[str, object], +) -> str: + payload = { + "snapshot_id": snapshot_id, + "embedding_generation_id": embedding_generation_id, + "effective_parameters": { + "pca_dimensions": effective.pca_dimensions, + "min_cluster_size": effective.min_cluster_size, + "min_samples": effective.min_samples, + "cluster_selection_method": effective.cluster_selection_method, + "n_samples": effective.n_samples, + "n_features": effective.n_features, + }, + "random_seed": random_seed, + "algorithm_manifest": algorithm_manifest, + } + return sha256_hex(json_text(payload, sort_keys=True)) + + +def clustering_algorithm_manifest() -> dict[str, object]: + return { + "python_version": f"{sys.version_info.major}.{sys.version_info.minor}", + "numpy_version": _package_version("numpy"), + "scipy_version": _package_version("scipy"), + "scikit_learn_version": _package_version("scikit-learn"), + "hdbscan_version": _package_version("hdbscan"), + "vector_preprocessing": "l2_normalize", + "pca_solver": "full", + "pca_whiten": False, + "clustering_input": "pca_reduced_coordinates", + "hdbscan_implementation": "hdbscan", + "clustering_metric": "euclidean", + "hdbscan_core_dist_n_jobs": 1, + } + + +def _package_version(distribution: str) -> str: + try: + return version(distribution) + except PackageNotFoundError: + return "unknown" + + +__all__ = [ + "SWEEP_MIN_CLUSTER_SIZES", + "SWEEP_MIN_SAMPLES", + "SWEEP_PCA_DIMENSIONS", + "SWEEP_SELECTION_METHODS", + "SweepCandidate", + "SweepCandidateResult", + "candidate_dedupe_key", + "candidate_space_digest", + "clustering_algorithm_manifest", + "iter_grid_candidates", + "iter_profile_candidates", + "iter_sweep_candidates", + "rank_sweep_results", + "run_digest", + "score_clustering_result", +] diff --git a/codeclone/analytics/contracts.py b/codeclone/analytics/contracts.py new file mode 100644 index 00000000..8146c2b8 --- /dev/null +++ b/codeclone/analytics/contracts.py @@ -0,0 +1,224 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal, NamedTuple + +ClusteringRunStatus = Literal["pending", "running", "completed", "failed"] +ProfileBatchStatus = Literal[ + "running", + "completed", + "completed_partial", + "failed", +] +IntentRepresentationKind = Literal[ + "intent.description.v1", + "intent.description_with_frame.v1", +] +CorpusLane = Literal["intent"] + +INTENT_REPRESENTATION_DESCRIPTION = "intent.description.v1" +INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME = "intent.description_with_frame.v1" + + +@dataclass(frozen=True, slots=True) +class CorpusItemRecord: + snapshot_id: str + representation_key: str + snapshot_item_id: str + source_record_key: str + project_id: str + intent_id: str + normalized_text: str + normalized_digest: str + normalizer_version: str + representation_digest: str + metadata_json: str + registry_overlay_json: str | None + + +@dataclass(frozen=True, slots=True) +class CorpusSnapshotRecord: + snapshot_id: str + lane: CorpusLane + representation_kind: str + representation_version: str + source_stores_json: str + source_schema_versions_json: str + record_count: int + source_digest: str + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class EmbeddingGenerationRecord: + embedding_generation_id: str + provider_id: str + provider_package_version: str + model_id: str + model_revision: str | None + model_artifact_fingerprint: str | None + exact_model_artifact_reproducibility: bool + dimensions: int + embedding_contract_version: str + embedding_similarity_metric: str + vector_preprocessing: str + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class EmbeddingItemRecord: + embedding_generation_id: str + snapshot_item_id: str + vector_row_key: str + vector_digest: str + dimensions: int + + +@dataclass(frozen=True, slots=True) +class ClusteringRunRecord: + clustering_run_id: str + snapshot_id: str + embedding_generation_id: str + requested_parameters_json: str + effective_parameters_json: str + random_seed: int + run_digest: str + recommended_by_heuristic: bool + selected_by_maintainer: bool + status: ClusteringRunStatus + created_at_utc: str + finished_at_utc: str | None + error_message: str | None + + +@dataclass(frozen=True, slots=True) +class ClusterAssignmentRecord: + clustering_run_id: str + snapshot_item_id: str + cluster_label: int + membership_strength: float | None + membership_digest: str + + +@dataclass(frozen=True, slots=True) +class ClusterSummaryRecord: + clustering_run_id: str + cluster_label: int + display_cluster_id: int | None + membership_digest: str + size: int + diagnostics_json: str + + +@dataclass(frozen=True, slots=True) +class ProfileManifestSnapshotRecord: + profile_manifest_digest: str + profile_id: str + profile_version: str + manifest_schema_version: str + canonical_manifest_json: str + label: str + description: str + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class ProfileBatchIdentity: + profile_batch_id: str + snapshot_id: str + embedding_generation_id: str + profile_id: str + profile_version: str + profile_manifest_digest: str + candidate_space_digest: str + + +@dataclass(frozen=True, slots=True) +class ProfileBatchRecord: + profile_batch_id: str + snapshot_id: str + embedding_generation_id: str + profile_id: str + profile_manifest_digest: str + candidate_space_digest: str + started_at_utc: str + finished_at_utc: str | None + status: ProfileBatchStatus + candidate_count_planned: int + candidate_count_succeeded: int + candidate_count_failed: int + recommended_clustering_run_id: str | None + recommendation_rationale_json: str | None + batch_max_cluster_count: int | None + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class ProfileBatchRunRecord: + profile_batch_id: str + clustering_run_id: str + candidate_ordinal: int + candidate_dedupe_key: str + + +@dataclass(frozen=True, slots=True) +class ProfileAssessmentRecord: + profile_batch_id: str + clustering_run_id: str + profile_id: str + profile_version: str + profile_manifest_digest: str + suitable_for_profile: bool + rejection_reasons_json: str + observed_metrics_json: str | None + assessed_digest: str + + +@dataclass(frozen=True, slots=True) +class RunSelectionRecord: + selection_id: str + snapshot_id: str + embedding_generation_id: str + profile_batch_id: str | None + profile_id: str | None + profile_manifest_digest: str | None + selected_run_id: str + selected_at_utc: str + selected_by: str + rationale: str | None + supersedes_selection_id: str | None + + +class ActiveSelectionResult(NamedTuple): + record: RunSelectionRecord | None + ambiguous: bool + + +__all__ = [ + "INTENT_REPRESENTATION_DESCRIPTION", + "INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME", + "ActiveSelectionResult", + "ClusterAssignmentRecord", + "ClusterSummaryRecord", + "ClusteringRunRecord", + "ClusteringRunStatus", + "CorpusItemRecord", + "CorpusLane", + "CorpusSnapshotRecord", + "EmbeddingGenerationRecord", + "EmbeddingItemRecord", + "IntentRepresentationKind", + "ProfileAssessmentRecord", + "ProfileBatchIdentity", + "ProfileBatchRecord", + "ProfileBatchRunRecord", + "ProfileBatchStatus", + "ProfileManifestSnapshotRecord", + "RunSelectionRecord", +] diff --git a/codeclone/analytics/corpus/__init__.py b/codeclone/analytics/corpus/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/corpus/adapters/__init__.py b/codeclone/analytics/corpus/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/corpus/adapters/intent_historical.py b/codeclone/analytics/corpus/adapters/intent_historical.py new file mode 100644 index 00000000..73af3345 --- /dev/null +++ b/codeclone/analytics/corpus/adapters/intent_historical.py @@ -0,0 +1,480 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from collections import defaultdict +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path + +from ....audit.events import repo_root_digest +from ....audit.reader import AuditRecord, read_intent_declared_records +from ....audit.validation import AUDIT_SCHEMA_VERSION, DEFAULT_AUDIT_PATH +from ....config.intent_registry import ( + IntentRegistryConfigError, + resolve_intent_registry_config, +) +from ....contracts import ( + CORPUS_NORMALIZER_VERSION, + ENGINEERING_MEMORY_SCHEMA_VERSION, + PATCH_TRAIL_SCHEMA_VERSION, +) +from ....memory.project import compute_project_id, resolve_memory_db_path +from ....memory.schema import open_memory_db_readonly +from ....memory.trajectory.agents import trajectory_agent_label +from ....memory.trajectory.anomalies import detect_trajectory_anomalies +from ....memory.trajectory.models import Trajectory +from ....memory.trajectory.patch_trail import patch_trail_from_mapping +from ....memory.trajectory.store import ( + list_trajectories_for_intent_id, + load_trajectory_patch_trail, +) +from ....utils.json_io import json_text +from ...agent_labels import map_agent_family +from ..keys import ( + representation_key, + representation_version_for_kind, + sha256_hex, + snapshot_item_id, + source_record_key, +) +from ..normalizer import normalize_corpus_text, source_content_digest +from ..registry_overlay import read_registry_overlay +from ..representations.intent import ( + IntentRepresentationInput, + build_representation_text, + declared_constraints_from_audit_payload, + declared_path_families_from_patch_trail, +) +from ..representations.intent import ( + representation_digest as compute_representation_digest, +) +from ..trajectory_selection import ( + TRAJECTORY_SELECTION_RULE_VERSION, + scope_expanded_from_labels, + select_trajectory_for_intent, +) + + +@dataclass(frozen=True, slots=True) +class HistoricalIntentSourceItem: + project_id: str + intent_id: str + source_record_key_value: str + source_content_digest: str + provenance: dict[str, object] + metadata: dict[str, object] + registry_overlay: dict[str, object] | None + representation_input: IntentRepresentationInput + + +@dataclass(frozen=True, slots=True) +class SourceDigestItem: + source_record_key: str + source_content_digest: str + provenance_digest: str + + +def _payload_mapping(record: AuditRecord) -> dict[str, object]: + if record.payload_json: + try: + parsed = json.loads(record.payload_json) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + return {} + if record.event_core_json: + try: + parsed = json.loads(record.event_core_json) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + return {} + return {} + + +def _intent_description(payload: Mapping[str, object]) -> str: + value = payload.get("intent_description") + if isinstance(value, str): + return value + return "" + + +def _intent_kind(payload: Mapping[str, object]) -> str | None: + value = payload.get("intent_kind") + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _canonical_declaration( + records: Sequence[AuditRecord], +) -> tuple[AuditRecord, bool, tuple[str, ...]]: + ordered = sorted( + records, + key=lambda item: (item.audit_sequence or 0, item.event_id), + ) + canonical = ordered[0] + descriptions = { + _intent_description(_payload_mapping(item)).strip() + for item in ordered + if _intent_description(_payload_mapping(item)).strip() + } + description_conflict = len(descriptions) > 1 + declaration_event_ids = tuple(item.event_id for item in ordered) + return canonical, description_conflict, declaration_event_ids + + +def _resolved_registry_db_path( + root_path: Path, + registry_db_path: Path | None, +) -> Path | None: + if registry_db_path is not None: + return registry_db_path + try: + config = resolve_intent_registry_config(root_path) + except (IntentRegistryConfigError, OSError, ValueError): + return None + if config.backend != "sqlite": + return None + return config.storage_path + + +def extract_historical_intent_items( + *, + root_path: Path, + representation_kind: str, + audit_db_path: Path | None = None, + memory_db_path: Path | None = None, + registry_db_path: Path | None = None, +) -> tuple[HistoricalIntentSourceItem, ...]: + resolved_root = root_path.resolve() + project_id = compute_project_id(resolved_root) + digest = repo_root_digest(resolved_root) + audit_path = audit_db_path or (resolved_root / DEFAULT_AUDIT_PATH) + records = read_intent_declared_records( + db_path=audit_path, + repo_root_digest=digest, + ) + grouped: defaultdict[tuple[str, str], list[AuditRecord]] = defaultdict(list) + for record in records: + intent_id = record.intent_id + if not intent_id: + continue + grouped[(project_id, intent_id)].append(record) + + memory_path = memory_db_path or resolve_memory_db_path(resolved_root) + memory_conn = ( + open_memory_db_readonly(memory_path) if memory_path.is_file() else None + ) + if memory_conn is not None: + memory_conn.row_factory = sqlite3.Row + + resolved_registry_db = _resolved_registry_db_path( + resolved_root, + registry_db_path, + ) + + items: list[HistoricalIntentSourceItem] = [] + try: + for (group_project_id, intent_id), group_records in sorted(grouped.items()): + canonical, description_conflict, declaration_event_ids = ( + _canonical_declaration(group_records) + ) + payload = _payload_mapping(canonical) + description = _intent_description(payload) + if not description.strip(): + continue + trajectories: tuple[Trajectory, ...] = () + patch_trail_payload: dict[str, object] | None = None + selected_trajectory = None + discarded_ids: tuple[str, ...] = () + if memory_conn is not None: + trajectories = list_trajectories_for_intent_id( + memory_conn, + project_id=group_project_id, + intent_id=intent_id, + ) + selection = select_trajectory_for_intent(trajectories) + selected_trajectory = selection.selected + discarded_ids = selection.discarded_ids + if selected_trajectory is not None: + patch_trail_payload = load_trajectory_patch_trail( + memory_conn, + trajectory_id=selected_trajectory.id, + ) + + patch_trail_digest: str | None = None + if patch_trail_payload is not None: + trail = patch_trail_from_mapping(patch_trail_payload) + if trail is not None: + patch_trail_digest = trail.patch_trail_digest + + provenance: dict[str, object] = { + "description": { + "source": "audit", + "event_id": canonical.event_id, + "audit_sequence": canonical.audit_sequence, + "duplicate_declaration_count": len(group_records), + "description_conflict": description_conflict, + "declaration_event_ids": list(declaration_event_ids), + }, + "trajectory": { + "selected_trajectory_id": ( + selected_trajectory.id if selected_trajectory else None + ), + "discarded_trajectory_ids": list(discarded_ids), + "selection_rule_version": TRAJECTORY_SELECTION_RULE_VERSION, + }, + "patch_trail": { + "source": "patch_trail", + "digest": patch_trail_digest, + }, + } + + metadata: dict[str, object] = { + "agent_client_raw": None, + "agent_family": "unknown", + "outcome": None, + "quality_tier": None, + "finished_at_utc": None, + "scope_expanded": None, + "anomaly_kinds": None, + "scope_check_status": None, + "verification_status": None, + "declared_file_count": None, + "changed_file_count": None, + } + agent_raw: str | None = None + if selected_trajectory is not None: + agent_raw = trajectory_agent_label(selected_trajectory) + metadata["outcome"] = selected_trajectory.outcome + metadata["quality_tier"] = selected_trajectory.quality_tier + metadata["finished_at_utc"] = selected_trajectory.finished_at_utc + metadata["scope_expanded"] = scope_expanded_from_labels( + selected_trajectory.labels + ) + anomalies = detect_trajectory_anomalies( + selected_trajectory, + patch_trail_payload=patch_trail_payload, + ) + metadata["anomaly_kinds"] = sorted({item.kind for item in anomalies}) + elif canonical.agent_label.strip(): + agent_raw = canonical.agent_label.strip() + + metadata["agent_client_raw"] = agent_raw + metadata["agent_family"] = map_agent_family(agent_raw) + + if patch_trail_payload is not None: + trail = patch_trail_from_mapping(patch_trail_payload) + if trail is not None: + metadata["scope_check_status"] = trail.scope_check_status + metadata["verification_status"] = trail.verification_status + metadata["declared_file_count"] = len(trail.declared_files) + metadata["changed_file_count"] = len(trail.changed_files) + + registry_overlay = ( + read_registry_overlay(resolved_registry_db, intent_id=intent_id) + if resolved_registry_db is not None + else None + ) + + rep_input = IntentRepresentationInput( + description=description, + intent_kind=_intent_kind(payload), + declared_path_families=declared_path_families_from_patch_trail( + patch_trail_payload + ), + declared_constraints=declared_constraints_from_audit_payload(payload), + ) + + items.append( + HistoricalIntentSourceItem( + project_id=group_project_id, + intent_id=intent_id, + source_record_key_value=source_record_key( + project_id=group_project_id, + intent_id=intent_id, + ), + source_content_digest=source_content_digest( + _raw_representation_inputs( + representation_kind=representation_kind, + payload=rep_input, + ) + ), + provenance=provenance, + metadata=metadata, + registry_overlay=registry_overlay, + representation_input=rep_input, + ) + ) + finally: + if memory_conn is not None: + memory_conn.close() + + return tuple(items) + + +def build_source_digest_items( + items: Sequence[HistoricalIntentSourceItem], + *, + lane: str, + representation_kind: str, +) -> tuple[SourceDigestItem, ...]: + digest_items: list[SourceDigestItem] = [] + for item in items: + provenance_digest = sha256_hex(json_text(item.provenance, sort_keys=True)) + digest_items.append( + SourceDigestItem( + source_record_key=item.source_record_key_value, + source_content_digest=item.source_content_digest, + provenance_digest=provenance_digest, + ) + ) + return tuple(sorted(digest_items, key=lambda entry: entry.source_record_key)) + + +def _raw_representation_inputs( + *, + representation_kind: str, + payload: IntentRepresentationInput, +) -> dict[str, object]: + raw: dict[str, object] = {"description": payload.description} + if representation_kind.endswith("description_with_frame.v1"): + raw.update( + { + "intent_kind": payload.intent_kind, + "declared_path_families": sorted(set(payload.declared_path_families)), + "declared_constraints": sorted(set(payload.declared_constraints)), + } + ) + return raw + + +def compute_source_digest( + *, + items: Sequence[HistoricalIntentSourceItem], + lane: str, + representation_kind: str, + representation_version: str, + source_schema_versions: Mapping[str, str], +) -> str: + digest_items = build_source_digest_items( + items, + lane=lane, + representation_kind=representation_kind, + ) + payload = { + "source_schema_versions": dict(sorted(source_schema_versions.items())), + "lane": lane, + "representation_kind": representation_kind, + "representation_version": representation_version, + "normalizer_version": CORPUS_NORMALIZER_VERSION, + "items": [ + { + "source_record_key": entry.source_record_key, + "source_content_digest": entry.source_content_digest, + "provenance_digest": entry.provenance_digest, + } + for entry in digest_items + ], + } + return sha256_hex(json_text(payload, sort_keys=True)) + + +def materialize_corpus_item( + *, + snapshot_id: str, + lane: str, + representation_kind: str, + item: HistoricalIntentSourceItem, +) -> tuple[str, str, str, str, str, str, str, str, str | None, str]: + rep_version = representation_version_for_kind(representation_kind) + source_key = item.source_record_key_value + rep_key = representation_key( + lane=lane, + representation_kind=representation_kind, + representation_version=rep_version, + source_record_key_value=source_key, + ) + snap_item_id = snapshot_item_id( + snapshot_id=snapshot_id, + representation_key_value=rep_key, + ) + normalized = normalize_corpus_text( + build_representation_text( + representation_kind=representation_kind, + payload=item.representation_input, + ) + ) + if not normalized.text: + msg = "normalized representation text is empty" + raise ValueError(msg) + rep_digest = compute_representation_digest( + representation_kind=representation_kind, + normalized_text=normalized.text, + ) + metadata_json = json_text(_materialized_metadata(item), sort_keys=True) + overlay_json = ( + json_text(item.registry_overlay, sort_keys=True) + if item.registry_overlay is not None + else None + ) + return ( + rep_key, + snap_item_id, + source_key, + normalized.text, + normalized.digest, + normalized.normalizer_version, + rep_digest, + metadata_json, + overlay_json, + rep_version, + ) + + +def _materialized_metadata(item: HistoricalIntentSourceItem) -> dict[str, object]: + metadata = dict(item.metadata) + provenance = { + key: dict(value) if isinstance(value, dict) else value + for key, value in item.provenance.items() + } + trajectory = provenance.get("trajectory") + if not isinstance(trajectory, dict): + trajectory = {} + provenance["trajectory"] = trajectory + trajectory["selected"] = trajectory.get("selected_trajectory_id") is not None + patch_trail = provenance.get("patch_trail") + if not isinstance(patch_trail, dict): + patch_trail = {} + provenance["patch_trail"] = patch_trail + patch_trail["present"] = patch_trail.get("digest") is not None + provenance["registry_overlay"] = {"present": item.registry_overlay is not None} + metadata["provenance"] = provenance + return metadata + + +def default_source_schema_versions() -> dict[str, str]: + return { + "audit": AUDIT_SCHEMA_VERSION, + "memory": ENGINEERING_MEMORY_SCHEMA_VERSION, + "patch_trail": PATCH_TRAIL_SCHEMA_VERSION, + } + + +__all__ = [ + "HistoricalIntentSourceItem", + "SourceDigestItem", + "build_source_digest_items", + "compute_source_digest", + "default_source_schema_versions", + "extract_historical_intent_items", + "materialize_corpus_item", +] diff --git a/codeclone/analytics/corpus/keys.py b/codeclone/analytics/corpus/keys.py new file mode 100644 index 00000000..1dbb1b4b --- /dev/null +++ b/codeclone/analytics/corpus/keys.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib + +from ...contracts import CORPUS_REPRESENTATION_CONTRACT_VERSION + + +def sha256_hex(text: str) -> str: + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def source_record_key(*, project_id: str, intent_id: str) -> str: + return sha256_hex(f"{project_id}\n{intent_id}") + + +def representation_key( + *, + lane: str, + representation_kind: str, + representation_version: str, + source_record_key_value: str, +) -> str: + return sha256_hex( + f"{lane}\n{representation_kind}\n{representation_version}\n" + f"{source_record_key_value}" + ) + + +def snapshot_item_id(*, snapshot_id: str, representation_key_value: str) -> str: + return sha256_hex(f"{snapshot_id}\n{representation_key_value}") + + +def representation_version_for_kind(representation_kind: str) -> str: + return CORPUS_REPRESENTATION_CONTRACT_VERSION + + +def membership_digest(snapshot_item_ids: list[str]) -> str: + ordered = sorted(snapshot_item_ids) + return sha256_hex("\n".join(ordered)) + + +__all__ = [ + "membership_digest", + "representation_key", + "representation_version_for_kind", + "sha256_hex", + "snapshot_item_id", + "source_record_key", +] diff --git a/codeclone/analytics/corpus/normalizer.py b/codeclone/analytics/corpus/normalizer.py new file mode 100644 index 00000000..78e94e99 --- /dev/null +++ b/codeclone/analytics/corpus/normalizer.py @@ -0,0 +1,74 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +import unicodedata +from dataclasses import dataclass + +from ...contracts import CORPUS_NORMALIZER_VERSION +from ...utils.json_io import json_text +from .keys import sha256_hex + +_DIGEST_PATTERN = re.compile( + r"\b[a-f0-9]{8,64}\b", + re.IGNORECASE, +) +_UUID_PATTERN = re.compile( + r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", + re.IGNORECASE, +) +_ISO_TIMESTAMP_PATTERN = re.compile( + r"\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z\b" +) +_ABS_PATH_PATTERN = re.compile(r"(?:/[\w./-]+|(?:[A-Za-z]:\\)[\w\\./-]+)") +_TEMPLATE_PREFIXES = ( + "implement ", + "fix ", + "refactor ", + "add ", + "update ", + "validate ", +) + + +@dataclass(frozen=True, slots=True) +class NormalizedText: + text: str + digest: str + normalizer_version: str + + +def normalize_corpus_text(raw: str) -> NormalizedText: + text = unicodedata.normalize("NFC", raw) + text = text.replace("\r\n", "\n").replace("\r", "\n") + text = re.sub(r"[ \t]+", " ", text) + text = re.sub(r"\n{3,}", "\n\n", text) + text = text.strip() + text = _DIGEST_PATTERN.sub("", text) + text = _UUID_PATTERN.sub("", text) + text = _ISO_TIMESTAMP_PATTERN.sub("", text) + text = _ABS_PATH_PATTERN.sub("", text) + lowered = text.lower() + for prefix in _TEMPLATE_PREFIXES: + if lowered.startswith(prefix): + text = text[len(prefix) :].lstrip() + break + digest = sha256_hex(text) + return NormalizedText( + text=text, + digest=digest, + normalizer_version=CORPUS_NORMALIZER_VERSION, + ) + + +def source_content_digest(raw_inputs: object) -> str: + """Hash canonical raw representation inputs before text normalization.""" + return sha256_hex(json_text(raw_inputs, sort_keys=True)) + + +__all__ = ["NormalizedText", "normalize_corpus_text", "source_content_digest"] diff --git a/codeclone/analytics/corpus/registry_overlay.py b/codeclone/analytics/corpus/registry_overlay.py new file mode 100644 index 00000000..ba746458 --- /dev/null +++ b/codeclone/analytics/corpus/registry_overlay.py @@ -0,0 +1,84 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path + +from ...utils.sqlite_store import get_meta_value + +_INTENT_REGISTRY_META_TABLE = "intent_registry_meta" +_SUPPORTED_REGISTRY_SCHEMA_VERSIONS = frozenset({"1", "2"}) + + +def _validate_registry_readonly_schema(conn: sqlite3.Connection) -> None: + version = get_meta_value( + conn, + meta_table=_INTENT_REGISTRY_META_TABLE, + key="schema_version", + ) + if version not in _SUPPORTED_REGISTRY_SCHEMA_VERSIONS: + msg = f"unsupported intent registry schema version: {version!r}" + raise sqlite3.DatabaseError(msg) + + +def read_registry_overlay( + registry_db: Path, + *, + intent_id: str, +) -> dict[str, object] | None: + """Optional live coordination overlay; excluded from corpus digests.""" + + if not registry_db.is_file(): + return None + try: + from ...observability.sqlite_access import open_instrumented_sqlite_db_readonly + + conn = open_instrumented_sqlite_db_readonly( + registry_db, + validate_schema=_validate_registry_readonly_schema, + ) + except (OSError, sqlite3.Error): + return None + try: + row = conn.execute( + """ + SELECT payload_json, declared_at_utc, closed_at_utc + FROM workspace_intents + WHERE intent_id=? + ORDER BY declared_at_utc DESC, agent_pid DESC, intent_id ASC + LIMIT 1 + """, + (intent_id,), + ).fetchone() + except sqlite3.Error: + return None + finally: + conn.close() + if row is None: + return None + payload_json = row[0] + status: str | None = None + if isinstance(payload_json, str): + try: + parsed = json.loads(payload_json) + if isinstance(parsed, dict): + raw_status = parsed.get("status") + if isinstance(raw_status, str): + status = raw_status + except json.JSONDecodeError: + status = None + return { + "present": True, + "status": status, + "declared_at_utc": row[1], + "closed_at_utc": row[2], + } + + +__all__ = ["read_registry_overlay"] diff --git a/codeclone/analytics/corpus/representations/__init__.py b/codeclone/analytics/corpus/representations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/corpus/representations/intent.py b/codeclone/analytics/corpus/representations/intent.py new file mode 100644 index 00000000..7763055a --- /dev/null +++ b/codeclone/analytics/corpus/representations/intent.py @@ -0,0 +1,120 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass + +from ...contracts import ( + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, +) +from ..keys import sha256_hex +from ..normalizer import normalize_corpus_text + + +@dataclass(frozen=True, slots=True) +class IntentRepresentationInput: + description: str + intent_kind: str | None + declared_path_families: Sequence[str] + declared_constraints: Sequence[str] + + +def build_intent_description_v1(description: str) -> str: + normalized = normalize_corpus_text(description) + return normalized.text + + +def build_intent_description_with_frame_v1(payload: IntentRepresentationInput) -> str: + normalized_description = normalize_corpus_text(payload.description) + kind = (payload.intent_kind or "").strip() + families = ", ".join(sorted(set(payload.declared_path_families))) + constraints = "; ".join(sorted(set(payload.declared_constraints))) + parts = [ + "DESCRIPTION:", + normalized_description.text, + "INTENT_KIND:", + kind, + "DECLARED_PATH_FAMILIES:", + families, + "DECLARED_CONSTRAINTS:", + constraints, + ] + return "\n".join(parts) + + +def representation_digest(*, representation_kind: str, normalized_text: str) -> str: + return sha256_hex(f"{representation_kind}\n{normalized_text}") + + +def build_representation_text( + *, + representation_kind: str, + payload: IntentRepresentationInput, +) -> str: + if representation_kind == INTENT_REPRESENTATION_DESCRIPTION: + return build_intent_description_v1(payload.description) + if representation_kind == INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME: + return build_intent_description_with_frame_v1(payload) + msg = f"unsupported representation kind: {representation_kind}" + raise ValueError(msg) + + +def declared_path_families_from_patch_trail( + patch_trail: Mapping[str, object] | None, + *, + limit: int = 12, +) -> tuple[str, ...]: + if patch_trail is None: + return () + declared = patch_trail.get("declared_files") + if not isinstance(declared, list): + return () + families: set[str] = set() + for item in declared: + if not isinstance(item, str): + continue + path = item.strip().replace("\\", "/") + while path.startswith("./"): + path = path[2:] + if not path: + continue + top = path.split("/", 1)[0] + if top: + families.add(top) + return tuple(sorted(families)[:limit]) + + +def declared_constraints_from_audit_payload( + payload: Mapping[str, object] | None, +) -> tuple[str, ...]: + if payload is None: + return () + constraints: list[str] = [] + for key in ("verification_profile", "dirty_scope_policy", "on_conflict"): + value = payload.get(key) + if isinstance(value, str) and value.strip(): + constraints.append(f"{key}={value.strip()}") + scope = payload.get("scope") + if isinstance(scope, Mapping): + for scope_key in ("allowed_files", "allowed_related", "forbidden"): + items = scope.get(scope_key) + if isinstance(items, list) and items: + constraints.append(f"scope.{scope_key}_count={len(items)}") + return tuple(sorted(constraints)) + + +__all__ = [ + "IntentRepresentationInput", + "build_intent_description_v1", + "build_intent_description_with_frame_v1", + "build_representation_text", + "declared_constraints_from_audit_payload", + "declared_path_families_from_patch_trail", + "representation_digest", +] diff --git a/codeclone/analytics/corpus/snapshot.py b/codeclone/analytics/corpus/snapshot.py new file mode 100644 index 00000000..4d5eb7ad --- /dev/null +++ b/codeclone/analytics/corpus/snapshot.py @@ -0,0 +1,148 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import uuid +from pathlib import Path + +from ...config.analytics import AnalyticsConfig, resolve_analytics_config +from ...memory.project import compute_project_id, resolve_memory_db_path +from ...report.meta import current_report_timestamp_utc +from ...utils.json_io import json_text +from ..contracts import CorpusItemRecord, CorpusLane, CorpusSnapshotRecord +from ..store.protocols import CorpusStore, SnapshotBuildResult +from ..store.sqlite import SqliteCorpusAnalyticsStore +from .adapters.intent_historical import ( + compute_source_digest, + default_source_schema_versions, + extract_historical_intent_items, + materialize_corpus_item, +) +from .keys import representation_version_for_kind + + +def _manifest_path(root_path: Path, path: Path) -> str: + try: + return path.resolve().relative_to(root_path.resolve()).as_posix() + except ValueError: + return "" + + +def _relative_store_paths( + root_path: Path, + *, + audit_db_path: Path, + memory_db_path: Path, +) -> dict[str, str]: + return { + "audit": _manifest_path(root_path, audit_db_path), + "memory": _manifest_path(root_path, memory_db_path), + } + + +def build_intent_snapshot( + *, + root_path: Path, + representation_kind: str, + config: AnalyticsConfig | None = None, + registry_db_path: Path | None = None, + store: CorpusStore | None = None, +) -> SnapshotBuildResult: + resolved_root = root_path.resolve() + analytics_config = config or resolve_analytics_config(resolved_root) + owned_store = store is None + active_store = store or SqliteCorpusAnalyticsStore.open(analytics_config.db_path) + try: + lane: CorpusLane = "intent" + rep_version = representation_version_for_kind(representation_kind) + memory_db_path = resolve_memory_db_path(resolved_root) + source_items = extract_historical_intent_items( + root_path=resolved_root, + representation_kind=representation_kind, + audit_db_path=analytics_config.audit_db_path, + memory_db_path=memory_db_path, + registry_db_path=registry_db_path, + ) + source_digest = compute_source_digest( + items=source_items, + lane=lane, + representation_kind=representation_kind, + representation_version=rep_version, + source_schema_versions=default_source_schema_versions(), + ) + snapshot_id = f"snap-{uuid.uuid4().hex[:16]}" + created_at = current_report_timestamp_utc() + project_id = compute_project_id(resolved_root) + corpus_items: list[CorpusItemRecord] = [] + for source_item in source_items: + ( + rep_key, + snap_item_id, + source_key, + normalized_text, + normalized_digest, + normalizer_version, + rep_digest, + metadata_json, + overlay_json, + _rep_version, + ) = materialize_corpus_item( + snapshot_id=snapshot_id, + lane=lane, + representation_kind=representation_kind, + item=source_item, + ) + corpus_items.append( + CorpusItemRecord( + snapshot_id=snapshot_id, + representation_key=rep_key, + snapshot_item_id=snap_item_id, + source_record_key=source_key, + project_id=project_id, + intent_id=source_item.intent_id, + normalized_text=normalized_text, + normalized_digest=normalized_digest, + normalizer_version=normalizer_version, + representation_digest=rep_digest, + metadata_json=metadata_json, + registry_overlay_json=overlay_json, + ) + ) + snapshot = CorpusSnapshotRecord( + snapshot_id=snapshot_id, + lane=lane, + representation_kind=representation_kind, + representation_version=rep_version, + source_stores_json=json_text( + _relative_store_paths( + resolved_root, + audit_db_path=analytics_config.audit_db_path, + memory_db_path=memory_db_path, + ), + sort_keys=True, + ), + source_schema_versions_json=json_text( + default_source_schema_versions(), + sort_keys=True, + ), + record_count=len(corpus_items), + source_digest=source_digest, + created_at_utc=created_at, + ) + active_store.insert_snapshot(snapshot, corpus_items) + active_store.commit() + return SnapshotBuildResult( + snapshot_id=snapshot_id, + source_digest=source_digest, + record_count=len(corpus_items), + ) + finally: + if owned_store: + active_store.close() + + +__all__ = ["build_intent_snapshot"] diff --git a/codeclone/analytics/corpus/trajectory_selection.py b/codeclone/analytics/corpus/trajectory_selection.py new file mode 100644 index 00000000..c58413c4 --- /dev/null +++ b/codeclone/analytics/corpus/trajectory_selection.py @@ -0,0 +1,83 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from ...audit.events import EVENT_INTENT_CLEARED, EVENT_PATCH_VERIFIED +from ...contracts import TRAJECTORY_PROJECTION_VERSION +from ...memory.trajectory.models import Trajectory, TrajectoryLabel + +TRAJECTORY_SELECTION_RULE_VERSION = "1" + + +@dataclass(frozen=True, slots=True) +class TrajectorySelectionResult: + selected: Trajectory | None + discarded_ids: tuple[str, ...] + + +def _has_verified_finish(trajectory: Trajectory) -> bool: + if "verified_finish" in trajectory.labels: + return True + for step in trajectory.steps: + if step.event_type == EVENT_INTENT_CLEARED: + return True + if step.event_type == EVENT_PATCH_VERIFIED and step.status in { + "accepted", + "accepted_with_external_changes", + }: + return True + return False + + +def _terminal_audit_sequence(trajectory: Trajectory) -> int: + if not trajectory.steps: + return -1 + return max(step.audit_sequence for step in trajectory.steps) + + +def select_trajectory_for_intent( + trajectories: Sequence[Trajectory], +) -> TrajectorySelectionResult: + """Deterministic trajectory selection per spec §4.4.""" + candidates = [ + trajectory + for trajectory in trajectories + if trajectory.projection_version == TRAJECTORY_PROJECTION_VERSION + ] + if not candidates: + return TrajectorySelectionResult(selected=None, discarded_ids=()) + + finish_candidates = [item for item in candidates if _has_verified_finish(item)] + pool = finish_candidates if finish_candidates else list(candidates) + selected = max( + pool, + key=lambda item: ( + _terminal_audit_sequence(item), + item.id, + ), + ) + discarded = tuple( + sorted( + trajectory.id for trajectory in candidates if trajectory.id != selected.id + ) + ) + return TrajectorySelectionResult(selected=selected, discarded_ids=discarded) + + +def scope_expanded_from_labels(labels: Sequence[TrajectoryLabel | str]) -> bool: + return "scope_expanded" in labels + + +__all__ = [ + "TRAJECTORY_SELECTION_RULE_VERSION", + "TrajectorySelectionResult", + "scope_expanded_from_labels", + "select_trajectory_for_intent", +] diff --git a/codeclone/analytics/embedding/__init__.py b/codeclone/analytics/embedding/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/embedding/generation.py b/codeclone/analytics/embedding/generation.py new file mode 100644 index 00000000..19518d15 --- /dev/null +++ b/codeclone/analytics/embedding/generation.py @@ -0,0 +1,166 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +import uuid +from collections.abc import Sequence +from contextlib import suppress +from dataclasses import dataclass + +from ...config.analytics import AnalyticsConfig +from ...contracts import CORPUS_EMBEDDING_CONTRACT_VERSION +from ...memory.embedding import EmbeddingProvider, embed_documents +from ...memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider +from ...report.meta import current_report_timestamp_utc +from ..contracts import CorpusItemRecord, EmbeddingGenerationRecord, EmbeddingItemRecord +from ..exceptions import AnalyticsCapabilityError, AnalyticsWorkflowError +from ..store.protocols import CorpusStore +from ..store.vectors_lancedb import AnalyticsVectorStore, vector_digest, vector_row_key + + +@dataclass(frozen=True, slots=True) +class EmbeddingBatchResult: + embedding_generation_id: str + item_count: int + + +def _resolve_fastembed_provider(config: AnalyticsConfig) -> FastEmbedEmbeddingProvider: + try: + importlib.import_module("fastembed") + except ImportError as exc: + raise AnalyticsCapabilityError( + "fastembed is required for analytics embeddings; " + "install with: uv sync --extra analytics" + ) from exc + return FastEmbedEmbeddingProvider( + model_name=config.embedding_model, + dimension=config.embedding_dimension, + cache_dir=config.embedding_cache_dir, + allow_model_download=config.allow_model_download, + ) + + +def _provider_package_version(provider_id: str) -> str: + if provider_id == "fastembed": + module = importlib.import_module("fastembed") + return str(getattr(module, "__version__", "unknown")) + return "unknown" + + +def generate_embeddings_for_snapshot( + *, + store: CorpusStore, + vector_store: AnalyticsVectorStore, + config: AnalyticsConfig, + snapshot_id: str, + provider: EmbeddingProvider | None = None, +) -> EmbeddingBatchResult: + items = store.list_items(snapshot_id) + if not items: + raise AnalyticsWorkflowError(f"snapshot has no items: {snapshot_id}") + active_provider = provider or _resolve_fastembed_provider(config) + texts = [item.normalized_text for item in items] + try: + vectors = embed_documents(active_provider, texts) + except Exception as exc: + raise AnalyticsWorkflowError(f"analytics embedding failed: {exc}") from exc + generation_id = f"emb-{uuid.uuid4().hex[:16]}" + provider_id = active_provider.model_id.split(":", 1)[0] + if provider_id not in {"fastembed", "diagnostic-hash-v1"}: + provider_id = ( + "fastembed" if "fastembed" in active_provider.model_id else "custom" + ) + if active_provider.model_id.startswith("fastembed:"): + provider_id = "fastembed" + model_id = ( + active_provider.model_id.split(":", 1)[1] + if ":" in active_provider.model_id + else active_provider.model_id + ) + generation = EmbeddingGenerationRecord( + embedding_generation_id=generation_id, + provider_id=provider_id, + provider_package_version=_provider_package_version(provider_id), + model_id=model_id, + model_revision=None, + model_artifact_fingerprint=None, + exact_model_artifact_reproducibility=False, + dimensions=active_provider.dimension, + embedding_contract_version=CORPUS_EMBEDDING_CONTRACT_VERSION, + embedding_similarity_metric="cosine", + vector_preprocessing="l2_normalize", + created_at_utc=current_report_timestamp_utc(), + ) + embedding_items: list[EmbeddingItemRecord] = [] + vector_rows: list[dict[str, object]] = [] + for item, vector in zip(items, vectors, strict=True): + row_key = vector_row_key( + embedding_generation_id=generation_id, + snapshot_item_id=item.snapshot_item_id, + ) + digest = vector_digest(vector) + embedding_items.append( + EmbeddingItemRecord( + embedding_generation_id=generation_id, + snapshot_item_id=item.snapshot_item_id, + vector_row_key=row_key, + vector_digest=digest, + dimensions=len(vector), + ) + ) + vector_rows.append( + { + "snapshot_item_id": item.snapshot_item_id, + "vector": vector, + } + ) + try: + store.insert_embedding_generation(generation) + store.insert_embedding_items(embedding_items) + vector_store.write_vectors( + embedding_generation_id=generation_id, + rows=vector_rows, + ) + store.commit() + except Exception: + store.rollback() + with suppress(Exception): + vector_store.delete_generation(generation_id) + raise + return EmbeddingBatchResult( + embedding_generation_id=generation_id, + item_count=len(items), + ) + + +def load_snapshot_vectors( + *, + vector_store: AnalyticsVectorStore, + embedding_generation_id: str, + items: Sequence[CorpusItemRecord], +) -> list[list[float]]: + item_ids = [item.snapshot_item_id for item in items] + loaded = vector_store.read_vectors( + embedding_generation_id=embedding_generation_id, + snapshot_item_ids=item_ids, + ) + vectors: list[list[float]] = [] + for item_id in item_ids: + vector = loaded.get(item_id) + if vector is None: + msg = f"missing vector for snapshot item: {item_id}" + raise ValueError(msg) + vectors.append(vector) + return vectors + + +__all__ = [ + "EmbeddingBatchResult", + "generate_embeddings_for_snapshot", + "load_snapshot_vectors", +] diff --git a/codeclone/analytics/exceptions.py b/codeclone/analytics/exceptions.py new file mode 100644 index 00000000..d13db131 --- /dev/null +++ b/codeclone/analytics/exceptions.py @@ -0,0 +1,31 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + + +class AnalyticsError(Exception): + """Base error for corpus analytics.""" + + +class AnalyticsCapabilityError(AnalyticsError): + """Required optional dependency is not installed.""" + + +class AnalyticsStoreError(AnalyticsError): + """Analytics SQLite store error.""" + + +class AnalyticsWorkflowError(AnalyticsError): + """Orchestration or input validation error.""" + + +__all__ = [ + "AnalyticsCapabilityError", + "AnalyticsError", + "AnalyticsStoreError", + "AnalyticsWorkflowError", +] diff --git a/codeclone/analytics/export/__init__.py b/codeclone/analytics/export/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/export/json_export.py b/codeclone/analytics/export/json_export.py new file mode 100644 index 00000000..4f7b5df2 --- /dev/null +++ b/codeclone/analytics/export/json_export.py @@ -0,0 +1,307 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from collections.abc import Mapping + +from ...contracts import ( + CORPUS_CONTROL_PLANE_CONTRACT_VERSION, + CORPUS_EXPORT_SCHEMA_VERSION, +) +from ...utils.json_io import json_text +from ..contracts import ( + ClusteringRunRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, +) +from ..exceptions import AnalyticsWorkflowError +from ..report.interpret import ( + INTERPRETATION_CONTRACT_VERSION, + build_profile_summary, + build_sweep_comparison_projection, + content_disclosure, + enrich_run_for_export, +) +from ..store.sqlite import SqliteCorpusAnalyticsStore + +_REPRODUCIBILITY_NOTE = ( + "Full vector reproducibility is not guaranteed from model id alone." +) +_MISSING_GENERATION_NOTE = ( + "Embedding generation metadata is unavailable; interpretation is limited " + "to persisted diagnostic facts." +) + + +def export_clustering_json( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + clustering_run_id: str, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> str: + snapshot, run = _owned_context( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=clustering_run_id, + ) + generation = store.get_embedding_generation(run.embedding_generation_id) + projection = enrich_run_for_export( + store=store, + snapshot=snapshot, + run=run, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + payload: dict[str, object] = { + "schema_version": CORPUS_EXPORT_SCHEMA_VERSION, + "control_plane_contract_version": CORPUS_CONTROL_PLANE_CONTRACT_VERSION, + "interpretation_contract_version": INTERPRETATION_CONTRACT_VERSION, + "snapshot": _snapshot_dict(snapshot), + "embedding_generation": _generation_dict(generation), + "embedding_items": _embedding_items( + store=store, + generation=generation, + embedding_generation_id=run.embedding_generation_id, + ), + "clustering_run": projection["run"], + "exact_model_artifact_reproducibility": ( + generation.exact_model_artifact_reproducibility + if generation is not None + else None + ), + "reproducibility_statement": _reproducibility_statement(generation), + "sweep_candidates": _single_export_sweep_candidates( + store=store, + snapshot=snapshot, + embedding_generation_id=run.embedding_generation_id, + ), + } + payload.update( + _full_projection_payload( + store=store, + snapshot=snapshot, + projection=projection, + ) + ) + payload["content_disclosure"] = content_disclosure(payload) + return json_text(payload, sort_keys=True, indent=True, trailing_newline=True) + + +def export_sweep_comparison_json( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> str: + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + generation = store.get_embedding_generation(embedding_generation_id) + candidates, comparison_summary = build_sweep_comparison_projection( + store=store, + snapshot=snapshot, + embedding_generation_id=embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + profile_summary = build_profile_summary( + store=store, + snapshot=snapshot, + embedding_generation_id=embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + payload: dict[str, object] = { + "schema_version": CORPUS_EXPORT_SCHEMA_VERSION, + "control_plane_contract_version": CORPUS_CONTROL_PLANE_CONTRACT_VERSION, + "interpretation_contract_version": INTERPRETATION_CONTRACT_VERSION, + "snapshot": _snapshot_dict(snapshot), + "embedding_generation": _generation_dict(generation), + "embedding_items": _embedding_items( + store=store, + generation=generation, + embedding_generation_id=embedding_generation_id, + ), + "candidates": candidates, + "comparison_summary": comparison_summary, + "exact_model_artifact_reproducibility": ( + generation.exact_model_artifact_reproducibility + if generation is not None + else None + ), + "reproducibility_statement": _reproducibility_statement(generation), + } + if profile_summary is not None: + payload["profile_summary"] = profile_summary + payload["content_disclosure"] = content_disclosure(payload) + return json_text(payload, sort_keys=True, indent=True, trailing_newline=True) + + +def _owned_context( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + clustering_run_id: str, +) -> tuple[CorpusSnapshotRecord, ClusteringRunRecord]: + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + run = store.get_clustering_run(clustering_run_id) + if run is None: + raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + if run.snapshot_id != snapshot_id: + raise AnalyticsWorkflowError( + f"clustering run {clustering_run_id} belongs to snapshot " + f"{run.snapshot_id}, not {snapshot_id}" + ) + return snapshot, run + + +def _single_export_sweep_candidates( + *, + store: SqliteCorpusAnalyticsStore, + snapshot: CorpusSnapshotRecord, + embedding_generation_id: str, +) -> list[dict[str, object]]: + result: list[dict[str, object]] = [] + for candidate in store.list_clustering_runs( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embedding_generation_id, + ): + if candidate.status != "completed": + continue + projection = enrich_run_for_export( + store=store, + snapshot=snapshot, + run=candidate, + ) + run_payload = projection.get("run") + result.append(dict(run_payload) if isinstance(run_payload, Mapping) else {}) + return result + + +def _full_projection_payload( + *, + store: SqliteCorpusAnalyticsStore, + snapshot: CorpusSnapshotRecord, + projection: Mapping[str, object], +) -> dict[str, object]: + if "clusters" not in projection: + return {} + return { + "clusters": projection["clusters"], + "assignments": projection["assignments"], + "noise_items": projection["noise_items"], + "items": [_item_dict(item) for item in store.list_items(snapshot.snapshot_id)], + } + + +def _snapshot_dict(snapshot: CorpusSnapshotRecord) -> dict[str, object]: + return { + "snapshot_id": snapshot.snapshot_id, + "lane": snapshot.lane, + "representation_kind": snapshot.representation_kind, + "representation_version": snapshot.representation_version, + "source_stores": _json_object_or_none(snapshot.source_stores_json), + "source_schema_versions": _json_object_or_none( + snapshot.source_schema_versions_json + ), + "record_count": snapshot.record_count, + "source_digest": snapshot.source_digest, + "created_at_utc": snapshot.created_at_utc, + } + + +def _item_dict(item: CorpusItemRecord) -> dict[str, object]: + return { + "snapshot_item_id": item.snapshot_item_id, + "intent_id": item.intent_id, + "normalized_digest": item.normalized_digest, + "representation_digest": item.representation_digest, + "metadata": _json_object_or_none(item.metadata_json), + "registry_overlay": ( + _json_object_or_none(item.registry_overlay_json) + if item.registry_overlay_json is not None + else None + ), + } + + +def _generation_dict( + generation: EmbeddingGenerationRecord | None, +) -> dict[str, object] | None: + if generation is None: + return None + return { + "embedding_generation_id": generation.embedding_generation_id, + "provider_id": generation.provider_id, + "provider_package_version": generation.provider_package_version, + "model_id": generation.model_id, + "model_revision": generation.model_revision, + "model_artifact_fingerprint": generation.model_artifact_fingerprint, + "exact_model_artifact_reproducibility": ( + generation.exact_model_artifact_reproducibility + ), + "dimensions": generation.dimensions, + "embedding_contract_version": generation.embedding_contract_version, + "embedding_similarity_metric": generation.embedding_similarity_metric, + "vector_preprocessing": generation.vector_preprocessing, + "created_at_utc": generation.created_at_utc, + } + + +def _embedding_items( + *, + store: SqliteCorpusAnalyticsStore, + generation: EmbeddingGenerationRecord | None, + embedding_generation_id: str, +) -> list[dict[str, object]]: + if generation is None: + return [] + return [ + _embedding_item_dict(item) + for item in store.list_embedding_items( + embedding_generation_id=embedding_generation_id + ) + ] + + +def _embedding_item_dict(item: EmbeddingItemRecord) -> dict[str, object]: + return { + "snapshot_item_id": item.snapshot_item_id, + "vector_row_key": item.vector_row_key, + "vector_digest": item.vector_digest, + "dimensions": item.dimensions, + } + + +def _reproducibility_statement( + generation: EmbeddingGenerationRecord | None, +) -> str | None: + if generation is None: + return _MISSING_GENERATION_NOTE + if generation.exact_model_artifact_reproducibility: + return None + return _REPRODUCIBILITY_NOTE + + +def _json_object_or_none(text: str) -> dict[str, object] | None: + try: + payload = json.loads(text) + except (json.JSONDecodeError, TypeError): + return None + return payload if isinstance(payload, dict) else None + + +__all__ = ["export_clustering_json", "export_sweep_comparison_json"] diff --git a/codeclone/analytics/integrity.py b/codeclone/analytics/integrity.py new file mode 100644 index 00000000..10736552 --- /dev/null +++ b/codeclone/analytics/integrity.py @@ -0,0 +1,620 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import math +from collections import Counter, defaultdict +from collections.abc import Collection, Mapping, Sequence +from dataclasses import dataclass +from typing import cast + +from ..contracts import CORPUS_EMBEDDING_CONTRACT_VERSION +from .clustering.models import NOISE_LABEL +from .contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, +) +from .corpus.keys import membership_digest +from .exceptions import AnalyticsWorkflowError +from .store.protocols import CorpusStore, VectorGenerationStore +from .store.vectors_lancedb import vector_digest, vector_row_key + +REQUIRED_ALGORITHM_MANIFEST_PATHS = ( + "python_version", + "numpy_version", + "scipy_version", + "scikit_learn_version", + "hdbscan_version", + "vector_preprocessing", + "pca_solver", + "pca_whiten", + "clustering_input", + "hdbscan_implementation", + "clustering_metric", + "hdbscan_core_dist_n_jobs", +) +_MANIFEST_VERSION_FIELDS = frozenset(REQUIRED_ALGORITHM_MANIFEST_PATHS[:5]) +_MANIFEST_FIXED_FIELDS: dict[str, object] = { + "vector_preprocessing": "l2_normalize", + "pca_solver": "full", + "pca_whiten": False, + "clustering_input": "pca_reduced_coordinates", + "hdbscan_implementation": "hdbscan", + "clustering_metric": "euclidean", + "hdbscan_core_dist_n_jobs": 1, +} + + +@dataclass(frozen=True, slots=True) +class PartitionValidityAssessment: + technically_valid: bool + failed_invariants: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class _ValidityJsonContext: + effective_parameters: dict[str, object] + effective_parameters_readable: bool + algorithm_manifest: dict[str, object] | None + diagnostics_by_summary: dict[int, dict[str, object]] + all_shapes_valid: bool + + +# Validity is formal store/partition integrity, never semantic meaningfulness. +# Inspectability remains a set of observable proxies, not one machine verdict. +# Stability-neighbour comparisons are a future versioned contract. +# Maintainer selection remains persisted run provenance, not validity evidence. +def validate_generation_metadata( + *, + store: CorpusStore, + snapshot_id: str, + embedding_generation_id: str, + items: Sequence[CorpusItemRecord], +) -> tuple[EmbeddingGenerationRecord, tuple[EmbeddingItemRecord, ...]]: + generation = store.get_embedding_generation(embedding_generation_id) + if generation is None: + raise AnalyticsWorkflowError( + f"unknown embedding generation: {embedding_generation_id}" + ) + if generation.embedding_contract_version != CORPUS_EMBEDDING_CONTRACT_VERSION: + raise AnalyticsWorkflowError( + "unsupported analytics embedding contract: " + f"{generation.embedding_contract_version}; " + f"expected {CORPUS_EMBEDDING_CONTRACT_VERSION}. " + "Generate a new analytics embedding batch." + ) + if ( + generation.embedding_similarity_metric != "cosine" + or generation.vector_preprocessing != "l2_normalize" + ): + raise AnalyticsWorkflowError( + "embedding generation does not match the fixed analytics " + "cosine/L2 preprocessing contract" + ) + expected_ids = {item.snapshot_item_id for item in items} + embedding_items = store.list_embedding_items( + embedding_generation_id=embedding_generation_id + ) + actual_ids = {item.snapshot_item_id for item in embedding_items} + if actual_ids != expected_ids: + raise AnalyticsWorkflowError( + "embedding generation does not match snapshot " + f"{snapshot_id}: missing={len(expected_ids - actual_ids)}, " + f"foreign={len(actual_ids - expected_ids)}" + ) + for item in embedding_items: + if item.dimensions != generation.dimensions: + raise AnalyticsWorkflowError( + "embedding dimension mismatch in metadata for " + f"{item.snapshot_item_id}: item={item.dimensions}, " + f"generation={generation.dimensions}" + ) + expected_key = vector_row_key( + embedding_generation_id=embedding_generation_id, + snapshot_item_id=item.snapshot_item_id, + ) + if item.vector_row_key != expected_key: + raise AnalyticsWorkflowError( + f"invalid vector row key for {item.snapshot_item_id}" + ) + return generation, embedding_items + + +def load_validated_snapshot_vectors( + *, + store: CorpusStore, + vector_store: VectorGenerationStore, + snapshot_id: str, + embedding_generation_id: str, + items: Sequence[CorpusItemRecord], +) -> list[list[float]]: + generation, embedding_items = validate_generation_metadata( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + items=items, + ) + metadata_by_id = {item.snapshot_item_id: item for item in embedding_items} + sidecar_ids = set( + vector_store.list_generation_item_ids( + embedding_generation_id=embedding_generation_id, + limit=len(metadata_by_id) + 1, + ) + ) + if sidecar_ids != set(metadata_by_id): + raise AnalyticsWorkflowError( + "analytics vector generation does not match embedding metadata: " + f"missing={len(set(metadata_by_id) - sidecar_ids)}, " + f"foreign={len(sidecar_ids - set(metadata_by_id))}" + ) + rows = vector_store.read_vector_rows( + embedding_generation_id=embedding_generation_id, + snapshot_item_ids=[item.snapshot_item_id for item in items], + ) + if set(rows) != set(metadata_by_id): + raise AnalyticsWorkflowError( + "analytics vector sidecar does not match embedding metadata: " + f"missing={len(set(metadata_by_id) - set(rows))}, " + f"foreign={len(set(rows) - set(metadata_by_id))}" + ) + vectors: list[list[float]] = [] + for corpus_item in items: + item_id = corpus_item.snapshot_item_id + row = rows[item_id] + metadata = metadata_by_id[item_id] + vector = row["vector"] + if not isinstance(vector, list): + raise AnalyticsWorkflowError(f"invalid vector payload for {item_id}") + typed_vector = [float(value) for value in vector] + if len(typed_vector) != generation.dimensions: + raise AnalyticsWorkflowError( + f"vector dimension mismatch for {item_id}: " + f"actual={len(typed_vector)}, expected={generation.dimensions}" + ) + actual_digest = vector_digest(typed_vector) + if ( + row["vector_digest"] != actual_digest + or metadata.vector_digest != actual_digest + ): + raise AnalyticsWorkflowError(f"vector digest mismatch for {item_id}") + if row["vector_row_key"] != metadata.vector_row_key: + raise AnalyticsWorkflowError(f"vector row key mismatch for {item_id}") + vectors.append(typed_vector) + return vectors + + +def validate_cluster_diagnostic_refs( + *, + cluster_label: int, + diagnostics: Mapping[str, object], + items_by_id: Mapping[str, CorpusItemRecord], + assigned_item_ids: Collection[str], +) -> None: + if cluster_label == NOISE_LABEL: + return + assigned = set(assigned_item_ids) + for field in ("representatives", "boundary_items"): + value = diagnostics.get(field) + if not isinstance(value, list): + raise AnalyticsWorkflowError( + f"cluster diagnostic {field} is not a list for label {cluster_label}" + ) + if len(value) != len(set(value)): + raise AnalyticsWorkflowError( + f"cluster diagnostic {field} contains duplicates for label " + f"{cluster_label}" + ) + if any( + not isinstance(item_id, str) + or item_id not in items_by_id + or item_id not in assigned + for item_id in value + ): + raise AnalyticsWorkflowError( + f"cluster diagnostic {field} contains an invalid reference for " + f"label {cluster_label}" + ) + + +def assess_partition_validity( + *, + store: CorpusStore, + snapshot_id: str, + clustering_run_id: str, +) -> PartitionValidityAssessment: + """Assess formal persisted-run invariants without making semantic judgments.""" + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + run = store.get_clustering_run(clustering_run_id) + if run is None: + raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + if run.snapshot_id != snapshot_id: + raise AnalyticsWorkflowError( + f"clustering run {clustering_run_id} belongs to snapshot " + f"{run.snapshot_id}, not {snapshot_id}" + ) + + items = store.list_items(snapshot_id) + assignments = store.list_assignments(clustering_run_id) + summaries = store.list_summaries(clustering_run_id) + context = _decode_validity_json( + snapshot=snapshot, + run=run, + items=items, + summaries=summaries, + ) + failed: set[str] = set() + if not context.all_shapes_valid: + failed.add("V10") + if not _assignment_coverage_is_valid(items, assignments): + failed.add("V1") + if not _summary_links_are_valid(assignments, summaries): + failed.add("V2") + members_by_label = _members_by_label(assignments) + failed.update( + _membership_integrity_failures( + assignments=assignments, + summaries=summaries, + members_by_label=members_by_label, + ) + ) + if not _minimum_cluster_size_is_valid( + effective_parameters=context.effective_parameters, + effective_parameters_readable=context.effective_parameters_readable, + summaries=summaries, + ): + failed.add("V5") + if not _persisted_interpretation_numbers_are_finite( + assignments=assignments, + diagnostics_by_summary=context.diagnostics_by_summary, + ): + failed.add("V6a") + if not _run_and_manifest_are_valid( + run=run, + algorithm_manifest=context.algorithm_manifest, + ): + failed.add("V7") + if not _generation_metadata_is_valid( + store=store, + snapshot_id=snapshot_id, + run=run, + items=items, + ): + failed.add("V8") + if not _diagnostic_references_are_valid( + items=items, + summaries=summaries, + diagnostics_by_summary=context.diagnostics_by_summary, + members_by_label=members_by_label, + ): + failed.add("V9") + + failed_invariants = tuple(sorted(failed, key=_invariant_sort_key)) + return PartitionValidityAssessment( + technically_valid=not failed_invariants, + failed_invariants=failed_invariants, + ) + + +def _decode_validity_json( + *, + snapshot: CorpusSnapshotRecord, + run: ClusteringRunRecord, + items: Sequence[CorpusItemRecord], + summaries: Sequence[ClusterSummaryRecord], +) -> _ValidityJsonContext: + effective, effective_ok = _json_object(run.effective_parameters_json) + raw_manifest = effective.get("algorithm_manifest") if effective_ok else None + manifest = raw_manifest.copy() if isinstance(raw_manifest, dict) else None + shapes = [ + _json_object(snapshot.source_stores_json)[1], + _json_object(snapshot.source_schema_versions_json)[1], + _json_object(run.requested_parameters_json)[1], + effective_ok, + manifest is not None, + ] + for item in items: + shapes.append(_json_object(item.metadata_json)[1]) + if item.registry_overlay_json is not None: + shapes.append(_json_object(item.registry_overlay_json)[1]) + diagnostics_by_summary: dict[int, dict[str, object]] = {} + for summary in summaries: + diagnostics, valid = _json_object(summary.diagnostics_json) + shapes.append(valid) + if valid: + diagnostics_by_summary[summary.cluster_label] = diagnostics + return _ValidityJsonContext( + effective_parameters=effective, + effective_parameters_readable=effective_ok, + algorithm_manifest=manifest, + diagnostics_by_summary=diagnostics_by_summary, + all_shapes_valid=all(shapes), + ) + + +def _assignment_coverage_is_valid( + items: Sequence[CorpusItemRecord], + assignments: Sequence[ClusterAssignmentRecord], +) -> bool: + expected_ids = [item.snapshot_item_id for item in items] + assignment_ids = [item.snapshot_item_id for item in assignments] + return ( + len(assignments) == len(items) + and len(assignment_ids) == len(set(assignment_ids)) + and set(assignment_ids) == set(expected_ids) + ) + + +def _summary_links_are_valid( + assignments: Sequence[ClusterAssignmentRecord], + summaries: Sequence[ClusterSummaryRecord], +) -> bool: + assignment_labels = {item.cluster_label for item in assignments} + summary_labels = [item.cluster_label for item in summaries] + summary_label_counts = Counter(summary_labels) + non_noise_assignments = assignment_labels - {NOISE_LABEL} + non_noise_summaries = set(summary_labels) - {NOISE_LABEL} + noise_summary_count = summary_label_counts[NOISE_LABEL] + return ( + len(summary_labels) == len(set(summary_labels)) + and all(label >= NOISE_LABEL for label in assignment_labels) + and all(label >= NOISE_LABEL for label in summary_label_counts) + and non_noise_assignments == non_noise_summaries + and all(summary_label_counts[label] == 1 for label in non_noise_summaries) + and ( + noise_summary_count == 1 + if NOISE_LABEL in assignment_labels + else noise_summary_count == 0 + ) + ) + + +def _members_by_label( + assignments: Sequence[ClusterAssignmentRecord], +) -> defaultdict[int, list[str]]: + result: defaultdict[int, list[str]] = defaultdict(list) + for assignment in assignments: + result[assignment.cluster_label].append(assignment.snapshot_item_id) + return result + + +def _membership_integrity_failures( + *, + assignments: Sequence[ClusterAssignmentRecord], + summaries: Sequence[ClusterSummaryRecord], + members_by_label: Mapping[int, Sequence[str]], +) -> set[str]: + failed: set[str] = set() + summaries_by_label = {summary.cluster_label: summary for summary in summaries} + for summary in summaries: + label = summary.cluster_label + members = members_by_label.get(label, ()) + if summary.size != len( + members + ) or summary.membership_digest != membership_digest(list(members)): + failed.add("V3") + for assignment in assignments: + assigned_summary = summaries_by_label.get(assignment.cluster_label) + if ( + assigned_summary is not None + and assignment.membership_digest != assigned_summary.membership_digest + ): + failed.add("V4") + return failed + + +def _minimum_cluster_size_is_valid( + *, + effective_parameters: Mapping[str, object], + effective_parameters_readable: bool, + summaries: Sequence[ClusterSummaryRecord], +) -> bool: + if not effective_parameters_readable: + return True + value = effective_parameters.get("min_cluster_size") + if isinstance(value, bool) or not isinstance(value, int) or value <= 0: + return False + return all( + summary.cluster_label == NOISE_LABEL or summary.size >= value + for summary in summaries + ) + + +def _persisted_interpretation_numbers_are_finite( + *, + assignments: Sequence[ClusterAssignmentRecord], + diagnostics_by_summary: Mapping[int, Mapping[str, object]], +) -> bool: + assignment_numbers_are_finite = all( + (value := assignment.membership_strength) is None or math.isfinite(value) + for assignment in assignments + ) + return assignment_numbers_are_finite and all( + _diagnostic_numbers_are_finite(diagnostics) + for diagnostics in diagnostics_by_summary.values() + ) + + +def _run_and_manifest_are_valid( + *, + run: ClusteringRunRecord, + algorithm_manifest: Mapping[str, object] | None, +) -> bool: + if run.status != "completed": + return False + return ( + True if algorithm_manifest is None else _manifest_is_valid(algorithm_manifest) + ) + + +def _generation_metadata_is_valid( + *, + store: CorpusStore, + snapshot_id: str, + run: ClusteringRunRecord, + items: Sequence[CorpusItemRecord], +) -> bool: + try: + validate_generation_metadata( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=run.embedding_generation_id, + items=items, + ) + except AnalyticsWorkflowError: + return False + return True + + +def _diagnostic_references_are_valid( + *, + items: Sequence[CorpusItemRecord], + summaries: Sequence[ClusterSummaryRecord], + diagnostics_by_summary: Mapping[int, Mapping[str, object]], + members_by_label: Mapping[int, Sequence[str]], +) -> bool: + items_by_id = {item.snapshot_item_id: item for item in items} + for summary in summaries: + label = summary.cluster_label + if label == NOISE_LABEL or label not in diagnostics_by_summary: + continue + try: + validate_cluster_diagnostic_refs( + cluster_label=label, + diagnostics=diagnostics_by_summary[label], + items_by_id=items_by_id, + assigned_item_ids=members_by_label.get(label, ()), + ) + except AnalyticsWorkflowError: + return False + return True + + +def validate_persisted_run( + *, + store: CorpusStore, + snapshot_id: str, + clustering_run_id: str, +) -> ClusteringRunRecord: + snapshot = store.get_snapshot(snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + run = store.get_clustering_run(clustering_run_id) + if run is None: + raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + if run.snapshot_id != snapshot_id: + raise AnalyticsWorkflowError( + f"clustering run {clustering_run_id} belongs to snapshot " + f"{run.snapshot_id}, not {snapshot_id}" + ) + assessment = assess_partition_validity( + store=store, + snapshot_id=snapshot_id, + clustering_run_id=clustering_run_id, + ) + if assessment.technically_valid: + return run + first = assessment.failed_invariants[0] + v7_message = ( + f"clustering run is not completed: {clustering_run_id} ({run.status})" + if run.status != "completed" + else f"clustering run manifest is invalid: {clustering_run_id}" + ) + messages = { + "V1": "clustering assignments do not match snapshot items", + "V2": "cluster summaries do not match assignments", + "V3": "cluster summary integrity mismatch", + "V4": "assignment membership digest mismatch", + "V5": "cluster smaller than effective min_cluster_size", + "V6a": "persisted interpretation numeric is not finite", + "V7": v7_message, + "V8": "embedding generation does not match snapshot items", + "V9": "cluster diagnostic reference integrity mismatch", + "V10": "persisted analytics JSON payload is malformed", + } + raise AnalyticsWorkflowError( + f"{messages[first]} (failed invariants: " + f"{', '.join(assessment.failed_invariants)})" + ) + + +def _json_object(text: str) -> tuple[dict[str, object], bool]: + try: + payload = json.loads(text) + except (json.JSONDecodeError, TypeError): + return {}, False + if not isinstance(payload, dict): + return {}, False + return payload, True + + +def _manifest_is_valid(manifest: Mapping[str, object]) -> bool: + for field in _MANIFEST_VERSION_FIELDS: + value = manifest.get(field) + if not isinstance(value, str) or not value: + return False + return all( + manifest.get(field) == value for field, value in _MANIFEST_FIXED_FIELDS.items() + ) + + +def _diagnostic_numbers_are_finite(diagnostics: Mapping[str, object]) -> bool: + for field in ( + "size", + "size_percent", + "average_membership_strength", + "min_correlation_sample_size", + ): + if not _persisted_number_is_finite(diagnostics.get(field)): + return False + distributions = diagnostics.get("metadata_distributions") + if not isinstance(distributions, Mapping): + return True + for values in _mapping_values(distributions): + for cell in _mapping_values(values): + for field in ("numerator", "denominator", "rate"): + if not _persisted_number_is_finite(cell.get(field)): + return False + return True + + +def _mapping_values(value: Mapping[str, object]) -> tuple[Mapping[str, object], ...]: + return tuple( + cast(Mapping[str, object], item) + for item in value.values() + if isinstance(item, Mapping) + ) + + +def _persisted_number_is_finite(value: object) -> bool: + if value is None or isinstance(value, bool) or not isinstance(value, int | float): + return True + return math.isfinite(value) + + +def _invariant_sort_key(code: str) -> tuple[int, str]: + digits = "".join(character for character in code if character.isdigit()) + suffix = code[len(digits) + 1 :] if digits else code + return (int(digits) if digits else 999, suffix) + + +__all__ = [ + "REQUIRED_ALGORITHM_MANIFEST_PATHS", + "PartitionValidityAssessment", + "assess_partition_validity", + "load_validated_snapshot_vectors", + "validate_cluster_diagnostic_refs", + "validate_generation_metadata", + "validate_persisted_run", +] diff --git a/codeclone/analytics/metrics/__init__.py b/codeclone/analytics/metrics/__init__.py new file mode 100644 index 00000000..5432e27b --- /dev/null +++ b/codeclone/analytics/metrics/__init__.py @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .partition_metrics import RunPartitionMetrics, compute_run_partition_metrics + +__all__ = ["RunPartitionMetrics", "compute_run_partition_metrics"] diff --git a/codeclone/analytics/metrics/partition_metrics.py b/codeclone/analytics/metrics/partition_metrics.py new file mode 100644 index 00000000..b9c7dbfd --- /dev/null +++ b/codeclone/analytics/metrics/partition_metrics.py @@ -0,0 +1,90 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from ..clustering.models import NOISE_LABEL +from ..contracts import ClusterAssignmentRecord, ClusterSummaryRecord + + +@dataclass(frozen=True, slots=True) +class RunPartitionMetrics: + total_items: int + cluster_count: int + noise_count: int + non_noise_count: int + noise_ratio: float + dominant_cluster_ratio: float + dominant_assigned_ratio: float | None + dominant_cluster_label: int | None + cluster_size_distribution: tuple[int, ...] + cluster_size_histogram: dict[str, int] + + +def compute_run_partition_metrics( + assignments: Sequence[ClusterAssignmentRecord], + summaries: Sequence[ClusterSummaryRecord], +) -> RunPartitionMetrics: + total_items = len(assignments) + noise_count = sum( + assignment.cluster_label == NOISE_LABEL for assignment in assignments + ) + non_noise_summaries = [ + summary for summary in summaries if summary.cluster_label != NOISE_LABEL + ] + ordered = sorted( + non_noise_summaries, + key=lambda summary: ( + -summary.size, + summary.membership_digest, + summary.cluster_label, + ), + ) + sizes = tuple(summary.size for summary in ordered) + largest = ordered[0] if ordered else None + non_noise_count = total_items - noise_count + return RunPartitionMetrics( + total_items=total_items, + cluster_count=len(non_noise_summaries), + noise_count=noise_count, + non_noise_count=non_noise_count, + noise_ratio=noise_count / total_items if total_items else 0.0, + dominant_cluster_ratio=( + largest.size / total_items if largest is not None and total_items else 0.0 + ), + dominant_assigned_ratio=( + largest.size / non_noise_count + if largest is not None and non_noise_count + else None + ), + dominant_cluster_label=largest.cluster_label if largest is not None else None, + cluster_size_distribution=sizes, + cluster_size_histogram=_cluster_size_histogram(sizes), + ) + + +def _cluster_size_histogram(sizes: Sequence[int]) -> dict[str, int]: + result = {"1-3": 0, "4-7": 0, "8-15": 0, "16-31": 0, "32-63": 0, "64+": 0} + for size in sizes: + if size <= 3: + result["1-3"] += 1 + elif size <= 7: + result["4-7"] += 1 + elif size <= 15: + result["8-15"] += 1 + elif size <= 31: + result["16-31"] += 1 + elif size <= 63: + result["32-63"] += 1 + else: + result["64+"] += 1 + return result + + +__all__ = ["RunPartitionMetrics", "compute_run_partition_metrics"] diff --git a/codeclone/analytics/profiles/__init__.py b/codeclone/analytics/profiles/__init__.py new file mode 100644 index 00000000..0b099640 --- /dev/null +++ b/codeclone/analytics/profiles/__init__.py @@ -0,0 +1,24 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .models import ( + ClusteringProfileManifest, + ProfileApplicability, + ProfileRankingPolicy, + ProfileSearchSpace, + ProfileSuitabilityRules, +) +from .registry import ProfileRegistry, resolve_profile_registry + +__all__ = [ + "ClusteringProfileManifest", + "ProfileApplicability", + "ProfileRankingPolicy", + "ProfileRegistry", + "ProfileSearchSpace", + "ProfileSuitabilityRules", + "resolve_profile_registry", +] diff --git a/codeclone/analytics/profiles/loader.py b/codeclone/analytics/profiles/loader.py new file mode 100644 index 00000000..6cc89da1 --- /dev/null +++ b/codeclone/analytics/profiles/loader.py @@ -0,0 +1,340 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import math +import re +from dataclasses import asdict +from importlib.resources import files +from pathlib import Path +from typing import Literal, cast + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + ValidationError, + field_validator, + model_validator, +) + +from ...contracts import ( + CORPUS_EMBEDDING_CONTRACT_VERSION, + CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION, +) +from ...utils.json_io import json_text, read_json_object +from ..contracts import ( + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, +) +from ..corpus.keys import sha256_hex +from ..exceptions import AnalyticsWorkflowError +from .models import ( + ClusteringProfileManifest, + ProfileApplicability, + ProfileRankingPolicy, + ProfileSearchSpace, + ProfileSuitabilityRules, +) + +_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9._-]*$") +_SEMVER_RE = re.compile( + r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)" + r"(?:-[0-9A-Za-z.-]+)?(?:\+[0-9A-Za-z.-]+)?$" +) +_CANONICAL_REPRESENTATION_KINDS = frozenset( + { + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, + } +) + + +class _ApplicabilityModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + corpus_size_class: Literal["small_intent"] + min_record_count: int | None = Field(default=None, ge=0) + max_record_count: int | None = Field(default=None, ge=0) + embedding_contract_versions: tuple[str, ...] = (CORPUS_EMBEDDING_CONTRACT_VERSION,) + + @field_validator("embedding_contract_versions") + @classmethod + def _versions(cls, value: tuple[str, ...]) -> tuple[str, ...]: + normalized = tuple(sorted({item.strip() for item in value if item.strip()})) + if not normalized: + raise ValueError("embedding_contract_versions must not be empty") + return normalized + + @model_validator(mode="after") + def _bounds(self) -> _ApplicabilityModel: + if ( + self.min_record_count is not None + and self.max_record_count is not None + and self.min_record_count > self.max_record_count + ): + raise ValueError("min_record_count must be <= max_record_count") + return self + + +class _SearchSpaceModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + pca_dimensions: tuple[int, ...] + min_cluster_size: tuple[int, ...] + min_samples: tuple[int, ...] + cluster_selection_method: tuple[Literal["eom", "leaf"], ...] + + @field_validator("pca_dimensions", "min_cluster_size", "min_samples") + @classmethod + def _positive_axis(cls, value: tuple[int, ...]) -> tuple[int, ...]: + if not value or any(item <= 0 for item in value): + raise ValueError("profile search axes require positive integers") + return tuple(sorted(set(value))) + + @field_validator("cluster_selection_method") + @classmethod + def _method_axis( + cls, + value: tuple[Literal["eom", "leaf"], ...], + ) -> tuple[Literal["eom", "leaf"], ...]: + if not value: + raise ValueError("cluster_selection_method must not be empty") + return cast( + "tuple[Literal['eom', 'leaf'], ...]", + tuple(sorted(set(value))), + ) + + +class _SuitabilityModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + min_non_noise_cluster_count: int | None = Field(default=None, ge=0) + max_non_noise_cluster_count: int | None = Field(default=None, ge=0) + max_dominant_cluster_ratio: float | None = None + min_dominant_cluster_ratio: float | None = None + min_noise_ratio: float | None = None + max_noise_ratio: float | None = None + min_non_noise_count: int | None = Field(default=None, ge=0) + + @field_validator( + "max_dominant_cluster_ratio", + "min_dominant_cluster_ratio", + "min_noise_ratio", + "max_noise_ratio", + ) + @classmethod + def _ratio(cls, value: float | None) -> float | None: + if value is None: + return None + if not math.isfinite(value) or not 0.0 <= value <= 1.0: + raise ValueError("profile suitability ratios must be finite in [0, 1]") + return value + + @model_validator(mode="after") + def _bounds(self) -> _SuitabilityModel: + _validate_pair( + self.min_non_noise_cluster_count, + self.max_non_noise_cluster_count, + "non-noise cluster count", + ) + _validate_pair( + self.min_dominant_cluster_ratio, + self.max_dominant_cluster_ratio, + "dominant cluster ratio", + ) + _validate_pair(self.min_noise_ratio, self.max_noise_ratio, "noise ratio") + return self + + +class _RankingModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + base_score_weight: float + cluster_count_weight: float + cluster_count_direction: Literal["prefer_higher", "prefer_lower", "neutral"] + noise_weight: float + noise_direction: Literal["prefer_lower", "prefer_higher", "neutral"] + + @field_validator( + "base_score_weight", + "cluster_count_weight", + "noise_weight", + ) + @classmethod + def _weight(cls, value: float) -> float: + if not math.isfinite(value) or value < 0.0: + raise ValueError("profile ranking weights must be finite and non-negative") + return value + + +class _ManifestModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + manifest_schema_version: str + profile_id: str + profile_version: str + lane: Literal["intent"] + representation_kinds: tuple[str, ...] + label: str + description: str + applicability: _ApplicabilityModel + primary_space: _SearchSpaceModel + suitability: _SuitabilityModel + ranking: _RankingModel + + @field_validator("manifest_schema_version") + @classmethod + def _schema_version(cls, value: str) -> str: + if value != CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION: + raise ValueError(f"unsupported profile manifest schema version: {value}") + return value + + @field_validator("profile_id") + @classmethod + def _profile_id(cls, value: str) -> str: + normalized = value.strip() + if not normalized or _PROFILE_ID_RE.fullmatch(normalized) is None: + raise ValueError("profile_id must be a non-empty canonical identifier") + return normalized + + @field_validator("profile_version") + @classmethod + def _profile_version(cls, value: str) -> str: + normalized = value.strip() + if _SEMVER_RE.fullmatch(normalized) is None: + raise ValueError("profile_version must be valid semver") + return normalized + + @field_validator("label", "description") + @classmethod + def _non_empty_text(cls, value: str) -> str: + normalized = value.strip() + if not normalized: + raise ValueError("profile label and description must not be empty") + return normalized + + @field_validator("representation_kinds") + @classmethod + def _representation_kinds(cls, value: tuple[str, ...]) -> tuple[str, ...]: + if not value: + raise ValueError("representation_kinds must not be empty") + aliases = sorted(set(value) - _CANONICAL_REPRESENTATION_KINDS) + if aliases: + raise ValueError( + "profile manifest uses non-canonical representation_kind: " + + ", ".join(aliases) + ) + return tuple(sorted(set(value))) + + +def load_manifest_file(path: Path) -> ClusteringProfileManifest: + try: + payload = read_json_object(path) + except (OSError, TypeError, ValueError) as exc: + raise AnalyticsWorkflowError( + f"cannot load analytics profile manifest {path}: {exc}" + ) from exc + return load_manifest_value(payload) + + +def load_manifest_value(payload: dict[str, object]) -> ClusteringProfileManifest: + try: + model = _ManifestModel.model_validate(payload) + except ValidationError as exc: + raise AnalyticsWorkflowError( + f"invalid analytics profile manifest: {exc}" + ) from exc + return _to_manifest(model) + + +def load_bundled_profiles() -> dict[str, ClusteringProfileManifest]: + manifests = files("codeclone.analytics.profiles").joinpath("manifests") + result: dict[str, ClusteringProfileManifest] = {} + for resource in sorted(manifests.iterdir(), key=lambda item: item.name): + if not resource.name.endswith(".json"): + continue + try: + import orjson + + payload = orjson.loads(resource.read_bytes()) + except (OSError, ValueError) as exc: + raise AnalyticsWorkflowError( + f"cannot load bundled analytics profile {resource.name}: {exc}" + ) from exc + if not isinstance(payload, dict): + raise AnalyticsWorkflowError( + f"bundled analytics profile must be an object: {resource.name}" + ) + manifest = load_manifest_value(payload) + if manifest.profile_id in result: + raise AnalyticsWorkflowError( + f"conflicting profile manifest for profile_id: {manifest.profile_id}" + ) + result[manifest.profile_id] = manifest + return result + + +def manifest_value(manifest: ClusteringProfileManifest) -> dict[str, object]: + return asdict(manifest) + + +def canonical_manifest_json(manifest: ClusteringProfileManifest) -> str: + return json_text(manifest_value(manifest), sort_keys=True) + + +def profile_manifest_digest(manifest: ClusteringProfileManifest) -> str: + return sha256_hex(canonical_manifest_json(manifest)) + + +def _to_manifest(model: _ManifestModel) -> ClusteringProfileManifest: + applicability = model.applicability + primary = model.primary_space + suitability = model.suitability + ranking = model.ranking + return ClusteringProfileManifest( + manifest_schema_version=model.manifest_schema_version, + profile_id=model.profile_id, + profile_version=model.profile_version, + lane=model.lane, + representation_kinds=model.representation_kinds, + label=model.label, + description=model.description, + applicability=ProfileApplicability( + corpus_size_class=applicability.corpus_size_class, + min_record_count=applicability.min_record_count, + max_record_count=applicability.max_record_count, + embedding_contract_versions=applicability.embedding_contract_versions, + ), + primary_space=ProfileSearchSpace( + pca_dimensions=primary.pca_dimensions, + min_cluster_size=primary.min_cluster_size, + min_samples=primary.min_samples, + cluster_selection_method=primary.cluster_selection_method, + ), + suitability=ProfileSuitabilityRules(**suitability.model_dump()), + ranking=ProfileRankingPolicy(**ranking.model_dump()), + ) + + +def _validate_pair( + minimum: int | float | None, + maximum: int | float | None, + label: str, +) -> None: + if minimum is not None and maximum is not None and minimum > maximum: + raise ValueError(f"minimum {label} must be <= maximum") + + +__all__ = [ + "canonical_manifest_json", + "load_bundled_profiles", + "load_manifest_file", + "load_manifest_value", + "manifest_value", + "profile_manifest_digest", +] diff --git a/codeclone/analytics/profiles/manifests/intent-small-balanced-v1.json b/codeclone/analytics/profiles/manifests/intent-small-balanced-v1.json new file mode 100644 index 00000000..b22d374f --- /dev/null +++ b/codeclone/analytics/profiles/manifests/intent-small-balanced-v1.json @@ -0,0 +1,38 @@ +{ + "manifest_schema_version": "1", + "profile_id": "intent-small-balanced-v1", + "profile_version": "1.0.0", + "lane": "intent", + "representation_kinds": [ + "intent.description.v1", + "intent.description_with_frame.v1" + ], + "label": "Small intent corpus — balanced inspection", + "description": "Balanced lens for routine inspection: moderate cluster count, capped dominant share, moderate noise band.", + "applicability": { + "corpus_size_class": "small_intent", + "min_record_count": 50, + "max_record_count": 500, + "embedding_contract_versions": ["2"] + }, + "primary_space": { + "pca_dimensions": [32, 64, 128], + "min_cluster_size": [5, 8, 12], + "min_samples": [1, 3, 5], + "cluster_selection_method": ["eom", "leaf"] + }, + "suitability": { + "min_non_noise_cluster_count": 3, + "max_non_noise_cluster_count": 24, + "max_dominant_cluster_ratio": 0.45, + "max_noise_ratio": 0.50, + "min_non_noise_count": 80 + }, + "ranking": { + "base_score_weight": 1.0, + "cluster_count_weight": 0.0, + "cluster_count_direction": "neutral", + "noise_weight": 0.0, + "noise_direction": "prefer_lower" + } +} diff --git a/codeclone/analytics/profiles/manifests/intent-small-discovery-v1.json b/codeclone/analytics/profiles/manifests/intent-small-discovery-v1.json new file mode 100644 index 00000000..eae16ead --- /dev/null +++ b/codeclone/analytics/profiles/manifests/intent-small-discovery-v1.json @@ -0,0 +1,38 @@ +{ + "manifest_schema_version": "1", + "profile_id": "intent-small-discovery-v1", + "profile_version": "1.0.0", + "lane": "intent", + "representation_kinds": [ + "intent.description.v1", + "intent.description_with_frame.v1" + ], + "label": "Small intent corpus — discovery", + "description": "Discovery lens for narrower candidate families. Rejects single-cluster megacluster partitions even when technically valid.", + "applicability": { + "corpus_size_class": "small_intent", + "min_record_count": 50, + "max_record_count": 500, + "embedding_contract_versions": ["2"] + }, + "primary_space": { + "pca_dimensions": [16, 32, 64], + "min_cluster_size": [5, 8], + "min_samples": [1, 3], + "cluster_selection_method": ["leaf", "eom"] + }, + "suitability": { + "min_non_noise_cluster_count": 2, + "max_non_noise_cluster_count": 32, + "max_dominant_cluster_ratio": 0.50, + "max_noise_ratio": 0.60, + "min_non_noise_count": 60 + }, + "ranking": { + "base_score_weight": 0.5, + "cluster_count_weight": 0.35, + "cluster_count_direction": "prefer_higher", + "noise_weight": 0.15, + "noise_direction": "prefer_lower" + } +} diff --git a/codeclone/analytics/profiles/manifests/intent-small-outlier-v1.json b/codeclone/analytics/profiles/manifests/intent-small-outlier-v1.json new file mode 100644 index 00000000..975d843c --- /dev/null +++ b/codeclone/analytics/profiles/manifests/intent-small-outlier-v1.json @@ -0,0 +1,38 @@ +{ + "manifest_schema_version": "1", + "profile_id": "intent-small-outlier-v1", + "profile_version": "1.0.0", + "lane": "intent", + "representation_kinds": [ + "intent.description.v1", + "intent.description_with_frame.v1" + ], + "label": "Small intent corpus — outlier-oriented", + "description": "Outlier lens: a few dense cores with substantial noise. Rejects too many tiny non-noise clusters.", + "applicability": { + "corpus_size_class": "small_intent", + "min_record_count": 50, + "max_record_count": 500, + "embedding_contract_versions": ["2"] + }, + "primary_space": { + "pca_dimensions": [64, 128], + "min_cluster_size": [12, 15, 20], + "min_samples": [5], + "cluster_selection_method": ["eom"] + }, + "suitability": { + "min_non_noise_cluster_count": 2, + "max_non_noise_cluster_count": 12, + "min_noise_ratio": 0.25, + "max_dominant_cluster_ratio": 0.70, + "min_non_noise_count": 30 + }, + "ranking": { + "base_score_weight": 0.4, + "cluster_count_weight": 0.2, + "cluster_count_direction": "prefer_lower", + "noise_weight": 0.4, + "noise_direction": "prefer_higher" + } +} diff --git a/codeclone/analytics/profiles/manifests/intent-small-stable-v1.json b/codeclone/analytics/profiles/manifests/intent-small-stable-v1.json new file mode 100644 index 00000000..7a1dd3ae --- /dev/null +++ b/codeclone/analytics/profiles/manifests/intent-small-stable-v1.json @@ -0,0 +1,39 @@ +{ + "manifest_schema_version": "1", + "profile_id": "intent-small-stable-v1", + "profile_version": "1.0.0", + "lane": "intent", + "representation_kinds": [ + "intent.description.v1", + "intent.description_with_frame.v1" + ], + "label": "Small intent corpus — stable families", + "description": "Prefer a small number of broad, stable families. High dominant-cluster share and moderate noise are acceptable.", + "applicability": { + "corpus_size_class": "small_intent", + "min_record_count": 50, + "max_record_count": 500, + "embedding_contract_versions": ["2"] + }, + "primary_space": { + "pca_dimensions": [64, 128], + "min_cluster_size": [12, 15, 20], + "min_samples": [3, 5], + "cluster_selection_method": ["eom", "leaf"] + }, + "suitability": { + "min_non_noise_cluster_count": 2, + "max_non_noise_cluster_count": 8, + "max_dominant_cluster_ratio": 0.85, + "min_noise_ratio": 0.05, + "max_noise_ratio": 0.55, + "min_non_noise_count": 40 + }, + "ranking": { + "base_score_weight": 0.6, + "cluster_count_weight": 0.2, + "cluster_count_direction": "prefer_lower", + "noise_weight": 0.2, + "noise_direction": "prefer_lower" + } +} diff --git a/codeclone/analytics/profiles/models.py b/codeclone/analytics/profiles/models.py new file mode 100644 index 00000000..005d853b --- /dev/null +++ b/codeclone/analytics/profiles/models.py @@ -0,0 +1,80 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +from ...contracts import CORPUS_EMBEDDING_CONTRACT_VERSION + + +@dataclass(frozen=True, slots=True) +class ProfileApplicability: + corpus_size_class: Literal["small_intent"] + min_record_count: int | None = None + max_record_count: int | None = None + embedding_contract_versions: tuple[str, ...] = (CORPUS_EMBEDDING_CONTRACT_VERSION,) + + +@dataclass(frozen=True, slots=True) +class ProfileRankingPolicy: + base_score_weight: float = 1.0 + cluster_count_weight: float = 0.0 + cluster_count_direction: Literal[ + "prefer_higher", + "prefer_lower", + "neutral", + ] = "neutral" + noise_weight: float = 0.0 + noise_direction: Literal[ + "prefer_lower", + "prefer_higher", + "neutral", + ] = "prefer_lower" + + +@dataclass(frozen=True, slots=True) +class ProfileSearchSpace: + pca_dimensions: tuple[int, ...] + min_cluster_size: tuple[int, ...] + min_samples: tuple[int, ...] + cluster_selection_method: tuple[Literal["eom", "leaf"], ...] + + +@dataclass(frozen=True, slots=True) +class ProfileSuitabilityRules: + min_non_noise_cluster_count: int | None = None + max_non_noise_cluster_count: int | None = None + max_dominant_cluster_ratio: float | None = None + min_dominant_cluster_ratio: float | None = None + min_noise_ratio: float | None = None + max_noise_ratio: float | None = None + min_non_noise_count: int | None = None + + +@dataclass(frozen=True, slots=True) +class ClusteringProfileManifest: + manifest_schema_version: str + profile_id: str + profile_version: str + lane: Literal["intent"] + representation_kinds: tuple[str, ...] + label: str + description: str + applicability: ProfileApplicability + primary_space: ProfileSearchSpace + suitability: ProfileSuitabilityRules + ranking: ProfileRankingPolicy + + +__all__ = [ + "ClusteringProfileManifest", + "ProfileApplicability", + "ProfileRankingPolicy", + "ProfileSearchSpace", + "ProfileSuitabilityRules", +] diff --git a/codeclone/analytics/profiles/ranking.py b/codeclone/analytics/profiles/ranking.py new file mode 100644 index 00000000..f410218c --- /dev/null +++ b/codeclone/analytics/profiles/ranking.py @@ -0,0 +1,124 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from ..clustering.models import EffectiveClusteringParameters +from ..metrics.partition_metrics import RunPartitionMetrics +from .loader import profile_manifest_digest +from .models import ClusteringProfileManifest, ProfileRankingPolicy + + +@dataclass(frozen=True, slots=True) +class ProfileRankedRun: + clustering_run_id: str + base_score: float + profile_score: float + effective: EffectiveClusteringParameters + metrics: RunPartitionMetrics + + +@dataclass(frozen=True, slots=True) +class ProfileRecommendationRationale: + profile_id: str + profile_manifest_digest: str + base_score: float + profile_score: float + cluster_count_term: float + noise_term: float + ranking_policy: ProfileRankingPolicy + + +def compute_profile_rank_score( + *, + policy: ProfileRankingPolicy, + base_score: float, + metrics: RunPartitionMetrics, + batch_max_cluster_count: int, +) -> tuple[float, float, float]: + denominator = max(batch_max_cluster_count, 1) + cluster_norm = metrics.cluster_count / denominator + cluster_term = _direction_term( + weight=policy.cluster_count_weight, + value=cluster_norm, + direction=policy.cluster_count_direction, + ) + noise_term = _direction_term( + weight=policy.noise_weight, + value=metrics.noise_ratio, + direction=policy.noise_direction, + ) + profile_score = policy.base_score_weight * base_score + cluster_term + noise_term + return profile_score, cluster_term, noise_term + + +def rank_profile_recommendations( + *, + profile: ClusteringProfileManifest, + candidates: Sequence[ProfileRankedRun], +) -> tuple[ProfileRankedRun | None, ProfileRecommendationRationale | None]: + if not candidates: + return None, None + batch_max = max(candidate.metrics.cluster_count for candidate in candidates) + scored: list[tuple[ProfileRankedRun, float, float]] = [] + for candidate in candidates: + profile_score, cluster_term, noise_term = compute_profile_rank_score( + policy=profile.ranking, + base_score=candidate.base_score, + metrics=candidate.metrics, + batch_max_cluster_count=batch_max, + ) + scored.append( + ( + ProfileRankedRun( + clustering_run_id=candidate.clustering_run_id, + base_score=candidate.base_score, + profile_score=profile_score, + effective=candidate.effective, + metrics=candidate.metrics, + ), + cluster_term, + noise_term, + ) + ) + winner, cluster_term, noise_term = min( + scored, + key=lambda item: ( + -item[0].profile_score, + item[0].effective.pca_dimensions, + item[0].effective.min_cluster_size, + item[0].effective.min_samples, + item[0].effective.cluster_selection_method, + ), + ) + return winner, ProfileRecommendationRationale( + profile_id=profile.profile_id, + profile_manifest_digest=profile_manifest_digest(profile), + base_score=winner.base_score, + profile_score=winner.profile_score, + cluster_count_term=cluster_term, + noise_term=noise_term, + ranking_policy=profile.ranking, + ) + + +def _direction_term(*, weight: float, value: float, direction: str) -> float: + if direction == "prefer_higher": + return weight * value + if direction == "prefer_lower": + return weight * (1.0 - value) + return 0.0 + + +__all__ = [ + "ProfileRankedRun", + "ProfileRecommendationRationale", + "compute_profile_rank_score", + "rank_profile_recommendations", +] diff --git a/codeclone/analytics/profiles/registry.py b/codeclone/analytics/profiles/registry.py new file mode 100644 index 00000000..1aea165f --- /dev/null +++ b/codeclone/analytics/profiles/registry.py @@ -0,0 +1,108 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from importlib.resources import files +from pathlib import Path +from types import MappingProxyType + +from ..exceptions import AnalyticsWorkflowError +from .loader import ( + load_bundled_profiles, + load_manifest_file, + profile_manifest_digest, +) +from .models import ClusteringProfileManifest + + +@dataclass(frozen=True, slots=True) +class ProfileRegistry: + profiles: Mapping[str, ClusteringProfileManifest] + default_profile_id: str | None + sources: Mapping[str, str] + + +def resolve_profile_registry( + *, + profile_paths: Sequence[Path] = (), + default_profile_id: str | None = None, + bundled_dir: Path | None = None, +) -> ProfileRegistry: + sources: dict[str, str] + if bundled_dir is None: + bundled = load_bundled_profiles() + sources = { + profile_id: f"bundled:{_bundled_filename(profile_id)}" + for profile_id in bundled + } + else: + bundled = {} + sources = {} + for path in sorted(bundled_dir.glob("*.json")): + manifest = load_manifest_file(path) + if manifest.profile_id in bundled: + raise AnalyticsWorkflowError( + "conflicting profile manifest for profile_id: " + f"{manifest.profile_id}" + ) + bundled[manifest.profile_id] = manifest + sources[manifest.profile_id] = f"bundled:{path.name}" + profiles = dict(bundled) + for path in profile_paths: + manifest = load_manifest_file(path) + existing = profiles.get(manifest.profile_id) + if existing is not None and profile_manifest_digest( + existing + ) != profile_manifest_digest(manifest): + raise AnalyticsWorkflowError( + f"conflicting profile manifest for profile_id: {manifest.profile_id}" + ) + if existing is None: + profiles[manifest.profile_id] = manifest + sources[manifest.profile_id] = str(path) + if default_profile_id is not None and default_profile_id not in profiles: + raise AnalyticsWorkflowError(f"unknown analytics profile: {default_profile_id}") + return ProfileRegistry( + profiles=MappingProxyType(dict(sorted(profiles.items()))), + default_profile_id=default_profile_id, + sources=MappingProxyType(dict(sorted(sources.items()))), + ) + + +def get_profile( + registry: ProfileRegistry, + profile_id: str, +) -> ClusteringProfileManifest: + try: + return registry.profiles[profile_id] + except KeyError as exc: + raise AnalyticsWorkflowError( + f"unknown analytics profile: {profile_id}" + ) from exc + + +def list_profiles(registry: ProfileRegistry) -> tuple[str, ...]: + return tuple(sorted(registry.profiles)) + + +def _bundled_filename(profile_id: str) -> str: + manifests = files("codeclone.analytics.profiles").joinpath("manifests") + expected = f"{profile_id}.json" + for resource in manifests.iterdir(): + if resource.name == expected: + return resource.name + return expected + + +__all__ = [ + "ProfileRegistry", + "get_profile", + "list_profiles", + "resolve_profile_registry", +] diff --git a/codeclone/analytics/profiles/suitability.py b/codeclone/analytics/profiles/suitability.py new file mode 100644 index 00000000..ae197da8 --- /dev/null +++ b/codeclone/analytics/profiles/suitability.py @@ -0,0 +1,167 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import asdict, dataclass + +from ...utils.json_io import json_text +from ..corpus.keys import sha256_hex +from ..integrity import PartitionValidityAssessment +from ..metrics.partition_metrics import RunPartitionMetrics +from .loader import profile_manifest_digest +from .models import ClusteringProfileManifest + + +@dataclass(frozen=True, slots=True) +class ProfileObservedMetrics: + non_noise_cluster_count: int + noise_ratio: float + dominant_cluster_ratio: float + dominant_assigned_ratio: float | None + non_noise_count: int + + +@dataclass(frozen=True, slots=True) +class ProfileSuitabilityAssessment: + profile_id: str + profile_version: str + profile_manifest_digest: str + suitable_for_profile: bool + rejection_reasons: tuple[str, ...] + observed: ProfileObservedMetrics | None + + +def assess_profile_suitability( + *, + profile: ClusteringProfileManifest, + validity: PartitionValidityAssessment, + metrics: RunPartitionMetrics | None, +) -> ProfileSuitabilityAssessment: + digest = profile_manifest_digest(profile) + if not validity.technically_valid: + return ProfileSuitabilityAssessment( + profile_id=profile.profile_id, + profile_version=profile.profile_version, + profile_manifest_digest=digest, + suitable_for_profile=False, + rejection_reasons=("technically_invalid",), + observed=None, + ) + if metrics is None: + raise ValueError("technically valid partition requires metrics") + observed = ProfileObservedMetrics( + non_noise_cluster_count=metrics.cluster_count, + noise_ratio=metrics.noise_ratio, + dominant_cluster_ratio=metrics.dominant_cluster_ratio, + dominant_assigned_ratio=metrics.dominant_assigned_ratio, + non_noise_count=metrics.non_noise_count, + ) + rules = profile.suitability + reasons: list[str] = [] + _append_below( + reasons, + observed.non_noise_cluster_count, + rules.min_non_noise_cluster_count, + "too_few_clusters", + ) + _append_above( + reasons, + observed.non_noise_cluster_count, + rules.max_non_noise_cluster_count, + "too_many_clusters", + ) + _append_above( + reasons, + observed.dominant_cluster_ratio, + rules.max_dominant_cluster_ratio, + "dominant_ratio_above_max", + ) + _append_below( + reasons, + observed.dominant_cluster_ratio, + rules.min_dominant_cluster_ratio, + "dominant_ratio_below_min", + ) + _append_above( + reasons, + observed.noise_ratio, + rules.max_noise_ratio, + "noise_ratio_above_max", + ) + _append_below( + reasons, + observed.noise_ratio, + rules.min_noise_ratio, + "noise_ratio_below_min", + ) + _append_below( + reasons, + observed.non_noise_count, + rules.min_non_noise_count, + "insufficient_assigned_mass", + ) + ordered = tuple(sorted(reasons)) + return ProfileSuitabilityAssessment( + profile_id=profile.profile_id, + profile_version=profile.profile_version, + profile_manifest_digest=digest, + suitable_for_profile=not ordered, + rejection_reasons=ordered, + observed=observed, + ) + + +def profile_assessment_digest( + *, + profile_batch_id: str, + clustering_run_id: str, + run_digest: str, + profile_manifest_digest: str, + assessment: ProfileSuitabilityAssessment, +) -> str: + payload = { + "profile_batch_id": profile_batch_id, + "clustering_run_id": clustering_run_id, + "run_digest": run_digest, + "profile_id": assessment.profile_id, + "profile_version": assessment.profile_version, + "profile_manifest_digest": profile_manifest_digest, + "suitable_for_profile": assessment.suitable_for_profile, + "rejection_reasons": list(assessment.rejection_reasons), + "observed": ( + asdict(assessment.observed) if assessment.observed is not None else None + ), + } + return sha256_hex(json_text(payload, sort_keys=True)) + + +def _append_below( + reasons: list[str], + actual: int | float, + minimum: int | float | None, + code: str, +) -> None: + if minimum is not None and actual < minimum: + reasons.append(code) + + +def _append_above( + reasons: list[str], + actual: int | float, + maximum: int | float | None, + code: str, +) -> None: + if maximum is not None and actual > maximum: + reasons.append(code) + + +__all__ = [ + "ProfileObservedMetrics", + "ProfileSuitabilityAssessment", + "assess_profile_suitability", + "profile_assessment_digest", +] diff --git a/codeclone/analytics/report/__init__.py b/codeclone/analytics/report/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/report/html.py b/codeclone/analytics/report/html.py new file mode 100644 index 00000000..dac4e5ce --- /dev/null +++ b/codeclone/analytics/report/html.py @@ -0,0 +1,568 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import html +import json +from collections.abc import Mapping, Sequence + +from ..clustering.models import NOISE_LABEL +from ..contracts import ClusteringRunRecord, CorpusSnapshotRecord +from ..exceptions import AnalyticsWorkflowError +from ..store.sqlite import SqliteCorpusAnalyticsStore +from .interpret import ( + build_profile_summary, + build_sweep_comparison_projection, + enrich_run_for_export, +) +from .messages.profiles import profile_rejection_message + + +def render_analytics_html( + *, + store: SqliteCorpusAnalyticsStore, + snapshot: CorpusSnapshotRecord, + run: ClusteringRunRecord, + comparison_only: bool = False, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> str: + if run.snapshot_id != snapshot.snapshot_id: + raise AnalyticsWorkflowError( + f"run {run.clustering_run_id} does not belong to {snapshot.snapshot_id}" + ) + generation = store.get_embedding_generation(run.embedding_generation_id) + if comparison_only: + body = _render_comparison_table( + store, + snapshot, + run, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + title = "Corpus Analytics Sweep Comparison" + run_line = "" + banner = "" + else: + projection = enrich_run_for_export( + store=store, + snapshot=snapshot, + run=run, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + run_payload = _mapping(projection["run"]) + presentation = _mapping(run_payload.get("presentation")) + body = _render_profile_context(run_payload) + _render_detail_view(projection) + title = "Corpus Analytics Cluster Report" + run_line = f"

Run: {html.escape(run.clustering_run_id)}

" + banner = _render_run_banner(presentation) + reproducibility_note = "" + if generation is None: + reproducibility_note = ( + '

Reproducibility: Embedding ' + "generation metadata is unavailable. This report is limited to " + "persisted diagnostic facts.

" + ) + elif not generation.exact_model_artifact_reproducibility: + reproducibility_note = ( + '

Reproducibility: Full vector ' + "reproducibility is not guaranteed from model id alone.

" + ) + return f""" + + + + +{html.escape(title)} + + + +

{html.escape(title)}

+

Snapshot: {html.escape(snapshot.snapshot_id)}

+{run_line} +{banner} +{reproducibility_note} +{body} + + +""" + + +def _render_run_banner(status: Mapping[str, object]) -> str: + kind = str(status.get("banner_kind", "technically_invalid")) + css = { + "maintainer_selected": "success", + "profile_recommended": "success", + "heuristic_recommended": "success", + "valid_but_profile_rejected": "warning", + "no_profile_suitable_candidate": "warning", + "candidate_only": "warning", + "technically_invalid": "error", + }.get(kind, "warning") + message = str(status.get("banner_message", "Run presentation is unavailable.")) + return ( + f'

' + f"{html.escape(kind.replace('_', ' ').title())}: " + f"{html.escape(message)}

" + ) + + +def _render_comparison_table( + store: SqliteCorpusAnalyticsStore, + snapshot: CorpusSnapshotRecord, + current_run: ClusteringRunRecord, + *, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> str: + candidates, summary = build_sweep_comparison_projection( + store=store, + snapshot=snapshot, + embedding_generation_id=current_run.embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + profile_summary = build_profile_summary( + store=store, + snapshot=snapshot, + embedding_generation_id=current_run.embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + rows: list[str] = [] + for projection in candidates: + run_payload = _mapping(projection.get("run")) + comparison = _mapping(projection.get("comparison")) + presentation = _mapping(run_payload.get("presentation")) + validity = _mapping(run_payload.get("validity")) + rows.append( + "" + f"{_escaped(run_payload.get('clustering_run_id'))}" + f"{_escaped_json(run_payload.get('requested_parameters'))}" + f"{_escaped_json(run_payload.get('effective_parameters'))}" + f"{_available(comparison.get('largest_cluster_size'))}" + f"{_ratio(comparison.get('dominant_cluster_ratio'))}" + f"{_ratio(comparison.get('dominant_assigned_ratio'))}" + f"{_number(comparison.get('score'))}" + f"{_available(comparison.get('rank'))}" + f"{_escaped(validity.get('technically_valid'))}" + f"{_escaped(presentation.get('banner_kind'))}" + f"{_escaped(presentation.get('projection_mode'))}" + f"{_escaped(comparison.get('recommended_by_heuristic'))}" + f"{_available(comparison.get('profile_suitable'))}" + f"{_escaped(comparison.get('is_profile_recommended'))}" + f"{_escaped(presentation.get('selected_by_maintainer'))}" + "" + ) + profile_header = ( + _render_profile_summary(profile_summary) if profile_summary is not None else "" + ) + return profile_header + ( + "

Candidate runs

" + '

Recommendation is heuristic evidence; maintainer ' + "selection remains an explicit separate decision.

" + f"

Candidates: {summary['candidate_count']}; technically valid: " + f"{summary['technically_valid_count']}; technically invalid: " + f"{summary['technically_invalid_count']}.

" + "" + "" + "" + "" + "" + "" + "" + "".join(rows) + "
RunRequestedEffectiveLargest clusterDominant / corpusDominant / assignedScoreRankTechnically validBannerProjection modeRecommendedProfile suitableProfile recommendedMaintainer selected
" + ) + + +def _render_profile_summary(summary: Mapping[str, object]) -> str: + presentation = _mapping(summary.get("presentation")) + banner = _render_run_banner(presentation) if presentation else "" + return ( + '

Profile lens

' + f"

{_escaped(summary.get('label'))}

" + f"

{_escaped(summary.get('description'))}

" + f"

Profile: {_escaped(summary.get('profile_id'))}; " + f"batch: {_escaped(summary.get('profile_batch_id'))}; " + f"suitable candidates: {_escaped(summary.get('profile_suitable_count'))}." + f"

{banner}
" + ) + + +def _render_profile_context(run_payload: Mapping[str, object]) -> str: + context = _mapping(run_payload.get("profile_context")) + if not context: + return "" + suitability = _mapping(context.get("suitability")) + raw_reasons = suitability.get("rejection_reasons") + reasons = ( + [profile_rejection_message(str(code)) for code in raw_reasons] + if isinstance(raw_reasons, list) + else [] + ) + reason_html = ( + "
    " + + "".join(f"
  • {html.escape(reason)}
  • " for reason in reasons) + + "
" + if reasons + else "

Profile suitability checks passed.

" + ) + return ( + '

Profile lens

' + f"

{_escaped(context.get('label'))}

" + f"

{_escaped(context.get('description'))}

" + f"

Suitable: {_escaped(suitability.get('suitable_for_profile'))}; " + f"profile recommended: {_escaped(context.get('is_profile_recommended'))}." + f"

{reason_html}
" + ) + + +def _render_detail_view(projection: Mapping[str, object]) -> str: + run_payload = _mapping(projection.get("run")) + presentation = _mapping(run_payload.get("presentation")) + if presentation.get("projection_mode") != "full_interpretation": + return _render_limited_overview(run_payload) + clusters = _mapping_list(projection.get("clusters")) + sections = [ + _render_full_overview(run_payload), + _render_cluster_index_projection(clusters), + ] + sections.extend(_render_cluster_panel_projection(cluster) for cluster in clusters) + return "\n".join(sections) + + +def _render_full_overview(run_payload: Mapping[str, object]) -> str: + metrics = _mapping(run_payload.get("partition_metrics")) + validity = _mapping(run_payload.get("validity")) + return ( + "

Overview

" + f"" + f"" + f"" + f"" + "" + f"" + "" + f"" + "" + f"" + "" + f"" + "" + f"" + "" + f"" + "
Corpus items{_escaped(metrics.get('total_items'))}
Clusters{_escaped(metrics.get('cluster_count'))}
Noise items{_escaped(metrics.get('noise_count'))}
Noise ratio{_ratio(metrics.get('noise_ratio'))}
Dominant cluster / corpus{_ratio(metrics.get('dominant_cluster_ratio'))}
Dominant cluster / assigned{_ratio(metrics.get('dominant_assigned_ratio'))}
Cluster-size histogram{_escaped_json(metrics.get('cluster_size_histogram'))}
Technically valid{_escaped(validity.get('technically_valid'))}
Requested parameters{_escaped_json(run_payload.get('requested_parameters'))}
Effective parameters{_escaped_json(run_payload.get('effective_parameters'))}
" + ) + + +def _render_limited_overview(run_payload: Mapping[str, object]) -> str: + validity = _mapping(run_payload.get("validity")) + facts = _mapping(run_payload.get("diagnostic_facts")) + failed = validity.get("failed_invariants") + return ( + "

Limited diagnostic overview

" + '

Partition-derived metrics and item interpretation are ' + "withheld because formal validity checks failed.

" + "" + f"" + f"" + "" + f"" + "" + f"" + "" + f"" + "" + f"" + "
Failed invariants{_escaped_json(failed)}
Run status{_escaped(facts.get('run_status'))}
Completed status{_escaped(facts.get('completed_status'))}
Snapshot item count{_available(facts.get('snapshot_item_count'))}
Assignment count{_available(facts.get('assignment_count'))}
Summary count{_available(facts.get('summary_count'))}
" + ) + + +def _render_cluster_index_projection( + clusters: Sequence[Mapping[str, object]], +) -> str: + rows = [] + for cluster in clusters: + diagnostics = _mapping(cluster.get("diagnostics")) + rows.append( + "" + f"{_escaped(_cluster_label(cluster))}" + f"{_escaped(cluster.get('size'))}" + f"{_number(diagnostics.get('size_percent'), suffix='%')}" + f"{_number(diagnostics.get('average_membership_strength'))}" + "" + f"{_escaped(diagnostics.get('medoid_snapshot_item_id', ''))}" + "" + ) + return ( + "

Cluster index

" + "" + "" + "" + "".join(rows) + "
ClusterSizeCorpus %Average membershipMedoid
" + ) + + +def _render_cluster_panel_projection(cluster: Mapping[str, object]) -> str: + label = _cluster_label(cluster) + is_noise = cluster.get("cluster_label") == NOISE_LABEL + css = "cluster noise" if is_noise else "cluster" + diagnostics = _mapping(cluster.get("diagnostics")) + interpretation = _mapping(cluster.get("interpretation")) + parts = [ + f'

Cluster {_escaped(label)}

', + f"

Size: {_escaped(cluster.get('size'))}; membership digest: " + f"{_escaped(cluster.get('membership_digest'))}

", + _render_id_group("Nearest clusters", diagnostics.get("nearest_clusters")), + ] + if is_noise: + parts.append( + _render_projected_noise( + _mapping_list(interpretation.get("noise_item_previews")) + ) + ) + else: + parts.append( + _render_item_preview_table( + "Representative items", + _mapping_list(interpretation.get("representative_previews")), + ) + ) + parts.append( + _render_item_preview_table( + "Boundary items", + _mapping_list(interpretation.get("boundary_previews")), + ) + ) + parts.append( + _render_projected_correlations( + _mapping(interpretation.get("categorical_correlations")) + ) + ) + parts.append( + _render_numeric_summaries(_mapping(interpretation.get("numeric_summaries"))) + ) + provenance = _mapping(interpretation.get("provenance_completeness")) + if provenance: + parts.append(_render_provenance(provenance)) + parts.append( + "

Machine-inspectability signals

"
+            f"{_escaped_json(interpretation.get('machine_inspectability_signals'))}"
+            "
" + ) + parts.append("
") + return "\n".join(parts) + + +def _render_item_preview_table( + title: str, + previews: Sequence[Mapping[str, object]], +) -> str: + if not previews: + return f'

{_escaped(title)}

None

' + rows = [] + for preview in previews: + metadata = ", ".join( + f"{field}={_display_metadata_value(preview.get(field))}" + for field in ( + "agent_family", + "outcome", + "quality_tier", + "scope_check_status", + "verification_status", + ) + ) + rows.append( + "" + f"{_escaped(preview.get('snapshot_item_id'))}" + f"{_escaped(preview.get('source_record_id'))}" + f'' + f"{_escaped(preview.get('normalized_text_preview'))}" + f"{_number(preview.get('membership_strength'))}" + f"{_escaped(metadata)}" + "" + ) + return ( + f"

{_escaped(title)}

" + "" + "" + "" + "".join(rows) + "
ItemSource recordNormalized text previewMembershipMetadata
" + ) + + +def _render_projected_correlations( + correlations: Mapping[str, object], +) -> str: + rows: list[str] = [] + for field, raw_cells in sorted(correlations.items()): + for raw_cell in _mapping_list(raw_cells): + display = _display_metadata_value(raw_cell.get("value")) + insufficient = bool(raw_cell.get("insufficient_sample")) + css = ' class="insufficient"' if insufficient else "" + rate = "n/a" if insufficient else str(raw_cell.get("rate")) + rows.append( + f"{_escaped(field)}" + f"{_escaped(display)}" + f"{_escaped(raw_cell.get('numerator'))}" + f"{_escaped(raw_cell.get('denominator'))}" + f"{_escaped(rate)}" + ) + if not rows: + return '

Categorical correlations

None

' + return ( + "

Categorical correlations

" + "" + "" + + "".join(rows) + + "
FieldValueNumeratorDenominatorRate
" + ) + + +def _render_numeric_summaries(summaries: Mapping[str, object]) -> str: + rows: list[str] = [] + for field, raw_summary in sorted(summaries.items()): + summary = _mapping(raw_summary) + rows.append( + "" + f"{_escaped(field)}" + f"{_escaped(summary.get('known_count'))}" + f"{_escaped(summary.get('unknown_count'))}" + f"{_available(summary.get('min'))}" + f"{_available(summary.get('p25'))}" + f"{_available(summary.get('median'))}" + f"{_available(summary.get('p75'))}" + f"{_available(summary.get('max'))}" + f"{_available(summary.get('mean'))}" + f"{_escaped_json(summary.get('buckets'))}" + "" + ) + return ( + "

Numeric summaries

" + "" + "" + "" + + "".join(rows) + + "
FieldKnownUnknownMinP25MedianP75MaxMeanBuckets
" + ) + + +def _render_provenance(provenance: Mapping[str, object]) -> str: + return ( + "

Provenance completeness

" + f"" + "" + f"" + "" + f"" + "" + f"" + "" + f"" + "
Items{_escaped(provenance.get('item_count'))}
Trajectory selected{_escaped(provenance.get('trajectory_selected_count'))}
Patch Trail present{_escaped(provenance.get('patch_trail_present_count'))}
Registry overlay present{_escaped(provenance.get('registry_overlay_present_count'))}
Unknown rates{_escaped_json(provenance.get('fields_unknown_rate'))}
" + ) + + +def _render_projected_noise( + entries: Sequence[Mapping[str, object]], +) -> str: + if not entries: + return '

Noise explorer

No noise items.

' + rows: list[str] = [] + for entry in entries: + preview = _mapping(entry.get("preview")) + flags = _mapping(entry.get("flags")) + active_flags = sorted(key for key, enabled in flags.items() if enabled) + rows.append( + "" + f"{_escaped(preview.get('snapshot_item_id'))}" + f"{_escaped(', '.join(active_flags) or 'none')}" + f'' + f"{_escaped(preview.get('normalized_text_preview'))}" + f"{_number(preview.get('membership_strength'))}" + "" + ) + return ( + "

Noise explorer

" + "" + "" + "" + "".join(rows) + "
ItemObservable flags (not semantic classification)Normalized text previewMembership
" + ) + + +def _render_id_group(title: str, value: object) -> str: + if not isinstance(value, list) or not value: + return f'

{html.escape(title)}

None

' + pills = "".join( + f'{html.escape(str(item))}' for item in value + ) + return f"

{html.escape(title)}

{pills}

" + + +def _cluster_label(cluster: Mapping[str, object]) -> str: + display = cluster.get("display_cluster_id") + return "noise" if display is None else str(display) + + +def _display_metadata_value(value: object) -> str: + if not isinstance(value, Mapping): + return "unknown" + return str(value.get("display", "unknown")) + + +def _mapping(value: object) -> dict[str, object]: + return dict(value) if isinstance(value, Mapping) else {} + + +def _mapping_list(value: object) -> list[dict[str, object]]: + if not isinstance(value, list): + return [] + return [dict(item) for item in value if isinstance(item, Mapping)] + + +def _escaped(value: object) -> str: + return html.escape(str(value)) + + +def _escaped_json(value: object) -> str: + return html.escape(json.dumps(value, sort_keys=True, ensure_ascii=False)) + + +def _available(value: object) -> str: + return "unavailable" if value is None else _escaped(value) + + +def _number(value: object, *, suffix: str = "") -> str: + if isinstance(value, bool) or not isinstance(value, int | float): + return "unavailable" + return f"{float(value):.3f}{suffix}" + + +def _ratio(value: object) -> str: + if isinstance(value, bool) or not isinstance(value, int | float): + return "unavailable" + return f"{float(value):.1%}" + + +__all__ = ["render_analytics_html"] diff --git a/codeclone/analytics/report/interpret.py b/codeclone/analytics/report/interpret.py new file mode 100644 index 00000000..c78fa7ea --- /dev/null +++ b/codeclone/analytics/report/interpret.py @@ -0,0 +1,1208 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import math +from collections import defaultdict +from collections.abc import Mapping, Sequence +from dataclasses import asdict, dataclass +from typing import Literal + +from ..clustering.diagnostics import ( + EMPTY_MEANS_CONFIRMED_NONE_FIELDS, + MAX_PREVIEW_CHARACTERS, + ItemPreview, + MetadataDisplayValue, + build_item_preview, + metadata_display_value, + numeric_field_summary, + preview_digest, +) +from ..clustering.models import NOISE_LABEL +from ..clustering.sweep import score_clustering_result +from ..contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + ProfileAssessmentRecord, + ProfileBatchRecord, + RunSelectionRecord, +) +from ..exceptions import AnalyticsWorkflowError +from ..integrity import PartitionValidityAssessment, assess_partition_validity +from ..metrics.partition_metrics import ( + RunPartitionMetrics, + compute_run_partition_metrics, +) +from ..metrics.partition_metrics import ( + _cluster_size_histogram as _partition_cluster_size_histogram, +) +from ..store.protocols import CorpusStore +from .messages.profiles import profile_banner_message + +INTERPRETATION_CONTRACT_VERSION = "1.1" +SMALL_CLUSTER_PROVENANCE_THRESHOLD = 15 +INSPECTABILITY_TRACKED_METADATA_FIELDS = ( + "agent_family", + "outcome", + "quality_tier", + "scope_check_status", + "verification_status", + "anomaly_kinds", +) +PREVIEW_SCOPES = ( + "cluster_representatives", + "cluster_boundaries", + "noise_items", +) +_NUMERIC_FIELDS = ( + "declared_file_count", + "changed_file_count", + "description_length", +) + + +@dataclass(frozen=True, slots=True) +class DiagnosticRunFacts: + snapshot_item_count: int | None + assignment_count: int | None + summary_count: int | None + completed_status: bool + run_status: str | None + clustering_run_id: str + snapshot_id: str + + +@dataclass(frozen=True, slots=True) +class RunPresentationStatus: + technically_valid: bool + failed_invariants: tuple[str, ...] + recommended_by_heuristic: bool + selected_by_maintainer: bool + is_candidate_only: bool + projection_mode: Literal["full_interpretation", "limited_diagnostic"] + banner_kind: Literal[ + "maintainer_selected", + "profile_recommended", + "heuristic_recommended", + "valid_but_profile_rejected", + "no_profile_suitable_candidate", + "candidate_only", + "technically_invalid", + ] + banner_message: str + + +@dataclass(frozen=True, slots=True) +class ProvenanceCompletenessSummary: + item_count: int + trajectory_selected_count: int + patch_trail_present_count: int + registry_overlay_present_count: int + agent_family_known_count: int + outcome_known_count: int + anomaly_metadata_known_count: int + fields_unknown_rate: dict[str, float] + + +def enrich_run_for_export( + *, + store: CorpusStore, + snapshot: CorpusSnapshotRecord, + run: ClusteringRunRecord, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> dict[str, object]: + if run.snapshot_id != snapshot.snapshot_id: + raise AnalyticsWorkflowError( + f"run {run.clustering_run_id} does not belong to {snapshot.snapshot_id}" + ) + items = store.list_items(snapshot.snapshot_id) + assignments = store.list_assignments(run.clustering_run_id) + summaries = store.list_summaries(run.clustering_run_id) + assessment = assess_partition_validity( + store=store, + snapshot_id=snapshot.snapshot_id, + clustering_run_id=run.clustering_run_id, + ) + batch = _resolve_profile_batch( + store=store, + snapshot=snapshot, + run=run, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + profile_assessment = ( + store.get_profile_assessment( + profile_batch_id=batch.profile_batch_id, + clustering_run_id=run.clustering_run_id, + ) + if batch is not None + else None + ) + active_selection = _active_selection( + store=store, + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=run.embedding_generation_id, + profile_batch_id=batch.profile_batch_id if batch is not None else None, + ) + manifest = ( + store.get_profile_manifest_snapshot(batch.profile_manifest_digest) + if batch is not None + else None + ) + presentation = derive_presentation_status( + run=run, + assessment=assessment, + profile_assessment=profile_assessment, + profile_batch_active=batch is not None, + profile_recommended_run_id=( + batch.recommended_clustering_run_id if batch is not None else None + ), + active_maintainer_selection=active_selection, + profile_label=manifest.label if manifest is not None else None, + ) + run_payload = _run_payload(run) + run_payload["validity"] = asdict(assessment) + run_payload["presentation"] = asdict(presentation) + if batch is not None and profile_assessment is not None and manifest is not None: + run_payload["profile_context"] = _profile_context_payload( + batch=batch, + assessment=profile_assessment, + label=manifest.label, + description=manifest.description, + clustering_run_id=run.clustering_run_id, + ) + if active_selection is not None: + run_payload["selection"] = _selection_payload( + selection=active_selection, + run=run, + ) + projection: dict[str, object] = {"run": run_payload} + if not assessment.technically_valid: + run_payload["score"] = None + run_payload["diagnostic_facts"] = asdict( + _diagnostic_run_facts( + run=run, + assessment=assessment, + item_count=len(items), + assignment_count=len(assignments), + summary_count=len(summaries), + ) + ) + return projection + + metrics = compute_run_partition_metrics(assignments, summaries) + run_payload["partition_metrics"] = asdict(metrics) + run_payload["score"] = score_clustering_result( + cluster_count=metrics.cluster_count, + noise_fraction=metrics.noise_ratio, + n_samples=metrics.total_items, + ) + run_payload["cluster_count"] = metrics.cluster_count + run_payload["noise_count"] = metrics.noise_count + run_payload["noise_fraction"] = metrics.noise_ratio + assignment_by_id = { + assignment.snapshot_item_id: assignment for assignment in assignments + } + items_by_id = {item.snapshot_item_id: item for item in items} + members_by_label: defaultdict[int, list[CorpusItemRecord]] = defaultdict(list) + for assignment in assignments: + item = items_by_id[assignment.snapshot_item_id] + members_by_label[assignment.cluster_label].append(item) + projection["clusters"] = [ + _cluster_projection( + summary=summary, + member_items=members_by_label[summary.cluster_label], + items_by_id=items_by_id, + assignment_by_id=assignment_by_id, + ) + for summary in summaries + ] + projection["assignments"] = [ + _assignment_payload(assignment) for assignment in assignments + ] + projection["noise_items"] = [ + assignment.snapshot_item_id + for assignment in assignments + if assignment.cluster_label == NOISE_LABEL + ] + return projection + + +def derive_presentation_status( + *, + run: ClusteringRunRecord, + assessment: PartitionValidityAssessment, + profile_assessment: ProfileAssessmentRecord | None = None, + profile_batch_active: bool = False, + profile_recommended_run_id: str | None = None, + active_maintainer_selection: RunSelectionRecord | None = None, + profile_label: str | None = None, +) -> RunPresentationStatus: + banner_kind: Literal[ + "maintainer_selected", + "profile_recommended", + "heuristic_recommended", + "valid_but_profile_rejected", + "candidate_only", + "technically_invalid", + ] + if not assessment.technically_valid: + banner_kind = "technically_invalid" + elif ( + active_maintainer_selection is not None + and active_maintainer_selection.selected_run_id == run.clustering_run_id + ): + banner_kind = "maintainer_selected" + elif profile_batch_active and profile_recommended_run_id == run.clustering_run_id: + banner_kind = "profile_recommended" + elif ( + profile_batch_active + and profile_assessment is not None + and not profile_assessment.suitable_for_profile + ): + banner_kind = "valid_but_profile_rejected" + elif run.recommended_by_heuristic: + banner_kind = "heuristic_recommended" + else: + banner_kind = "candidate_only" + banner_message = profile_banner_message( + banner_kind, + failed_invariants=assessment.failed_invariants, + profile_label=profile_label, + ) + selected = ( + active_maintainer_selection is not None + and active_maintainer_selection.selected_run_id == run.clustering_run_id + ) + return RunPresentationStatus( + technically_valid=assessment.technically_valid, + failed_invariants=assessment.failed_invariants, + recommended_by_heuristic=run.recommended_by_heuristic, + selected_by_maintainer=selected, + is_candidate_only=( + assessment.technically_valid + and not run.recommended_by_heuristic + and not selected + and not ( + profile_batch_active + and profile_recommended_run_id == run.clustering_run_id + ) + and not ( + profile_batch_active + and profile_assessment is not None + and not profile_assessment.suitable_for_profile + ) + ), + projection_mode=( + "full_interpretation" + if assessment.technically_valid + else "limited_diagnostic" + ), + banner_kind=banner_kind, + banner_message=banner_message, + ) + + +def build_sweep_comparison_projection( + *, + store: CorpusStore, + snapshot: CorpusSnapshotRecord, + embedding_generation_id: str, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> tuple[list[dict[str, object]], dict[str, object]]: + batch = _resolve_comparison_batch( + store=store, + snapshot=snapshot, + embedding_generation_id=embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + rows: list[tuple[ClusteringRunRecord, dict[str, object], dict[str, object]]] = [] + runs = ( + store.list_clustering_runs_for_batch(profile_batch_id=batch.profile_batch_id) + if batch is not None + else store.list_clustering_runs( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embedding_generation_id, + ) + ) + for run in runs: + projection = enrich_run_for_export( + store=store, + snapshot=snapshot, + run=run, + profile_batch_id=batch.profile_batch_id if batch is not None else None, + ) + run_payload = _mapping(projection["run"]) + validity = _mapping(run_payload["validity"]) + valid = bool(validity.get("technically_valid")) + metrics = _mapping(run_payload.get("partition_metrics")) + comparison: dict[str, object] = { + "score": None, + "rank": None, + "recommended_by_heuristic": valid and run.recommended_by_heuristic, + "is_profile_recommended": ( + batch is not None + and batch.recommended_clustering_run_id == run.clustering_run_id + ), + "profile_suitable": _profile_suitable(run_payload), + "dominant_cluster_ratio": metrics.get("dominant_cluster_ratio"), + "dominant_assigned_ratio": metrics.get("dominant_assigned_ratio"), + "largest_cluster_size": _largest_cluster_size(metrics), + } + if valid: + comparison["score"] = score_clustering_result( + cluster_count=_required_integer(metrics, "cluster_count"), + noise_fraction=_required_number(metrics, "noise_ratio"), + n_samples=_required_integer(metrics, "total_items"), + ) + projection["comparison"] = comparison + rows.append((run, projection, comparison)) + + ranked = sorted( + (row for row in rows if row[2]["score"] is not None), + key=_comparison_sort_key, + ) + for rank, (_run, _projection, comparison) in enumerate(ranked, start=1): + comparison["rank"] = rank + + recommendations = [ + run.clustering_run_id + for run, _projection, comparison in rows + if comparison["recommended_by_heuristic"] + ] + if len(recommendations) > 1: + raise AnalyticsWorkflowError("multiple valid heuristic recommendations") + active_selection = _active_selection( + store=store, + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_batch_id=batch.profile_batch_id if batch is not None else None, + ) + summary: dict[str, object] = { + "candidate_count": len(rows), + "technically_valid_count": len(ranked), + "technically_invalid_count": len(rows) - len(ranked), + "recommended_run_id": recommendations[0] if recommendations else None, + "selected_run_id": ( + active_selection.selected_run_id if active_selection is not None else None + ), + } + return [projection for _run, projection, _comparison in rows], summary + + +def build_profile_summary( + *, + store: CorpusStore, + snapshot: CorpusSnapshotRecord, + embedding_generation_id: str, + profile_id: str | None = None, + profile_batch_id: str | None = None, +) -> dict[str, object] | None: + batch = _resolve_comparison_batch( + store=store, + snapshot=snapshot, + embedding_generation_id=embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + if batch is None: + return None + manifest = store.get_profile_manifest_snapshot(batch.profile_manifest_digest) + if manifest is None: + raise AnalyticsWorkflowError( + f"profile manifest snapshot missing: {batch.profile_manifest_digest}" + ) + runs = store.list_clustering_runs_for_batch(profile_batch_id=batch.profile_batch_id) + assessments = store.list_profile_assessments( + profile_batch_id=batch.profile_batch_id + ) + active_selection = _active_selection( + store=store, + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_batch_id=batch.profile_batch_id, + ) + technically_valid_count = sum( + assess_partition_validity( + store=store, + snapshot_id=snapshot.snapshot_id, + clustering_run_id=run.clustering_run_id, + ).technically_valid + for run in runs + ) + heuristic = [run.clustering_run_id for run in runs if run.recommended_by_heuristic] + if len(heuristic) > 1: + raise AnalyticsWorkflowError("multiple valid heuristic recommendations") + summary: dict[str, object] = { + "profile_batch_id": batch.profile_batch_id, + "profile_id": batch.profile_id, + "profile_version": manifest.profile_version, + "profile_manifest_digest": batch.profile_manifest_digest, + "label": manifest.label, + "description": manifest.description, + "candidate_count": batch.candidate_count_planned, + "candidate_count_failed": batch.candidate_count_failed, + "technically_valid_count": technically_valid_count, + "profile_suitable_count": sum( + assessment.suitable_for_profile for assessment in assessments + ), + "batch_status": batch.status, + "recommended_for_profile_run_id": (batch.recommended_clustering_run_id), + "recommended_by_heuristic_run_id": (heuristic[0] if heuristic else None), + "active_selected_run_id": ( + active_selection.selected_run_id if active_selection is not None else None + ), + "recommendation_rationale": ( + _json_mapping_or_none(batch.recommendation_rationale_json) + if batch.recommendation_rationale_json is not None + else None + ), + } + if not summary["profile_suitable_count"]: + summary["presentation"] = asdict( + derive_sweep_comparison_presentation(profile_summary=summary) + ) + return summary + + +def derive_sweep_comparison_presentation( + *, + profile_summary: Mapping[str, object], +) -> RunPresentationStatus: + profile_label = ( + str(profile_summary["label"]) + if isinstance(profile_summary.get("label"), str) + else None + ) + return RunPresentationStatus( + technically_valid=bool(profile_summary.get("technically_valid_count")), + failed_invariants=(), + recommended_by_heuristic=False, + selected_by_maintainer=False, + is_candidate_only=False, + projection_mode="full_interpretation", + banner_kind="no_profile_suitable_candidate", + banner_message=profile_banner_message( + "no_profile_suitable_candidate", + profile_label=profile_label, + ), + ) + + +def content_disclosure(payload: Mapping[str, object]) -> dict[str, object]: + counts = dict.fromkeys(PREVIEW_SCOPES, 0) + _count_previews(payload, counts) + return { + "contains_normalized_text_previews": sum(counts.values()) > 0, + "preview_scope": [scope for scope in PREVIEW_SCOPES if counts[scope] > 0], + "max_preview_characters": MAX_PREVIEW_CHARACTERS, + } + + +def _cluster_projection( + *, + summary: ClusterSummaryRecord, + member_items: Sequence[CorpusItemRecord], + items_by_id: Mapping[str, CorpusItemRecord], + assignment_by_id: Mapping[str, ClusterAssignmentRecord], +) -> dict[str, object]: + diagnostics = _json_mapping(summary.diagnostics_json) + payload: dict[str, object] = { + "cluster_label": summary.cluster_label, + "display_cluster_id": summary.display_cluster_id, + "membership_digest": summary.membership_digest, + "size": summary.size, + "diagnostics": diagnostics, + } + if summary.cluster_label == NOISE_LABEL: + payload["interpretation"] = { + "noise_item_previews": _noise_item_previews( + diagnostics=diagnostics, + items_by_id=items_by_id, + assignment_by_id=assignment_by_id, + ) + } + return payload + + representative_ids = _string_list(diagnostics.get("representatives")) + boundary_ids = _string_list(diagnostics.get("boundary_items")) + representative_previews = [ + _preview_payload( + build_item_preview( + items_by_id[item_id], + assignment_by_id.get(item_id), + source_kind="intent_historical", + source_record_id=items_by_id[item_id].intent_id, + ) + ) + for item_id in representative_ids + ] + boundary_previews = [ + _preview_payload( + build_item_preview( + items_by_id[item_id], + assignment_by_id.get(item_id), + source_kind="intent_historical", + source_record_id=items_by_id[item_id].intent_id, + ) + ) + for item_id in boundary_ids + ] + numeric_summaries = { + field: asdict(numeric_field_summary(member_items, field=field)) + for field in _NUMERIC_FIELDS + } + interpretation: dict[str, object] = { + "representative_previews": representative_previews, + "boundary_previews": boundary_previews, + "categorical_correlations": _categorical_correlations(diagnostics), + "numeric_summaries": numeric_summaries, + "machine_inspectability_signals": _machine_inspectability_signals( + member_items=member_items, + representative_previews=representative_previews, + assignment_by_id=assignment_by_id, + description_length_summary=_mapping( + numeric_summaries["description_length"] + ), + ), + } + if summary.size <= SMALL_CLUSTER_PROVENANCE_THRESHOLD: + interpretation["provenance_completeness"] = asdict( + _provenance_completeness(member_items) + ) + payload["interpretation"] = interpretation + return payload + + +def _noise_item_previews( + *, + diagnostics: Mapping[str, object], + items_by_id: Mapping[str, CorpusItemRecord], + assignment_by_id: Mapping[str, ClusterAssignmentRecord], +) -> list[dict[str, object]]: + result: list[dict[str, object]] = [] + for entry in _mapping_list(diagnostics.get("noise_items")): + item_id = entry.get("snapshot_item_id") + if not isinstance(item_id, str) or item_id not in items_by_id: + continue + item = items_by_id[item_id] + result.append( + { + "preview": _preview_payload( + build_item_preview( + item, + assignment_by_id.get(item_id), + source_kind="intent_historical", + source_record_id=item.intent_id, + ) + ), + "flags": _mapping(entry.get("flags")), + } + ) + return result + + +def _categorical_correlations( + diagnostics: Mapping[str, object], +) -> dict[str, list[dict[str, object]]]: + distributions = diagnostics.get("metadata_distributions") + if not isinstance(distributions, Mapping): + return {} + result: dict[str, list[dict[str, object]]] = {} + for field, values in sorted(distributions.items()): + if field in _NUMERIC_FIELDS: + continue + rows: list[dict[str, object]] = [] + for key, cell in _mapping_cells(values): + rows.append( + { + "value": asdict(_distribution_display_value(str(field), str(key))), + "numerator": cell.get("numerator"), + "denominator": cell.get("denominator"), + "rate": cell.get("rate"), + "insufficient_sample": bool(cell.get("insufficient_sample")), + } + ) + result[str(field)] = rows + return result + + +def _distribution_display_value(field: str, key: str) -> MetadataDisplayValue: + if key == "null": + return MetadataDisplayValue(kind="unknown", display="unknown") + if key == "none": + if field in EMPTY_MEANS_CONFIRMED_NONE_FIELDS: + return MetadataDisplayValue( + kind="confirmed_none", + display="none (confirmed empty)", + ) + return MetadataDisplayValue( + kind="empty_collection", + display="empty collection", + ) + return MetadataDisplayValue(kind="value", display=key) + + +def _provenance_completeness( + items: Sequence[CorpusItemRecord], +) -> ProvenanceCompletenessSummary: + trajectory_selected_count = 0 + patch_trail_present_count = 0 + registry_overlay_present_count = 0 + known_counts = dict.fromkeys(INSPECTABILITY_TRACKED_METADATA_FIELDS, 0) + for item in items: + metadata = _json_mapping(item.metadata_json) + provenance = metadata.get("provenance") + provenance_map = provenance if isinstance(provenance, Mapping) else {} + if ( + _provenance_presence( + provenance_map, + section="trajectory", + explicit_key="selected", + positive_key="selected_trajectory_id", + ) + is True + ): + trajectory_selected_count += 1 + if ( + _provenance_presence( + provenance_map, + section="patch_trail", + explicit_key="present", + positive_key="digest", + ) + is True + ): + patch_trail_present_count += 1 + if _registry_overlay_presence(item, provenance_map) is True: + registry_overlay_present_count += 1 + for field in INSPECTABILITY_TRACKED_METADATA_FIELDS: + if metadata_display_value(metadata, field).kind != "unknown": + known_counts[field] += 1 + item_count = len(items) + return ProvenanceCompletenessSummary( + item_count=item_count, + trajectory_selected_count=trajectory_selected_count, + patch_trail_present_count=patch_trail_present_count, + registry_overlay_present_count=registry_overlay_present_count, + agent_family_known_count=known_counts["agent_family"], + outcome_known_count=known_counts["outcome"], + anomaly_metadata_known_count=known_counts["anomaly_kinds"], + fields_unknown_rate={ + field: ( + (item_count - known_counts[field]) / item_count if item_count else 0.0 + ) + for field in INSPECTABILITY_TRACKED_METADATA_FIELDS + }, + ) + + +def _provenance_presence( + provenance: Mapping[str, object], + *, + section: str, + explicit_key: str, + positive_key: str, +) -> bool | None: + # Slice 1 compatibility: positive evidence is present, explicit absence is + # absent, and a null/missing legacy field remains unknown. + value = provenance.get(section) + if not isinstance(value, Mapping): + return None + explicit = value.get(explicit_key) + if isinstance(explicit, bool): + return explicit + if value.get(positive_key) is not None: + return True + if value.get("available") is False: + return False + return None + + +def _registry_overlay_presence( + item: CorpusItemRecord, + provenance: Mapping[str, object], +) -> bool | None: + # Slice 1 only established non-null overlay content as positive evidence. + value = provenance.get("registry_overlay") + if isinstance(value, Mapping) and isinstance(value.get("present"), bool): + return bool(value["present"]) + if item.registry_overlay_json is None: + return None + overlay = _json_mapping_or_none(item.registry_overlay_json) + return True if overlay is not None else None + + +def _machine_inspectability_signals( + *, + member_items: Sequence[CorpusItemRecord], + representative_previews: Sequence[Mapping[str, object]], + assignment_by_id: Mapping[str, ClusterAssignmentRecord], + description_length_summary: Mapping[str, object], +) -> dict[str, object]: + preview_texts = [ + str(preview.get("normalized_text_preview", "")) + for preview in representative_previews + ] + strengths = sorted( + assignment.membership_strength + for item in member_items + if (assignment := assignment_by_id.get(item.snapshot_item_id)) is not None + and assignment.membership_strength is not None + and math.isfinite(assignment.membership_strength) + ) + known = 0 + for item in member_items: + metadata = _json_mapping(item.metadata_json) + known += sum( + metadata_display_value(metadata, field).kind != "unknown" + for field in INSPECTABILITY_TRACKED_METADATA_FIELDS + ) + denominator = len(INSPECTABILITY_TRACKED_METADATA_FIELDS) * len(member_items) + return { + "representative_text_present": bool(preview_texts) and all(preview_texts), + "representative_text_unique": bool(preview_texts) + and len({preview_digest(text) for text in preview_texts}) == len(preview_texts), + "membership_strength_spread": ( + strengths[-1] - strengths[0] if len(strengths) >= 2 else None + ), + "metadata_known_fraction": known / denominator if denominator else 0.0, + "cluster_size": len(member_items), + "description_length_median": description_length_summary.get("median"), + } + + +def _diagnostic_run_facts( + *, + run: ClusteringRunRecord, + assessment: PartitionValidityAssessment, + item_count: int, + assignment_count: int, + summary_count: int, +) -> DiagnosticRunFacts: + allowed_by_invariant = { + "V1": {"snapshot", "assignment"}, + "V2": {"snapshot", "assignment"}, + "V3": {"assignment", "summary"}, + "V4": {"assignment", "summary"}, + "V5": {"assignment", "summary"}, + "V6a": set(), + "V7": set(), + "V8": set(), + "V9": {"snapshot", "assignment", "summary"}, + "V10": {"snapshot", "assignment", "summary"}, + } + allowed = {"snapshot", "assignment", "summary"} + for invariant in assessment.failed_invariants: + allowed &= allowed_by_invariant[invariant] + return DiagnosticRunFacts( + snapshot_item_count=item_count if "snapshot" in allowed else None, + assignment_count=assignment_count if "assignment" in allowed else None, + summary_count=summary_count if "summary" in allowed else None, + completed_status=run.status == "completed", + run_status=run.status, + clustering_run_id=run.clustering_run_id, + snapshot_id=run.snapshot_id, + ) + + +def _run_payload(run: ClusteringRunRecord) -> dict[str, object]: + requested = _json_mapping_or_none(run.requested_parameters_json) + effective = _json_mapping_or_none(run.effective_parameters_json) + manifest = ( + effective.get("algorithm_manifest") + if isinstance(effective, Mapping) + and isinstance(effective.get("algorithm_manifest"), Mapping) + else None + ) + return { + "clustering_run_id": run.clustering_run_id, + "snapshot_id": run.snapshot_id, + "embedding_generation_id": run.embedding_generation_id, + "requested_parameters": requested, + "effective_parameters": effective, + "algorithm_manifest": dict(manifest) if isinstance(manifest, Mapping) else None, + "random_seed": run.random_seed, + "run_digest": run.run_digest, + "recommended_by_heuristic": run.recommended_by_heuristic, + "selected_by_maintainer": run.selected_by_maintainer, + "status": run.status, + "created_at_utc": run.created_at_utc, + "finished_at_utc": run.finished_at_utc, + "error_message": run.error_message, + } + + +def _resolve_profile_batch( + *, + store: CorpusStore, + snapshot: CorpusSnapshotRecord, + run: ClusteringRunRecord, + profile_id: str | None, + profile_batch_id: str | None, +) -> ProfileBatchRecord | None: + batch = _resolve_comparison_batch( + store=store, + snapshot=snapshot, + embedding_generation_id=run.embedding_generation_id, + profile_id=profile_id, + profile_batch_id=profile_batch_id, + ) + if batch is None and profile_id is None and profile_batch_id is None: + if not hasattr(store, "list_profile_batch_ids_for_run"): + return None + candidates = [ + store.get_profile_batch(batch_id) + for batch_id in store.list_profile_batch_ids_for_run( + clustering_run_id=run.clustering_run_id + ) + ] + matching = [candidate for candidate in candidates if candidate is not None] + batch = ( + max( + matching, + key=lambda candidate: ( + candidate.started_at_utc, + candidate.profile_batch_id, + ), + ) + if matching + else None + ) + if batch is None: + return None + return batch if _batch_contains_run(store, batch, run.clustering_run_id) else None + + +def _resolve_comparison_batch( + *, + store: CorpusStore, + snapshot: CorpusSnapshotRecord, + embedding_generation_id: str, + profile_id: str | None, + profile_batch_id: str | None, +) -> ProfileBatchRecord | None: + if profile_batch_id is not None: + if not hasattr(store, "get_profile_batch"): + return None + batch = store.get_profile_batch(profile_batch_id) + if batch is None: + raise AnalyticsWorkflowError(f"unknown profile batch: {profile_batch_id}") + elif profile_id is not None: + if not hasattr(store, "get_latest_profile_batch"): + return None + batch = store.get_latest_profile_batch( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_id=profile_id, + ) + if batch is None: + return None + else: + return None + if ( + batch.snapshot_id != snapshot.snapshot_id + or batch.embedding_generation_id != embedding_generation_id + ): + raise AnalyticsWorkflowError( + "profile batch does not belong to requested corpus: " + f"{batch.profile_batch_id}" + ) + if profile_id is not None and batch.profile_id != profile_id: + raise AnalyticsWorkflowError( + f"profile batch does not match profile: {profile_id}" + ) + return batch + + +def _batch_contains_run( + store: CorpusStore, + batch: ProfileBatchRecord, + clustering_run_id: str, +) -> bool: + return any( + member.clustering_run_id == clustering_run_id + for member in store.list_profile_batch_run_records( + profile_batch_id=batch.profile_batch_id + ) + ) + + +def _active_selection( + *, + store: CorpusStore, + snapshot_id: str, + embedding_generation_id: str, + profile_batch_id: str | None, +) -> RunSelectionRecord | None: + if not hasattr(store, "get_active_run_selection"): + return None + result = store.get_active_run_selection( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_batch_id=profile_batch_id, + ) + if result.ambiguous: + raise AnalyticsWorkflowError( + "selection chain ambiguous: multiple active selections" + ) + return result.record + + +def _profile_context_payload( + *, + batch: ProfileBatchRecord, + assessment: ProfileAssessmentRecord, + label: str, + description: str, + clustering_run_id: str, +) -> dict[str, object]: + return { + "profile_id": assessment.profile_id, + "profile_version": assessment.profile_version, + "profile_manifest_digest": assessment.profile_manifest_digest, + "label": label, + "description": description, + "profile_batch_id": batch.profile_batch_id, + "suitability": { + "suitable_for_profile": assessment.suitable_for_profile, + "rejection_reasons": _json_string_list(assessment.rejection_reasons_json), + "observed": ( + _json_mapping_or_none(assessment.observed_metrics_json) + if assessment.observed_metrics_json is not None + else None + ), + }, + "is_profile_recommended": ( + batch.recommended_clustering_run_id == clustering_run_id + ), + } + + +def _selection_payload( + *, + selection: RunSelectionRecord, + run: ClusteringRunRecord, +) -> dict[str, object]: + return { + "selection_id": selection.selection_id, + "profile_batch_id": selection.profile_batch_id, + "profile_id": selection.profile_id, + "profile_manifest_digest": selection.profile_manifest_digest, + "selected_by": selection.selected_by, + "selected_at_utc": selection.selected_at_utc, + "rationale": selection.rationale, + "is_active": selection.selected_run_id == run.clustering_run_id, + "legacy_bool_mirror": run.selected_by_maintainer, + } + + +def _profile_suitable(run_payload: Mapping[str, object]) -> bool | None: + context = run_payload.get("profile_context") + if not isinstance(context, Mapping): + return None + suitability = context.get("suitability") + if not isinstance(suitability, Mapping): + return None + value = suitability.get("suitable_for_profile") + return bool(value) if isinstance(value, bool) else None + + +def _assignment_payload(assignment: ClusterAssignmentRecord) -> dict[str, object]: + return { + "snapshot_item_id": assignment.snapshot_item_id, + "cluster_label": assignment.cluster_label, + "membership_strength": assignment.membership_strength, + "membership_digest": assignment.membership_digest, + } + + +def _cluster_size_histogram(sizes: Sequence[int]) -> dict[str, int]: + """Compatibility alias for the neutral partition-metrics helper.""" + + return _partition_cluster_size_histogram(sizes) + + +def _preview_payload(preview: ItemPreview) -> dict[str, object]: + return { + "snapshot_item_id": preview.snapshot_item_id, + "source_record_id": preview.source_record_id, + "source_kind": preview.source_kind, + "intent_id": preview.intent_id, + "normalized_text_preview": preview.normalized_text_preview, + "membership_strength": preview.membership_strength, + "agent_family": asdict(preview.agent_family), + "outcome": asdict(preview.outcome), + "quality_tier": asdict(preview.quality_tier), + "scope_check_status": asdict(preview.scope_check_status), + "verification_status": asdict(preview.verification_status), + } + + +def _largest_cluster_size(metrics: Mapping[str, object]) -> int | None: + sizes = metrics.get("cluster_size_distribution") + if not isinstance(sizes, list | tuple) or not sizes: + return None + value = sizes[0] + return int(value) if isinstance(value, int) else None + + +def _comparison_sort_key( + row: tuple[ClusteringRunRecord, dict[str, object], dict[str, object]], +) -> tuple[float, int, int, int, str]: + _run, projection, comparison = row + score = comparison["score"] + run_payload = _mapping(projection["run"]) + effective = _mapping(run_payload["effective_parameters"]) + return ( + -_finite_float(score), + _integer_parameter(effective, "pca_dimensions"), + _integer_parameter(effective, "min_cluster_size"), + _integer_parameter(effective, "min_samples"), + _string_parameter(effective, "cluster_selection_method"), + ) + + +def _count_previews(value: object, counts: dict[str, int]) -> None: + if isinstance(value, Mapping): + for key, nested in value.items(): + scope = { + "representative_previews": "cluster_representatives", + "boundary_previews": "cluster_boundaries", + "noise_item_previews": "noise_items", + }.get(str(key)) + if scope is not None and isinstance(nested, list): + counts[scope] += len(nested) + _count_previews(nested, counts) + elif isinstance(value, list): + for nested in value: + _count_previews(nested, counts) + + +def _json_mapping(text: str) -> dict[str, object]: + value = _json_mapping_or_none(text) + return value if value is not None else {} + + +def _json_mapping_or_none(text: str) -> dict[str, object] | None: + try: + value = json.loads(text) + except (json.JSONDecodeError, TypeError): + return None + return value if isinstance(value, dict) else None + + +def _json_string_list(text: str) -> list[str]: + try: + value = json.loads(text) + except (json.JSONDecodeError, TypeError): + return [] + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, str)] + + +def _mapping(value: object) -> dict[str, object]: + return dict(value) if isinstance(value, Mapping) else {} + + +def _mapping_list(value: object) -> list[dict[str, object]]: + if not isinstance(value, list): + return [] + return [dict(item) for item in value if isinstance(item, Mapping)] + + +def _mapping_cells(value: object) -> list[tuple[str, dict[str, object]]]: + if not isinstance(value, Mapping): + return [] + return [ + (str(key), dict(cell)) + for key, cell in sorted(value.items()) + if isinstance(cell, Mapping) + ] + + +def _finite_float(value: object) -> float: + if isinstance(value, bool) or not isinstance(value, int | float): + raise AnalyticsWorkflowError("valid comparison candidate has no finite score") + number = float(value) + if not math.isfinite(number): + raise AnalyticsWorkflowError("valid comparison candidate has no finite score") + return number + + +def _integer_parameter(parameters: Mapping[str, object], field: str) -> int: + value = parameters.get(field) + if isinstance(value, bool) or not isinstance(value, int): + raise AnalyticsWorkflowError( + f"valid comparison candidate is missing integer parameter {field}" + ) + return value + + +def _required_integer(parameters: Mapping[str, object], field: str) -> int: + value = parameters.get(field) + if isinstance(value, bool) or not isinstance(value, int): + raise AnalyticsWorkflowError( + f"valid projection is missing integer field {field}" + ) + return value + + +def _required_number(parameters: Mapping[str, object], field: str) -> float: + value = parameters.get(field) + if isinstance(value, bool) or not isinstance(value, int | float): + raise AnalyticsWorkflowError( + f"valid projection is missing numeric field {field}" + ) + number = float(value) + if not math.isfinite(number): + raise AnalyticsWorkflowError( + f"valid projection has non-finite numeric field {field}" + ) + return number + + +def _string_parameter(parameters: Mapping[str, object], field: str) -> str: + value = parameters.get(field) + if not isinstance(value, str) or not value: + raise AnalyticsWorkflowError( + f"valid comparison candidate is missing string parameter {field}" + ) + return value + + +def _string_list(value: object) -> list[str]: + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, str)] + + +__all__ = [ + "INSPECTABILITY_TRACKED_METADATA_FIELDS", + "INTERPRETATION_CONTRACT_VERSION", + "PREVIEW_SCOPES", + "SMALL_CLUSTER_PROVENANCE_THRESHOLD", + "DiagnosticRunFacts", + "ProvenanceCompletenessSummary", + "RunPartitionMetrics", + "RunPresentationStatus", + "build_profile_summary", + "build_sweep_comparison_projection", + "compute_run_partition_metrics", + "content_disclosure", + "derive_presentation_status", + "derive_sweep_comparison_presentation", + "enrich_run_for_export", +] diff --git a/codeclone/analytics/report/messages/__init__.py b/codeclone/analytics/report/messages/__init__.py new file mode 100644 index 00000000..60ce520c --- /dev/null +++ b/codeclone/analytics/report/messages/__init__.py @@ -0,0 +1,17 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .profiles import ( + PROFILE_REJECTION_MESSAGES, + profile_banner_message, + profile_rejection_message, +) + +__all__ = [ + "PROFILE_REJECTION_MESSAGES", + "profile_banner_message", + "profile_rejection_message", +] diff --git a/codeclone/analytics/report/messages/profiles.py b/codeclone/analytics/report/messages/profiles.py new file mode 100644 index 00000000..e1c34ecf --- /dev/null +++ b/codeclone/analytics/report/messages/profiles.py @@ -0,0 +1,72 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +PROFILE_REJECTION_MESSAGES = { + "technically_invalid": "The partition failed technical validity checks.", + "too_few_clusters": ( + "The partition has fewer non-noise clusters than this lens allows." + ), + "too_many_clusters": ( + "The partition has more non-noise clusters than this lens allows." + ), + "dominant_ratio_above_max": "The dominant cluster exceeds this lens maximum.", + "dominant_ratio_below_min": "The dominant cluster is below this lens minimum.", + "noise_ratio_above_max": "The noise ratio exceeds this lens maximum.", + "noise_ratio_below_min": "The noise ratio is below this lens minimum.", + "insufficient_assigned_mass": ( + "Too few corpus items are assigned to non-noise clusters." + ), +} + + +def profile_rejection_message(code: str) -> str: + return PROFILE_REJECTION_MESSAGES.get(code, code.replace("_", " ")) + + +def profile_banner_message( + kind: str, + *, + failed_invariants: Sequence[str] = (), + profile_label: str | None = None, +) -> str: + label = profile_label or "selected profile" + messages = { + "maintainer_selected": ( + "Maintainer-selected run. Selection is review evidence, not taxonomy truth." + ), + "profile_recommended": ( + f"Technically valid run recommended by the {label} lens." + ), + "heuristic_recommended": ( + "Heuristically recommended run. Recommendation is not a semantic verdict." + ), + "valid_but_profile_rejected": ( + f"Technically valid partition rejected by the {label} lens." + ), + "no_profile_suitable_candidate": ( + f"No technically valid candidate satisfied the {label} lens." + ), + "candidate_only": ( + "Candidate run - not recommended or maintainer-selected. " + "Inspect it as one clustering output, not as corpus taxonomy." + ), + } + if kind == "technically_invalid": + return "Technically invalid clustering run. Failed invariants: " + ", ".join( + failed_invariants + ) + return messages.get(kind, "Run presentation is unavailable.") + + +__all__ = [ + "PROFILE_REJECTION_MESSAGES", + "profile_banner_message", + "profile_rejection_message", +] diff --git a/codeclone/analytics/schema.py b/codeclone/analytics/schema.py new file mode 100644 index 00000000..7aa1f729 --- /dev/null +++ b/codeclone/analytics/schema.py @@ -0,0 +1,973 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +import sqlite3 +from pathlib import Path + +from ..contracts import CORPUS_ANALYTICS_STORE_SCHEMA_VERSION +from ..report.meta import current_report_timestamp_utc +from ..utils.sqlite_store import ( + get_meta_value, + initialize_schema_v1, +) +from .exceptions import AnalyticsStoreError + +_ANALYTICS_META_TABLE = "analytics_meta" + +_CONTROL_PLANE_DDL = ( + """ + CREATE TABLE IF NOT EXISTS profile_manifest_snapshots ( + profile_manifest_digest TEXT PRIMARY KEY, + profile_id TEXT NOT NULL, + profile_version TEXT NOT NULL, + manifest_schema_version TEXT NOT NULL, + canonical_manifest_json TEXT NOT NULL, + label TEXT NOT NULL, + description TEXT NOT NULL, + created_at_utc TEXT NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS profile_batches ( + profile_batch_id TEXT PRIMARY KEY, + snapshot_id TEXT NOT NULL, + embedding_generation_id TEXT NOT NULL, + profile_id TEXT NOT NULL, + profile_manifest_digest TEXT NOT NULL, + candidate_space_digest TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + finished_at_utc TEXT, + status TEXT NOT NULL, + candidate_count_planned INTEGER NOT NULL, + candidate_count_succeeded INTEGER NOT NULL DEFAULT 0, + candidate_count_failed INTEGER NOT NULL DEFAULT 0, + recommended_clustering_run_id TEXT, + recommendation_rationale_json TEXT, + batch_max_cluster_count INTEGER, + created_at_utc TEXT NOT NULL, + FOREIGN KEY (snapshot_id) + REFERENCES corpus_snapshots(snapshot_id), + FOREIGN KEY (embedding_generation_id) + REFERENCES embedding_generations(embedding_generation_id), + FOREIGN KEY (profile_manifest_digest) + REFERENCES profile_manifest_snapshots(profile_manifest_digest), + FOREIGN KEY (recommended_clustering_run_id) + REFERENCES clustering_runs(clustering_run_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS profile_batch_runs ( + profile_batch_id TEXT NOT NULL, + clustering_run_id TEXT NOT NULL, + candidate_ordinal INTEGER NOT NULL, + candidate_dedupe_key TEXT NOT NULL, + PRIMARY KEY (profile_batch_id, clustering_run_id), + UNIQUE (profile_batch_id, candidate_dedupe_key), + FOREIGN KEY (profile_batch_id) + REFERENCES profile_batches(profile_batch_id), + FOREIGN KEY (clustering_run_id) + REFERENCES clustering_runs(clustering_run_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS profile_assessments ( + profile_batch_id TEXT NOT NULL, + clustering_run_id TEXT NOT NULL, + profile_id TEXT NOT NULL, + profile_version TEXT NOT NULL, + profile_manifest_digest TEXT NOT NULL, + suitable_for_profile INTEGER NOT NULL, + rejection_reasons_json TEXT NOT NULL, + observed_metrics_json TEXT, + assessed_digest TEXT NOT NULL, + PRIMARY KEY (profile_batch_id, clustering_run_id), + FOREIGN KEY (profile_batch_id) + REFERENCES profile_batches(profile_batch_id), + FOREIGN KEY (clustering_run_id) + REFERENCES clustering_runs(clustering_run_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS run_selections ( + selection_id TEXT PRIMARY KEY, + snapshot_id TEXT NOT NULL, + embedding_generation_id TEXT NOT NULL, + profile_batch_id TEXT, + profile_id TEXT, + profile_manifest_digest TEXT, + selected_run_id TEXT NOT NULL, + selected_at_utc TEXT NOT NULL, + selected_by TEXT NOT NULL, + rationale TEXT, + supersedes_selection_id TEXT, + FOREIGN KEY (selected_run_id) + REFERENCES clustering_runs(clustering_run_id), + FOREIGN KEY (supersedes_selection_id) + REFERENCES run_selections(selection_id), + FOREIGN KEY (profile_batch_id) + REFERENCES profile_batches(profile_batch_id) + ) + """, +) + +_DDL = ( + """ + CREATE TABLE IF NOT EXISTS corpus_snapshots ( + snapshot_id TEXT PRIMARY KEY, + lane TEXT NOT NULL, + representation_kind TEXT NOT NULL, + representation_version TEXT NOT NULL, + source_stores_json TEXT NOT NULL, + source_schema_versions_json TEXT NOT NULL, + record_count INTEGER NOT NULL, + source_digest TEXT NOT NULL, + created_at_utc TEXT NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS corpus_items ( + snapshot_id TEXT NOT NULL, + representation_key TEXT NOT NULL, + snapshot_item_id TEXT NOT NULL, + source_record_key TEXT NOT NULL, + project_id TEXT NOT NULL, + intent_id TEXT NOT NULL, + normalized_text TEXT NOT NULL, + normalized_digest TEXT NOT NULL, + normalizer_version TEXT NOT NULL, + representation_digest TEXT NOT NULL, + metadata_json TEXT NOT NULL, + registry_overlay_json TEXT, + PRIMARY KEY (snapshot_id, representation_key) + ) + """, + """ + CREATE TABLE IF NOT EXISTS embedding_generations ( + embedding_generation_id TEXT PRIMARY KEY, + provider_id TEXT NOT NULL, + provider_package_version TEXT NOT NULL, + model_id TEXT NOT NULL, + model_revision TEXT, + model_artifact_fingerprint TEXT, + exact_model_artifact_reproducibility INTEGER NOT NULL, + dimensions INTEGER NOT NULL, + embedding_contract_version TEXT NOT NULL, + embedding_similarity_metric TEXT NOT NULL, + vector_preprocessing TEXT NOT NULL, + created_at_utc TEXT NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS embedding_items ( + embedding_generation_id TEXT NOT NULL, + snapshot_item_id TEXT NOT NULL, + vector_row_key TEXT NOT NULL, + vector_digest TEXT NOT NULL, + dimensions INTEGER NOT NULL, + PRIMARY KEY (embedding_generation_id, snapshot_item_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS clustering_runs ( + clustering_run_id TEXT PRIMARY KEY, + snapshot_id TEXT NOT NULL, + embedding_generation_id TEXT NOT NULL, + requested_parameters_json TEXT NOT NULL, + effective_parameters_json TEXT NOT NULL, + random_seed INTEGER NOT NULL, + run_digest TEXT NOT NULL, + recommended_by_heuristic INTEGER NOT NULL DEFAULT 0, + selected_by_maintainer INTEGER NOT NULL DEFAULT 0, + status TEXT NOT NULL, + created_at_utc TEXT NOT NULL, + finished_at_utc TEXT, + error_message TEXT + ) + """, + """ + CREATE TABLE IF NOT EXISTS cluster_assignments ( + clustering_run_id TEXT NOT NULL, + snapshot_item_id TEXT NOT NULL, + cluster_label INTEGER NOT NULL, + membership_strength REAL, + membership_digest TEXT NOT NULL, + PRIMARY KEY (clustering_run_id, snapshot_item_id) + ) + """, + """ + CREATE TABLE IF NOT EXISTS cluster_summaries ( + clustering_run_id TEXT NOT NULL, + cluster_label INTEGER NOT NULL, + display_cluster_id INTEGER, + membership_digest TEXT NOT NULL, + size INTEGER NOT NULL, + diagnostics_json TEXT NOT NULL, + PRIMARY KEY (clustering_run_id, cluster_label) + ) + """, + *_CONTROL_PLANE_DDL, + f""" + CREATE TABLE IF NOT EXISTS {_ANALYTICS_META_TABLE} ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ) + """, +) + +_INDEXES = ( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_corpus_items_snapshot_item " + "ON corpus_items(snapshot_id, snapshot_item_id)", + "CREATE INDEX IF NOT EXISTS idx_corpus_items_intent " + "ON corpus_items(project_id, intent_id)", + "CREATE INDEX IF NOT EXISTS idx_clustering_runs_snapshot " + "ON clustering_runs(snapshot_id, embedding_generation_id)", + "CREATE INDEX IF NOT EXISTS idx_cluster_assignments_run " + "ON cluster_assignments(clustering_run_id, cluster_label)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_items_vector_row_key " + "ON embedding_items(vector_row_key)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_cluster_summaries_display " + "ON cluster_summaries(clustering_run_id, display_cluster_id) " + "WHERE display_cluster_id IS NOT NULL", + "CREATE INDEX IF NOT EXISTS idx_profile_batches_lens " + "ON profile_batches(" + "snapshot_id, embedding_generation_id, profile_id, started_at_utc" + ")", + "CREATE INDEX IF NOT EXISTS idx_run_selections_scope " + "ON run_selections(" + "snapshot_id, embedding_generation_id, profile_batch_id, selected_at_utc" + ")", +) + +_CONTROL_PLANE_INDEX_MARKERS = ( + "idx_profile_batches_lens", + "idx_run_selections_scope", +) + +_INTEGRITY_TRIGGERS = ( + """ + CREATE TRIGGER IF NOT EXISTS analytics_corpus_item_snapshot_guard + BEFORE INSERT ON corpus_items + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown corpus snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_corpus_item_snapshot_update_guard + BEFORE UPDATE OF snapshot_id ON corpus_items + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown corpus snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_snapshot_delete_guard + BEFORE DELETE ON corpus_snapshots + WHEN EXISTS ( + SELECT 1 FROM corpus_items WHERE snapshot_id=OLD.snapshot_id + ) OR EXISTS ( + SELECT 1 FROM clustering_runs WHERE snapshot_id=OLD.snapshot_id + ) OR EXISTS ( + SELECT 1 FROM profile_batches WHERE snapshot_id=OLD.snapshot_id + ) OR EXISTS ( + SELECT 1 FROM run_selections WHERE snapshot_id=OLD.snapshot_id + ) + BEGIN + SELECT RAISE(ABORT, 'corpus snapshot is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_corpus_item_delete_guard + BEFORE DELETE ON corpus_items + WHEN EXISTS ( + SELECT 1 FROM embedding_items + WHERE snapshot_item_id=OLD.snapshot_item_id + ) OR EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE snapshot_item_id=OLD.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'corpus item is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_embedding_item_generation_guard + BEFORE INSERT ON embedding_items + WHEN NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown embedding generation'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_embedding_item_update_guard + BEFORE UPDATE OF embedding_generation_id, snapshot_item_id + ON embedding_items + WHEN NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NOT EXISTS ( + SELECT 1 FROM corpus_items + WHERE snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'invalid embedding item reference'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_generation_delete_guard + BEFORE DELETE ON embedding_generations + WHEN EXISTS ( + SELECT 1 FROM embedding_items + WHERE embedding_generation_id=OLD.embedding_generation_id + ) OR EXISTS ( + SELECT 1 FROM clustering_runs + WHERE embedding_generation_id=OLD.embedding_generation_id + ) OR EXISTS ( + SELECT 1 FROM profile_batches + WHERE embedding_generation_id=OLD.embedding_generation_id + ) OR EXISTS ( + SELECT 1 FROM run_selections + WHERE embedding_generation_id=OLD.embedding_generation_id + ) + BEGIN + SELECT RAISE(ABORT, 'embedding generation is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_embedding_item_snapshot_guard + BEFORE INSERT ON embedding_items + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_items + WHERE snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'unknown snapshot item'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_clustering_run_guard + BEFORE INSERT ON clustering_runs + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) OR NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NEW.status NOT IN ('pending', 'running', 'completed', 'failed') + BEGIN + SELECT RAISE(ABORT, 'invalid clustering run reference or status'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_clustering_run_update_guard + BEFORE UPDATE OF snapshot_id, embedding_generation_id, status + ON clustering_runs + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) OR NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NEW.status NOT IN ('pending', 'running', 'completed', 'failed') + BEGIN + SELECT RAISE(ABORT, 'invalid clustering run reference or status'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_clustering_run_delete_guard + BEFORE DELETE ON clustering_runs + WHEN EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE clustering_run_id=OLD.clustering_run_id + ) OR EXISTS ( + SELECT 1 FROM cluster_summaries + WHERE clustering_run_id=OLD.clustering_run_id + ) OR EXISTS ( + SELECT 1 FROM profile_batch_runs + WHERE clustering_run_id=OLD.clustering_run_id + ) OR EXISTS ( + SELECT 1 FROM profile_assessments + WHERE clustering_run_id=OLD.clustering_run_id + ) OR EXISTS ( + SELECT 1 FROM run_selections + WHERE selected_run_id=OLD.clustering_run_id + ) OR EXISTS ( + SELECT 1 FROM profile_batches + WHERE recommended_clustering_run_id=OLD.clustering_run_id + ) + BEGIN + SELECT RAISE(ABORT, 'clustering run is still referenced'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_assignment_guard + BEFORE INSERT ON cluster_assignments + WHEN NOT EXISTS ( + SELECT 1 + FROM clustering_runs AS run + JOIN corpus_items AS item ON item.snapshot_id=run.snapshot_id + WHERE run.clustering_run_id=NEW.clustering_run_id + AND item.snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'assignment does not belong to run snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_assignment_update_guard + BEFORE UPDATE OF clustering_run_id, snapshot_item_id + ON cluster_assignments + WHEN NOT EXISTS ( + SELECT 1 + FROM clustering_runs AS run + JOIN corpus_items AS item ON item.snapshot_id=run.snapshot_id + WHERE run.clustering_run_id=NEW.clustering_run_id + AND item.snapshot_item_id=NEW.snapshot_item_id + ) + BEGIN + SELECT RAISE(ABORT, 'assignment does not belong to run snapshot'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_summary_guard + BEFORE INSERT ON cluster_summaries + WHEN NOT EXISTS ( + SELECT 1 FROM clustering_runs + WHERE clustering_run_id=NEW.clustering_run_id + ) OR NOT EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE clustering_run_id=NEW.clustering_run_id + AND cluster_label=NEW.cluster_label + ) + BEGIN + SELECT RAISE(ABORT, 'summary has no matching run assignments'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_summary_update_guard + BEFORE UPDATE OF clustering_run_id, cluster_label + ON cluster_summaries + WHEN NOT EXISTS ( + SELECT 1 FROM clustering_runs + WHERE clustering_run_id=NEW.clustering_run_id + ) OR NOT EXISTS ( + SELECT 1 FROM cluster_assignments + WHERE clustering_run_id=NEW.clustering_run_id + AND cluster_label=NEW.cluster_label + ) + BEGIN + SELECT RAISE(ABORT, 'summary has no matching run assignments'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_batch_guard + BEFORE INSERT ON profile_batches + WHEN NOT EXISTS ( + SELECT 1 FROM corpus_snapshots WHERE snapshot_id=NEW.snapshot_id + ) OR NOT EXISTS ( + SELECT 1 FROM embedding_generations + WHERE embedding_generation_id=NEW.embedding_generation_id + ) OR NEW.status != 'running' + OR NEW.candidate_count_planned <= 0 + OR NEW.candidate_count_succeeded != 0 + OR NEW.candidate_count_failed != 0 + OR NEW.finished_at_utc IS NOT NULL + OR NEW.recommended_clustering_run_id IS NOT NULL + OR NEW.recommendation_rationale_json IS NOT NULL + OR NEW.batch_max_cluster_count IS NOT NULL + BEGIN + SELECT RAISE(ABORT, 'invalid profile batch'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_batch_update_guard + BEFORE UPDATE ON profile_batches + WHEN NEW.profile_batch_id != OLD.profile_batch_id + OR NEW.snapshot_id != OLD.snapshot_id + OR NEW.embedding_generation_id != OLD.embedding_generation_id + OR NEW.profile_id != OLD.profile_id + OR NEW.profile_manifest_digest != OLD.profile_manifest_digest + OR NEW.candidate_space_digest != OLD.candidate_space_digest + OR NEW.started_at_utc != OLD.started_at_utc + OR NEW.created_at_utc != OLD.created_at_utc + OR OLD.status != 'running' + OR NEW.status NOT IN ('completed', 'completed_partial', 'failed') + OR NEW.candidate_count_planned <= 0 + OR NEW.candidate_count_succeeded < 0 + OR NEW.candidate_count_failed < 0 + OR NEW.finished_at_utc IS NULL + OR NEW.candidate_count_succeeded + NEW.candidate_count_failed + != NEW.candidate_count_planned + OR ( + NEW.status = 'completed' + AND NEW.candidate_count_failed != 0 + ) + OR ( + NEW.status = 'completed_partial' + AND ( + NEW.candidate_count_succeeded = 0 + OR NEW.candidate_count_failed = 0 + ) + ) + OR ( + NEW.status = 'failed' + AND NEW.candidate_count_succeeded != 0 + ) + OR ( + (NEW.recommended_clustering_run_id IS NULL) + != (NEW.recommendation_rationale_json IS NULL) + ) + OR ( + NEW.recommended_clustering_run_id IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM profile_batch_runs + WHERE profile_batch_id=NEW.profile_batch_id + AND clustering_run_id=NEW.recommended_clustering_run_id + ) + ) + BEGIN + SELECT RAISE(ABORT, 'immutable profile batch identity changed'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_batch_run_guard + BEFORE INSERT ON profile_batch_runs + WHEN NEW.candidate_ordinal < 0 + OR NOT EXISTS ( + SELECT 1 + FROM profile_batches AS batch + JOIN clustering_runs AS run + ON run.snapshot_id=batch.snapshot_id + AND run.embedding_generation_id=batch.embedding_generation_id + WHERE batch.profile_batch_id=NEW.profile_batch_id + AND run.clustering_run_id=NEW.clustering_run_id + AND batch.status='running' + AND run.status='completed' + ) + BEGIN + SELECT RAISE(ABORT, 'profile batch run scope mismatch'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_assessment_guard + BEFORE INSERT ON profile_assessments + WHEN NEW.suitable_for_profile NOT IN (0, 1) + OR NOT EXISTS ( + SELECT 1 + FROM profile_batch_runs AS member + JOIN profile_batches AS batch + ON batch.profile_batch_id=member.profile_batch_id + JOIN profile_manifest_snapshots AS manifest + ON manifest.profile_manifest_digest=batch.profile_manifest_digest + WHERE member.profile_batch_id=NEW.profile_batch_id + AND member.clustering_run_id=NEW.clustering_run_id + AND batch.profile_id=NEW.profile_id + AND batch.profile_manifest_digest=NEW.profile_manifest_digest + AND batch.status='running' + AND manifest.profile_version=NEW.profile_version + ) + BEGIN + SELECT RAISE(ABORT, 'profile assessment scope mismatch'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_run_selection_guard + BEFORE INSERT ON run_selections + WHEN NOT EXISTS ( + SELECT 1 FROM clustering_runs AS run + WHERE run.clustering_run_id=NEW.selected_run_id + AND run.snapshot_id=NEW.snapshot_id + AND run.embedding_generation_id=NEW.embedding_generation_id + ) OR ( + NEW.profile_batch_id IS NOT NULL + AND NOT EXISTS ( + SELECT 1 + FROM profile_batch_runs AS member + JOIN profile_batches AS batch + ON batch.profile_batch_id=member.profile_batch_id + WHERE member.profile_batch_id=NEW.profile_batch_id + AND member.clustering_run_id=NEW.selected_run_id + AND batch.profile_id=NEW.profile_id + AND batch.profile_manifest_digest=NEW.profile_manifest_digest + ) + ) OR ( + NEW.profile_batch_id IS NULL + AND ( + NEW.profile_id IS NOT NULL + OR NEW.profile_manifest_digest IS NOT NULL + ) + ) + BEGIN + SELECT RAISE(ABORT, 'selection scope mismatch'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_manifest_update_guard + BEFORE UPDATE ON profile_manifest_snapshots + BEGIN + SELECT RAISE(ABORT, 'profile manifest snapshot is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_manifest_delete_guard + BEFORE DELETE ON profile_manifest_snapshots + BEGIN + SELECT RAISE(ABORT, 'profile manifest snapshot is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_batch_delete_guard + BEFORE DELETE ON profile_batches + BEGIN + SELECT RAISE(ABORT, 'profile batch is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_batch_run_update_guard + BEFORE UPDATE ON profile_batch_runs + BEGIN + SELECT RAISE(ABORT, 'profile batch membership is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_batch_run_delete_guard + BEFORE DELETE ON profile_batch_runs + BEGIN + SELECT RAISE(ABORT, 'profile batch membership is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_assessment_update_guard + BEFORE UPDATE ON profile_assessments + BEGIN + SELECT RAISE(ABORT, 'profile assessment is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_profile_assessment_delete_guard + BEFORE DELETE ON profile_assessments + BEGIN + SELECT RAISE(ABORT, 'profile assessment is immutable'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_run_selection_update_guard + BEFORE UPDATE ON run_selections + BEGIN + SELECT RAISE(ABORT, 'run selection is append-only'); + END + """, + """ + CREATE TRIGGER IF NOT EXISTS analytics_run_selection_delete_guard + BEFORE DELETE ON run_selections + BEGIN + SELECT RAISE(ABORT, 'run selection is append-only'); + END + """, +) + +_CONTROL_PLANE_TRIGGER_MARKERS = ( + "analytics_profile_batch_guard", + "analytics_profile_batch_update_guard", + "analytics_profile_batch_run_guard", + "analytics_profile_assessment_guard", + "analytics_run_selection_guard", + "analytics_profile_manifest_update_guard", + "analytics_profile_manifest_delete_guard", + "analytics_profile_batch_delete_guard", + "analytics_profile_batch_run_update_guard", + "analytics_profile_batch_run_delete_guard", + "analytics_profile_assessment_update_guard", + "analytics_profile_assessment_delete_guard", + "analytics_run_selection_update_guard", + "analytics_run_selection_delete_guard", +) + + +def _install_indexes( + conn: sqlite3.Connection, + *, + include_control_plane: bool, +) -> None: + for statement in _INDEXES: + if not include_control_plane and any( + marker in statement for marker in _CONTROL_PLANE_INDEX_MARKERS + ): + continue + conn.execute(statement) + + +def _install_integrity_triggers( + conn: sqlite3.Connection, + *, + include_control_plane: bool = True, +) -> None: + for statement in _INTEGRITY_TRIGGERS: + if not include_control_plane and ( + any(marker in statement for marker in _CONTROL_PLANE_TRIGGER_MARKERS) + or "profile_" in statement + or "run_selections" in statement + ): + continue + conn.execute(statement) + + +def _migrate_1_0_to_1_1(conn: sqlite3.Connection) -> None: + orphan_checks = ( + ( + "corpus_items", + "SELECT COUNT(*) FROM corpus_items AS item " + "LEFT JOIN corpus_snapshots AS snap " + "ON snap.snapshot_id=item.snapshot_id " + "WHERE snap.snapshot_id IS NULL", + ), + ( + "embedding_items", + "SELECT COUNT(*) FROM embedding_items AS item " + "LEFT JOIN embedding_generations AS generation " + "ON generation.embedding_generation_id=item.embedding_generation_id " + "LEFT JOIN corpus_items AS corpus " + "ON corpus.snapshot_item_id=item.snapshot_item_id " + "WHERE generation.embedding_generation_id IS NULL " + "OR corpus.snapshot_item_id IS NULL", + ), + ( + "clustering_runs", + "SELECT COUNT(*) FROM clustering_runs AS run " + "LEFT JOIN corpus_snapshots AS snap " + "ON snap.snapshot_id=run.snapshot_id " + "LEFT JOIN embedding_generations AS generation " + "ON generation.embedding_generation_id=run.embedding_generation_id " + "WHERE snap.snapshot_id IS NULL " + "OR generation.embedding_generation_id IS NULL " + "OR run.status NOT IN ('pending','running','completed','failed')", + ), + ( + "cluster_assignments", + "SELECT COUNT(*) FROM cluster_assignments AS assignment " + "LEFT JOIN clustering_runs AS run " + "ON run.clustering_run_id=assignment.clustering_run_id " + "LEFT JOIN corpus_items AS item " + "ON item.snapshot_id=run.snapshot_id " + "AND item.snapshot_item_id=assignment.snapshot_item_id " + "WHERE run.clustering_run_id IS NULL " + "OR item.snapshot_item_id IS NULL", + ), + ( + "cluster_summaries", + "SELECT COUNT(*) FROM cluster_summaries AS summary " + "LEFT JOIN clustering_runs AS run " + "ON run.clustering_run_id=summary.clustering_run_id " + "LEFT JOIN cluster_assignments AS assignment " + "ON assignment.clustering_run_id=summary.clustering_run_id " + "AND assignment.cluster_label=summary.cluster_label " + "WHERE run.clustering_run_id IS NULL " + "OR assignment.snapshot_item_id IS NULL", + ), + ( + "embedding_items.vector_row_key", + "SELECT COUNT(*) FROM (" + "SELECT vector_row_key FROM embedding_items " + "GROUP BY vector_row_key HAVING COUNT(*) > 1" + ")", + ), + ( + "cluster_summaries.display_cluster_id", + "SELECT COUNT(*) FROM (" + "SELECT clustering_run_id, display_cluster_id " + "FROM cluster_summaries " + "WHERE display_cluster_id IS NOT NULL " + "GROUP BY clustering_run_id, display_cluster_id " + "HAVING COUNT(*) > 1" + ")", + ), + ) + for table, query in orphan_checks: + count = int(conn.execute(query).fetchone()[0]) + if count: + raise AnalyticsStoreError( + f"cannot migrate analytics schema: {table} has {count} " + "invalid reference(s)" + ) + _install_indexes(conn, include_control_plane=False) + _install_integrity_triggers(conn, include_control_plane=False) + conn.execute( + f"UPDATE {_ANALYTICS_META_TABLE} SET value=? WHERE key='schema_version'", + ("1.1",), + ) + conn.commit() + + +def _migrate_1_1_to_1_2(conn: sqlite3.Connection) -> None: + for statement in _CONTROL_PLANE_DDL: + conn.execute(statement) + for trigger in ( + "analytics_snapshot_delete_guard", + "analytics_generation_delete_guard", + "analytics_clustering_run_delete_guard", + ): + conn.execute(f"DROP TRIGGER IF EXISTS {trigger}") + _install_indexes(conn, include_control_plane=True) + _install_integrity_triggers(conn) + _backfill_legacy_selections(conn) + conn.execute( + f"UPDATE {_ANALYTICS_META_TABLE} SET value=? WHERE key='schema_version'", + ("1.2",), + ) + conn.commit() + + +def _backfill_legacy_selections(conn: sqlite3.Connection) -> None: + scopes = conn.execute( + """ + SELECT snapshot_id, embedding_generation_id, COUNT(*) AS selected_count + FROM clustering_runs + WHERE selected_by_maintainer=1 + GROUP BY snapshot_id, embedding_generation_id + ORDER BY snapshot_id, embedding_generation_id + """ + ).fetchall() + for snapshot_id, embedding_generation_id, selected_count in scopes: + existing = conn.execute( + """ + SELECT 1 FROM run_selections + WHERE snapshot_id=? AND embedding_generation_id=? + AND profile_batch_id IS NULL + LIMIT 1 + """, + (snapshot_id, embedding_generation_id), + ).fetchone() + if existing is not None: + continue + if int(selected_count) > 1: + scope = f"{snapshot_id}|{embedding_generation_id}" + suffix = hashlib.sha256(scope.encode("utf-8")).hexdigest()[:16] + conn.execute( + f"INSERT OR REPLACE INTO {_ANALYTICS_META_TABLE}(key, value) " + "VALUES (?, ?)", + ( + f"diagnostic.LEGACY_SELECTION_AMBIGUOUS.{suffix}", + json.dumps( + { + "code": "LEGACY_SELECTION_AMBIGUOUS", + "snapshot_id": str(snapshot_id), + "embedding_generation_id": str(embedding_generation_id), + "selected_count": int(selected_count), + }, + sort_keys=True, + separators=(",", ":"), + ), + ), + ) + continue + run = conn.execute( + """ + SELECT clustering_run_id, finished_at_utc, created_at_utc + FROM clustering_runs + WHERE snapshot_id=? AND embedding_generation_id=? + AND selected_by_maintainer=1 + """, + (snapshot_id, embedding_generation_id), + ).fetchone() + if run is None: + continue + run_id = str(run[0]) + identity = f"{snapshot_id}|{embedding_generation_id}|{run_id}" + selection_id = ( + "sel-legacy-" + hashlib.sha256(identity.encode("utf-8")).hexdigest()[:16] + ) + conn.execute( + """ + INSERT OR IGNORE INTO run_selections ( + selection_id, snapshot_id, embedding_generation_id, + profile_batch_id, profile_id, profile_manifest_digest, + selected_run_id, selected_at_utc, selected_by, rationale, + supersedes_selection_id + ) VALUES (?, ?, ?, NULL, NULL, NULL, ?, ?, ?, NULL, NULL) + """, + ( + selection_id, + snapshot_id, + embedding_generation_id, + run_id, + str(run[1] or run[2]), + "legacy-migration", + ), + ) + + +def ensure_analytics_schema(conn: sqlite3.Connection) -> None: + current = get_meta_value( + conn, meta_table=_ANALYTICS_META_TABLE, key="schema_version" + ) + if current == "1.0": + _migrate_1_0_to_1_1(conn) + current = "1.1" + if current == "1.1": + _migrate_1_1_to_1_2(conn) + return + if current is not None and current != CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: + raise AnalyticsStoreError(f"unsupported analytics schema version: {current}") + if current is None: + initialize_schema_v1( + conn, + ddl_statements=_DDL, + index_statements=_INDEXES, + meta_table=_ANALYTICS_META_TABLE, + seed_meta={ + "schema_version": CORPUS_ANALYTICS_STORE_SCHEMA_VERSION, + "created_at_utc": current_report_timestamp_utc(), + }, + ) + _install_integrity_triggers(conn) + conn.commit() + + +def validate_analytics_schema(conn: sqlite3.Connection) -> None: + current = get_meta_value( + conn, meta_table=_ANALYTICS_META_TABLE, key="schema_version" + ) + if current != CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: + raise AnalyticsStoreError( + "analytics store requires writable migration to schema " + f"{CORPUS_ANALYTICS_STORE_SCHEMA_VERSION}; found {current or 'missing'}" + ) + + +def open_analytics_db(path: Path) -> sqlite3.Connection: + from ..observability.sqlite_access import open_instrumented_sqlite_db + + return open_instrumented_sqlite_db( + path, + ensure_schema=ensure_analytics_schema, + foreign_keys=True, + ) + + +def open_analytics_db_readonly(path: Path) -> sqlite3.Connection: + from ..observability.sqlite_access import open_instrumented_sqlite_db_readonly + + return open_instrumented_sqlite_db_readonly( + path, + validate_schema=validate_analytics_schema, + ) + + +__all__ = [ + "ensure_analytics_schema", + "open_analytics_db", + "open_analytics_db_readonly", + "validate_analytics_schema", +] diff --git a/codeclone/analytics/store/__init__.py b/codeclone/analytics/store/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclone/analytics/store/protocols.py b/codeclone/analytics/store/protocols.py new file mode 100644 index 00000000..ac0a68f3 --- /dev/null +++ b/codeclone/analytics/store/protocols.py @@ -0,0 +1,243 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Protocol + +from ..contracts import ( + ActiveSelectionResult, + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, + ProfileAssessmentRecord, + ProfileBatchRecord, + ProfileBatchRunRecord, + ProfileManifestSnapshotRecord, + RunSelectionRecord, +) + + +class CorpusStore(Protocol): + def insert_snapshot( + self, + snapshot: CorpusSnapshotRecord, + items: Sequence[CorpusItemRecord], + ) -> None: ... + + def get_snapshot(self, snapshot_id: str) -> CorpusSnapshotRecord | None: ... + + def list_snapshots(self) -> tuple[CorpusSnapshotRecord, ...]: ... + + def list_items(self, snapshot_id: str) -> tuple[CorpusItemRecord, ...]: ... + + def insert_embedding_generation( + self, + generation: EmbeddingGenerationRecord, + ) -> None: ... + + def insert_embedding_items( + self, + items: Sequence[EmbeddingItemRecord], + ) -> None: ... + + def get_embedding_generation( + self, + embedding_generation_id: str, + ) -> EmbeddingGenerationRecord | None: ... + + def list_embedding_items( + self, + *, + embedding_generation_id: str, + ) -> tuple[EmbeddingItemRecord, ...]: ... + + def insert_clustering_run(self, run: ClusteringRunRecord) -> None: ... + + def update_clustering_run(self, run: ClusteringRunRecord) -> None: ... + + def get_clustering_run( + self, + clustering_run_id: str, + ) -> ClusteringRunRecord | None: ... + + def list_clustering_runs( + self, + *, + snapshot_id: str, + embedding_generation_id: str | None = None, + ) -> tuple[ClusteringRunRecord, ...]: ... + + def set_recommended_run( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + clustering_run_id: str, + ) -> None: ... + + def insert_profile_manifest_snapshot( + self, + record: ProfileManifestSnapshotRecord, + ) -> None: ... + + def get_profile_manifest_snapshot( + self, + profile_manifest_digest: str, + ) -> ProfileManifestSnapshotRecord | None: ... + + def insert_profile_batch(self, record: ProfileBatchRecord) -> None: ... + + def finalize_profile_batch(self, record: ProfileBatchRecord) -> None: ... + + def get_profile_batch( + self, + profile_batch_id: str, + ) -> ProfileBatchRecord | None: ... + + def get_latest_profile_batch( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + profile_id: str, + ) -> ProfileBatchRecord | None: ... + + def insert_profile_batch_run(self, record: ProfileBatchRunRecord) -> None: ... + + def list_profile_batch_run_records( + self, + *, + profile_batch_id: str, + ) -> tuple[ProfileBatchRunRecord, ...]: ... + + def list_clustering_runs_for_batch( + self, + *, + profile_batch_id: str, + ) -> tuple[ClusteringRunRecord, ...]: ... + + def list_profile_batch_ids_for_run( + self, + *, + clustering_run_id: str, + ) -> tuple[str, ...]: ... + + def insert_profile_assessment( + self, + record: ProfileAssessmentRecord, + ) -> None: ... + + def get_profile_assessment( + self, + *, + profile_batch_id: str, + clustering_run_id: str, + ) -> ProfileAssessmentRecord | None: ... + + def list_profile_assessments( + self, + *, + profile_batch_id: str, + ) -> tuple[ProfileAssessmentRecord, ...]: ... + + def get_active_run_selection( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + profile_batch_id: str | None, + ) -> ActiveSelectionResult: ... + + def record_run_selection_atomic( + self, + record: RunSelectionRecord, + ) -> RunSelectionRecord: ... + + def insert_cluster_assignments( + self, + assignments: Sequence[ClusterAssignmentRecord], + ) -> None: ... + + def insert_cluster_summaries( + self, + summaries: Sequence[ClusterSummaryRecord], + ) -> None: ... + + def list_assignments( + self, + clustering_run_id: str, + ) -> tuple[ClusterAssignmentRecord, ...]: ... + + def list_summaries( + self, + clustering_run_id: str, + ) -> tuple[ClusterSummaryRecord, ...]: ... + + def commit(self) -> None: ... + + def rollback(self) -> None: ... + + def close(self) -> None: ... + + +class VectorGenerationStore(Protocol): + def write_vectors( + self, + *, + embedding_generation_id: str, + rows: Sequence[Mapping[str, object]], + ) -> None: ... + + def read_vectors( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, list[float]]: ... + + def read_vector_rows( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, dict[str, object]]: ... + + def list_generation_item_ids( + self, + *, + embedding_generation_id: str, + limit: int, + ) -> tuple[str, ...]: ... + + def delete_generation(self, embedding_generation_id: str) -> None: ... + + def close(self) -> None: ... + + +class CorpusSnapshotReader(Protocol): + def read_items(self, snapshot_id: str) -> tuple[CorpusItemRecord, ...]: ... + + +@dataclass(frozen=True, slots=True) +class SnapshotBuildResult: + snapshot_id: str + source_digest: str + record_count: int + + +__all__ = [ + "CorpusSnapshotReader", + "CorpusStore", + "SnapshotBuildResult", + "VectorGenerationStore", +] diff --git a/codeclone/analytics/store/sqlite.py b/codeclone/analytics/store/sqlite.py new file mode 100644 index 00000000..314d6c79 --- /dev/null +++ b/codeclone/analytics/store/sqlite.py @@ -0,0 +1,993 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from collections.abc import Sequence +from dataclasses import replace +from pathlib import Path + +from ..contracts import ( + ActiveSelectionResult, + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + CorpusSnapshotRecord, + EmbeddingGenerationRecord, + EmbeddingItemRecord, + ProfileAssessmentRecord, + ProfileBatchRecord, + ProfileBatchRunRecord, + ProfileManifestSnapshotRecord, + RunSelectionRecord, +) +from ..exceptions import AnalyticsStoreError +from ..schema import open_analytics_db, open_analytics_db_readonly + + +class SqliteCorpusAnalyticsStore: + """SQLite implementation of CorpusStore.""" + + def __init__(self, conn: sqlite3.Connection) -> None: + self._conn = conn + self._conn.row_factory = sqlite3.Row + + @classmethod + def open(cls, path: Path) -> SqliteCorpusAnalyticsStore: + return cls(open_analytics_db(path)) + + @classmethod + def open_readonly(cls, path: Path) -> SqliteCorpusAnalyticsStore: + return cls(open_analytics_db_readonly(path)) + + def insert_snapshot( + self, + snapshot: CorpusSnapshotRecord, + items: Sequence[CorpusItemRecord], + ) -> None: + self._conn.execute( + """ + INSERT INTO corpus_snapshots ( + snapshot_id, lane, representation_kind, representation_version, + source_stores_json, source_schema_versions_json, + record_count, source_digest, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + snapshot.snapshot_id, + snapshot.lane, + snapshot.representation_kind, + snapshot.representation_version, + snapshot.source_stores_json, + snapshot.source_schema_versions_json, + snapshot.record_count, + snapshot.source_digest, + snapshot.created_at_utc, + ), + ) + self._conn.executemany( + """ + INSERT INTO corpus_items ( + snapshot_id, representation_key, snapshot_item_id, + source_record_key, project_id, intent_id, + normalized_text, normalized_digest, normalizer_version, + representation_digest, metadata_json, registry_overlay_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ( + item.snapshot_id, + item.representation_key, + item.snapshot_item_id, + item.source_record_key, + item.project_id, + item.intent_id, + item.normalized_text, + item.normalized_digest, + item.normalizer_version, + item.representation_digest, + item.metadata_json, + item.registry_overlay_json, + ) + for item in items + ], + ) + + def get_snapshot(self, snapshot_id: str) -> CorpusSnapshotRecord | None: + row = self._conn.execute( + "SELECT * FROM corpus_snapshots WHERE snapshot_id=?", + (snapshot_id,), + ).fetchone() + return _snapshot_from_row(row) if row is not None else None + + def list_snapshots(self) -> tuple[CorpusSnapshotRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM corpus_snapshots " + "ORDER BY created_at_utc DESC, snapshot_id ASC" + ).fetchall() + return tuple(_snapshot_from_row(row) for row in rows) + + def list_items(self, snapshot_id: str) -> tuple[CorpusItemRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM corpus_items WHERE snapshot_id=? " + "ORDER BY source_record_key ASC, representation_key ASC", + (snapshot_id,), + ).fetchall() + return tuple(_item_from_row(row) for row in rows) + + def insert_embedding_generation( + self, + generation: EmbeddingGenerationRecord, + ) -> None: + self._conn.execute( + """ + INSERT INTO embedding_generations ( + embedding_generation_id, provider_id, provider_package_version, + model_id, model_revision, model_artifact_fingerprint, + exact_model_artifact_reproducibility, dimensions, + embedding_contract_version, embedding_similarity_metric, + vector_preprocessing, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + generation.embedding_generation_id, + generation.provider_id, + generation.provider_package_version, + generation.model_id, + generation.model_revision, + generation.model_artifact_fingerprint, + int(generation.exact_model_artifact_reproducibility), + generation.dimensions, + generation.embedding_contract_version, + generation.embedding_similarity_metric, + generation.vector_preprocessing, + generation.created_at_utc, + ), + ) + + def insert_embedding_items( + self, + items: Sequence[EmbeddingItemRecord], + ) -> None: + self._conn.executemany( + """ + INSERT INTO embedding_items ( + embedding_generation_id, snapshot_item_id, + vector_row_key, vector_digest, dimensions + ) VALUES (?, ?, ?, ?, ?) + """, + [ + ( + item.embedding_generation_id, + item.snapshot_item_id, + item.vector_row_key, + item.vector_digest, + item.dimensions, + ) + for item in items + ], + ) + + def get_embedding_generation( + self, + embedding_generation_id: str, + ) -> EmbeddingGenerationRecord | None: + row = self._conn.execute( + "SELECT * FROM embedding_generations WHERE embedding_generation_id=?", + (embedding_generation_id,), + ).fetchone() + return _generation_from_row(row) if row is not None else None + + def list_embedding_items( + self, + *, + embedding_generation_id: str, + ) -> tuple[EmbeddingItemRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM embedding_items WHERE embedding_generation_id=? " + "ORDER BY snapshot_item_id ASC", + (embedding_generation_id,), + ).fetchall() + return tuple(_embedding_item_from_row(row) for row in rows) + + def insert_clustering_run(self, run: ClusteringRunRecord) -> None: + self._conn.execute( + """ + INSERT INTO clustering_runs ( + clustering_run_id, snapshot_id, embedding_generation_id, + requested_parameters_json, effective_parameters_json, + random_seed, run_digest, recommended_by_heuristic, + selected_by_maintainer, status, created_at_utc, + finished_at_utc, error_message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + run.clustering_run_id, + run.snapshot_id, + run.embedding_generation_id, + run.requested_parameters_json, + run.effective_parameters_json, + run.random_seed, + run.run_digest, + int(run.recommended_by_heuristic), + int(run.selected_by_maintainer), + run.status, + run.created_at_utc, + run.finished_at_utc, + run.error_message, + ), + ) + + def update_clustering_run(self, run: ClusteringRunRecord) -> None: + self._conn.execute( + """ + UPDATE clustering_runs SET + requested_parameters_json=?, + effective_parameters_json=?, + random_seed=?, + run_digest=?, + recommended_by_heuristic=?, + selected_by_maintainer=?, + status=?, + finished_at_utc=?, + error_message=? + WHERE clustering_run_id=? + """, + ( + run.requested_parameters_json, + run.effective_parameters_json, + run.random_seed, + run.run_digest, + int(run.recommended_by_heuristic), + int(run.selected_by_maintainer), + run.status, + run.finished_at_utc, + run.error_message, + run.clustering_run_id, + ), + ) + + def get_clustering_run( + self, + clustering_run_id: str, + ) -> ClusteringRunRecord | None: + row = self._conn.execute( + "SELECT * FROM clustering_runs WHERE clustering_run_id=?", + (clustering_run_id,), + ).fetchone() + return _run_from_row(row) if row is not None else None + + def list_clustering_runs( + self, + *, + snapshot_id: str, + embedding_generation_id: str | None = None, + ) -> tuple[ClusteringRunRecord, ...]: + if embedding_generation_id is None: + rows = self._conn.execute( + "SELECT * FROM clustering_runs WHERE snapshot_id=? " + "ORDER BY created_at_utc ASC, clustering_run_id ASC", + (snapshot_id,), + ).fetchall() + else: + rows = self._conn.execute( + "SELECT * FROM clustering_runs WHERE snapshot_id=? " + "AND embedding_generation_id=? " + "ORDER BY created_at_utc ASC, clustering_run_id ASC", + (snapshot_id, embedding_generation_id), + ).fetchall() + return tuple(_run_from_row(row) for row in rows) + + def set_recommended_run( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + clustering_run_id: str, + ) -> None: + for run in self.list_clustering_runs( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + ): + self.update_clustering_run( + replace( + run, + recommended_by_heuristic=( + run.clustering_run_id == clustering_run_id + ), + ) + ) + + def insert_profile_manifest_snapshot( + self, + record: ProfileManifestSnapshotRecord, + ) -> None: + self._conn.execute( + """ + INSERT OR IGNORE INTO profile_manifest_snapshots ( + profile_manifest_digest, profile_id, profile_version, + manifest_schema_version, canonical_manifest_json, + label, description, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + record.profile_manifest_digest, + record.profile_id, + record.profile_version, + record.manifest_schema_version, + record.canonical_manifest_json, + record.label, + record.description, + record.created_at_utc, + ), + ) + existing = self.get_profile_manifest_snapshot(record.profile_manifest_digest) + normalized_existing = ( + replace(existing, created_at_utc=record.created_at_utc) + if existing is not None + else None + ) + if normalized_existing != record: + raise AnalyticsStoreError( + "profile manifest digest collision or snapshot mismatch" + ) + + def get_profile_manifest_snapshot( + self, + profile_manifest_digest: str, + ) -> ProfileManifestSnapshotRecord | None: + row = self._conn.execute( + """ + SELECT * FROM profile_manifest_snapshots + WHERE profile_manifest_digest=? + """, + (profile_manifest_digest,), + ).fetchone() + return _profile_manifest_snapshot_from_row(row) if row is not None else None + + def insert_profile_batch(self, record: ProfileBatchRecord) -> None: + self._conn.execute( + """ + INSERT INTO profile_batches ( + profile_batch_id, snapshot_id, embedding_generation_id, + profile_id, profile_manifest_digest, candidate_space_digest, + started_at_utc, finished_at_utc, status, + candidate_count_planned, candidate_count_succeeded, + candidate_count_failed, recommended_clustering_run_id, + recommendation_rationale_json, batch_max_cluster_count, + created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + _profile_batch_values(record), + ) + + def finalize_profile_batch(self, record: ProfileBatchRecord) -> None: + cursor = self._conn.execute( + """ + UPDATE profile_batches SET + finished_at_utc=?, + status=?, + candidate_count_succeeded=?, + candidate_count_failed=?, + recommended_clustering_run_id=?, + recommendation_rationale_json=?, + batch_max_cluster_count=? + WHERE profile_batch_id=? + """, + ( + record.finished_at_utc, + record.status, + record.candidate_count_succeeded, + record.candidate_count_failed, + record.recommended_clustering_run_id, + record.recommendation_rationale_json, + record.batch_max_cluster_count, + record.profile_batch_id, + ), + ) + if cursor.rowcount != 1: + raise AnalyticsStoreError( + f"unknown profile batch: {record.profile_batch_id}" + ) + + def get_profile_batch( + self, + profile_batch_id: str, + ) -> ProfileBatchRecord | None: + row = self._conn.execute( + "SELECT * FROM profile_batches WHERE profile_batch_id=?", + (profile_batch_id,), + ).fetchone() + return _profile_batch_from_row(row) if row is not None else None + + def get_latest_profile_batch( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + profile_id: str, + ) -> ProfileBatchRecord | None: + row = self._conn.execute( + """ + SELECT * FROM profile_batches + WHERE snapshot_id=? AND embedding_generation_id=? AND profile_id=? + ORDER BY started_at_utc DESC, profile_batch_id ASC + LIMIT 1 + """, + (snapshot_id, embedding_generation_id, profile_id), + ).fetchone() + return _profile_batch_from_row(row) if row is not None else None + + def insert_profile_batch_run(self, record: ProfileBatchRunRecord) -> None: + self._conn.execute( + """ + INSERT INTO profile_batch_runs ( + profile_batch_id, clustering_run_id, + candidate_ordinal, candidate_dedupe_key + ) VALUES (?, ?, ?, ?) + """, + ( + record.profile_batch_id, + record.clustering_run_id, + record.candidate_ordinal, + record.candidate_dedupe_key, + ), + ) + + def list_profile_batch_run_records( + self, + *, + profile_batch_id: str, + ) -> tuple[ProfileBatchRunRecord, ...]: + rows = self._conn.execute( + """ + SELECT * FROM profile_batch_runs + WHERE profile_batch_id=? + ORDER BY candidate_ordinal ASC, candidate_dedupe_key ASC + """, + (profile_batch_id,), + ).fetchall() + return tuple(_profile_batch_run_from_row(row) for row in rows) + + def list_clustering_runs_for_batch( + self, + *, + profile_batch_id: str, + ) -> tuple[ClusteringRunRecord, ...]: + rows = self._conn.execute( + """ + SELECT run.* + FROM profile_batch_runs AS member + JOIN clustering_runs AS run + ON run.clustering_run_id=member.clustering_run_id + WHERE member.profile_batch_id=? + ORDER BY member.candidate_ordinal ASC, member.candidate_dedupe_key ASC + """, + (profile_batch_id,), + ).fetchall() + return tuple(_run_from_row(row) for row in rows) + + def list_profile_batch_ids_for_run( + self, + *, + clustering_run_id: str, + ) -> tuple[str, ...]: + rows = self._conn.execute( + """ + SELECT profile_batch_id FROM profile_batch_runs + WHERE clustering_run_id=? + ORDER BY profile_batch_id ASC + """, + (clustering_run_id,), + ).fetchall() + return tuple(str(row[0]) for row in rows) + + def insert_profile_assessment( + self, + record: ProfileAssessmentRecord, + ) -> None: + self._conn.execute( + """ + INSERT INTO profile_assessments ( + profile_batch_id, clustering_run_id, profile_id, + profile_version, profile_manifest_digest, + suitable_for_profile, rejection_reasons_json, + observed_metrics_json, assessed_digest + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + record.profile_batch_id, + record.clustering_run_id, + record.profile_id, + record.profile_version, + record.profile_manifest_digest, + int(record.suitable_for_profile), + record.rejection_reasons_json, + record.observed_metrics_json, + record.assessed_digest, + ), + ) + + def get_profile_assessment( + self, + *, + profile_batch_id: str, + clustering_run_id: str, + ) -> ProfileAssessmentRecord | None: + row = self._conn.execute( + """ + SELECT * FROM profile_assessments + WHERE profile_batch_id=? AND clustering_run_id=? + """, + (profile_batch_id, clustering_run_id), + ).fetchone() + return _profile_assessment_from_row(row) if row is not None else None + + def list_profile_assessments( + self, + *, + profile_batch_id: str, + ) -> tuple[ProfileAssessmentRecord, ...]: + rows = self._conn.execute( + """ + SELECT assessment.* + FROM profile_assessments AS assessment + JOIN profile_batch_runs AS member + ON member.profile_batch_id=assessment.profile_batch_id + AND member.clustering_run_id=assessment.clustering_run_id + WHERE assessment.profile_batch_id=? + ORDER BY member.candidate_ordinal ASC + """, + (profile_batch_id,), + ).fetchall() + return tuple(_profile_assessment_from_row(row) for row in rows) + + def get_active_run_selection( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + profile_batch_id: str | None, + ) -> ActiveSelectionResult: + rows = self._active_selection_rows( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_batch_id=profile_batch_id, + ) + return ActiveSelectionResult( + _run_selection_from_row(rows[0]) if rows else None, + len(rows) > 1, + ) + + def record_run_selection_atomic( + self, + record: RunSelectionRecord, + ) -> RunSelectionRecord: + if self._conn.in_transaction: + raise AnalyticsStoreError( + "atomic selection recording requires a clean transaction" + ) + try: + self._conn.execute("BEGIN IMMEDIATE") + rows = self._active_selection_rows( + snapshot_id=record.snapshot_id, + embedding_generation_id=record.embedding_generation_id, + profile_batch_id=record.profile_batch_id, + ) + if len(rows) > 1: + raise AnalyticsStoreError( + "selection chain ambiguous: multiple active selections" + ) + previous = _run_selection_from_row(rows[0]) if rows else None + persisted = replace( + record, + supersedes_selection_id=( + previous.selection_id if previous is not None else None + ), + ) + batch_mismatch = ( + persisted.profile_batch_id is not None + and not self._run_in_profile_batch( + profile_batch_id=persisted.profile_batch_id, + clustering_run_id=persisted.selected_run_id, + ) + ) + if batch_mismatch: + raise AnalyticsStoreError( + "selected run is not a member of profile batch: " + f"{persisted.profile_batch_id}" + ) + self._insert_run_selection(persisted) + if persisted.profile_batch_id is None: + self._conn.execute( + """ + UPDATE clustering_runs + SET selected_by_maintainer=( + clustering_run_id=? + ) + WHERE snapshot_id=? AND embedding_generation_id=? + """, + ( + persisted.selected_run_id, + persisted.snapshot_id, + persisted.embedding_generation_id, + ), + ) + self._conn.commit() + return persisted + except BaseException: + self._conn.rollback() + raise + + def _active_selection_rows( + self, + *, + snapshot_id: str, + embedding_generation_id: str, + profile_batch_id: str | None, + ) -> list[sqlite3.Row]: + return list( + self._conn.execute( + """ + SELECT selection.* + FROM run_selections AS selection + WHERE selection.snapshot_id=? + AND selection.embedding_generation_id=? + AND selection.profile_batch_id IS ? + AND NOT EXISTS ( + SELECT 1 FROM run_selections AS successor + WHERE successor.supersedes_selection_id= + selection.selection_id + ) + ORDER BY selection.selected_at_utc DESC, + selection.selection_id ASC + """, + (snapshot_id, embedding_generation_id, profile_batch_id), + ).fetchall() + ) + + def _run_in_profile_batch( + self, + *, + profile_batch_id: str, + clustering_run_id: str, + ) -> bool: + return ( + self._conn.execute( + """ + SELECT 1 FROM profile_batch_runs + WHERE profile_batch_id=? AND clustering_run_id=? + """, + (profile_batch_id, clustering_run_id), + ).fetchone() + is not None + ) + + def _insert_run_selection(self, record: RunSelectionRecord) -> None: + self._conn.execute( + """ + INSERT INTO run_selections ( + selection_id, snapshot_id, embedding_generation_id, + profile_batch_id, profile_id, profile_manifest_digest, + selected_run_id, selected_at_utc, selected_by, rationale, + supersedes_selection_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + record.selection_id, + record.snapshot_id, + record.embedding_generation_id, + record.profile_batch_id, + record.profile_id, + record.profile_manifest_digest, + record.selected_run_id, + record.selected_at_utc, + record.selected_by, + record.rationale, + record.supersedes_selection_id, + ), + ) + + def insert_cluster_assignments( + self, + assignments: Sequence[ClusterAssignmentRecord], + ) -> None: + self._conn.executemany( + """ + INSERT INTO cluster_assignments ( + clustering_run_id, snapshot_item_id, cluster_label, + membership_strength, membership_digest + ) VALUES (?, ?, ?, ?, ?) + """, + [ + ( + item.clustering_run_id, + item.snapshot_item_id, + item.cluster_label, + item.membership_strength, + item.membership_digest, + ) + for item in assignments + ], + ) + + def insert_cluster_summaries( + self, + summaries: Sequence[ClusterSummaryRecord], + ) -> None: + self._conn.executemany( + """ + INSERT INTO cluster_summaries ( + clustering_run_id, cluster_label, display_cluster_id, + membership_digest, size, diagnostics_json + ) VALUES (?, ?, ?, ?, ?, ?) + """, + [ + ( + item.clustering_run_id, + item.cluster_label, + item.display_cluster_id, + item.membership_digest, + item.size, + item.diagnostics_json, + ) + for item in summaries + ], + ) + + def list_assignments( + self, + clustering_run_id: str, + ) -> tuple[ClusterAssignmentRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM cluster_assignments WHERE clustering_run_id=? " + "ORDER BY snapshot_item_id ASC", + (clustering_run_id,), + ).fetchall() + return tuple(_assignment_from_row(row) for row in rows) + + def list_summaries( + self, + clustering_run_id: str, + ) -> tuple[ClusterSummaryRecord, ...]: + rows = self._conn.execute( + "SELECT * FROM cluster_summaries WHERE clustering_run_id=? " + "ORDER BY display_cluster_id ASC NULLS LAST, cluster_label ASC", + (clustering_run_id,), + ).fetchall() + return tuple(_summary_from_row(row) for row in rows) + + def commit(self) -> None: + self._conn.commit() + + def rollback(self) -> None: + self._conn.rollback() + + def close(self) -> None: + self._conn.close() + + +def _snapshot_from_row(row: sqlite3.Row) -> CorpusSnapshotRecord: + return CorpusSnapshotRecord( + snapshot_id=str(row["snapshot_id"]), + lane=str(row["lane"]), # type: ignore[arg-type] + representation_kind=str(row["representation_kind"]), + representation_version=str(row["representation_version"]), + source_stores_json=str(row["source_stores_json"]), + source_schema_versions_json=str(row["source_schema_versions_json"]), + record_count=int(row["record_count"]), + source_digest=str(row["source_digest"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _item_from_row(row: sqlite3.Row) -> CorpusItemRecord: + overlay = row["registry_overlay_json"] + return CorpusItemRecord( + snapshot_id=str(row["snapshot_id"]), + representation_key=str(row["representation_key"]), + snapshot_item_id=str(row["snapshot_item_id"]), + source_record_key=str(row["source_record_key"]), + project_id=str(row["project_id"]), + intent_id=str(row["intent_id"]), + normalized_text=str(row["normalized_text"]), + normalized_digest=str(row["normalized_digest"]), + normalizer_version=str(row["normalizer_version"]), + representation_digest=str(row["representation_digest"]), + metadata_json=str(row["metadata_json"]), + registry_overlay_json=str(overlay) if overlay is not None else None, + ) + + +def _generation_from_row(row: sqlite3.Row) -> EmbeddingGenerationRecord: + return EmbeddingGenerationRecord( + embedding_generation_id=str(row["embedding_generation_id"]), + provider_id=str(row["provider_id"]), + provider_package_version=str(row["provider_package_version"]), + model_id=str(row["model_id"]), + model_revision=_optional_str(row["model_revision"]), + model_artifact_fingerprint=_optional_str(row["model_artifact_fingerprint"]), + exact_model_artifact_reproducibility=bool( + int(row["exact_model_artifact_reproducibility"]) + ), + dimensions=int(row["dimensions"]), + embedding_contract_version=str(row["embedding_contract_version"]), + embedding_similarity_metric=str(row["embedding_similarity_metric"]), + vector_preprocessing=str(row["vector_preprocessing"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _embedding_item_from_row(row: sqlite3.Row) -> EmbeddingItemRecord: + return EmbeddingItemRecord( + embedding_generation_id=str(row["embedding_generation_id"]), + snapshot_item_id=str(row["snapshot_item_id"]), + vector_row_key=str(row["vector_row_key"]), + vector_digest=str(row["vector_digest"]), + dimensions=int(row["dimensions"]), + ) + + +def _run_from_row(row: sqlite3.Row) -> ClusteringRunRecord: + return ClusteringRunRecord( + clustering_run_id=str(row["clustering_run_id"]), + snapshot_id=str(row["snapshot_id"]), + embedding_generation_id=str(row["embedding_generation_id"]), + requested_parameters_json=str(row["requested_parameters_json"]), + effective_parameters_json=str(row["effective_parameters_json"]), + random_seed=int(row["random_seed"]), + run_digest=str(row["run_digest"]), + recommended_by_heuristic=bool(int(row["recommended_by_heuristic"])), + selected_by_maintainer=bool(int(row["selected_by_maintainer"])), + status=str(row["status"]), # type: ignore[arg-type] + created_at_utc=str(row["created_at_utc"]), + finished_at_utc=_optional_str(row["finished_at_utc"]), + error_message=_optional_str(row["error_message"]), + ) + + +def _assignment_from_row(row: sqlite3.Row) -> ClusterAssignmentRecord: + strength = row["membership_strength"] + return ClusterAssignmentRecord( + clustering_run_id=str(row["clustering_run_id"]), + snapshot_item_id=str(row["snapshot_item_id"]), + cluster_label=int(row["cluster_label"]), + membership_strength=float(strength) if strength is not None else None, + membership_digest=str(row["membership_digest"]), + ) + + +def _summary_from_row(row: sqlite3.Row) -> ClusterSummaryRecord: + display = row["display_cluster_id"] + return ClusterSummaryRecord( + clustering_run_id=str(row["clustering_run_id"]), + cluster_label=int(row["cluster_label"]), + display_cluster_id=int(display) if display is not None else None, + membership_digest=str(row["membership_digest"]), + size=int(row["size"]), + diagnostics_json=str(row["diagnostics_json"]), + ) + + +def _profile_manifest_snapshot_from_row( + row: sqlite3.Row, +) -> ProfileManifestSnapshotRecord: + return ProfileManifestSnapshotRecord( + profile_manifest_digest=str(row["profile_manifest_digest"]), + profile_id=str(row["profile_id"]), + profile_version=str(row["profile_version"]), + manifest_schema_version=str(row["manifest_schema_version"]), + canonical_manifest_json=str(row["canonical_manifest_json"]), + label=str(row["label"]), + description=str(row["description"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _profile_batch_values(record: ProfileBatchRecord) -> tuple[object, ...]: + return ( + record.profile_batch_id, + record.snapshot_id, + record.embedding_generation_id, + record.profile_id, + record.profile_manifest_digest, + record.candidate_space_digest, + record.started_at_utc, + record.finished_at_utc, + record.status, + record.candidate_count_planned, + record.candidate_count_succeeded, + record.candidate_count_failed, + record.recommended_clustering_run_id, + record.recommendation_rationale_json, + record.batch_max_cluster_count, + record.created_at_utc, + ) + + +def _profile_batch_from_row(row: sqlite3.Row) -> ProfileBatchRecord: + return ProfileBatchRecord( + profile_batch_id=str(row["profile_batch_id"]), + snapshot_id=str(row["snapshot_id"]), + embedding_generation_id=str(row["embedding_generation_id"]), + profile_id=str(row["profile_id"]), + profile_manifest_digest=str(row["profile_manifest_digest"]), + candidate_space_digest=str(row["candidate_space_digest"]), + started_at_utc=str(row["started_at_utc"]), + finished_at_utc=_optional_str(row["finished_at_utc"]), + status=str(row["status"]), # type: ignore[arg-type] + candidate_count_planned=int(row["candidate_count_planned"]), + candidate_count_succeeded=int(row["candidate_count_succeeded"]), + candidate_count_failed=int(row["candidate_count_failed"]), + recommended_clustering_run_id=_optional_str( + row["recommended_clustering_run_id"] + ), + recommendation_rationale_json=_optional_str( + row["recommendation_rationale_json"] + ), + batch_max_cluster_count=( + int(row["batch_max_cluster_count"]) + if row["batch_max_cluster_count"] is not None + else None + ), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _profile_batch_run_from_row(row: sqlite3.Row) -> ProfileBatchRunRecord: + return ProfileBatchRunRecord( + profile_batch_id=str(row["profile_batch_id"]), + clustering_run_id=str(row["clustering_run_id"]), + candidate_ordinal=int(row["candidate_ordinal"]), + candidate_dedupe_key=str(row["candidate_dedupe_key"]), + ) + + +def _profile_assessment_from_row(row: sqlite3.Row) -> ProfileAssessmentRecord: + return ProfileAssessmentRecord( + profile_batch_id=str(row["profile_batch_id"]), + clustering_run_id=str(row["clustering_run_id"]), + profile_id=str(row["profile_id"]), + profile_version=str(row["profile_version"]), + profile_manifest_digest=str(row["profile_manifest_digest"]), + suitable_for_profile=bool(int(row["suitable_for_profile"])), + rejection_reasons_json=str(row["rejection_reasons_json"]), + observed_metrics_json=_optional_str(row["observed_metrics_json"]), + assessed_digest=str(row["assessed_digest"]), + ) + + +def _run_selection_from_row(row: sqlite3.Row) -> RunSelectionRecord: + return RunSelectionRecord( + selection_id=str(row["selection_id"]), + snapshot_id=str(row["snapshot_id"]), + embedding_generation_id=str(row["embedding_generation_id"]), + profile_batch_id=_optional_str(row["profile_batch_id"]), + profile_id=_optional_str(row["profile_id"]), + profile_manifest_digest=_optional_str(row["profile_manifest_digest"]), + selected_run_id=str(row["selected_run_id"]), + selected_at_utc=str(row["selected_at_utc"]), + selected_by=str(row["selected_by"]), + rationale=_optional_str(row["rationale"]), + supersedes_selection_id=_optional_str(row["supersedes_selection_id"]), + ) + + +def _optional_str(value: object) -> str | None: + return str(value) if isinstance(value, str) else None + + +def parse_json_object(text: str) -> dict[str, object]: + parsed = json.loads(text) + if not isinstance(parsed, dict): + raise AnalyticsStoreError("expected JSON object") + return parsed + + +__all__ = ["SqliteCorpusAnalyticsStore", "parse_json_object"] diff --git a/codeclone/analytics/store/vectors_lancedb.py b/codeclone/analytics/store/vectors_lancedb.py new file mode 100644 index 00000000..509d3ceb --- /dev/null +++ b/codeclone/analytics/store/vectors_lancedb.py @@ -0,0 +1,275 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import importlib +import math +import struct +from collections.abc import Mapping, Sequence +from pathlib import Path +from types import ModuleType +from typing import Protocol, cast + +from ..corpus.keys import sha256_hex +from ..exceptions import AnalyticsCapabilityError, AnalyticsStoreError + +_TABLE_NAME = "corpus_vectors" +_ID_QUERY_BATCH = 500 + + +class _LanceSearchQuery(Protocol): + def select(self, columns: list[str]) -> _LanceSearchQuery: ... + + def where(self, predicate: str) -> _LanceSearchQuery: ... + + def limit(self, k: int) -> _LanceSearchQuery: ... + + def to_list(self) -> list[dict[str, object]]: ... + + +class _LanceMergeInsert(Protocol): + def when_matched_update_all(self) -> _LanceMergeInsert: ... + + def when_not_matched_insert_all(self) -> _LanceMergeInsert: ... + + def execute(self, records: list[dict[str, object]]) -> None: ... + + +class _ArrowType(Protocol): + @property + def list_size(self) -> int: ... + + +class _ArrowField(Protocol): + @property + def type(self) -> _ArrowType: ... + + +class _ArrowSchema(Protocol): + def field(self, name: str) -> _ArrowField: ... + + +class _LanceTable(Protocol): + @property + def schema(self) -> _ArrowSchema: ... + + def search(self, vector: list[float] | None = None) -> _LanceSearchQuery: ... + + def merge_insert(self, key: str) -> _LanceMergeInsert: ... + + def delete(self, predicate: str) -> None: ... + + +class _LanceConnection(Protocol): + def open_table(self, name: str) -> _LanceTable: ... + + def create_table( + self, name: str, schema: object, *, exist_ok: bool = False + ) -> _LanceTable: ... + + +def _load_lancedb() -> ModuleType: + try: + return importlib.import_module("lancedb") + except ImportError as exc: + raise AnalyticsCapabilityError( + "lancedb is required for analytics embeddings; " + "install with: uv sync --extra analytics" + ) from exc + + +def _schema(pa: ModuleType, dimension: int) -> object: + return pa.schema( + [ + pa.field("vector_row_key", pa.string()), + pa.field("embedding_generation_id", pa.string()), + pa.field("snapshot_item_id", pa.string()), + pa.field("vector_digest", pa.string()), + pa.field("vector", pa.list_(pa.float32(), dimension)), + ] + ) + + +def vector_row_key(*, embedding_generation_id: str, snapshot_item_id: str) -> str: + return sha256_hex(f"{embedding_generation_id}\n{snapshot_item_id}") + + +def vector_digest(vector: Sequence[float]) -> str: + payload = b"".join(struct.pack(" None: + lancedb = _load_lancedb() + pyarrow = importlib.import_module("pyarrow") + self._dimension = dimension + path.mkdir(parents=True, exist_ok=True) + self._conn = cast(_LanceConnection, lancedb.connect(str(path))) + self._table = self._open_or_create_table(pyarrow) + + def _open_or_create_table(self, pyarrow: ModuleType) -> _LanceTable: + try: + table = self._conn.open_table(_TABLE_NAME) + except ValueError as exc: + if f"Table '{_TABLE_NAME}' was not found" not in str(exc): + raise + return self._conn.create_table( + _TABLE_NAME, + schema=_schema(pyarrow, self._dimension), + exist_ok=True, + ) + field = table.schema.field("vector") + actual_dimension = getattr(field.type, "list_size", None) + if actual_dimension != self._dimension: + raise AnalyticsStoreError( + "analytics vector store dimension mismatch: " + f"existing={actual_dimension}, configured={self._dimension}" + ) + return table + + def write_vectors( + self, + *, + embedding_generation_id: str, + rows: Sequence[Mapping[str, object]], + ) -> None: + records: list[dict[str, object]] = [] + for row in rows: + snapshot_item_id = str(row["snapshot_item_id"]) + vector = row["vector"] + if not isinstance(vector, list): + msg = "vector must be a list of floats" + raise TypeError(msg) + float_vector = [float(value) for value in vector] + if len(float_vector) != self._dimension: + raise AnalyticsStoreError( + f"vector dimension mismatch: actual={len(float_vector)}, " + f"expected={self._dimension}" + ) + if not all(math.isfinite(value) for value in float_vector): + raise AnalyticsStoreError("vectors must contain only finite values") + row_key = vector_row_key( + embedding_generation_id=embedding_generation_id, + snapshot_item_id=snapshot_item_id, + ) + records.append( + { + "vector_row_key": row_key, + "embedding_generation_id": embedding_generation_id, + "snapshot_item_id": snapshot_item_id, + "vector_digest": vector_digest(float_vector), + "vector": float_vector, + } + ) + if not records: + return + ( + self._table.merge_insert("vector_row_key") + .when_matched_update_all() + .when_not_matched_insert_all() + .execute(records) + ) + + def read_vectors( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, list[float]]: + loaded: dict[str, list[float]] = {} + for item_id, row in self.read_vector_rows( + embedding_generation_id=embedding_generation_id, + snapshot_item_ids=snapshot_item_ids, + ).items(): + vector = row.get("vector") + if isinstance(vector, list): + loaded[item_id] = [float(value) for value in vector] + return loaded + + def read_vector_rows( + self, + *, + embedding_generation_id: str, + snapshot_item_ids: Sequence[str], + ) -> dict[str, dict[str, object]]: + if not snapshot_item_ids: + return {} + loaded: dict[str, dict[str, object]] = {} + ordered = sorted(set(snapshot_item_ids)) + for start in range(0, len(ordered), _ID_QUERY_BATCH): + batch = ordered[start : start + _ID_QUERY_BATCH] + quoted = ", ".join(_sql_literal(item) for item in batch) + rows = ( + self._table.search(None) + .select( + [ + "vector_row_key", + "snapshot_item_id", + "vector_digest", + "vector", + ] + ) + .where( + "embedding_generation_id = " + f"{_sql_literal(embedding_generation_id)} " + f"AND snapshot_item_id IN ({quoted})" + ) + .limit(len(batch)) + .to_list() + ) + for row in rows: + item_id = row.get("snapshot_item_id") + vector = row.get("vector") + if isinstance(item_id, str) and isinstance(vector, list): + loaded[item_id] = { + "vector_row_key": str(row.get("vector_row_key", "")), + "vector_digest": str(row.get("vector_digest", "")), + "vector": [float(value) for value in vector], + } + return loaded + + def delete_generation(self, embedding_generation_id: str) -> None: + self._table.delete( + f"embedding_generation_id = {_sql_literal(embedding_generation_id)}" + ) + + def list_generation_item_ids( + self, + *, + embedding_generation_id: str, + limit: int, + ) -> tuple[str, ...]: + if limit <= 0: + return () + rows = ( + self._table.search(None) + .select(["snapshot_item_id"]) + .where(f"embedding_generation_id = {_sql_literal(embedding_generation_id)}") + .limit(limit) + .to_list() + ) + return tuple( + sorted( + str(item_id) + for row in rows + if isinstance((item_id := row.get("snapshot_item_id")), str) + ) + ) + + def close(self) -> None: + return None + + +def _sql_literal(value: str) -> str: + return "'" + value.replace("'", "''") + "'" + + +__all__ = ["AnalyticsVectorStore", "vector_digest", "vector_row_key"] diff --git a/codeclone/analytics/workflow.py b/codeclone/analytics/workflow.py new file mode 100644 index 00000000..6306b465 --- /dev/null +++ b/codeclone/analytics/workflow.py @@ -0,0 +1,1072 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import uuid +from collections.abc import Sequence +from dataclasses import asdict, dataclass, replace +from datetime import datetime, timezone +from pathlib import Path +from typing import Literal + +from ..config.analytics import AnalyticsConfig, resolve_analytics_config +from ..observability import span +from ..report.meta import current_report_timestamp_utc +from ..utils.json_io import json_text +from .clustering.canonicalize import ( + canonicalize_partitions, + display_cluster_id_map, + partition_membership_map, +) +from .clustering.diagnostics import ( + build_cluster_diagnostics, + compute_centroids, + nearest_cluster_ids, +) +from .clustering.models import ( + NOISE_LABEL, + ClusteringParameters, + ClusteringPipelineResult, + ClusterPartition, + EffectiveClusteringParameters, +) +from .clustering.pipeline import resolve_effective_parameters, run_clustering_pipeline +from .clustering.sweep import ( + SweepCandidate, + SweepCandidateResult, + candidate_space_digest, + clustering_algorithm_manifest, + iter_profile_candidates, + iter_sweep_candidates, + rank_sweep_results, + run_digest, + score_clustering_result, +) +from .contracts import ( + ClusterAssignmentRecord, + ClusteringRunRecord, + ClusterSummaryRecord, + CorpusItemRecord, + ProfileAssessmentRecord, + ProfileBatchRecord, + ProfileBatchRunRecord, + ProfileManifestSnapshotRecord, + RunSelectionRecord, +) +from .corpus.keys import sha256_hex +from .corpus.snapshot import build_intent_snapshot +from .embedding.generation import ( + EmbeddingBatchResult, + generate_embeddings_for_snapshot, +) +from .exceptions import AnalyticsStoreError, AnalyticsWorkflowError +from .integrity import ( + assess_partition_validity, + load_validated_snapshot_vectors, + validate_persisted_run, +) +from .metrics.partition_metrics import compute_run_partition_metrics +from .profiles.loader import canonical_manifest_json, profile_manifest_digest +from .profiles.models import ClusteringProfileManifest, ProfileSearchSpace +from .profiles.ranking import ProfileRankedRun, rank_profile_recommendations +from .profiles.registry import get_profile, resolve_profile_registry +from .profiles.suitability import ( + assess_profile_suitability, + profile_assessment_digest, +) +from .store.protocols import CorpusStore, SnapshotBuildResult +from .store.sqlite import SqliteCorpusAnalyticsStore, parse_json_object +from .store.vectors_lancedb import AnalyticsVectorStore + + +@dataclass(frozen=True, slots=True) +class ClusterRunResult: + clustering_run_id: str + cluster_count: int + noise_count: int + + +@dataclass(frozen=True, slots=True) +class BuildResult: + snapshot_id: str + embedding_generation_id: str + clustering_run_ids: tuple[str, ...] + recommended_run_id: str | None + profile_id: str | None = None + profile_batch_id: str | None = None + recommended_for_profile_run_id: str | None = None + + +@dataclass(frozen=True, slots=True) +class ProfileSweepResult: + profile_batch_id: str + profile_id: str + clustering_run_ids: tuple[str, ...] + recommended_for_profile_run_id: str | None + profile_suitable_count: int + technically_valid_count: int + batch_status: Literal["completed", "completed_partial", "failed"] + + +def run_snapshot( + *, + root_path: Path, + representation_kind: str, + config: AnalyticsConfig | None = None, +) -> SnapshotBuildResult: + with span(name="analytics.snapshot"): + return build_intent_snapshot( + root_path=root_path, + representation_kind=representation_kind, + config=config, + ) + + +def run_embed( + *, + root_path: Path, + snapshot_id: str, + config: AnalyticsConfig | None = None, +) -> EmbeddingBatchResult: + resolved_config = config or resolve_analytics_config(root_path) + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + vector_store = AnalyticsVectorStore( + path=resolved_config.vectors_path, + dimension=resolved_config.embedding_dimension, + ) + try: + if store.get_snapshot(snapshot_id) is None: + known = ", ".join(item.snapshot_id for item in store.list_snapshots()[:5]) + msg = f"unknown snapshot: {snapshot_id}" + if known: + msg = f"{msg}; known snapshots: {known}" + raise AnalyticsWorkflowError(msg) + with span(name="analytics.embed"): + return generate_embeddings_for_snapshot( + store=store, + vector_store=vector_store, + config=resolved_config, + snapshot_id=snapshot_id, + ) + finally: + store.close() + vector_store.close() + + +def run_clustering( + *, + root_path: Path, + snapshot_id: str, + embedding_generation_id: str, + requested: ClusteringParameters | None = None, + sweep: bool = False, + sweep_grid: ProfileSearchSpace | None = None, + profile_id: str | None = None, + config: AnalyticsConfig | None = None, +) -> tuple[str, ...]: + resolved_config = config or resolve_analytics_config(root_path) + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + vector_store = AnalyticsVectorStore( + path=resolved_config.vectors_path, + dimension=resolved_config.embedding_dimension, + ) + try: + with span(name="analytics.cluster"): + items = store.list_items(snapshot_id) + if not items: + raise AnalyticsWorkflowError("snapshot has no corpus items") + vectors = load_validated_snapshot_vectors( + store=store, + vector_store=vector_store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + items=items, + ) + item_ids = [item.snapshot_item_id for item in items] + if profile_id is not None: + profile = _resolve_profile( + config=resolved_config, + profile_id=profile_id, + ) + return _run_profile_sweep( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + profile=profile, + config=resolved_config, + ).clustering_run_ids + if sweep: + return _run_sweep( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + config=resolved_config, + sweep_grid=sweep_grid, + ) + params = requested or ClusteringParameters( + pca_dimensions=resolved_config.default_pca_dimensions, + min_cluster_size=resolved_config.default_min_cluster_size, + min_samples=resolved_config.default_min_samples, + cluster_selection_method=resolved_config.default_cluster_selection_method, + ) + run_id = _execute_single_run( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + requested=params, + config=resolved_config, + recommended_by_heuristic=False, + ) + store.commit() + return (run_id,) + finally: + store.close() + vector_store.close() + + +def select_cluster_run( + *, + root_path: Path, + clustering_run_id: str, + profile_batch_id: str | None = None, + selection_profile_id: str | None = None, + selected_by: str = "local-maintainer", + rationale: str | None = None, + config: AnalyticsConfig | None = None, +) -> RunSelectionRecord: + resolved_config = config or resolve_analytics_config(root_path) + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + try: + run = store.get_clustering_run(clustering_run_id) + if run is None: + raise AnalyticsWorkflowError(f"unknown clustering run: {clustering_run_id}") + validate_persisted_run( + store=store, + snapshot_id=run.snapshot_id, + clustering_run_id=clustering_run_id, + ) + if profile_batch_id is not None and selection_profile_id is not None: + raise AnalyticsWorkflowError( + "selection profile scope must use a batch id or profile id, not both" + ) + resolved_batch_id = profile_batch_id + if selection_profile_id is not None: + batch = store.get_latest_profile_batch( + snapshot_id=run.snapshot_id, + embedding_generation_id=run.embedding_generation_id, + profile_id=selection_profile_id, + ) + if batch is None: + raise AnalyticsWorkflowError( + f"unknown analytics profile batch for: {selection_profile_id}" + ) + resolved_batch_id = batch.profile_batch_id + return record_run_selection( + store=store, + snapshot_id=run.snapshot_id, + embedding_generation_id=run.embedding_generation_id, + selected_run_id=clustering_run_id, + profile_batch_id=resolved_batch_id, + selected_by=selected_by, + rationale=rationale, + ) + finally: + store.close() + + +def run_build( + *, + root_path: Path, + representation_kind: str, + sweep: bool = False, + use_recommended: bool = False, + requested: ClusteringParameters | None = None, + sweep_grid: ProfileSearchSpace | None = None, + profile_id: str | None = None, + config: AnalyticsConfig | None = None, +) -> BuildResult: + resolved_config = config or resolve_analytics_config(root_path) + effective_sweep = sweep or profile_id is not None + if use_recommended and not effective_sweep: + raise AnalyticsWorkflowError("--use-recommended requires --sweep") + with span(name="analytics.build"): + snapshot = run_snapshot( + root_path=root_path, + representation_kind=representation_kind, + config=resolved_config, + ) + embed = run_embed( + root_path=root_path, + snapshot_id=snapshot.snapshot_id, + config=resolved_config, + ) + run_ids = run_clustering( + root_path=root_path, + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + requested=requested, + sweep=effective_sweep, + sweep_grid=sweep_grid, + profile_id=profile_id, + config=resolved_config, + ) + recommended: str | None = None + profile_batch: ProfileBatchRecord | None = None + if effective_sweep: + store = SqliteCorpusAnalyticsStore.open(resolved_config.db_path) + try: + runs = store.list_clustering_runs( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + ) + for run in runs: + if run.recommended_by_heuristic: + recommended = run.clustering_run_id + break + if profile_id is not None: + resolved_profile = _resolve_profile( + config=resolved_config, + profile_id=profile_id, + ) + profile_batch = store.get_latest_profile_batch( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + profile_id=resolved_profile.profile_id, + ) + finally: + store.close() + return BuildResult( + snapshot_id=snapshot.snapshot_id, + embedding_generation_id=embed.embedding_generation_id, + clustering_run_ids=run_ids, + recommended_run_id=recommended, + profile_id=profile_batch.profile_id if profile_batch is not None else None, + profile_batch_id=( + profile_batch.profile_batch_id if profile_batch is not None else None + ), + recommended_for_profile_run_id=( + profile_batch.recommended_clustering_run_id + if profile_batch is not None + else None + ), + ) + + +def _run_sweep( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + vectors: list[list[float]], + config: AnalyticsConfig, + sweep_grid: ProfileSearchSpace | None = None, +) -> tuple[str, ...]: + selected_grid = sweep_grid or _config_sweep_grid(config) + candidates = iter_sweep_candidates( + n_samples=len(item_ids), + n_features=len(vectors[0]) if vectors else 0, + grid=selected_grid, + ) + if not candidates: + raise AnalyticsWorkflowError( + "corpus is too small for the configured clustering sweep" + ) + run_ids: list[str] = [] + scored: list[SweepCandidateResult] = [] + for candidate in candidates: + run_id = _execute_single_run( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + requested=candidate.requested, + config=config, + recommended_by_heuristic=False, + ) + run_ids.append(run_id) + result = store.get_clustering_run(run_id) + if result is None: + continue + assignments = store.list_assignments(run_id) + noise = sum(1 for item in assignments if item.cluster_label == NOISE_LABEL) + cluster_labels = { + item.cluster_label + for item in assignments + if item.cluster_label != NOISE_LABEL + } + scored.append( + SweepCandidateResult( + candidate=candidate, + score=score_clustering_result( + cluster_count=len(cluster_labels), + noise_fraction=noise / len(assignments) if assignments else 1.0, + n_samples=len(assignments), + ), + cluster_count=len(cluster_labels), + noise_fraction=noise / len(assignments) if assignments else 1.0, + ) + ) + best = rank_sweep_results(scored) + if best is not None and run_ids: + best_run_id = run_ids[scored.index(best)] + store.set_recommended_run( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + clustering_run_id=best_run_id, + ) + store.commit() + return tuple(run_ids) + + +def _run_profile_sweep( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + vectors: list[list[float]], + profile: ClusteringProfileManifest, + config: AnalyticsConfig, +) -> ProfileSweepResult: + snapshot = store.get_snapshot(snapshot_id) + generation = store.get_embedding_generation(embedding_generation_id) + if snapshot is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {snapshot_id}") + if generation is None: + raise AnalyticsWorkflowError( + f"unknown embedding generation: {embedding_generation_id}" + ) + _validate_profile_applicability( + profile=profile, + representation_kind=snapshot.representation_kind, + record_count=snapshot.record_count, + embedding_contract_version=generation.embedding_contract_version, + ) + candidates = iter_profile_candidates( + profile=profile, + n_samples=len(item_ids), + n_features=len(vectors[0]) if vectors else 0, + ) + if not candidates: + raise AnalyticsWorkflowError( + "profile incompatible with corpus: no effective clustering candidates" + ) + manifest_digest = profile_manifest_digest(profile) + space_digest = candidate_space_digest( + candidates, + fixed_parameters={"random_seed": config.cluster_random_seed}, + ) + started_at = _execution_timestamp_utc() + profile_batch_id = new_profile_batch_id( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_manifest_digest=manifest_digest, + candidate_space_digest=space_digest, + started_at_utc=started_at, + ) + store.insert_profile_manifest_snapshot( + ProfileManifestSnapshotRecord( + profile_manifest_digest=manifest_digest, + profile_id=profile.profile_id, + profile_version=profile.profile_version, + manifest_schema_version=profile.manifest_schema_version, + canonical_manifest_json=canonical_manifest_json(profile), + label=profile.label, + description=profile.description, + created_at_utc=started_at, + ) + ) + store.insert_profile_batch( + ProfileBatchRecord( + profile_batch_id=profile_batch_id, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_id=profile.profile_id, + profile_manifest_digest=manifest_digest, + candidate_space_digest=space_digest, + started_at_utc=started_at, + finished_at_utc=None, + status="running", + candidate_count_planned=len(candidates), + candidate_count_succeeded=0, + candidate_count_failed=0, + recommended_clustering_run_id=None, + recommendation_rationale_json=None, + batch_max_cluster_count=None, + created_at_utc=started_at, + ) + ) + store.commit() + run_ids: list[str] = [] + scored: list[SweepCandidateResult] = [] + for ordinal, candidate in enumerate(candidates): + try: + run_id = _execute_single_run( + store=store, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + item_ids=item_ids, + items=items, + vectors=vectors, + requested=candidate.requested, + config=config, + recommended_by_heuristic=False, + ) + except Exception: + continue + store.insert_profile_batch_run( + ProfileBatchRunRecord( + profile_batch_id=profile_batch_id, + clustering_run_id=run_id, + candidate_ordinal=ordinal, + candidate_dedupe_key=candidate.dedupe_key, + ) + ) + run_ids.append(run_id) + scored.append( + _score_completed_run( + store=store, + clustering_run_id=run_id, + candidate=candidate, + ) + ) + best = rank_sweep_results(scored) + if best is not None: + best_run_id = run_ids[scored.index(best)] + store.set_recommended_run( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + clustering_run_id=best_run_id, + ) + result = assess_and_persist_profile_batch( + store=store, + profile_batch_id=profile_batch_id, + profile=profile, + profile_manifest_digest=manifest_digest, + clustering_run_ids=run_ids, + ) + store.commit() + return result + + +def assess_and_persist_profile_batch( + *, + store: CorpusStore, + profile_batch_id: str, + profile: ClusteringProfileManifest, + profile_manifest_digest: str, + clustering_run_ids: Sequence[str], +) -> ProfileSweepResult: + batch = store.get_profile_batch(profile_batch_id) + if batch is None: + raise AnalyticsWorkflowError(f"unknown profile batch: {profile_batch_id}") + ranked: list[ProfileRankedRun] = [] + technically_valid_count = 0 + all_cluster_counts: list[int] = [] + for run_id in clustering_run_ids: + run = store.get_clustering_run(run_id) + if run is None or run.status != "completed": + continue + validity = assess_partition_validity( + store=store, + snapshot_id=run.snapshot_id, + clustering_run_id=run_id, + ) + metrics = None + if validity.technically_valid: + technically_valid_count += 1 + metrics = compute_run_partition_metrics( + store.list_assignments(run_id), + store.list_summaries(run_id), + ) + all_cluster_counts.append(metrics.cluster_count) + assessment = assess_profile_suitability( + profile=profile, + validity=validity, + metrics=metrics, + ) + store.insert_profile_assessment( + ProfileAssessmentRecord( + profile_batch_id=profile_batch_id, + clustering_run_id=run_id, + profile_id=profile.profile_id, + profile_version=profile.profile_version, + profile_manifest_digest=profile_manifest_digest, + suitable_for_profile=assessment.suitable_for_profile, + rejection_reasons_json=json_text( + list(assessment.rejection_reasons), + sort_keys=True, + ), + observed_metrics_json=( + json_text(asdict(assessment.observed), sort_keys=True) + if assessment.observed is not None + else None + ), + assessed_digest=profile_assessment_digest( + profile_batch_id=profile_batch_id, + clustering_run_id=run_id, + run_digest=run.run_digest, + profile_manifest_digest=profile_manifest_digest, + assessment=assessment, + ), + ) + ) + if assessment.suitable_for_profile and metrics is not None: + ranked.append( + ProfileRankedRun( + clustering_run_id=run_id, + base_score=score_clustering_result( + cluster_count=metrics.cluster_count, + noise_fraction=metrics.noise_ratio, + n_samples=metrics.total_items, + ), + profile_score=0.0, + effective=_effective_parameters_from_run(run), + metrics=metrics, + ) + ) + winner, rationale = rank_profile_recommendations( + profile=profile, + candidates=ranked, + ) + succeeded = len(clustering_run_ids) + failed = batch.candidate_count_planned - succeeded + status: Literal["completed", "completed_partial", "failed"] + if succeeded == 0: + status = "failed" + elif failed: + status = "completed_partial" + else: + status = "completed" + finalized = replace( + batch, + finished_at_utc=_execution_timestamp_utc(), + status=status, + candidate_count_succeeded=succeeded, + candidate_count_failed=failed, + recommended_clustering_run_id=( + winner.clustering_run_id if winner is not None else None + ), + recommendation_rationale_json=( + json_text(asdict(rationale), sort_keys=True) + if rationale is not None + else None + ), + batch_max_cluster_count=( + max(all_cluster_counts) if all_cluster_counts else None + ), + ) + store.finalize_profile_batch(finalized) + return ProfileSweepResult( + profile_batch_id=profile_batch_id, + profile_id=profile.profile_id, + clustering_run_ids=tuple(clustering_run_ids), + recommended_for_profile_run_id=finalized.recommended_clustering_run_id, + profile_suitable_count=len(ranked), + technically_valid_count=technically_valid_count, + batch_status=status, + ) + + +def record_run_selection( + *, + store: CorpusStore, + snapshot_id: str, + embedding_generation_id: str, + selected_run_id: str, + profile_batch_id: str | None, + selected_by: str, + rationale: str | None, +) -> RunSelectionRecord: + normalized_selected_by = selected_by.strip() + if not normalized_selected_by: + raise AnalyticsWorkflowError("selected_by must not be empty") + profile_id: str | None = None + manifest_digest: str | None = None + if profile_batch_id is not None: + batch = store.get_profile_batch(profile_batch_id) + if batch is None: + raise AnalyticsWorkflowError(f"unknown profile batch: {profile_batch_id}") + profile_id = batch.profile_id + manifest_digest = batch.profile_manifest_digest + record = RunSelectionRecord( + selection_id=f"sel-{uuid.uuid4().hex[:16]}", + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + profile_batch_id=profile_batch_id, + profile_id=profile_id, + profile_manifest_digest=manifest_digest, + selected_run_id=selected_run_id, + selected_at_utc=_execution_timestamp_utc(), + selected_by=normalized_selected_by, + rationale=rationale.strip() if rationale and rationale.strip() else None, + supersedes_selection_id=None, + ) + try: + return store.record_run_selection_atomic(record) + except AnalyticsStoreError as exc: + raise AnalyticsWorkflowError(str(exc)) from exc + + +def new_profile_batch_id( + *, + snapshot_id: str, + embedding_generation_id: str, + profile_manifest_digest: str, + candidate_space_digest: str, + started_at_utc: str, +) -> str: + payload = "|".join( + ( + snapshot_id, + embedding_generation_id, + profile_manifest_digest, + candidate_space_digest, + started_at_utc, + ) + ) + return f"pbatch-{sha256_hex(payload)[:16]}" + + +def _resolve_profile( + *, + config: AnalyticsConfig, + profile_id: str, +) -> ClusteringProfileManifest: + selected_id = profile_id + if profile_id == "auto": + if config.default_profile_id is None: + raise AnalyticsWorkflowError("default_profile_id not configured") + selected_id = config.default_profile_id + registry = resolve_profile_registry( + profile_paths=config.profile_paths, + default_profile_id=config.default_profile_id, + ) + return get_profile(registry, selected_id) + + +def _config_sweep_grid(config: AnalyticsConfig) -> ProfileSearchSpace: + return ProfileSearchSpace( + pca_dimensions=config.sweep_pca_dimensions, + min_cluster_size=config.sweep_min_cluster_sizes, + min_samples=config.sweep_min_samples, + cluster_selection_method=config.sweep_selection_methods, + ) + + +def _validate_profile_applicability( + *, + profile: ClusteringProfileManifest, + representation_kind: str, + record_count: int, + embedding_contract_version: str, +) -> None: + if representation_kind not in profile.representation_kinds: + raise AnalyticsWorkflowError( + "profile incompatible with corpus: representation kind " + f"{representation_kind}" + ) + applicability = profile.applicability + if ( + applicability.min_record_count is not None + and record_count < applicability.min_record_count + ): + raise AnalyticsWorkflowError( + "profile incompatible with corpus: record count below minimum" + ) + if ( + applicability.max_record_count is not None + and record_count > applicability.max_record_count + ): + raise AnalyticsWorkflowError( + "profile incompatible with corpus: record count above maximum" + ) + if embedding_contract_version not in applicability.embedding_contract_versions: + raise AnalyticsWorkflowError( + "profile incompatible with corpus: embedding contract " + f"{embedding_contract_version}" + ) + + +def _score_completed_run( + *, + store: CorpusStore, + clustering_run_id: str, + candidate: SweepCandidate, +) -> SweepCandidateResult: + assignments = store.list_assignments(clustering_run_id) + noise_count = sum( + assignment.cluster_label == NOISE_LABEL for assignment in assignments + ) + cluster_labels = { + assignment.cluster_label + for assignment in assignments + if assignment.cluster_label != NOISE_LABEL + } + noise_fraction = noise_count / len(assignments) if assignments else 1.0 + return SweepCandidateResult( + candidate=candidate, + score=score_clustering_result( + cluster_count=len(cluster_labels), + noise_fraction=noise_fraction, + n_samples=len(assignments), + ), + cluster_count=len(cluster_labels), + noise_fraction=noise_fraction, + ) + + +def _effective_parameters_from_run( + run: ClusteringRunRecord, +) -> EffectiveClusteringParameters: + value = parse_json_object(run.effective_parameters_json) + try: + pca_dimensions = value["pca_dimensions"] + min_cluster_size = value["min_cluster_size"] + min_samples = value["min_samples"] + method = value["cluster_selection_method"] + n_samples = value["n_samples"] + n_features = value["n_features"] + except KeyError as exc: + raise AnalyticsWorkflowError( + "clustering run effective parameters are incomplete: " + f"{run.clustering_run_id}" + ) from exc + if ( + isinstance(pca_dimensions, bool) + or not isinstance(pca_dimensions, int) + or isinstance(min_cluster_size, bool) + or not isinstance(min_cluster_size, int) + or isinstance(min_samples, bool) + or not isinstance(min_samples, int) + or not isinstance(method, str) + or isinstance(n_samples, bool) + or not isinstance(n_samples, int) + or isinstance(n_features, bool) + or not isinstance(n_features, int) + ): + raise AnalyticsWorkflowError( + f"clustering run effective parameters are invalid: {run.clustering_run_id}" + ) + return EffectiveClusteringParameters( + pca_dimensions=pca_dimensions, + min_cluster_size=min_cluster_size, + min_samples=min_samples, + cluster_selection_method=method, + n_samples=n_samples, + n_features=n_features, + ) + + +def _execution_timestamp_utc() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + +def _execute_single_run( + *, + store: SqliteCorpusAnalyticsStore, + snapshot_id: str, + embedding_generation_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + vectors: list[list[float]], + requested: ClusteringParameters, + config: AnalyticsConfig, + recommended_by_heuristic: bool, +) -> str: + effective = resolve_effective_parameters( + requested, + n_samples=len(item_ids), + n_features=len(vectors[0]) if vectors else 0, + ) + if effective is None: + raise AnalyticsWorkflowError("clustering parameters produced no valid run") + run_id = f"run-{uuid.uuid4().hex[:16]}" + created_at = current_report_timestamp_utc() + algorithm_manifest = clustering_algorithm_manifest() + run = ClusteringRunRecord( + clustering_run_id=run_id, + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + requested_parameters_json=json_text( + { + "pca_dimensions": requested.pca_dimensions, + "min_cluster_size": requested.min_cluster_size, + "min_samples": requested.min_samples, + "cluster_selection_method": requested.cluster_selection_method, + }, + sort_keys=True, + ), + effective_parameters_json=json_text( + { + "pca_dimensions": effective.pca_dimensions, + "min_cluster_size": effective.min_cluster_size, + "min_samples": effective.min_samples, + "cluster_selection_method": effective.cluster_selection_method, + "n_samples": effective.n_samples, + "n_features": effective.n_features, + "algorithm_manifest": algorithm_manifest, + }, + sort_keys=True, + ), + random_seed=config.cluster_random_seed, + run_digest=run_digest( + snapshot_id=snapshot_id, + embedding_generation_id=embedding_generation_id, + effective=effective, + random_seed=config.cluster_random_seed, + algorithm_manifest=algorithm_manifest, + ), + recommended_by_heuristic=recommended_by_heuristic, + selected_by_maintainer=False, + status="running", + created_at_utc=created_at, + finished_at_utc=None, + error_message=None, + ) + store.insert_clustering_run(run) + store.commit() + try: + pipeline = run_clustering_pipeline( + snapshot_item_ids=item_ids, + embeddings=vectors, + requested=requested, + random_seed=config.cluster_random_seed, + ) + if pipeline is None: + raise AnalyticsWorkflowError("clustering parameters produced no valid run") + coordinates = dict(zip(item_ids, pipeline.reduced_coordinates, strict=True)) + partitions = canonicalize_partitions( + pipeline.partitions, + coordinates=coordinates, + ) + _persist_run_artifacts( + store=store, + run_id=run_id, + item_ids=item_ids, + items=items, + pipeline=pipeline, + partitions=partitions, + coordinates=coordinates, + config=config, + ) + store.update_clustering_run( + replace( + run, + status="completed", + finished_at_utc=current_report_timestamp_utc(), + ) + ) + store.commit() + except Exception as exc: + store.rollback() + store.update_clustering_run( + replace( + run, + status="failed", + finished_at_utc=current_report_timestamp_utc(), + error_message=str(exc), + ) + ) + store.commit() + raise + return run_id + + +def _persist_run_artifacts( + *, + store: SqliteCorpusAnalyticsStore, + run_id: str, + item_ids: list[str], + items: Sequence[CorpusItemRecord], + pipeline: ClusteringPipelineResult, + partitions: Sequence[ClusterPartition], + coordinates: dict[str, tuple[float, ...]], + config: AnalyticsConfig, +) -> None: + membership_map = partition_membership_map(partitions) + items_by_id = {item.snapshot_item_id: item for item in items} + strength_by_id = dict(zip(item_ids, pipeline.membership_strengths, strict=True)) + assignments: list[ClusterAssignmentRecord] = [] + for item_id, label, strength in zip( + item_ids, + pipeline.labels, + pipeline.membership_strengths, + strict=True, + ): + assignments.append( + ClusterAssignmentRecord( + clustering_run_id=run_id, + snapshot_item_id=item_id, + cluster_label=label, + membership_strength=strength, + membership_digest=membership_map.get(item_id, ""), + ) + ) + store.insert_cluster_assignments(assignments) + display_map = display_cluster_id_map(partitions) + centroids = compute_centroids(partitions=partitions, coordinates=coordinates) + summaries: list[ClusterSummaryRecord] = [] + for partition in partitions: + diagnostics = build_cluster_diagnostics( + partition=partition, + items_by_id=items_by_id, + coordinates=coordinates, + membership_strengths=strength_by_id, + total_items=len(items), + min_correlation_sample_size=config.min_correlation_sample_size, + ) + if partition.cluster_label != NOISE_LABEL: + nearest_labels = nearest_cluster_ids( + cluster_label=partition.cluster_label, + centroids=centroids, + ) + diagnostics["nearest_clusters"] = [ + display_id + for label in nearest_labels + if (display_id := display_map.get(label)) is not None + ] + summaries.append( + ClusterSummaryRecord( + clustering_run_id=run_id, + cluster_label=partition.cluster_label, + display_cluster_id=display_map.get(partition.cluster_label), + membership_digest=partition.membership_digest, + size=len(partition.snapshot_item_ids), + diagnostics_json=json_text(diagnostics, sort_keys=True), + ) + ) + store.insert_cluster_summaries(summaries) + + +__all__ = [ + "BuildResult", + "ClusterRunResult", + "ProfileSweepResult", + "assess_and_persist_profile_batch", + "new_profile_batch_id", + "record_run_selection", + "run_build", + "run_clustering", + "run_embed", + "run_snapshot", + "select_cluster_run", +] diff --git a/codeclone/audit/__init__.py b/codeclone/audit/__init__.py new file mode 100644 index 00000000..7d475d64 --- /dev/null +++ b/codeclone/audit/__init__.py @@ -0,0 +1,109 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .events import ( + ANALYSIS_SOURCE_CLI, + ANALYSIS_SOURCE_MCP, + AUDIT_EVENT_CORE_VERSION, + EVENT_ANALYSIS_COMPLETED, + EVENT_BASELINE_ABUSE, + EVENT_BLAST_RADIUS, + EVENT_CLAIM_COMPLETED, + EVENT_CLAIM_VIOLATED, + EVENT_INTENT_CHECKED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_DECLARED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_EXPIRED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_VIOLATED, + EVENT_PATCH_BUDGET, + EVENT_PATCH_EXPIRED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_RECEIPT_CREATED, + EVENT_WORKSPACE_CONFLICT, + EVENT_WORKSPACE_GC, + KNOWN_AUDIT_SURFACES, + AnalysisSource, + AuditEvent, + AuditSurface, + derive_workflow_id, + event_core_for_event, + normalize_audit_surface, + repo_root_digest, +) +from .validation import ( + DEFAULT_AUDIT_PATH, + DEFAULT_AUDIT_PAYLOADS, + DEFAULT_AUDIT_RETENTION_DAYS, + DEFAULT_AUDIT_TOKEN_ESTIMATOR, + AuditConfigError, + AuditReadError, + AuditSchemaError, + AuditValidationError, + resolve_audit_path, + validate_payload_mode, + validate_retention_days, + validate_token_estimator, +) +from .writer import AuditWriter, NullAuditWriter, SqliteAuditWriter + +__all__ = [ + "ANALYSIS_SOURCE_CLI", + "ANALYSIS_SOURCE_MCP", + "AUDIT_EVENT_CORE_VERSION", + "DEFAULT_AUDIT_PATH", + "DEFAULT_AUDIT_PAYLOADS", + "DEFAULT_AUDIT_RETENTION_DAYS", + "DEFAULT_AUDIT_TOKEN_ESTIMATOR", + "EVENT_ANALYSIS_COMPLETED", + "EVENT_BASELINE_ABUSE", + "EVENT_BLAST_RADIUS", + "EVENT_CLAIM_COMPLETED", + "EVENT_CLAIM_VIOLATED", + "EVENT_INTENT_CHECKED", + "EVENT_INTENT_CLEARED", + "EVENT_INTENT_DECLARED", + "EVENT_INTENT_EXPANDED", + "EVENT_INTENT_EXPIRED", + "EVENT_INTENT_PROMOTED", + "EVENT_INTENT_QUEUED", + "EVENT_INTENT_QUEUE_BLOCKED", + "EVENT_INTENT_RENEWED", + "EVENT_INTENT_VIOLATED", + "EVENT_PATCH_BUDGET", + "EVENT_PATCH_EXPIRED", + "EVENT_PATCH_VERIFIED", + "EVENT_PATCH_VIOLATED", + "EVENT_RECEIPT_CREATED", + "EVENT_WORKSPACE_CONFLICT", + "EVENT_WORKSPACE_GC", + "KNOWN_AUDIT_SURFACES", + "AnalysisSource", + "AuditConfigError", + "AuditEvent", + "AuditReadError", + "AuditSchemaError", + "AuditSurface", + "AuditValidationError", + "AuditWriter", + "NullAuditWriter", + "SqliteAuditWriter", + "derive_workflow_id", + "event_core_for_event", + "normalize_audit_surface", + "repo_root_digest", + "resolve_audit_path", + "validate_payload_mode", + "validate_retention_days", + "validate_token_estimator", +] diff --git a/codeclone/audit/analysis_completed.py b/codeclone/audit/analysis_completed.py new file mode 100644 index 00000000..c1a6bb53 --- /dev/null +++ b/codeclone/audit/analysis_completed.py @@ -0,0 +1,223 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from collections.abc import Mapping +from pathlib import Path + +from .. import __version__ +from .events import ( + ANALYSIS_SOURCE_CLI, + ANALYSIS_SOURCE_MCP, + EVENT_ANALYSIS_COMPLETED, + AnalysisSource, + AuditEvent, + repo_root_digest, +) +from .writer import AuditWriter, NullAuditWriter + + +def analysis_completed_payload( + *, + summary: Mapping[str, object], + source: AnalysisSource, +) -> dict[str, object]: + """Build the audit payload for ``analysis.completed`` from a run summary.""" + + health = _mapping(summary.get("health")) + findings = _findings_summary(summary) + inventory = _mapping(summary.get("inventory")) + diff = _mapping(summary.get("diff")) + return { + "source": source, + "focus": str(summary.get("focus", "repository")), + "mode": _analysis_mode(summary), + "schema": str(summary.get("schema", summary.get("report_schema_version", ""))), + "health": { + "score": health.get("score"), + "grade": health.get("grade"), + }, + "findings": { + "total": findings.get("total"), + "new": findings.get("new"), + }, + "inventory": { + "files": inventory.get("files"), + "lines": inventory.get("lines"), + "functions": inventory.get("functions"), + }, + "diff": { + "new_clones": diff.get("new_clones"), + "health_delta": diff.get("health_delta"), + }, + } + + +def analysis_completed_payload_from_report( + *, + report_document: Mapping[str, object], + source: AnalysisSource, + new_func_count: int, + new_block_count: int, +) -> dict[str, object]: + """Build an analysis.completed payload from a canonical report document.""" + + meta = _mapping(report_document.get("meta")) + runtime = _mapping(meta.get("runtime")) + inventory = _mapping(report_document.get("inventory")) + file_registry = _mapping(inventory.get("file_registry")) + findings = _mapping(report_document.get("findings")) + findings_summary = _mapping(findings.get("summary")) + metrics = _mapping(report_document.get("metrics")) + metrics_summary = _mapping(metrics.get("summary")) + health = _mapping(metrics_summary.get("health")) + return { + "source": source, + "focus": "repository", + "mode": str(runtime.get("analysis_mode", meta.get("analysis_mode", "full"))), + "schema": str(report_document.get("report_schema_version", "")), + "health": { + "score": health.get("score", meta.get("health_score")), + "grade": health.get("grade", meta.get("health_grade")), + }, + "findings": { + "total": findings_summary.get("total", findings.get("total")), + "new": findings_summary.get("new"), + }, + "inventory": { + "files": len(_sequence(file_registry.get("items"))), + "lines": inventory.get("lines"), + "functions": inventory.get("functions"), + }, + "diff": { + "new_clones": new_func_count + new_block_count, + "health_delta": None, + }, + } + + +def emit_analysis_completed( + *, + root_path: Path, + summary: Mapping[str, object], + source: AnalysisSource, + report_digest: str, + run_id: str, + agent_pid: int, + agent_start_epoch: int, + agent_label: str, + writer: AuditWriter | None = None, +) -> None: + """Append an ``analysis.completed`` audit row when audit is enabled.""" + + from .runtime import open_audit_writer_for_root + + active_writer = ( + writer if writer is not None else open_audit_writer_for_root(root_path) + ) + if isinstance(active_writer, NullAuditWriter): + return + payload = analysis_completed_payload(summary=summary, source=source) + status = _analysis_mode(summary) + active_writer.emit( + AuditEvent( + event_type=EVENT_ANALYSIS_COMPLETED, + severity="info", + repo_root_digest=repo_root_digest(root_path), + agent_pid=agent_pid, + agent_start_epoch=agent_start_epoch, + agent_label=agent_label, + run_id=run_id, + report_digest=report_digest, + status=status, + payload=payload, + surface=source, + tool_name=f"{source}:analysis", + ) + ) + + +def emit_analysis_completed_from_report( + *, + root_path: Path, + report_document: Mapping[str, object], + report_digest: str, + run_id: str, + source: AnalysisSource, + new_func_count: int, + new_block_count: int, + agent_pid: int | None = None, + agent_start_epoch: int | None = None, + agent_label: str | None = None, + writer: AuditWriter | None = None, +) -> None: + payload = analysis_completed_payload_from_report( + report_document=report_document, + source=source, + new_func_count=new_func_count, + new_block_count=new_block_count, + ) + summary = { + **payload, + "focus": payload["focus"], + "mode": payload["mode"], + "schema": payload["schema"], + "health": payload["health"], + "findings": payload["findings"], + "inventory": payload["inventory"], + "diff": payload["diff"], + } + emit_analysis_completed( + root_path=root_path, + summary=summary, + source=source, + report_digest=report_digest, + run_id=run_id, + agent_pid=agent_pid if agent_pid is not None else os.getpid(), + agent_start_epoch=agent_start_epoch if agent_start_epoch is not None else 0, + agent_label=agent_label or f"codeclone-cli/{__version__}", + writer=writer, + ) + + +def _analysis_mode(summary: Mapping[str, object]) -> str: + mode = summary.get("mode") or summary.get("analysis_mode") + if mode is None: + return "completed" + text = str(mode).strip() + return text or "completed" + + +def _findings_summary(summary: Mapping[str, object]) -> Mapping[str, object]: + findings = _mapping(summary.get("findings")) + if findings: + return findings + return _mapping(summary.get("findings_summary")) + + +def _mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _sequence(value: object) -> tuple[object, ...]: + if isinstance(value, str): + return () + if isinstance(value, list): + return tuple(value) + return () + + +__all__ = [ + "ANALYSIS_SOURCE_CLI", + "ANALYSIS_SOURCE_MCP", + "AnalysisSource", + "analysis_completed_payload", + "analysis_completed_payload_from_report", + "emit_analysis_completed", + "emit_analysis_completed_from_report", +] diff --git a/codeclone/audit/events.py b/codeclone/audit/events.py new file mode 100644 index 00000000..30a55c15 --- /dev/null +++ b/codeclone/audit/events.py @@ -0,0 +1,814 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +import secrets +import time +from collections.abc import Callable, Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path, PurePosixPath +from typing import Final, Literal, cast + +AuditSeverity = Literal["info", "warn", "error"] +AuditPayloadMode = Literal["off", "compact", "full"] +AnalysisSource = Literal["mcp", "cli"] +AuditSurface = Literal["mcp", "cli", "hook", "ide", "ci", "unknown"] + +AUDIT_EVENT_CORE_VERSION: Final = "2" + +EVENT_INTENT_DECLARED = "intent.declared" +EVENT_INTENT_QUEUED = "intent.queued" +EVENT_INTENT_PROMOTED = "intent.promoted" +EVENT_INTENT_QUEUE_BLOCKED = "intent.queue_blocked" +EVENT_INTENT_CHECKED = "intent.checked" +EVENT_INTENT_EXPANDED = "intent.expanded" +EVENT_INTENT_VIOLATED = "intent.violated" +EVENT_INTENT_CLEARED = "intent.cleared" +EVENT_INTENT_RENEWED = "intent.renewed" +EVENT_INTENT_EXPIRED = "intent.expired" +EVENT_WORKSPACE_CONFLICT = "workspace.conflict_detected" +EVENT_WORKSPACE_GC = "workspace.gc_completed" +EVENT_BLAST_RADIUS = "blast_radius.computed" +EVENT_PATCH_BUDGET = "patch_budget.computed" +EVENT_PATCH_VERIFIED = "patch_contract.verified" +EVENT_PATCH_VIOLATED = "patch_contract.violated" +EVENT_PATCH_EXPIRED = "patch_contract.expired" +EVENT_CLAIM_COMPLETED = "claim_validation.completed" +EVENT_CLAIM_VIOLATED = "claim_validation.violated" +EVENT_RECEIPT_CREATED = "review_receipt.created" +EVENT_PATCH_TRAIL_COMPUTED = "patch_trail.computed" +EVENT_BASELINE_ABUSE = "baseline_abuse.detected" +EVENT_ANALYSIS_COMPLETED = "analysis.completed" + +ANALYSIS_SOURCE_MCP: AnalysisSource = "mcp" +ANALYSIS_SOURCE_CLI: AnalysisSource = "cli" + +KNOWN_AUDIT_SURFACES = frozenset({"mcp", "cli", "hook", "ide", "ci", "unknown"}) + +KNOWN_EVENT_TYPES = frozenset( + { + EVENT_INTENT_DECLARED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_CHECKED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_VIOLATED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_EXPIRED, + EVENT_WORKSPACE_CONFLICT, + EVENT_WORKSPACE_GC, + EVENT_BLAST_RADIUS, + EVENT_PATCH_BUDGET, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_CLAIM_COMPLETED, + EVENT_CLAIM_VIOLATED, + EVENT_RECEIPT_CREATED, + EVENT_PATCH_TRAIL_COMPUTED, + EVENT_BASELINE_ABUSE, + EVENT_ANALYSIS_COMPLETED, + } +) + +PAYLOAD_MODES = frozenset({"off", "compact", "full"}) + +# Compact mode keeps the intent description as a bounded forensic field. +_COMPACT_TEXT_LIMIT = 500 +_EVENT_CORE_SCOPE_PATH_LIMIT = 50 +_EVENT_CORE_CITATION_LIMIT = 32 +_PROJECTION_SUPPLEMENT_FACT_KEYS = frozenset( + { + "scope_paths", + "declared_scope_paths", + "changed_files", + "untouched_in_declared", + "citations", + } +) + +# The summary column stores the human-authored essence of an event, +# independent of audit_payloads mode. Bounded to keep the column lean. +SUMMARY_TEXT_LIMIT = 2000 + +# Intent lifecycle events whose payload may carry the human intent +# description. Shared by compact payloads and the summary projection so the +# two stay in lockstep. +_INTENT_PAYLOAD_EVENTS = frozenset( + { + EVENT_INTENT_DECLARED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_EXPIRED, + } +) + + +@dataclass(frozen=True, slots=True) +class AuditEvent: + event_type: str + severity: AuditSeverity + repo_root_digest: str + agent_pid: int + agent_label: str + agent_start_epoch: int | None = None + run_id: str | None = None + intent_id: str | None = None + report_digest: str | None = None + status: str | None = None + payload: Mapping[str, object] | None = None + workflow_id: str | None = None + surface: AuditSurface | None = None + tool_name: str | None = None + + +def generate_event_id() -> str: + timestamp = format(int(time.time() * 1000), "x") + return f"evt_{timestamp}_{secrets.token_hex(2)}" + + +def repo_root_digest(root_path: Path) -> str: + return hashlib.sha256(str(root_path).encode("utf-8")).hexdigest()[:16] + + +def derive_workflow_id(event: AuditEvent, event_id: str) -> str: + """Return the deterministic workflow grouping id for an audit row. + + Intent and run handles are grouping aids, not proof fields. Report content + identity stays in ``report_digest`` and in event-core facts when present. + """ + if event.intent_id: + return f"intent:{event.intent_id}" + if event.run_id: + return f"run:{event.run_id}" + explicit = _explicit_workflow_id(event) + if explicit: + return explicit + return f"event:{event_id}" + + +def normalize_audit_surface( + surface: AuditSurface | str | None, + *, + payload: Mapping[str, object] | None = None, +) -> AuditSurface: + if isinstance(surface, str): + normalized = surface.strip().lower() + if normalized in KNOWN_AUDIT_SURFACES: + return cast(AuditSurface, normalized) + payload_source = _payload_source(payload) + if payload_source in {ANALYSIS_SOURCE_MCP, ANALYSIS_SOURCE_CLI}: + return payload_source + return "unknown" + + +def event_core_for_event(event: AuditEvent) -> dict[str, object]: + """Build bounded machine facts used by trajectory replay. + + This is deliberately separate from compact/full audit payloads: compact + payloads are human-friendly forensics, while event core is deterministic + replay input. It never copies unbounded payload lists or prose. + """ + facts, truncated = _event_core_facts(event.event_type, event.payload) + if event.intent_id: + facts.setdefault("intent_id", event.intent_id) + if event.run_id: + facts.setdefault("run_id", event.run_id) + if event.report_digest: + facts.setdefault("report_digest", event.report_digest) + return { + "core_schema_version": AUDIT_EVENT_CORE_VERSION, + "event_family": _event_family(event.event_type), + "event_type": event.event_type, + "status": event.status or str(facts.get("status", "")), + "facts": facts, + "truncated": truncated, + } + + +def compact_payload_for_event( + *, + event_type: str, + payload: Mapping[str, object] | None, +) -> dict[str, object]: + if payload is None: + return {} + if event_type in _INTENT_PAYLOAD_EVENTS: + return _compact_intent_payload(payload) + if event_type == EVENT_INTENT_QUEUE_BLOCKED: + return { + "intent_id": str(payload.get("intent_id", "")), + "blocking_count": _int_value(payload.get("blocking_count")), + } + if event_type in { + EVENT_INTENT_CHECKED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_VIOLATED, + }: + return _compact_check_payload(payload) + if event_type == EVENT_INTENT_CLEARED: + return { + "cleared": _int_value(payload.get("cleared")), + "workspace_cleared": bool(payload.get("workspace_cleared")), + } + if event_type == EVENT_WORKSPACE_CONFLICT: + return { + "concurrent_intents": _sequence_field_count( + payload, + "concurrent_intents", + ) + } + if event_type == EVENT_WORKSPACE_GC: + return { + "removed": _int_value(payload.get("removed")), + "stale_count": _int_value(payload.get("stale_count")), + "orphaned_count": _int_value(payload.get("orphaned_count")), + } + if event_type == EVENT_BLAST_RADIUS: + return _compact_blast_radius_payload(payload) + if event_type == EVENT_ANALYSIS_COMPLETED: + return _compact_analysis_completed_payload(payload) + if event_type == EVENT_PATCH_BUDGET: + return _compact_budget_payload(payload) + if event_type in { + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_BASELINE_ABUSE, + }: + return _compact_verify_payload(payload) + if event_type in {EVENT_CLAIM_COMPLETED, EVENT_CLAIM_VIOLATED}: + return { + "valid": bool(payload.get("valid")), + "violations": len(_sequence(payload.get("violations"))), + "warnings": len(_sequence(payload.get("warnings"))), + } + if event_type == EVENT_RECEIPT_CREATED: + receipt = _mapping(payload.get("receipt")) + return { + "format": str(payload.get("format", "")), + "verdict": str(receipt.get("verdict", "")), + "human_decisions": _sequence_field_count( + receipt, + "human_decision_points", + ), + } + if event_type == EVENT_PATCH_TRAIL_COMPUTED: + return _compact_patch_trail_payload(payload) + return _compact_identifiers(payload) + + +def _event_core_facts( + event_type: str, + payload: Mapping[str, object] | None, +) -> tuple[dict[str, object], bool]: + if payload is None: + return {}, False + if event_type in _INTENT_PAYLOAD_EVENTS: + core = dict(_compact_intent_payload(payload)) + core.pop("intent_description", None) + scope_paths, truncated = _bounded_scope_paths(payload) + if scope_paths: + core["scope_paths"] = list(scope_paths) + if truncated: + core["scope_paths_truncated"] = True + return core, truncated + if event_type == EVENT_INTENT_QUEUE_BLOCKED: + return { + "intent_id": str(payload.get("intent_id", "")), + "blocking_count": _int_value(payload.get("blocking_count")), + }, False + if event_type in { + EVENT_INTENT_CHECKED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_VIOLATED, + }: + return _check_event_core_facts(payload) + if event_type == EVENT_INTENT_CLEARED: + return { + "cleared": _int_value(payload.get("cleared")), + "workspace_cleared": bool(payload.get("workspace_cleared")), + }, False + if event_type == EVENT_WORKSPACE_CONFLICT: + return { + "concurrent_intents": _sequence_field_count( + payload, + "concurrent_intents", + ) + }, False + if event_type == EVENT_WORKSPACE_GC: + return { + "removed": _int_value(payload.get("removed")), + "stale_count": _int_value(payload.get("stale_count")), + "orphaned_count": _int_value(payload.get("orphaned_count")), + }, False + if event_type == EVENT_BLAST_RADIUS: + return _compact_blast_radius_payload(payload), False + if event_type == EVENT_ANALYSIS_COMPLETED: + return _compact_analysis_completed_payload(payload), False + if event_type == EVENT_PATCH_BUDGET: + return _compact_budget_payload(payload), False + if event_type in { + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_BASELINE_ABUSE, + }: + return _verify_event_core_facts(payload), False + if event_type in {EVENT_CLAIM_COMPLETED, EVENT_CLAIM_VIOLATED}: + return _claim_event_core_facts(payload) + if event_type == EVENT_RECEIPT_CREATED: + receipt = _mapping(payload.get("receipt")) + return { + "format": str(payload.get("format", "")), + "verdict": str(receipt.get("verdict", "")), + "human_decisions": _sequence_field_count( + receipt, + "human_decision_points", + ), + }, False + if event_type == EVENT_PATCH_TRAIL_COMPUTED: + return _patch_trail_event_core_facts(payload) + return _compact_identifiers(payload), False + + +def _compact_intent_payload(payload: Mapping[str, object]) -> dict[str, object]: + scope = _mapping(payload.get("scope")) + allowed = _sequence(scope.get("allowed_files")) + return { + # Compaction drops volume, not substance: the intent description is + # the key forensic field and survives (bounded) even in compact mode. + "intent_description": _bounded_text( + payload.get("intent_description"), _COMPACT_TEXT_LIMIT + ), + "scope_file_count": len(allowed), + "concurrent_intents": len(_sequence(payload.get("concurrent_intents"))), + "workspace_registered": bool(payload.get("workspace_registered")), + "ttl_seconds": _int_value(payload.get("ttl_seconds")), + "lease_seconds": _int_value(payload.get("lease_seconds")), + } + + +def _bounded_scope_paths(payload: Mapping[str, object]) -> tuple[tuple[str, ...], bool]: + scope = _mapping(payload.get("scope")) + raw_paths = [ + *_sequence(scope.get("allowed_files")), + *_sequence(scope.get("allowed_related")), + ] + normalized: list[str] = [] + for raw_path in raw_paths: + path = _normalized_event_core_path(raw_path) + if path is not None: + normalized.append(path) + unique = tuple(sorted(set(normalized))) + return ( + unique[:_EVENT_CORE_SCOPE_PATH_LIMIT], + len(unique) > _EVENT_CORE_SCOPE_PATH_LIMIT, + ) + + +def _normalized_event_core_path(value: object) -> str | None: + if not isinstance(value, str): + return None + text = value.strip().replace("\\", "/") + while text.startswith("./"): + text = text[2:] + if not text or text in {".", ".."} or text.startswith("/"): + return None + path = PurePosixPath(text) + if any(part in {"", ".", ".."} for part in path.parts): + return None + return path.as_posix() + + +def event_summary( + event_type: str, + payload: Mapping[str, object] | None, +) -> str | None: + """Human-readable essence of an event for the summary column. + + Independent of audit_payloads mode: the summary is lightweight structured + metadata (like status or intent_id), not bulk payload, so it is captured + even when payloads are 'off' or 'compact'. Returns None when the event + carries no human-authored text. Bounded to ``SUMMARY_TEXT_LIMIT``. + """ + if payload is None: + return None + if event_type in _INTENT_PAYLOAD_EVENTS: + text = _bounded_text(payload.get("intent_description"), SUMMARY_TEXT_LIMIT) + return text or None + if event_type == EVENT_ANALYSIS_COMPLETED: + return _analysis_completed_summary(payload) + incident = _incident_summary(event_type, payload) + return _bounded_text(incident, SUMMARY_TEXT_LIMIT) if incident else None + + +# Incident events whose summary is a labelled count of a payload list. +_COUNT_INCIDENTS: dict[str, tuple[str, str, str]] = { + EVENT_WORKSPACE_CONFLICT: ( + "concurrent_intents", + "workspace conflict", + "concurrent intent(s)", + ), + EVENT_CLAIM_VIOLATED: ("violations", "claim validation failed", "violation(s)"), +} + + +def _join_or(values: object, *, default: str) -> str: + items = [str(item) for item in _sequence(values)] + return ", ".join(items) if items else default + + +def _summary_patch_violated(payload: Mapping[str, object]) -> str: + delta = _mapping(payload.get("structural_delta")) + regressions = len(_sequence(delta.get("regressions"))) + detail = _join_or(payload.get("contract_violations"), default="none") + return f"patch contract violated: {regressions} regression(s); {detail}" + + +def _summary_baseline_abuse(payload: Mapping[str, object]) -> str: + abuse = _mapping(payload.get("baseline_abuse")) + detail = _join_or(abuse.get("triggers"), default="unspecified") + return f"baseline abuse detected: {detail}" + + +def _summary_receipt_created(payload: Mapping[str, object]) -> str: + receipt = _mapping(payload.get("receipt")) + verdict = str(receipt.get("verdict", "")).strip() or "unknown" + return f"review receipt: {verdict}" + + +# Incident events whose summary needs bespoke per-type field extraction. +_INCIDENT_BUILDERS: dict[str, Callable[[Mapping[str, object]], str]] = { + EVENT_PATCH_VIOLATED: _summary_patch_violated, + EVENT_BASELINE_ABUSE: _summary_baseline_abuse, + EVENT_RECEIPT_CREATED: _summary_receipt_created, +} + + +def _incident_summary(event_type: str, payload: Mapping[str, object]) -> str: + """Bounded human-readable line for an indexed incident event. + + Field paths mirror ``compact_payload_for_event`` so the summary and the + compact payload stay in lockstep. Non-incident event types yield "". + """ + count_spec = _COUNT_INCIDENTS.get(event_type) + if count_spec is not None: + key, prefix, noun = count_spec + return f"{prefix}: {len(_sequence(payload.get(key)))} {noun}" + builder = _INCIDENT_BUILDERS.get(event_type) + return builder(payload) if builder is not None else "" + + +def _compact_check_payload(payload: Mapping[str, object]) -> dict[str, object]: + return { + "status": str(payload.get("status", "")), + "unexpected_files": len(_sequence(payload.get("unexpected_files"))), + "forbidden_touched": len(_sequence(payload.get("forbidden_touched"))), + } + + +def _bounded_path_list( + value: object, +) -> tuple[tuple[str, ...], bool]: + normalized: list[str] = [] + for raw_path in _sequence(value): + path = _normalized_event_core_path(raw_path) + if path is not None: + normalized.append(path) + unique = tuple(sorted(set(normalized))) + return ( + unique[:_EVENT_CORE_SCOPE_PATH_LIMIT], + len(unique) > _EVENT_CORE_SCOPE_PATH_LIMIT, + ) + + +def _check_event_core_facts( + payload: Mapping[str, object], +) -> tuple[dict[str, object], bool]: + core = _compact_check_payload(payload) + truncated = False + changed, changed_truncated = _bounded_path_list(payload.get("actual_changed_files")) + declared, declared_truncated = _bounded_path_list(payload.get("declared_scope")) + unexpected, unexpected_truncated = _bounded_path_list( + payload.get("unexpected_files") + ) + forbidden, forbidden_truncated = _bounded_path_list( + payload.get("forbidden_touched") + ) + if changed: + core["changed_files"] = list(changed) + if declared: + core["declared_scope_paths"] = list(declared) + if unexpected: + core["unexpected_files_list"] = list(unexpected) + if forbidden: + core["forbidden_touched_list"] = list(forbidden) + untouched = tuple(sorted(set(declared) - set(changed))) + if untouched: + bounded = untouched[:_EVENT_CORE_SCOPE_PATH_LIMIT] + core["untouched_in_declared"] = list(bounded) + if len(untouched) > _EVENT_CORE_SCOPE_PATH_LIMIT: + truncated = True + truncated = ( + truncated + or changed_truncated + or declared_truncated + or unexpected_truncated + or forbidden_truncated + ) + if truncated: + core["paths_truncated"] = True + return core, truncated + + +def _compact_analysis_completed_payload( + payload: Mapping[str, object], +) -> dict[str, object]: + health = _mapping(payload.get("health")) + findings = _mapping(payload.get("findings")) + inventory = _mapping(payload.get("inventory")) + return { + "source": str(payload.get("source", "")), + "mode": str(payload.get("mode", "")), + "focus": str(payload.get("focus", "")), + "health_score": _int_or_none(health.get("score")), + "health_grade": str(health.get("grade", "")), + "findings_total": _int_or_none(findings.get("total")), + "findings_new": _int_or_none(findings.get("new")), + "files": _int_or_none(inventory.get("files")), + } + + +def _analysis_completed_summary(payload: Mapping[str, object]) -> str: + health = _mapping(payload.get("health")) + score = health.get("score") + source = str(payload.get("source", "")).strip() or "unknown" + if isinstance(score, int) and not isinstance(score, bool): + return f"analysis completed ({source}): health={score}" + return f"analysis completed ({source})" + + +def _compact_blast_radius_payload(payload: Mapping[str, object]) -> dict[str, object]: + structural_risk = _mapping(payload.get("structural_risk")) + return { + "radius_level": str(payload.get("radius_level", "")), + "direct_dependents": len(_sequence(payload.get("direct_dependents"))), + "clone_cohort_members": len(_sequence(payload.get("clone_cohort_members"))), + "do_not_touch": len(_sequence(payload.get("do_not_touch"))), + "review_context": len(_sequence(payload.get("review_context"))), + "risk_keys": sorted(str(key) for key in structural_risk), + } + + +def _compact_budget_payload(payload: Mapping[str, object]) -> dict[str, object]: + blast = _mapping(payload.get("blast_radius_summary")) + gate = _mapping(payload.get("gate_preview")) + return { + "strictness": str(payload.get("strictness", "")), + "radius_level": str(blast.get("radius_level", "")), + "do_not_touch_count": _int_value(blast.get("do_not_touch_count")), + "review_context_count": _int_value(blast.get("review_context_count")), + "gate_would_fail": bool(gate.get("would_fail")), + } + + +def _compact_patch_trail_payload(payload: Mapping[str, object]) -> dict[str, object]: + counts = _patch_trail_counts(payload) + truncation = _mapping(payload.get("truncation")) + return { + "patch_trail_digest": str(payload.get("patch_trail_digest", "")), + "scope_check_status": str(payload.get("scope_check_status", "")), + "verification_status": str(payload.get("verification_status", "")), + "declared": _int_value(counts.get("declared")), + "changed": _int_value(counts.get("changed")), + "untouched_in_declared": _int_value(counts.get("untouched_in_declared")), + "unexpected": _int_value(counts.get("unexpected")), + "forbidden_touched": _int_value(counts.get("forbidden_touched")), + "truncation": bool(any(bool(value) for value in truncation.values())), + } + + +def _patch_trail_event_core_facts( + payload: Mapping[str, object], +) -> tuple[dict[str, object], bool]: + counts = _patch_trail_counts(payload) + truncation = _mapping(payload.get("truncation")) + truncated = bool(any(bool(value) for value in truncation.values())) + return { + "patch_trail_digest": str(payload.get("patch_trail_digest", "")), + "scope_check_status": str(payload.get("scope_check_status", "")), + "verification_status": str(payload.get("verification_status", "")), + "declared": _int_value(counts.get("declared")), + "changed": _int_value(counts.get("changed")), + "untouched_in_declared": _int_value(counts.get("untouched_in_declared")), + "unexpected": _int_value(counts.get("unexpected")), + "forbidden_touched": _int_value(counts.get("forbidden_touched")), + "truncation": truncated, + }, truncated + + +def _patch_trail_counts(payload: Mapping[str, object]) -> Mapping[str, object]: + counts = payload.get("counts") + if isinstance(counts, Mapping): + return counts + return { + "declared": len(_sequence(payload.get("declared_files"))), + "changed": len(_sequence(payload.get("changed_files"))), + "untouched_in_declared": len(_sequence(payload.get("untouched_in_declared"))), + "unexpected": len(_sequence(payload.get("unexpected_files"))), + "forbidden_touched": len(_sequence(payload.get("forbidden_touched"))), + } + + +def _compact_verify_payload(payload: Mapping[str, object]) -> dict[str, object]: + delta = _mapping(payload.get("structural_delta")) + baseline_abuse = _mapping(payload.get("baseline_abuse")) + return { + "status": str(payload.get("status", "")), + "regressions": len(_sequence(delta.get("regressions"))), + "improvements": len(_sequence(delta.get("improvements"))), + "health_delta": _int_or_none(delta.get("health_delta")), + "contract_violations": [ + str(item) for item in _sequence(payload.get("contract_violations")) + ], + "baseline_abuse": bool(baseline_abuse.get("detected")), + } + + +def _claim_event_core_facts( + payload: Mapping[str, object], +) -> tuple[dict[str, object], bool]: + core: dict[str, object] = { + "valid": bool(payload.get("valid")), + "violations": len(_sequence(payload.get("violations"))), + "warnings": len(_sequence(payload.get("warnings"))), + "citations_found": _int_value(payload.get("citations_found")), + } + truncated = False + citations: list[dict[str, object]] = [] + for raw in _sequence(payload.get("validated_citations")): + if isinstance(raw, Mapping): + entry = _validated_citation_entry(raw) + if entry is not None: + citations.append(entry) + if citations: + bounded = citations[:_EVENT_CORE_CITATION_LIMIT] + core["citations"] = bounded + if len(citations) > _EVENT_CORE_CITATION_LIMIT: + truncated = True + core["citations_truncated"] = True + return core, truncated + + +def _validated_citation_entry(raw: Mapping[str, object]) -> dict[str, object] | None: + cited_id = str(raw.get("cited_id", "")).strip() + kind = str(raw.get("kind", "")).strip() + if not cited_id or not kind: + return None + return { + "cited_id": cited_id, + "kind": kind, + "valid": bool(raw.get("valid")), + } + + +def projection_supplement_facts_from_payload( + event_type: str, + payload_json: str | None, +) -> dict[str, object]: + """Re-derive bounded replay facts from stored audit payload for projection.""" + if not payload_json or payload_json == "{}": + return {} + try: + parsed = json.loads(payload_json) + except json.JSONDecodeError: + return {} + if not isinstance(parsed, Mapping): + return {} + facts, _ = _event_core_facts(event_type, parsed) + return { + key: value + for key in sorted(_PROJECTION_SUPPLEMENT_FACT_KEYS) + if (value := facts.get(key)) + } + + +def _verify_event_core_facts(payload: Mapping[str, object]) -> dict[str, object]: + delta = _mapping(payload.get("structural_delta")) + baseline_abuse = _mapping(payload.get("baseline_abuse")) + return { + "status": str(payload.get("status", "")), + "regressions": len(_sequence(delta.get("regressions"))), + "improvements": len(_sequence(delta.get("improvements"))), + "health_delta": _int_or_none(delta.get("health_delta")), + "contract_violation_count": len(_sequence(payload.get("contract_violations"))), + "baseline_abuse": bool(baseline_abuse.get("detected")), + } + + +def _compact_identifiers(payload: Mapping[str, object]) -> dict[str, object]: + keys = ("mode", "status", "reason", "run_id", "intent_id") + return {key: payload[key] for key in keys if key in payload} + + +def _sequence_field_count(payload: Mapping[str, object], key: str) -> int: + return len(_sequence(payload.get(key))) + + +def _mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _sequence(value: object) -> Sequence[object]: + if isinstance(value, str): + return () + return value if isinstance(value, Sequence) else () + + +def _int_value(value: object) -> int: + return value if isinstance(value, int) and not isinstance(value, bool) else 0 + + +def _int_or_none(value: object) -> int | None: + return value if isinstance(value, int) and not isinstance(value, bool) else None + + +def _bounded_text(value: object, limit: int) -> str: + text = str(value or "").strip() + return text[:limit] + + +def _event_family(event_type: str) -> str: + head, _, _tail = event_type.partition(".") + return head or "unknown" + + +def _explicit_workflow_id(event: AuditEvent) -> str: + candidates = (event.workflow_id, _mapping(event.payload).get("workflow_id")) + for candidate in candidates: + if isinstance(candidate, str): + stripped = candidate.strip() + if stripped: + return stripped + return "" + + +def _payload_source(payload: Mapping[str, object] | None) -> str: + source = _mapping(payload).get("source") + return source.strip().lower() if isinstance(source, str) else "" + + +__all__ = [ + "ANALYSIS_SOURCE_CLI", + "ANALYSIS_SOURCE_MCP", + "AUDIT_EVENT_CORE_VERSION", + "EVENT_ANALYSIS_COMPLETED", + "EVENT_BASELINE_ABUSE", + "EVENT_BLAST_RADIUS", + "EVENT_CLAIM_COMPLETED", + "EVENT_CLAIM_VIOLATED", + "EVENT_INTENT_CHECKED", + "EVENT_INTENT_CLEARED", + "EVENT_INTENT_DECLARED", + "EVENT_INTENT_EXPANDED", + "EVENT_INTENT_EXPIRED", + "EVENT_INTENT_PROMOTED", + "EVENT_INTENT_QUEUED", + "EVENT_INTENT_QUEUE_BLOCKED", + "EVENT_INTENT_RENEWED", + "EVENT_INTENT_VIOLATED", + "EVENT_PATCH_BUDGET", + "EVENT_PATCH_EXPIRED", + "EVENT_PATCH_VERIFIED", + "EVENT_PATCH_VIOLATED", + "EVENT_RECEIPT_CREATED", + "EVENT_WORKSPACE_CONFLICT", + "EVENT_WORKSPACE_GC", + "KNOWN_AUDIT_SURFACES", + "KNOWN_EVENT_TYPES", + "PAYLOAD_MODES", + "SUMMARY_TEXT_LIMIT", + "AnalysisSource", + "AuditEvent", + "AuditPayloadMode", + "AuditSeverity", + "AuditSurface", + "compact_payload_for_event", + "derive_workflow_id", + "event_core_for_event", + "event_summary", + "generate_event_id", + "normalize_audit_surface", + "projection_supplement_facts_from_payload", + "repo_root_digest", +] diff --git a/codeclone/audit/reader.py b/codeclone/audit/reader.py new file mode 100644 index 00000000..c0ba8c52 --- /dev/null +++ b/codeclone/audit/reader.py @@ -0,0 +1,780 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import sqlite3 +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path + +from ..utils.utc_timestamps import age_seconds_since_utc_timestamp +from .events import ( + ANALYSIS_SOURCE_CLI, + ANALYSIS_SOURCE_MCP, + EVENT_ANALYSIS_COMPLETED, + repo_root_digest, +) +from .schema import get_meta, open_audit_db_readonly +from .validation import AuditReadError, AuditSchemaError + + +@dataclass(frozen=True, slots=True) +class AnalysisRunSnapshot: + """Latest persisted analysis run summary from the audit trail.""" + + run_id: str | None + health: int | None + findings: int | None + files: int | None + age_seconds: int | None + source: str + + +@dataclass(frozen=True, slots=True) +class AuditRecord: + audit_sequence: int | None + event_id: str + event_type: str + severity: str + created_at_utc: str + run_id: str | None + intent_id: str | None + report_digest: str | None + workflow_id: str | None + surface: str | None + tool_name: str | None + event_core_json: str | None + event_core_sha256: str | None + payload_sha256: str | None + status: str | None + agent_label: str + summary: str | None = None + estimated_tokens: int | None = None + token_encoding: str | None = None + payload_characters: int | None = None + payload_json: str | None = None + + +@dataclass(frozen=True, slots=True) +class TypeTokenProfile: + """Token stats for one event type.""" + + event_type: str + call_count: int + total_tokens: int + max_tokens: int + + +@dataclass(frozen=True, slots=True) +class TopPayload: + """A single expensive audit payload.""" + + event_type: str + event_id: str + estimated_tokens: int + created_at_utc: str + intent_id: str | None = None + run_id: str | None = None + agent_label: str = "" + + +@dataclass(frozen=True, slots=True) +class WorkflowTokenProfile: + """Token stats for one audit workflow group.""" + + workflow_kind: str + workflow_id: str + call_count: int + total_tokens: int + max_tokens: int + first_event_utc: str + latest_event_utc: str + agent_label: str + + +@dataclass(frozen=True, slots=True) +class PayloadFootprint: + """Aggregate payload cost analytics.""" + + encoding: str + tool_calls: int + total_tokens: int + avg_tokens: int + p95_tokens: int + max_tokens: int + by_type: tuple[TypeTokenProfile, ...] + top_payloads: tuple[TopPayload, ...] + top_workflows: tuple[WorkflowTokenProfile, ...] = () + + +@dataclass(frozen=True, slots=True) +class AuditSummary: + db_path: Path + db_size_bytes: int + retention_days: int | None + total_events: int + intent_events: int + contract_events: int + receipt_events: int + violation_events: int + oldest_event_utc: str | None + latest_event_utc: str | None + events: tuple[AuditRecord, ...] + total_estimated_tokens: int | None = None + token_encoding: str | None = None + token_event_count: int = 0 + payload_footprint: PayloadFootprint | None = None + + +def read_latest_analysis_run( + *, + db_path: Path, + repo_root: Path, +) -> AnalysisRunSnapshot | None: + """Return the newest ``analysis.completed`` row for ``repo_root``, if any.""" + + if not db_path.is_file(): + return None + digest = repo_root_digest(repo_root.resolve()) + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + row = conn.execute( + "SELECT run_id, created_at_utc, payload_json " + "FROM controller_events " + "WHERE event_type = ? AND repo_root_digest = ? " + "ORDER BY created_at_utc DESC, id DESC " + "LIMIT 1", + (EVENT_ANALYSIS_COMPLETED, digest), + ).fetchone() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + if row is None: + return None + run_id_raw = _str_or_none(row[0]) + created_at_utc = _str_or_empty(row[1]) + payload = _analysis_payload_from_json(row[2]) + source = _analysis_run_source_label(str(payload.get("source", ""))) + run_id = _short_run_id(run_id_raw, payload) + health = _int_or_none(_mapping(payload.get("health")).get("score")) + if health is None: + health = _int_or_none(payload.get("health_score")) + findings = _int_or_none(_mapping(payload.get("findings")).get("total")) + if findings is None: + findings = _int_or_none(payload.get("findings_total")) + files = _int_or_none(_mapping(payload.get("inventory")).get("files")) + if files is None: + files = _int_or_none(payload.get("files")) + age_seconds = age_seconds_since_utc_timestamp(created_at_utc) + return AnalysisRunSnapshot( + run_id=run_id, + health=health, + findings=findings, + files=files, + age_seconds=age_seconds, + source=source, + ) + + +def read_audit_summary(*, db_path: Path, limit: int = 50) -> AuditSummary: + if not db_path.is_file(): + raise AuditReadError("no audit data") + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + retention_days = _int_meta(conn, "retention_days") + total = _count(conn, "SELECT COUNT(*) FROM controller_events") + intent_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events WHERE event_type LIKE 'intent.%'", + ) + contract_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events " + "WHERE event_type IN (" + "'patch_budget.computed'," + "'patch_contract.verified'," + "'patch_contract.violated'," + "'patch_contract.expired'" + ")", + ) + receipt_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events " + "WHERE event_type = 'review_receipt.created'", + ) + violation_events = _count( + conn, + "SELECT COUNT(*) FROM controller_events " + "WHERE severity IN ('warn', 'error')", + ) + oldest = _text_scalar(conn, "SELECT MIN(created_at_utc) FROM controller_events") + latest = _text_scalar(conn, "SELECT MAX(created_at_utc) FROM controller_events") + event_columns = _event_columns(conn) + token_cols = _has_token_columns(event_columns) + select_prefix = _audit_record_select_prefix(event_columns) + if token_cols: + rows = conn.execute( + f"SELECT {select_prefix}, " + f"{_column_or_null(event_columns, 'summary')}, " + "estimated_tokens, token_encoding, payload_characters " + "FROM controller_events " + "ORDER BY created_at_utc DESC, id DESC " + "LIMIT ?", + (max(1, int(limit)),), + ).fetchall() + token_summary = _token_summary(conn) + else: + rows = conn.execute( + f"SELECT {select_prefix} " + "FROM controller_events " + "ORDER BY created_at_utc DESC, id DESC " + "LIMIT ?", + (max(1, int(limit)),), + ).fetchall() + token_summary = (None, None, 0) + footprint = _read_payload_footprint(conn) if token_cols else None + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + total_tokens, token_enc, token_event_cnt = token_summary + return AuditSummary( + db_path=db_path, + db_size_bytes=_db_size(db_path), + retention_days=retention_days, + total_events=total, + intent_events=intent_events, + contract_events=contract_events, + receipt_events=receipt_events, + violation_events=violation_events, + oldest_event_utc=oldest, + latest_event_utc=latest, + events=tuple(_record_from_row(row) for row in rows), + total_estimated_tokens=total_tokens, + token_encoding=token_enc, + token_event_count=token_event_cnt, + payload_footprint=footprint, + ) + + +def read_intent_declared_records( + *, + db_path: Path, + repo_root_digest: str, +) -> tuple[AuditRecord, ...]: + """Return audit ``intent.declared`` rows for a repository, ordered by id ASC.""" + + from .events import EVENT_INTENT_DECLARED + + if not db_path.is_file(): + return () + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + rows = conn.execute( + "SELECT id, event_id, event_type, severity, created_at_utc, run_id, " + "intent_id, report_digest, workflow_id, surface, tool_name, " + "event_core_json, event_core_sha256, payload_sha256, " + "status, agent_label, summary, " + "estimated_tokens, token_encoding, payload_characters, payload_json " + "FROM controller_events " + "WHERE repo_root_digest = ? AND event_type = ? " + "ORDER BY id ASC", + (repo_root_digest, EVENT_INTENT_DECLARED), + ).fetchall() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + return tuple(_record_from_row(row) for row in rows) + + +def read_audit_event_core_records( + *, + db_path: Path, + repo_root_digest: str, + workflow_id: str | None = None, +) -> tuple[AuditRecord, ...]: + """Return deterministic audit event-core rows for trajectory projection.""" + + if not db_path.is_file(): + raise AuditReadError("no audit data") + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + where = [ + "repo_root_digest = ?", + "workflow_id IS NOT NULL", + "workflow_id != ''", + "event_core_json IS NOT NULL", + "event_core_sha256 IS NOT NULL", + ] + params: list[object] = [repo_root_digest] + if workflow_id is not None: + where.append("workflow_id = ?") + params.append(workflow_id) + rows = conn.execute( + "SELECT id, event_id, event_type, severity, created_at_utc, run_id, " + "intent_id, report_digest, workflow_id, surface, tool_name, " + "event_core_json, event_core_sha256, payload_sha256, " + "status, agent_label, summary, " + "estimated_tokens, token_encoding, payload_characters, payload_json " + "FROM controller_events " + f"WHERE {' AND '.join(where)} " + "ORDER BY workflow_id ASC, id ASC", + params, + ).fetchall() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + return tuple(_record_from_row(row) for row in rows) + + +def list_workflow_ids_with_events_after( + *, + db_path: Path, + repo_root_digest: str, + after_id: int, +) -> tuple[str, ...]: + """Distinct workflow_ids with a projectable event-core row newer than + ``after_id`` (same filters as read_audit_event_core_records), ascending. + + The audit trail is append-only with monotonic ids, so this yields exactly + the workflows changed since the watermark — the input to an incremental + trajectory rebuild. A missing audit DB yields ``()``. + """ + if not db_path.is_file(): + return () + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + rows = conn.execute( + "SELECT DISTINCT workflow_id FROM controller_events " + "WHERE repo_root_digest = ? AND id > ? " + "AND workflow_id IS NOT NULL AND workflow_id != '' " + "AND event_core_json IS NOT NULL AND event_core_sha256 IS NOT NULL " + "ORDER BY workflow_id ASC", + (repo_root_digest, after_id), + ).fetchall() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + return tuple(str(row[0]) for row in rows) + + +def count_audit_event_core_gaps( + *, + db_path: Path, + repo_root_digest: str, +) -> int: + """Count rows that cannot feed trajectory projection for this repository.""" + + if not db_path.is_file(): + return 0 + try: + conn = open_audit_db_readonly(db_path) + except (sqlite3.Error, AuditSchemaError, OSError) as exc: + raise AuditReadError(f"cannot open audit database: {exc}") from exc + try: + row = conn.execute( + "SELECT COUNT(*) FROM controller_events " + "WHERE repo_root_digest = ? " + "AND (workflow_id IS NULL OR workflow_id = '' " + "OR event_core_json IS NULL OR event_core_sha256 IS NULL)", + (repo_root_digest,), + ).fetchone() + except (sqlite3.Error, AuditSchemaError) as exc: + raise AuditReadError(f"cannot read audit database: {exc}") from exc + finally: + conn.close() + return int(row[0]) if row is not None and isinstance(row[0], int) else 0 + + +def _record_from_row(row: tuple[object, ...]) -> AuditRecord: + return AuditRecord( + audit_sequence=_int_or_none(row[0]), + event_id=_str_or_empty(row[1]), + event_type=_str_or_empty(row[2]), + severity=_str_or_empty(row[3]), + created_at_utc=_str_or_empty(row[4]), + run_id=_str_or_none(row[5]), + intent_id=_str_or_none(row[6]), + report_digest=_str_or_none(row[7]), + workflow_id=_str_or_none(row[8]), + surface=_str_or_none(row[9]), + tool_name=_str_or_none(row[10]), + event_core_json=_str_or_none(row[11]), + event_core_sha256=_str_or_none(row[12]), + payload_sha256=_str_or_none(row[13]), + status=_str_or_none(row[14]), + agent_label=_str_or_empty(row[15]), + summary=_str_or_none(row[16]) if len(row) > 16 else None, + estimated_tokens=_int_or_none(row[17]) if len(row) > 17 else None, + token_encoding=_str_or_none(row[18]) if len(row) > 18 else None, + payload_characters=_int_or_none(row[19]) if len(row) > 19 else None, + payload_json=_str_or_none(row[20]) if len(row) > 20 else None, + ) + + +def _event_columns(conn: sqlite3.Connection) -> frozenset[str]: + return frozenset( + str(row[1]) + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + if len(row) > 1 + ) + + +def _has_token_columns(columns: frozenset[str]) -> bool: + """Return whether the complete token-accounting projection is readable.""" + + return { + "estimated_tokens", + "token_encoding", + "payload_characters", + }.issubset(columns) + + +def _column_or_null(columns: frozenset[str], name: str) -> str: + return name if name in columns else f"NULL AS {name}" + + +def _audit_record_select_prefix(columns: frozenset[str]) -> str: + required = ( + "id", + "event_id", + "event_type", + "severity", + "created_at_utc", + "run_id", + "intent_id", + "report_digest", + ) + optional = ( + "workflow_id", + "surface", + "tool_name", + "event_core_json", + "event_core_sha256", + "payload_sha256", + ) + suffix = ("status", "agent_label") + return ", ".join( + (*required, *(_column_or_null(columns, name) for name in optional), *suffix) + ) + + +def _token_summary( + conn: sqlite3.Connection, +) -> tuple[int | None, str | None, int]: + """Aggregate token estimation data across all events.""" + row = conn.execute( + "SELECT SUM(estimated_tokens), COUNT(estimated_tokens) " + "FROM controller_events WHERE estimated_tokens IS NOT NULL" + ).fetchone() + if row is None or row[1] == 0: + return None, None, 0 + total_tokens = row[0] if isinstance(row[0], int) else None + event_count = row[1] if isinstance(row[1], int) else 0 + enc_row = conn.execute( + "SELECT token_encoding FROM controller_events " + "WHERE token_encoding IS NOT NULL LIMIT 1" + ).fetchone() + encoding = _str_or_none(enc_row[0]) if enc_row else None + return total_tokens, encoding, event_count + + +def payload_footprint_to_dict(fp: PayloadFootprint) -> dict[str, object]: + """Serialize PayloadFootprint to a JSON-safe dict.""" + return { + "encoding": fp.encoding, + "tool_calls": fp.tool_calls, + "total_tokens": fp.total_tokens, + "avg_tokens": fp.avg_tokens, + "p95_tokens": fp.p95_tokens, + "max_tokens": fp.max_tokens, + "by_type": { + tp.event_type: { + "count": tp.call_count, + "tokens": tp.total_tokens, + "max": tp.max_tokens, + } + for tp in fp.by_type + }, + "top_payloads": [ + { + "event_type": tp.event_type, + "event_id": tp.event_id, + "tokens": tp.estimated_tokens, + "created_at_utc": tp.created_at_utc, + "intent_id": tp.intent_id, + "run_id": tp.run_id, + "agent_label": tp.agent_label, + } + for tp in fp.top_payloads + ], + "top_workflows": [ + { + "workflow_kind": wf.workflow_kind, + "workflow_id": wf.workflow_id, + "calls": wf.call_count, + "tokens": wf.total_tokens, + "max": wf.max_tokens, + "first_event_utc": wf.first_event_utc, + "latest_event_utc": wf.latest_event_utc, + "agent_label": wf.agent_label, + } + for wf in fp.top_workflows + ], + } + + +def _read_payload_footprint(conn: sqlite3.Connection) -> PayloadFootprint | None: + """Build aggregate payload analytics from token columns.""" + agg = conn.execute( + "SELECT COUNT(*), SUM(estimated_tokens), MAX(estimated_tokens) " + "FROM controller_events WHERE estimated_tokens IS NOT NULL" + ).fetchone() + if agg is None or agg[0] == 0: + return None + tool_calls = agg[0] if isinstance(agg[0], int) else 0 + total_tokens = agg[1] if isinstance(agg[1], int) else 0 + max_tokens = agg[2] if isinstance(agg[2], int) else 0 + avg_tokens = total_tokens // tool_calls if tool_calls else 0 + + # p95: skip top 5% rows, take the next one + p95_offset = max(0, tool_calls * 5 // 100) + p95_row = conn.execute( + "SELECT estimated_tokens FROM controller_events " + "WHERE estimated_tokens IS NOT NULL " + "ORDER BY estimated_tokens DESC " + "LIMIT 1 OFFSET ?", + (p95_offset,), + ).fetchone() + p95_tokens = p95_row[0] if p95_row and isinstance(p95_row[0], int) else max_tokens + + # Breakdown by event_type + type_rows = conn.execute( + "SELECT event_type, COUNT(*), SUM(estimated_tokens), MAX(estimated_tokens) " + "FROM controller_events WHERE estimated_tokens IS NOT NULL " + "GROUP BY event_type ORDER BY SUM(estimated_tokens) DESC" + ).fetchall() + by_type = tuple( + TypeTokenProfile( + event_type=_str_or_empty(r[0]), + call_count=r[1] if isinstance(r[1], int) else 0, + total_tokens=r[2] if isinstance(r[2], int) else 0, + max_tokens=r[3] if isinstance(r[3], int) else 0, + ) + for r in type_rows + ) + + top_workflows = _read_top_workflows(conn) + + # Top 5 most expensive payloads + top_rows = conn.execute( + "SELECT event_type, event_id, estimated_tokens, created_at_utc, " + "intent_id, run_id, agent_label " + "FROM controller_events WHERE estimated_tokens IS NOT NULL " + "ORDER BY estimated_tokens DESC LIMIT 5" + ).fetchall() + top_payloads = tuple( + TopPayload( + event_type=_str_or_empty(r[0]), + event_id=_str_or_empty(r[1]), + estimated_tokens=r[2] if isinstance(r[2], int) else 0, + created_at_utc=_str_or_empty(r[3]), + intent_id=_str_or_none(r[4]), + run_id=_str_or_none(r[5]), + agent_label=_str_or_empty(r[6]), + ) + for r in top_rows + ) + + # Encoding (single value for the session) + enc_row = conn.execute( + "SELECT token_encoding FROM controller_events " + "WHERE token_encoding IS NOT NULL LIMIT 1" + ).fetchone() + encoding = _str_or_none(enc_row[0]) if enc_row else "unknown" + + return PayloadFootprint( + encoding=encoding or "unknown", + tool_calls=tool_calls, + total_tokens=total_tokens, + avg_tokens=avg_tokens, + p95_tokens=p95_tokens, + max_tokens=max_tokens, + by_type=by_type, + top_payloads=top_payloads, + top_workflows=top_workflows, + ) + + +def _read_top_workflows( + conn: sqlite3.Connection, +) -> tuple[WorkflowTokenProfile, ...]: + rows = conn.execute( + """ + SELECT + CASE + WHEN workflow_id IS NOT NULL + AND workflow_id LIKE 'intent:%' THEN 'intent' + WHEN workflow_id IS NOT NULL AND workflow_id LIKE 'run:%' THEN 'run' + WHEN workflow_id IS NOT NULL AND workflow_id LIKE 'event:%' THEN 'event' + WHEN workflow_id IS NOT NULL AND workflow_id != '' THEN 'workflow' + WHEN intent_id IS NOT NULL AND intent_id != '' THEN 'intent' + WHEN run_id IS NOT NULL AND run_id != '' THEN 'run' + ELSE 'event' + END AS workflow_kind, + CASE + WHEN workflow_id IS NOT NULL + AND workflow_id LIKE 'intent:%' THEN substr(workflow_id, 8) + WHEN workflow_id IS NOT NULL + AND workflow_id LIKE 'run:%' THEN substr(workflow_id, 5) + WHEN workflow_id IS NOT NULL + AND workflow_id LIKE 'event:%' THEN substr(workflow_id, 7) + WHEN workflow_id IS NOT NULL AND workflow_id != '' THEN workflow_id + WHEN intent_id IS NOT NULL AND intent_id != '' THEN intent_id + WHEN run_id IS NOT NULL AND run_id != '' THEN run_id + ELSE event_id + END AS workflow_group_id, + COUNT(*) AS call_count, + SUM(estimated_tokens) AS total_tokens, + MAX(estimated_tokens) AS max_tokens, + MIN(created_at_utc) AS first_event_utc, + MAX(created_at_utc) AS latest_event_utc, + MIN(agent_label) AS agent_label + FROM controller_events + WHERE estimated_tokens IS NOT NULL + GROUP BY workflow_kind, workflow_group_id + ORDER BY total_tokens DESC, max_tokens DESC, workflow_group_id ASC + LIMIT 5 + """ + ).fetchall() + return tuple( + WorkflowTokenProfile( + workflow_kind=_str_or_empty(row[0]), + workflow_id=_str_or_empty(row[1]), + call_count=row[2] if isinstance(row[2], int) else 0, + total_tokens=row[3] if isinstance(row[3], int) else 0, + max_tokens=row[4] if isinstance(row[4], int) else 0, + first_event_utc=_str_or_empty(row[5]), + latest_event_utc=_str_or_empty(row[6]), + agent_label=_str_or_empty(row[7]), + ) + for row in rows + ) + + +def _count(conn: sqlite3.Connection, sql: str) -> int: + value = conn.execute(sql).fetchone() + if value is None: + return 0 + item = value[0] + return item if isinstance(item, int) else 0 + + +def _text_scalar(conn: sqlite3.Connection, sql: str) -> str | None: + row = conn.execute(sql).fetchone() + if row is None: + return None + return _str_or_none(row[0]) + + +def _int_meta(conn: sqlite3.Connection, key: str) -> int | None: + value = get_meta(conn, key) + if value is None: + return None + try: + return int(value) + except ValueError: + return None + + +def _db_size(path: Path) -> int: + try: + return path.stat().st_size + except OSError: + return 0 + + +def _str_or_empty(value: object) -> str: + return value if isinstance(value, str) else "" + + +def _str_or_none(value: object) -> str | None: + return value if isinstance(value, str) else None + + +def _int_or_none(value: object) -> int | None: + return value if isinstance(value, int) and not isinstance(value, bool) else None + + +def _mapping(value: object) -> dict[str, object]: + return value if isinstance(value, dict) else {} + + +def _analysis_payload_from_json(value: object) -> dict[str, object]: + if not isinstance(value, str) or not value: + return {} + try: + parsed = json.loads(value) + except json.JSONDecodeError: + return {} + return parsed if isinstance(parsed, dict) else {} + + +def _analysis_run_source_label(raw_source: str) -> str: + normalized = raw_source.strip().lower() + if normalized == ANALYSIS_SOURCE_MCP: + return "audit_mcp" + if normalized == ANALYSIS_SOURCE_CLI: + return "audit_cli" + return "audit_unknown" + + +def _short_run_id(run_id: str | None, payload: Mapping[str, object]) -> str | None: + candidate = run_id or _str_or_none(payload.get("run_id")) + if candidate is None: + return None + trimmed = candidate.strip() + if not trimmed: + return None + return trimmed[:8] if len(trimmed) >= 8 else trimmed + + +__all__ = [ + "AnalysisRunSnapshot", + "AuditRecord", + "AuditSummary", + "PayloadFootprint", + "TopPayload", + "TypeTokenProfile", + "WorkflowTokenProfile", + "count_audit_event_core_gaps", + "payload_footprint_to_dict", + "read_audit_event_core_records", + "read_audit_summary", + "read_intent_declared_records", + "read_latest_analysis_run", +] diff --git a/codeclone/audit/runtime.py b/codeclone/audit/runtime.py new file mode 100644 index 00000000..0246cb6c --- /dev/null +++ b/codeclone/audit/runtime.py @@ -0,0 +1,58 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +from ..config.pyproject_loader import ConfigValidationError, load_pyproject_config +from .validation import ( + DEFAULT_AUDIT_PATH, + DEFAULT_AUDIT_PAYLOADS, + DEFAULT_AUDIT_RETENTION_DAYS, + DEFAULT_AUDIT_TOKEN_ESTIMATOR, + resolve_audit_path, + validate_payload_mode, + validate_retention_days, + validate_token_estimator, +) +from .writer import AuditWriter, NullAuditWriter, SqliteAuditWriter + + +def open_audit_writer_for_root(root_path: Path) -> AuditWriter: + """Return a configured audit writer for ``root_path``, or ``NullAuditWriter``.""" + + try: + config = load_pyproject_config(root_path) + except (ConfigValidationError, OSError): + return NullAuditWriter() + if not bool(config.get("audit_enabled", False)): + return NullAuditWriter() + try: + db_path = resolve_audit_path( + root_path=root_path, + value=config.get("audit_path", DEFAULT_AUDIT_PATH), + ) + payloads = validate_payload_mode( + config.get("audit_payloads", DEFAULT_AUDIT_PAYLOADS) + ) + retention_days = validate_retention_days( + config.get("audit_retention_days", DEFAULT_AUDIT_RETENTION_DAYS) + ) + token_estimator = validate_token_estimator( + config.get("audit_token_estimator", DEFAULT_AUDIT_TOKEN_ESTIMATOR) + ) + return SqliteAuditWriter( + db_path=db_path, + payloads=payloads, + retention_days=retention_days, + token_estimator=token_estimator, + ) + except Exception: + return NullAuditWriter() + + +__all__ = ["open_audit_writer_for_root"] diff --git a/codeclone/audit/schema.py b/codeclone/audit/schema.py new file mode 100644 index 00000000..af85cf87 --- /dev/null +++ b/codeclone/audit/schema.py @@ -0,0 +1,226 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from .. import __version__ +from ..report.meta import current_report_timestamp_utc +from ..utils.sqlite_store import ( + get_meta_value, + initialize_schema_v1, +) +from .validation import AUDIT_SCHEMA_VERSION, AuditSchemaError + +_AUDIT_META_TABLE = "audit_meta" + +_CREATE_EVENTS_SQL = """ +CREATE TABLE IF NOT EXISTS controller_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_id TEXT NOT NULL UNIQUE, + event_type TEXT NOT NULL, + severity TEXT NOT NULL DEFAULT 'info', + created_at_utc TEXT NOT NULL, + + repo_root_digest TEXT NOT NULL, + run_id TEXT, + intent_id TEXT, + report_digest TEXT, + workflow_id TEXT, + surface TEXT, + tool_name TEXT, + event_core_json TEXT, + event_core_sha256 TEXT, + payload_sha256 TEXT, + agent_label TEXT NOT NULL DEFAULT '', + agent_pid INTEGER NOT NULL, + + status TEXT, + payload_json TEXT NOT NULL DEFAULT '{}', + agent_start_epoch INTEGER, + + estimated_tokens INTEGER, + token_encoding TEXT, + payload_characters INTEGER, + summary TEXT +) +""" + +_CREATE_META_SQL = """ +CREATE TABLE IF NOT EXISTS audit_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) +""" + +_INDEX_SQL = ( + "CREATE INDEX IF NOT EXISTS idx_events_intent ON controller_events(intent_id)", + "CREATE INDEX IF NOT EXISTS idx_events_run ON controller_events(run_id)", + "CREATE INDEX IF NOT EXISTS idx_events_workflow ON controller_events(workflow_id)", + "CREATE INDEX IF NOT EXISTS idx_events_surface_tool " + "ON controller_events(surface, tool_name)", + "CREATE INDEX IF NOT EXISTS idx_events_type_time " + "ON controller_events(event_type, created_at_utc)", + "CREATE INDEX IF NOT EXISTS idx_events_created " + "ON controller_events(created_at_utc)", + "CREATE INDEX IF NOT EXISTS idx_events_analysis_repo " + "ON controller_events(event_type, repo_root_digest, created_at_utc)", + "CREATE INDEX IF NOT EXISTS idx_events_agent_session " + "ON controller_events(agent_pid, agent_start_epoch)", +) + +# Schema versions this build can open: the current version plus any older +# version reachable by an idempotent in-place migration. +_MIGRATABLE_VERSIONS = frozenset({"1", "2", "3", "4"}) + +# Additive, nullable columns expected on controller_events. Declarative so a +# single idempotent pass upgrades any older database (pre-token, token-only) +# to the current shape. Order matches the CREATE TABLE tail and the ALTER +# append order, so fresh and migrated databases converge on the same layout. +_ADDITIVE_EVENT_COLUMNS = ( + ("workflow_id", "TEXT"), + ("surface", "TEXT"), + ("tool_name", "TEXT"), + ("event_core_json", "TEXT"), + ("event_core_sha256", "TEXT"), + ("payload_sha256", "TEXT"), + ("estimated_tokens", "INTEGER"), + ("token_encoding", "TEXT"), + ("payload_characters", "INTEGER"), + ("summary", "TEXT"), + ("agent_start_epoch", "INTEGER"), +) + +_READABLE_EVENT_COLUMNS = frozenset( + { + "id", + "event_id", + "event_type", + "severity", + "created_at_utc", + "repo_root_digest", + "run_id", + "intent_id", + "report_digest", + "agent_label", + "agent_pid", + "status", + "payload_json", + } +) + + +def open_audit_db(path: Path) -> sqlite3.Connection: + from ..observability.sqlite_access import open_instrumented_sqlite_db + + return open_instrumented_sqlite_db(path, ensure_schema=ensure_schema) + + +def open_audit_db_readonly(path: Path) -> sqlite3.Connection: + """Open a structurally readable audit database without mutating it.""" + from ..observability.sqlite_access import open_instrumented_sqlite_db_readonly + + return open_instrumented_sqlite_db_readonly( + path, + validate_schema=_validate_readonly_schema, + ) + + +def ensure_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current is None: + create_schema_v2(conn) + return + if current not in _MIGRATABLE_VERSIONS: + raise AuditSchemaError(f"Unsupported audit schema version: {current}") + # Idempotent self-heal: bring any migratable database up to the current + # column shape, then advance the recorded version. Safe on every open. + _ensure_event_columns(conn) + _ensure_event_indexes(conn) + if current != AUDIT_SCHEMA_VERSION: + _set_meta(conn, "schema_version", AUDIT_SCHEMA_VERSION) + + +def create_schema_v2(conn: sqlite3.Connection) -> None: + for statement in (_CREATE_EVENTS_SQL, _CREATE_META_SQL): + conn.execute(statement) + _ensure_event_columns(conn) + initialize_schema_v1( + conn, + ddl_statements=(), + index_statements=_INDEX_SQL, + meta_table=_AUDIT_META_TABLE, + seed_meta={ + "schema_version": AUDIT_SCHEMA_VERSION, + "generator": "codeclone", + "codeclone_version": __version__, + "created_at_utc": current_report_timestamp_utc(), + }, + ) + + +def _ensure_event_indexes(conn: sqlite3.Connection) -> None: + for statement in _INDEX_SQL: + conn.execute(statement) + conn.commit() + + +def _ensure_event_columns(conn: sqlite3.Connection) -> None: + """Idempotently add any missing additive columns to controller_events. + + Backward-compatible: an older database (pre-token, or token-only) gains + exactly the columns it lacks and nothing else. Safe to call on every open. + """ + existing = { + row[1] + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + } + for col, col_type in _ADDITIVE_EVENT_COLUMNS: + if col not in existing: + conn.execute(f"ALTER TABLE controller_events ADD COLUMN {col} {col_type}") + conn.commit() + + +def _validate_readonly_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current is not None and current not in _MIGRATABLE_VERSIONS: + raise AuditSchemaError(f"Unsupported audit schema version: {current}") + missing = sorted(_READABLE_EVENT_COLUMNS - _event_columns(conn)) + if missing: + raise AuditSchemaError( + "Audit database is missing required columns: " + ", ".join(missing) + ) + + +def _event_columns(conn: sqlite3.Connection) -> frozenset[str]: + return frozenset( + str(row[1]) + for row in conn.execute("PRAGMA table_info(controller_events)").fetchall() + if len(row) > 1 + ) + + +def _set_meta(conn: sqlite3.Connection, key: str, value: str) -> None: + conn.execute( + f"INSERT OR REPLACE INTO {_AUDIT_META_TABLE}(key, value) VALUES (?, ?)", + (key, value), + ) + conn.commit() + + +def get_meta(conn: sqlite3.Connection, key: str) -> str | None: + return get_meta_value(conn, meta_table=_AUDIT_META_TABLE, key=key) + + +__all__ = [ + "create_schema_v2", + "ensure_schema", + "get_meta", + "open_audit_db", + "open_audit_db_readonly", +] diff --git a/codeclone/audit/validation.py b/codeclone/audit/validation.py new file mode 100644 index 00000000..191bba17 --- /dev/null +++ b/codeclone/audit/validation.py @@ -0,0 +1,340 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +from dataclasses import dataclass +from pathlib import Path + +from ..budget.estimator import ( + TOKEN_ESTIMATOR_CHARS_APPROX, + TOKEN_ESTIMATOR_MODES, + TOKEN_ESTIMATOR_TIKTOKEN, + TokenEstimatorMode, +) +from ..utils.repo_paths import ( + PathOutsideRepoError, + RepoPathError, + RepoPathPolicy, + resolve_under_repo_root, +) +from .events import ( + AUDIT_EVENT_CORE_VERSION, + KNOWN_AUDIT_SURFACES, + KNOWN_EVENT_TYPES, + PAYLOAD_MODES, + SUMMARY_TEXT_LIMIT, + AuditPayloadMode, + AuditSeverity, +) + +AUDIT_SCHEMA_VERSION = "4" +DEFAULT_AUDIT_PATH = ".codeclone/db/audit.sqlite3" +DEFAULT_AUDIT_PAYLOADS: AuditPayloadMode = "compact" +DEFAULT_AUDIT_RETENTION_DAYS = 30 +DEFAULT_AUDIT_TOKEN_ESTIMATOR: TokenEstimatorMode = TOKEN_ESTIMATOR_CHARS_APPROX +MIN_AUDIT_RETENTION_DAYS = 1 +MAX_AUDIT_RETENTION_DAYS = 365 + +_VALID_AUDIT_SUFFIXES = frozenset({".sqlite3", ".db"}) +_MAX_EVENT_ID_LEN = 48 +_MAX_EVENT_TYPE_LEN = 64 +_MAX_SEVERITY_LEN = 8 +_MAX_TIMESTAMP_LEN = 40 +_MAX_DIGEST_LEN = 128 +_MAX_RUN_ID_LEN = 128 +_MAX_INTENT_ID_LEN = 128 +_MAX_WORKFLOW_ID_LEN = 192 +_MAX_SURFACE_LEN = 16 +_MAX_TOOL_NAME_LEN = 128 +_MAX_AGENT_LABEL_LEN = 128 +_MAX_STATUS_LEN = 32 +MAX_PAYLOAD_JSON_LEN = 262_144 +MAX_EVENT_CORE_JSON_LEN = 65_536 + + +class AuditConfigError(ValueError): + """Raised for invalid audit configuration.""" + + +class AuditValidationError(ValueError): + """Raised when an audit event row violates the storage contract.""" + + +class AuditSchemaError(RuntimeError): + """Raised for unsupported or corrupt audit database schemas.""" + + +class AuditReadError(RuntimeError): + """Raised when a CLI audit read cannot be completed safely.""" + + +@dataclass(frozen=True, slots=True) +class EventRow: + event_id: str + event_type: str + severity: AuditSeverity + created_at_utc: str + repo_root_digest: str + run_id: str | None + intent_id: str | None + report_digest: str | None + agent_label: str + agent_pid: int + status: str | None + payload_json: str + workflow_id: str | None = None + surface: str | None = None + tool_name: str | None = None + event_core_json: str | None = None + event_core_sha256: str | None = None + payload_sha256: str | None = None + agent_start_epoch: int | None = None + estimated_tokens: int | None = None + token_encoding: str | None = None + payload_characters: int | None = None + summary: str | None = None + + def as_tuple(self) -> tuple[object, ...]: + return ( + self.event_id, + self.event_type, + self.severity, + self.created_at_utc, + self.repo_root_digest, + self.run_id, + self.intent_id, + self.report_digest, + self.workflow_id, + self.surface, + self.tool_name, + self.event_core_json, + self.event_core_sha256, + self.payload_sha256, + self.agent_label, + self.agent_pid, + self.status, + self.payload_json, + self.agent_start_epoch, + self.estimated_tokens, + self.token_encoding, + self.payload_characters, + self.summary, + ) + + +def resolve_audit_path(*, root_path: Path, value: object) -> Path: + if not isinstance(value, str): + raise AuditConfigError("audit_path must be a string") + raw = value.strip() + if not raw: + raise AuditConfigError("audit_path must not be empty") + path = Path(raw) + if any(part in {"", ".", ".."} for part in path.parts): + raise AuditConfigError("audit_path must not contain empty, '.', or '..' parts") + if path.suffix not in _VALID_AUDIT_SUFFIXES: + raise AuditConfigError("audit_path must end with .sqlite3 or .db") + try: + return resolve_under_repo_root( + root_path, + path, + policy=RepoPathPolicy(), + ) + except PathOutsideRepoError as exc: + raise AuditConfigError( + "audit_path must be relative to the repository root" + ) from exc + except RepoPathError as exc: + raise AuditConfigError(f"invalid audit_path: {exc}") from exc + + +def validate_payload_mode(value: object) -> AuditPayloadMode: + if value not in PAYLOAD_MODES: + expected = ", ".join(sorted(PAYLOAD_MODES)) + raise AuditConfigError(f"audit_payloads must be one of: {expected}") + if value == "off": + return "off" + if value == "full": + return "full" + return "compact" + + +def validate_retention_days(value: object) -> int: + if not isinstance(value, int) or isinstance(value, bool): + raise AuditConfigError("audit_retention_days must be an integer") + if not MIN_AUDIT_RETENTION_DAYS <= value <= MAX_AUDIT_RETENTION_DAYS: + raise AuditConfigError( + "audit_retention_days must be between " + f"{MIN_AUDIT_RETENTION_DAYS} and {MAX_AUDIT_RETENTION_DAYS}" + ) + return value + + +def validate_token_estimator(value: object) -> TokenEstimatorMode: + if value not in TOKEN_ESTIMATOR_MODES: + expected = ", ".join(sorted(TOKEN_ESTIMATOR_MODES)) + raise AuditConfigError(f"audit_token_estimator must be one of: {expected}") + if value == TOKEN_ESTIMATOR_TIKTOKEN: + return TOKEN_ESTIMATOR_TIKTOKEN + return TOKEN_ESTIMATOR_CHARS_APPROX + + +def validate_event_row(row: EventRow) -> None: + _validate_event_identity(row) + _validate_event_references(row) + _validate_surface(row.surface) + _validate_agent_identity(row) + _validate_payload_contract(row) + + +def _validate_event_identity(row: EventRow) -> None: + _validate_text(row.event_id, "event_id", max_len=_MAX_EVENT_ID_LEN) + _validate_text(row.event_type, "event_type", max_len=_MAX_EVENT_TYPE_LEN) + if row.event_type not in KNOWN_EVENT_TYPES: + raise AuditValidationError(f"unknown event_type: {row.event_type}") + _validate_text(row.severity, "severity", max_len=_MAX_SEVERITY_LEN) + if row.severity not in {"info", "warn", "error"}: + raise AuditValidationError(f"invalid severity: {row.severity}") + _validate_text(row.created_at_utc, "created_at_utc", max_len=_MAX_TIMESTAMP_LEN) + _validate_text(row.repo_root_digest, "repo_root_digest", max_len=_MAX_DIGEST_LEN) + + +def _validate_event_references(row: EventRow) -> None: + _validate_optional_text(row.run_id, "run_id", max_len=_MAX_RUN_ID_LEN) + _validate_optional_text(row.intent_id, "intent_id", max_len=_MAX_INTENT_ID_LEN) + _validate_optional_text(row.report_digest, "report_digest", max_len=_MAX_DIGEST_LEN) + _validate_optional_text( + row.workflow_id, + "workflow_id", + max_len=_MAX_WORKFLOW_ID_LEN, + ) + _validate_optional_text(row.tool_name, "tool_name", max_len=_MAX_TOOL_NAME_LEN) + _validate_optional_event_core(row.event_core_json, row.event_core_sha256) + + +def _validate_surface(surface: str | None) -> None: + _validate_optional_text(surface, "surface", max_len=_MAX_SURFACE_LEN) + if surface is not None and surface not in KNOWN_AUDIT_SURFACES: + raise AuditValidationError(f"invalid surface: {surface}") + + +def _validate_agent_identity(row: EventRow) -> None: + _validate_text(row.agent_label, "agent_label", max_len=_MAX_AGENT_LABEL_LEN) + if not isinstance(row.agent_pid, int) or isinstance(row.agent_pid, bool): + raise AuditValidationError("agent_pid must be an integer") + if row.agent_pid <= 0: + raise AuditValidationError("agent_pid must be positive") + _validate_agent_start_epoch(row.agent_start_epoch) + + +def _validate_agent_start_epoch(value: int | None) -> None: + if value is None: + return + if not isinstance(value, int) or isinstance(value, bool): + raise AuditValidationError("agent_start_epoch must be an integer") + if value < 0: + raise AuditValidationError("agent_start_epoch must be non-negative") + + +def _validate_payload_contract(row: EventRow) -> None: + _validate_optional_text(row.status, "status", max_len=_MAX_STATUS_LEN) + _validate_text(row.payload_json, "payload_json", max_len=MAX_PAYLOAD_JSON_LEN) + _validate_optional_payload_hash(row.payload_json, row.payload_sha256) + _validate_optional_text(row.summary, "summary", max_len=SUMMARY_TEXT_LIMIT) + + +def _validate_optional_event_core( + event_core_json: str | None, + event_core_sha256: str | None, +) -> None: + if event_core_json is None: + if event_core_sha256 is not None: + raise AuditValidationError("event_core_sha256 requires event_core_json") + return + _validate_text( + event_core_json, + "event_core_json", + max_len=MAX_EVENT_CORE_JSON_LEN, + ) + _validate_optional_sha256(event_core_sha256, "event_core_sha256") + if event_core_sha256 is None: + raise AuditValidationError("event_core_sha256 must not be empty") + try: + parsed = json.loads(event_core_json) + except json.JSONDecodeError as exc: + raise AuditValidationError("event_core_json must be JSON") from exc + if not isinstance(parsed, dict): + raise AuditValidationError("event_core_json must be a JSON object") + if parsed.get("core_schema_version") != AUDIT_EVENT_CORE_VERSION: + raise AuditValidationError( + "event_core_json has unsupported core_schema_version" + ) + if _sha256_text(event_core_json) != event_core_sha256: + raise AuditValidationError("event_core_sha256 does not match event_core_json") + + +def _validate_optional_payload_hash( + payload_json: str, + payload_sha256: str | None, +) -> None: + _validate_optional_sha256(payload_sha256, "payload_sha256") + if payload_sha256 is None: + return + if _sha256_text(payload_json) != payload_sha256: + raise AuditValidationError("payload_sha256 does not match payload_json") + + +def _validate_optional_sha256(value: str | None, field: str) -> None: + if value is None: + return + _validate_text(value, field, max_len=64) + if len(value) != 64 or any(ch not in "0123456789abcdef" for ch in value): + raise AuditValidationError(f"{field} must be lowercase sha256 hex") + + +def _sha256_text(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest() + + +def _validate_optional_text(value: str | None, field: str, *, max_len: int) -> None: + if value is None: + return + _validate_text(value, field, max_len=max_len) + + +def _validate_text(value: str, field: str, *, max_len: int) -> None: + if not isinstance(value, str): + raise AuditValidationError(f"{field} must be a string") + if not value and field not in {"agent_label", "payload_json"}: + raise AuditValidationError(f"{field} must not be empty") + if len(value) > max_len: + raise AuditValidationError(f"{field} too long") + if "\x00" in value: + raise AuditValidationError(f"{field} contains NUL byte") + + +__all__ = [ + "AUDIT_EVENT_CORE_VERSION", + "AUDIT_SCHEMA_VERSION", + "DEFAULT_AUDIT_PATH", + "DEFAULT_AUDIT_PAYLOADS", + "DEFAULT_AUDIT_RETENTION_DAYS", + "DEFAULT_AUDIT_TOKEN_ESTIMATOR", + "MAX_EVENT_CORE_JSON_LEN", + "MAX_PAYLOAD_JSON_LEN", + "AuditConfigError", + "AuditReadError", + "AuditSchemaError", + "AuditValidationError", + "EventRow", + "resolve_audit_path", + "validate_event_row", + "validate_payload_mode", + "validate_retention_days", + "validate_token_estimator", +] diff --git a/codeclone/audit/writer.py b/codeclone/audit/writer.py new file mode 100644 index 00000000..7dab3e9d --- /dev/null +++ b/codeclone/audit/writer.py @@ -0,0 +1,275 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +import threading +from collections.abc import Mapping +from contextlib import suppress +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Protocol + +from ..report.meta import current_report_timestamp_utc +from .events import ( + AUDIT_EVENT_CORE_VERSION, + AuditEvent, + AuditPayloadMode, + compact_payload_for_event, + derive_workflow_id, + event_core_for_event, + event_summary, + generate_event_id, + normalize_audit_surface, +) +from .schema import open_audit_db +from .validation import EventRow, validate_event_row + +if TYPE_CHECKING: + from ..budget.estimator import TokenEstimate, TokenEstimatorMode + +_INSERT_SQL = """ +INSERT INTO controller_events( + event_id, + event_type, + severity, + created_at_utc, + repo_root_digest, + run_id, + intent_id, + report_digest, + workflow_id, + surface, + tool_name, + event_core_json, + event_core_sha256, + payload_sha256, + agent_label, + agent_pid, + status, + payload_json, + agent_start_epoch, + estimated_tokens, + token_encoding, + payload_characters, + summary +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +""" + + +class AuditWriter(Protocol): + def emit(self, event: AuditEvent) -> int | None: ... + def close(self) -> None: ... + + +class NullAuditWriter: + def emit(self, event: AuditEvent) -> int | None: + return None + + def close(self) -> None: + return None + + +class SqliteAuditWriter: + def __init__( + self, + *, + db_path: Path, + payloads: AuditPayloadMode, + retention_days: int, + token_estimator: TokenEstimatorMode = "chars_approx", + ) -> None: + self._conn = open_audit_db(db_path) + self._payloads = payloads + self._retention_days = retention_days + self._token_estimator = token_estimator + self._lock = threading.Lock() + self._closed = False + self._gc_counter = 0 + self._gc_interval = 100 + self._conn.execute( + "INSERT OR REPLACE INTO audit_meta(key, value) VALUES (?, ?)", + ("retention_days", str(retention_days)), + ) + self._conn.commit() + + def emit(self, event: AuditEvent) -> int | None: + try: + return self._emit_impl(event) + except Exception: + # Audit writes are non-fatal by contract: a failure must never + # break the controller. Count the drop as observability telemetry + # (no-op when observability is disabled) so silent audit gaps stay + # countable. Telemetry is itself best-effort and never re-raises. + with suppress(Exception): + from ..observability import record_counter + + record_counter("audit.emit_dropped") + return None + + def close(self) -> None: + with self._lock: + if self._closed: + return + try: + self._run_retention_gc() + finally: + self._conn.close() + self._closed = True + + def _emit_impl(self, event: AuditEvent) -> int | None: + row = event_to_row( + event=event, + payloads=self._payloads, + token_estimator=self._token_estimator, + ) + validate_event_row(row) + with self._lock: + if self._closed: + return None + cursor = self._conn.execute(_INSERT_SQL, row.as_tuple()) + self._conn.commit() + self._gc_counter += 1 + if self._gc_counter >= self._gc_interval: + self._run_retention_gc() + self._gc_counter = 0 + return cursor.lastrowid + + def _run_retention_gc(self) -> None: + cutoff = datetime.now(timezone.utc) - timedelta(days=self._retention_days) + cutoff_text = cutoff.replace(microsecond=0).isoformat().replace("+00:00", "Z") + self._conn.execute( + "DELETE FROM controller_events WHERE created_at_utc < ?", + (cutoff_text,), + ) + self._conn.commit() + + +def event_to_row( + *, + event: AuditEvent, + payloads: AuditPayloadMode, + token_estimator: TokenEstimatorMode = "chars_approx", +) -> EventRow: + event_id = generate_event_id() + payload_json = _payload_json(event=event, payloads=payloads) + event_core_json = _event_core_json(event) + token_estimate = _estimate_payload_tokens( + event.payload, + token_estimator=token_estimator, + ) + return EventRow( + event_id=event_id, + event_type=event.event_type, + severity=event.severity, + created_at_utc=current_report_timestamp_utc(), + repo_root_digest=event.repo_root_digest, + run_id=event.run_id, + intent_id=event.intent_id, + report_digest=event.report_digest, + workflow_id=derive_workflow_id(event, event_id), + surface=normalize_audit_surface(event.surface, payload=event.payload), + tool_name=_normalized_tool_name(event.tool_name), + event_core_json=event_core_json, + event_core_sha256=_sha256_text(event_core_json), + payload_sha256=None if payloads == "off" else _sha256_text(payload_json), + agent_label=event.agent_label, + agent_pid=event.agent_pid, + status=event.status, + payload_json=payload_json, + agent_start_epoch=event.agent_start_epoch, + estimated_tokens=token_estimate.tokens if token_estimate else None, + token_encoding=token_estimate.encoding if token_estimate else None, + payload_characters=token_estimate.characters if token_estimate else None, + summary=event_summary(event.event_type, event.payload), + ) + + +def _estimate_payload_tokens( + payload: Mapping[str, object] | None, + *, + token_estimator: TokenEstimatorMode = "chars_approx", +) -> TokenEstimate | None: + """Estimate token count for the full original payload. + + Lazy import of ``codeclone.budget.estimator``. Any failure + (ImportError, encoding error, etc.) returns None — the audit writer + never fails because of token estimation. + """ + if payload is None: + return None + try: + from ..budget.estimator import estimate_payload + + return estimate_payload(payload, estimator=token_estimator) + except Exception: + return None + + +def _payload_json(*, event: AuditEvent, payloads: AuditPayloadMode) -> str: + if payloads == "off": + return "{}" + payload = ( + event.payload + if payloads == "full" + else compact_payload_for_event( + event_type=event.event_type, + payload=event.payload, + ) + ) + if payload is None: + return "{}" + try: + return _canonical_json(payload) + except (TypeError, ValueError): + return "{}" + + +def _event_core_json(event: AuditEvent) -> str: + try: + return _canonical_json(event_core_for_event(event)) + except (TypeError, ValueError): + return _canonical_json( + { + "core_schema_version": AUDIT_EVENT_CORE_VERSION, + "event_family": "unknown", + "event_type": event.event_type, + "status": event.status or "", + "facts": {}, + "truncated": True, + } + ) + + +def _canonical_json(payload: Mapping[str, object]) -> str: + return json.dumps( + payload, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + default=str, + ) + + +def _sha256_text(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest() + + +def _normalized_tool_name(value: str | None) -> str | None: + if value is None: + return None + text = value.strip() + return text or None + + +__all__ = [ + "AuditWriter", + "NullAuditWriter", + "SqliteAuditWriter", + "event_to_row", +] diff --git a/codeclone/budget/__init__.py b/codeclone/budget/__init__.py new file mode 100644 index 00000000..eb319e1b --- /dev/null +++ b/codeclone/budget/__init__.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP payload token budget estimation (optional leaf module). + +Requires the ``codeclone[token-bench]`` extra for exact BPE counts. +Falls back to character-based approximation when ``tiktoken`` is absent. + +This module must not import from ``codeclone.surfaces`` or +``codeclone.audit``. Dependency direction: ``audit -> budget``. +""" + +from __future__ import annotations diff --git a/codeclone/budget/estimator.py b/codeclone/budget/estimator.py new file mode 100644 index 00000000..52eac785 --- /dev/null +++ b/codeclone/budget/estimator.py @@ -0,0 +1,162 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic token-count estimator for MCP JSON payloads. + +Defaults to ``ceil(chars / 4)`` so long-lived MCP processes do not import +``tiktoken`` just because the optional package is installed. Exact BPE +counting remains available through explicit ``estimator="tiktoken"`` opt-in. + +The payload is serialized to canonical JSON (sorted keys, compact separators, +no ASCII escaping) before counting. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Final, Literal + +TokenEstimatorMode = Literal["chars_approx", "tiktoken"] + +TOKEN_ESTIMATOR_CHARS_APPROX: Final[TokenEstimatorMode] = "chars_approx" +TOKEN_ESTIMATOR_TIKTOKEN: Final[TokenEstimatorMode] = "tiktoken" +TOKEN_ESTIMATOR_MODES: Final[frozenset[str]] = frozenset( + {TOKEN_ESTIMATOR_CHARS_APPROX, TOKEN_ESTIMATOR_TIKTOKEN} +) + + +@dataclass(frozen=True, slots=True) +class TokenEstimate: + """Result of a payload token estimation.""" + + encoding: str # e.g. "o200k_base" or "chars_approx" + characters: int + tokens: int + method: str # "tiktoken" | "chars_approx" + + +def estimate_payload( + payload: Mapping[str, object], + *, + encoding: str = "o200k_base", + estimator: TokenEstimatorMode = TOKEN_ESTIMATOR_CHARS_APPROX, +) -> TokenEstimate: + """Estimate token count for a canonical JSON payload. + + Character approximation is the default because this function is used by + long-lived MCP audit paths. ``tiktoken`` is imported only when explicitly + requested. If exact estimation is requested but unavailable, the function + falls back to approximation without failing audit writes. + """ + text = _canonical_json(payload) + return estimate_text_token( + text, + encoding=encoding, + estimator=estimator, + ) + + +def estimate_text_token( + text: str, + *, + encoding: str = "o200k_base", + estimator: TokenEstimatorMode = TOKEN_ESTIMATOR_CHARS_APPROX, +) -> TokenEstimate: + """Estimate token count for raw text using the shared estimator contract.""" + characters = len(text) + if estimator not in TOKEN_ESTIMATOR_MODES: + expected = ", ".join(sorted(TOKEN_ESTIMATOR_MODES)) + raise ValueError(f"token estimator must be one of: {expected}") + if estimator == TOKEN_ESTIMATOR_TIKTOKEN: + return _tiktoken_or_chars_estimate( + text, + encoding=encoding, + characters=characters, + ) + return _chars_approx_estimate(characters) + + +def estimate_texts_token_counts( + texts: Sequence[str], + *, + encoding: str = "o200k_base", + estimator: TokenEstimatorMode = TOKEN_ESTIMATOR_CHARS_APPROX, +) -> tuple[int, ...]: + """Batch token counts for raw text using the shared estimator contract.""" + return tuple( + estimate_text_token(text, encoding=encoding, estimator=estimator).tokens + for text in texts + ) + + +def approx_tokens_from_chars(characters: int) -> int: + """Rough approximation: 1 token ~ 4 characters.""" + return -(-characters // 4) # ceil division + + +def _canonical_json(payload: Mapping[str, object]) -> str: + return json.dumps( + payload, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=False, + default=str, + ) + + +class _TiktokenUnavailable(Exception): + pass + + +def _tiktoken_estimate(text: str, *, encoding: str) -> TokenEstimate: + try: + import tiktoken + except ImportError as exc: + raise _TiktokenUnavailable from exc + enc = tiktoken.get_encoding(encoding) + tokens = len(enc.encode(text)) + return TokenEstimate( + encoding=encoding, + characters=len(text), + tokens=tokens, + method="tiktoken", + ) + + +def _tiktoken_or_chars_estimate( + text: str, + *, + encoding: str, + characters: int, +) -> TokenEstimate: + try: + return _tiktoken_estimate(text, encoding=encoding) + except _TiktokenUnavailable: + return _chars_approx_estimate(characters) + + +def _chars_approx_estimate(characters: int) -> TokenEstimate: + return TokenEstimate( + encoding=TOKEN_ESTIMATOR_CHARS_APPROX, + characters=characters, + tokens=approx_tokens_from_chars(characters), + method=TOKEN_ESTIMATOR_CHARS_APPROX, + ) + + +__all__ = [ + "TOKEN_ESTIMATOR_CHARS_APPROX", + "TOKEN_ESTIMATOR_MODES", + "TOKEN_ESTIMATOR_TIKTOKEN", + "TokenEstimate", + "TokenEstimatorMode", + "approx_tokens_from_chars", + "estimate_payload", + "estimate_text_token", + "estimate_texts_token_counts", +] diff --git a/codeclone/cache/_canonicalize.py b/codeclone/cache/_canonicalize.py index 911184bf..02432e5a 100644 --- a/codeclone/cache/_canonicalize.py +++ b/codeclone/cache/_canonicalize.py @@ -14,6 +14,7 @@ _is_class_metrics_dict, _is_dead_candidate_dict, _is_file_stat_dict, + _is_function_relationship_facts_dict, _is_module_api_surface_dict, _is_module_dep_dict, _is_module_docstring_coverage_dict, @@ -32,11 +33,13 @@ ClassMetricsDict, DeadCandidateDict, FileStat, + FunctionRelationshipFactsDict, ModuleApiSurfaceDict, ModuleDepDict, ModuleDocstringCoverageDict, ModuleTypingCoverageDict, PublicSymbolDict, + RelationshipRecordDict, RuntimeReachabilityFactDict, SecuritySurfaceDict, SegmentDict, @@ -124,6 +127,12 @@ def _as_typed_runtime_reachability_list( return _as_typed_list(value, predicate=_is_runtime_reachability_fact_dict) +def _as_typed_function_relationship_facts_list( + value: object, +) -> list[FunctionRelationshipFactsDict] | None: + return _as_typed_list(value, predicate=_is_function_relationship_facts_dict) + + def _as_typed_string_list(value: object) -> list[str] | None: return _as_typed_list(value, predicate=_is_str_item) @@ -187,6 +196,7 @@ def _has_cache_entry_container_shape(entry: Mapping[str, object]) -> bool: "runtime_reachability", "security_surfaces", "structural_findings", + "function_relationship_facts", ) if not all(isinstance(entry.get(key, []), list) for key in optional_list_keys): return False @@ -217,6 +227,7 @@ def _decode_optional_cache_sections( list[str], list[RuntimeReachabilityFactDict], list[SecuritySurfaceDict], + list[FunctionRelationshipFactsDict], ModuleTypingCoverageDict | None, ModuleDocstringCoverageDict | None, ModuleApiSurfaceDict | None, @@ -242,6 +253,9 @@ def _decode_optional_cache_sections( security_surfaces_raw = _as_typed_security_surfaces_list( entry.get("security_surfaces", []) ) + function_relationship_facts_raw = _as_typed_function_relationship_facts_list( + entry.get("function_relationship_facts", []) + ) if ( class_metrics_raw is None or module_deps_raw is None @@ -252,6 +266,7 @@ def _decode_optional_cache_sections( or class_names_raw is None or runtime_reachability_raw is None or security_surfaces_raw is None + or function_relationship_facts_raw is None ): return None typing_coverage_raw = _as_module_typing_coverage_dict(entry.get("typing_coverage")) @@ -274,6 +289,7 @@ def _decode_optional_cache_sections( class_names_raw, runtime_reachability_raw, security_surfaces_raw, + function_relationship_facts_raw, typing_coverage_raw, docstring_coverage_raw, api_surface_raw, @@ -290,6 +306,7 @@ def _attach_optional_cache_sections( api_surface: ModuleApiSurfaceDict | None = None, runtime_reachability: list[RuntimeReachabilityFactDict] | None = None, security_surfaces: list[SecuritySurfaceDict] | None = None, + function_relationship_facts: list[FunctionRelationshipFactsDict] | None = None, source_stats: SourceStatsDict | None = None, structural_findings: list[StructuralFindingGroupDict] | None = None, ) -> CacheEntry: @@ -303,6 +320,8 @@ def _attach_optional_cache_sections( entry["runtime_reachability"] = runtime_reachability if security_surfaces is not None: entry["security_surfaces"] = security_surfaces + if function_relationship_facts is not None: + entry["function_relationship_facts"] = function_relationship_facts if source_stats is not None: entry["source_stats"] = source_stats if structural_findings is not None: @@ -361,6 +380,41 @@ def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry: tuple(item.get("suppressed_rules", [])), ), ) + function_relationship_facts = [ + FunctionRelationshipFactsDict( + source_qualname=facts["source_qualname"], + relationships=sorted( + ( + RelationshipRecordDict( + relation_kind=record["relation_kind"], + resolution_status=record["resolution_status"], + origin_lane=record["origin_lane"], + source_qualname=record["source_qualname"], + target_qualname=record["target_qualname"], + path=record["path"], + line=record["line"], + expression=record["expression"], + resolution_rule=record["resolution_rule"], + ) + for record in facts["relationships"] + ), + key=lambda record: ( + record["relation_kind"], + record["origin_lane"], + record["target_qualname"] or "", + record["path"], + record["line"], + record["resolution_status"], + record["resolution_rule"] or "", + record["expression"] or "", + ), + ), + ) + for facts in sorted( + entry.get("function_relationship_facts", []), + key=lambda item: item["source_qualname"], + ) + ] result: CacheEntry = { "stat": entry["stat"], @@ -396,6 +450,7 @@ def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry: item["evidence_symbol"], ), ), + "function_relationship_facts": function_relationship_facts, } typing_coverage = entry.get("typing_coverage") if typing_coverage is not None: @@ -472,6 +527,7 @@ def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry: "_as_typed_block_list", "_as_typed_class_metrics_list", "_as_typed_dead_candidates_list", + "_as_typed_function_relationship_facts_list", "_as_typed_module_deps_list", "_as_typed_runtime_reachability_list", "_as_typed_security_surfaces_list", diff --git a/codeclone/cache/_validators.py b/codeclone/cache/_validators.py index c3987117..04638278 100644 --- a/codeclone/cache/_validators.py +++ b/codeclone/cache/_validators.py @@ -15,16 +15,21 @@ ClassMetricsDict, DeadCandidateDict, FileStat, + FunctionRelationshipFactsDict, ModuleApiSurfaceDict, ModuleDepDict, ModuleDocstringCoverageDict, ModuleTypingCoverageDict, PublicSymbolDict, + RelationshipRecordDict, RuntimeReachabilityFactDict, SecuritySurfaceDict, SegmentDict, SourceStatsDict, UnitDict, + _as_relationship_kind, + _as_relationship_origin_lane, + _as_relationship_resolution_status, ) @@ -259,6 +264,55 @@ def _is_runtime_reachability_fact_dict( ) +def _is_relationship_record_dict( + value: object, +) -> TypeGuard[RelationshipRecordDict]: + if not isinstance(value, dict): + return False + relation_kind = _as_relationship_kind(value.get("relation_kind")) + resolution_status = _as_relationship_resolution_status( + value.get("resolution_status") + ) + origin_lane = _as_relationship_origin_lane(value.get("origin_lane")) + target_qualname = value.get("target_qualname") + expression = value.get("expression") + resolution_rule = value.get("resolution_rule") + line = value.get("line") + if ( + relation_kind is None + or resolution_status is None + or origin_lane is None + or not isinstance(value.get("source_qualname"), str) + or not isinstance(value.get("path"), str) + or not isinstance(line, int) + or line < 1 + or (expression is not None and not isinstance(expression, str)) + or (resolution_rule is not None and not isinstance(resolution_rule, str)) + ): + return False + if resolution_status == "resolved": + return isinstance(target_qualname, str) + return target_qualname is None + + +def _is_function_relationship_facts_dict( + value: object, +) -> TypeGuard[FunctionRelationshipFactsDict]: + if not isinstance(value, dict): + return False + source_qualname = value.get("source_qualname") + relationships = value.get("relationships") + return ( + isinstance(source_qualname, str) + and isinstance(relationships, list) + and all( + _is_relationship_record_dict(record) + and record["source_qualname"] == source_qualname + for record in relationships + ) + ) + + def _is_string_list(value: object) -> TypeGuard[list[str]]: return isinstance(value, list) and all(isinstance(item, str) for item in value) @@ -281,11 +335,13 @@ def _has_typed_fields( "_is_class_metrics_dict", "_is_dead_candidate_dict", "_is_file_stat_dict", + "_is_function_relationship_facts_dict", "_is_module_api_surface_dict", "_is_module_dep_dict", "_is_module_docstring_coverage_dict", "_is_module_typing_coverage_dict", "_is_public_symbol_dict", + "_is_relationship_record_dict", "_is_runtime_reachability_fact_dict", "_is_security_surface_dict", "_is_segment_dict", diff --git a/codeclone/cache/_wire_decode.py b/codeclone/cache/_wire_decode.py index e74784a6..2138ddfc 100644 --- a/codeclone/cache/_wire_decode.py +++ b/codeclone/cache/_wire_decode.py @@ -32,11 +32,13 @@ ClassMetricsDict, DeadCandidateDict, FileStat, + FunctionRelationshipFactsDict, ModuleApiSurfaceDict, ModuleDepDict, ModuleDocstringCoverageDict, ModuleTypingCoverageDict, PublicSymbolDict, + RelationshipRecordDict, RuntimeReachabilityFactDict, SecuritySurfaceDict, SegmentDict, @@ -44,6 +46,9 @@ StructuralFindingGroupDict, StructuralFindingOccurrenceDict, UnitDict, + _as_relationship_kind, + _as_relationship_origin_lane, + _as_relationship_resolution_status, _as_runtime_reachability_confidence, _as_runtime_reachability_edge_kind, _as_runtime_reachability_framework, @@ -143,10 +148,18 @@ def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None: obj=obj, filepath=filepath, ) + function_relationship_facts = _decode_optional_wire_function_relationship_facts( + obj=obj, + filepath=filepath, + ) coupled_classes_map = _decode_optional_wire_coupled_classes(obj=obj, key="cc") if coupled_classes_map is None: return None - if runtime_reachability is None or security_surfaces is None: + if ( + runtime_reachability is None + or security_surfaces is None + or function_relationship_facts is None + ): return None for metric in class_metrics: @@ -178,6 +191,7 @@ def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None: api_surface=api_surface, runtime_reachability=runtime_reachability, security_surfaces=security_surfaces, + function_relationship_facts=function_relationship_facts, source_stats=source_stats, structural_findings=( _normalize_cached_structural_groups(structural_findings, filepath=filepath) @@ -390,6 +404,89 @@ def _decode_optional_wire_runtime_reachability( ) +def _decode_optional_wire_function_relationship_facts( + *, + obj: dict[str, object], + filepath: str, +) -> list[FunctionRelationshipFactsDict] | None: + raw_facts = obj.get("fr") + if raw_facts is None: + return [] + facts_rows = _as_list(raw_facts) + if facts_rows is None: + return None + decoded: list[FunctionRelationshipFactsDict] = [] + for facts_raw in facts_rows: + facts_row = _decode_wire_row(facts_raw, valid_lengths={2}) + if facts_row is None: + return None + source_qualname = _as_str(facts_row[0]) + relationships_raw = _as_list(facts_row[1]) + if source_qualname is None or relationships_raw is None: + return None + relationships: list[RelationshipRecordDict] = [] + for relationship_raw in relationships_raw: + relationship = _decode_wire_relationship_record( + relationship_raw, + source_qualname=source_qualname, + filepath=filepath, + ) + if relationship is None: + return None + relationships.append(relationship) + decoded.append( + FunctionRelationshipFactsDict( + source_qualname=source_qualname, + relationships=relationships, + ) + ) + return decoded + + +def _decode_wire_relationship_record( + value: object, + *, + source_qualname: str, + filepath: str, +) -> RelationshipRecordDict | None: + row = _decode_wire_row(value, valid_lengths={7}) + if row is None: + return None + relation_kind = _as_relationship_kind(_as_str(row[0])) + resolution_status = _as_relationship_resolution_status(_as_str(row[1])) + origin_lane = _as_relationship_origin_lane(_as_str(row[2])) + target_qualname = row[3] + line = _as_int(row[4]) + expression = row[5] + resolution_rule = row[6] + if ( + relation_kind is None + or resolution_status is None + or origin_lane is None + or line is None + or line < 1 + or (target_qualname is not None and not isinstance(target_qualname, str)) + or (expression is not None and not isinstance(expression, str)) + or (resolution_rule is not None and not isinstance(resolution_rule, str)) + ): + return None + if resolution_status == "resolved" and not isinstance(target_qualname, str): + return None + if resolution_status == "unresolved" and target_qualname is not None: + return None + return RelationshipRecordDict( + relation_kind=relation_kind, + resolution_status=resolution_status, + origin_lane=origin_lane, + source_qualname=source_qualname, + target_qualname=target_qualname, + path=filepath, + line=line, + expression=expression, + resolution_rule=resolution_rule, + ) + + def _decode_wire_runtime_reachability( row_raw: object, filepath: str, diff --git a/codeclone/cache/_wire_encode.py b/codeclone/cache/_wire_encode.py index dd1ec089..965f7a64 100644 --- a/codeclone/cache/_wire_encode.py +++ b/codeclone/cache/_wire_encode.py @@ -263,6 +263,32 @@ def _encode_runtime_reachability(entry: CacheEntry, wire: dict[str, object]) -> ] +def _encode_function_relationship_facts( + entry: CacheEntry, + wire: dict[str, object], +) -> None: + facts_rows = entry.get("function_relationship_facts", []) + if facts_rows: + wire["fr"] = [ + [ + facts["source_qualname"], + [ + [ + record["relation_kind"], + record["resolution_status"], + record["origin_lane"], + record["target_qualname"], + record["line"], + record["expression"], + record["resolution_rule"], + ] + for record in facts["relationships"] + ], + ] + for facts in facts_rows + ] + + def _encode_optional_metrics_sections( entry: CacheEntry, wire: dict[str, object] ) -> None: @@ -342,6 +368,7 @@ def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: _encode_dead_candidates(entry, wire) _encode_name_lists(entry, wire) _encode_runtime_reachability(entry, wire) + _encode_function_relationship_facts(entry, wire) _encode_security_surfaces(entry, wire) _encode_optional_metrics_sections(entry, wire) _encode_structural_findings(entry, wire) diff --git a/codeclone/cache/entries.py b/codeclone/cache/entries.py index 67d33c24..e8e0db9e 100644 --- a/codeclone/cache/entries.py +++ b/codeclone/cache/entries.py @@ -16,10 +16,12 @@ ClassMetrics, DeadCandidate, FunctionGroupItem, + FunctionRelationshipFacts, ModuleApiSurface, ModuleDep, ModuleDocstringCoverage, ModuleTypingCoverage, + RelationshipRecord, RuntimeReachabilityFact, SecuritySurface, SegmentGroupItem, @@ -42,6 +44,23 @@ class SourceStatsDict(TypedDict): classes: int +class RelationshipRecordDict(TypedDict): + relation_kind: str + resolution_status: str + origin_lane: str + source_qualname: str + target_qualname: str | None + path: str + line: int + expression: str | None + resolution_rule: str | None + + +class FunctionRelationshipFactsDict(TypedDict): + source_qualname: str + relationships: list[RelationshipRecordDict] + + UnitDict = FunctionGroupItem BlockDict = BlockGroupItem SegmentDict = SegmentGroupItem @@ -201,17 +220,22 @@ class _FileEntryReportFacts(TypedDict, total=False): structural_findings: list[StructuralFindingGroupDict] -class _FileEntryV27( +class _FileEntryRelationshipFacts(TypedDict, total=False): + function_relationship_facts: list[FunctionRelationshipFactsDict] + + +class _FileEntryV29( _FileEntryBase, _FileEntryAnalysisFacts, _FileEntryQualityFacts, _FileEntryReportFacts, + _FileEntryRelationshipFacts, ): pass CacheEntryBase = _FileEntryBase -CacheEntry = _FileEntryV27 +CacheEntry = _FileEntryV29 def _normalize_cached_structural_group( @@ -287,6 +311,34 @@ def _as_risk_literal(value: object) -> Literal["low", "medium", "high"] | None: return None +def _as_relationship_kind(value: object) -> Literal["call", "reference"] | None: + match value: + case "call" | "reference": + return value + case _: + return None + + +def _as_relationship_resolution_status( + value: object, +) -> Literal["resolved", "unresolved"] | None: + match value: + case "resolved" | "unresolved": + return value + case _: + return None + + +def _as_relationship_origin_lane( + value: object, +) -> Literal["production", "test"] | None: + match value: + case "production" | "test": + return value + case _: + return None + + def _as_security_surface_category(value: object) -> str | None: match value: case ( @@ -421,6 +473,47 @@ def _unit_dict_from_model(unit: Unit, filepath: str) -> UnitDict: ) +def _relationship_record_dict_from_model( + record: RelationshipRecord, + *, + source_qualname: str, + filepath: str, +) -> RelationshipRecordDict: + if record.source_qualname != source_qualname: + raise ValueError( + "Relationship record source_qualname must match its facts container" + ) + return RelationshipRecordDict( + relation_kind=record.relation_kind, + resolution_status=record.resolution_status, + origin_lane=record.origin_lane, + source_qualname=source_qualname, + target_qualname=record.target_qualname, + path=filepath, + line=record.line, + expression=record.expression, + resolution_rule=record.resolution_rule, + ) + + +def _function_relationship_facts_dict_from_model( + facts: FunctionRelationshipFacts, + *, + filepath: str, +) -> FunctionRelationshipFactsDict: + return FunctionRelationshipFactsDict( + source_qualname=facts.source_qualname, + relationships=[ + _relationship_record_dict_from_model( + record, + source_qualname=facts.source_qualname, + filepath=filepath, + ) + for record in facts.relationships + ], + ) + + def _block_dict_from_model(block: BlockUnit, filepath: str) -> BlockDict: return BlockGroupItem( block_hash=block.block_hash, @@ -627,11 +720,13 @@ def _structural_group_dict_from_model( "ClassMetricsDict", "DeadCandidateDict", "FileStat", + "FunctionRelationshipFactsDict", "ModuleApiSurfaceDict", "ModuleDepDict", "ModuleDocstringCoverageDict", "ModuleTypingCoverageDict", "PublicSymbolDict", + "RelationshipRecordDict", "RuntimeReachabilityFactDict", "SecuritySurfaceDict", "SegmentDict", @@ -640,6 +735,9 @@ def _structural_group_dict_from_model( "StructuralFindingOccurrenceDict", "UnitDict", "_api_surface_dict_from_model", + "_as_relationship_kind", + "_as_relationship_origin_lane", + "_as_relationship_resolution_status", "_as_risk_literal", "_as_runtime_reachability_confidence", "_as_runtime_reachability_edge_kind", @@ -653,10 +751,12 @@ def _structural_group_dict_from_model( "_class_metrics_dict_from_model", "_dead_candidate_dict_from_model", "_docstring_coverage_dict_from_model", + "_function_relationship_facts_dict_from_model", "_module_dep_dict_from_model", "_new_optional_metrics_payload", "_normalize_cached_structural_group", "_normalize_cached_structural_groups", + "_relationship_record_dict_from_model", "_runtime_reachability_dict_from_model", "_security_surface_dict_from_model", "_segment_dict_from_model", diff --git a/codeclone/cache/integrity.py b/codeclone/cache/integrity.py index 12086b1c..977dafaa 100644 --- a/codeclone/cache/integrity.py +++ b/codeclone/cache/integrity.py @@ -54,8 +54,10 @@ def verify_cache_payload_signature( return hmac.compare_digest(signature, sign_cache_payload(payload)) -def read_json_document(path: Path) -> object: - return _read_json_document(path) +def read_json_document(path: Path, *, max_bytes: int | None = None) -> object: + if max_bytes is None: + return _read_json_document(path) + return _read_json_document(path, max_bytes=max_bytes) def write_json_document_atomically(path: Path, document: object) -> None: diff --git a/codeclone/cache/projection.py b/codeclone/cache/projection.py index 49b2db37..6575a63a 100644 --- a/codeclone/cache/projection.py +++ b/codeclone/cache/projection.py @@ -11,6 +11,7 @@ from typing import TypedDict from ..models import SegmentGroupItem +from ..utils.repo_paths import RepoPathPolicy, resolve_under_repo_root from .integrity import ( as_int_or_none, as_object_list, @@ -51,14 +52,16 @@ def runtime_filepath_from_wire( root: Path | None, ) -> str: wire_path = Path(wire_filepath) - if root is None or wire_path.is_absolute(): + if root is None: return str(wire_path) - combined = root / wire_path - try: - return str(combined.resolve(strict=False)) - except OSError: - return str(combined) + return str( + resolve_under_repo_root( + root, + wire_path, + policy=RepoPathPolicy(allow_absolute=True), + ) + ) class SegmentReportProjection(TypedDict): diff --git a/codeclone/cache/store.py b/codeclone/cache/store.py index b90f73da..03659d58 100644 --- a/codeclone/cache/store.py +++ b/codeclone/cache/store.py @@ -7,7 +7,7 @@ from __future__ import annotations import os -from collections.abc import Collection +from collections.abc import Collection, Sequence from json import JSONDecodeError from pathlib import Path from typing import Protocol @@ -24,7 +24,14 @@ DEFAULT_SEGMENT_MIN_STMT, ) from ..contracts.errors import CacheError -from ..models import BlockUnit, FileMetrics, SegmentUnit, StructuralFindingGroup, Unit +from ..models import ( + BlockUnit, + FileMetrics, + FunctionRelationshipFacts, + SegmentUnit, + StructuralFindingGroup, + Unit, +) from ._canonicalize import ( _as_file_stat_dict, _as_typed_block_list, @@ -46,6 +53,7 @@ _class_metrics_dict_from_model, _dead_candidate_dict_from_model, _docstring_coverage_dict_from_model, + _function_relationship_facts_dict_from_model, _module_dep_dict_from_model, _new_optional_metrics_payload, _normalize_cached_structural_groups, @@ -283,7 +291,7 @@ def load(self) -> None: ) return - raw_obj = read_json_document(self.path) + raw_obj = read_json_document(self.path, max_bytes=self.max_size_bytes) parsed = self._load_and_validate(raw_obj) if parsed is None: return @@ -505,6 +513,7 @@ def get_file_entry(self, filepath: str) -> CacheEntry | None: class_names_raw, runtime_reachability_raw, security_surfaces_raw, + function_relationship_facts_raw, typing_coverage_raw, docstring_coverage_raw, api_surface_raw, @@ -533,6 +542,7 @@ def get_file_entry(self, filepath: str) -> CacheEntry | None: api_surface=api_surface_raw, runtime_reachability=runtime_reachability_raw, security_surfaces=security_surfaces_raw, + function_relationship_facts=function_relationship_facts_raw, source_stats=source_stats, structural_findings=structural_findings, ) @@ -553,6 +563,7 @@ def put_file_entry( source_stats: SourceStatsDict | None = None, file_metrics: FileMetrics | None = None, structural_findings: list[StructuralFindingGroup] | None = None, + function_relationship_facts: Sequence[FunctionRelationshipFacts] | None = None, ) -> None: runtime_path = runtime_filepath_from_wire( wire_filepath_from_runtime(filepath, root=self.root), @@ -564,6 +575,20 @@ def put_file_entry( segment_rows = [ _segment_dict_from_model(segment, runtime_path) for segment in segments ] + effective_relationship_facts = function_relationship_facts + if effective_relationship_facts is None: + effective_relationship_facts = ( + file_metrics.function_relationship_facts + if file_metrics is not None + else () + ) + function_relationship_fact_rows = [ + _function_relationship_facts_dict_from_model( + facts, + filepath=runtime_path, + ) + for facts in effective_relationship_facts + ] ( class_metrics_rows, @@ -637,6 +662,7 @@ def put_file_entry( class_names=class_names, runtime_reachability=runtime_reachability, security_surfaces=security_surfaces, + function_relationship_facts=function_relationship_fact_rows, ) if typing_coverage is not None: entry_dict["typing_coverage"] = typing_coverage diff --git a/codeclone/config/analytics.py b/codeclone/config/analytics.py new file mode 100644 index 00000000..0e2bd7cb --- /dev/null +++ b/codeclone/config/analytics.py @@ -0,0 +1,294 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +from ..audit.validation import DEFAULT_AUDIT_PATH +from ..utils.repo_paths import RepoPathPolicy, resolve_under_repo_root +from .analytics_specs import ANALYTICS_NESTED_TABLE_KEY +from .memory import resolve_memory_config +from .pyproject_loader import load_pyproject_config + +DEFAULT_ANALYTICS_DB_RELATIVE = ".codeclone/analytics/corpus_clustering.sqlite3" +DEFAULT_ANALYTICS_VECTORS_RELATIVE = ".codeclone/analytics/corpus_vectors" +DEFAULT_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5" +DEFAULT_EMBEDDING_DIMENSION = 384 +DEFAULT_EMBEDDING_PROVIDER = "fastembed" +DEFAULT_MIN_CORRELATION_SAMPLE_SIZE = 5 +DEFAULT_CLUSTER_RANDOM_SEED = 42 +DEFAULT_PCA_DIMENSIONS = 64 +DEFAULT_MIN_CLUSTER_SIZE = 8 +DEFAULT_MIN_SAMPLES = 3 +DEFAULT_CLUSTER_SELECTION_METHOD = "eom" +DEFAULT_SWEEP_PCA_DIMENSIONS = (32, 64, 128) +DEFAULT_SWEEP_MIN_CLUSTER_SIZES = (5, 8, 12, 15) +DEFAULT_SWEEP_MIN_SAMPLES = (1, 3, 5) +DEFAULT_SWEEP_SELECTION_METHODS: tuple[Literal["eom", "leaf"], ...] = ( + "eom", + "leaf", +) + + +class AnalyticsPyprojectTable(BaseModel): + """Validated ``[tool.codeclone.analytics]`` table.""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + db_path: str | None = None + vectors_path: str | None = None + embedding_model: str | None = None + embedding_dimension: int | None = Field(default=None, gt=0) + embedding_provider: Literal["fastembed"] | None = None + embedding_cache_dir: str | None = None + min_correlation_sample_size: int | None = Field(default=None, gt=0) + cluster_random_seed: int | None = None + default_pca_dimensions: int | None = Field(default=None, gt=0) + default_min_cluster_size: int | None = Field(default=None, gt=0) + default_min_samples: int | None = Field(default=None, gt=0) + default_cluster_selection_method: Literal["eom", "leaf"] | None = None + default_profile_id: str | None = None + profile_paths: tuple[str, ...] = () + sweep_pca_dimensions: tuple[int, ...] | None = None + sweep_min_cluster_sizes: tuple[int, ...] | None = None + sweep_min_samples: tuple[int, ...] | None = None + sweep_selection_methods: tuple[Literal["eom", "leaf"], ...] | None = None + allow_model_download: bool | None = None + + @field_validator( + "sweep_pca_dimensions", + "sweep_min_cluster_sizes", + "sweep_min_samples", + ) + @classmethod + def _positive_sweep_axis( + cls, + value: tuple[int, ...] | None, + ) -> tuple[int, ...] | None: + if value is None: + return None + if not value or any(item <= 0 for item in value): + raise ValueError("analytics sweep axes require positive integers") + return tuple(sorted(set(value))) + + @field_validator("sweep_selection_methods") + @classmethod + def _selection_methods( + cls, + value: tuple[Literal["eom", "leaf"], ...] | None, + ) -> tuple[Literal["eom", "leaf"], ...] | None: + if value is None: + return None + if not value: + raise ValueError("analytics sweep selection methods must not be empty") + return tuple(sorted(set(value))) + + @field_validator("profile_paths") + @classmethod + def _profile_paths(cls, value: tuple[str, ...]) -> tuple[str, ...]: + if any(not item.strip() for item in value): + raise ValueError("analytics profile paths must not be empty") + return value + + @field_validator("default_profile_id") + @classmethod + def _default_profile_id(cls, value: str | None) -> str | None: + if value is None: + return None + normalized = value.strip() + if not normalized: + raise ValueError("default_profile_id must not be empty") + return normalized + + +@dataclass(frozen=True, slots=True) +class AnalyticsConfig: + db_path: Path + vectors_path: Path + audit_db_path: Path + embedding_model: str + embedding_dimension: int + embedding_provider: str + embedding_cache_dir: Path + min_correlation_sample_size: int + cluster_random_seed: int + default_pca_dimensions: int + default_min_cluster_size: int + default_min_samples: int + default_cluster_selection_method: str + default_profile_id: str | None + profile_paths: tuple[Path, ...] + sweep_pca_dimensions: tuple[int, ...] + sweep_min_cluster_sizes: tuple[int, ...] + sweep_min_samples: tuple[int, ...] + sweep_selection_methods: tuple[Literal["eom", "leaf"], ...] + allow_model_download: bool + + +def _resolve_path(root_path: Path, raw: str | None, default_relative: str) -> Path: + policy = RepoPathPolicy(allow_absolute=True) + selected = raw if raw is not None else default_relative + return resolve_under_repo_root(root_path, selected, policy=policy) + + +def _resolve_profile_paths( + root_path: Path, + raw_paths: tuple[str, ...], +) -> tuple[Path, ...]: + policy = RepoPathPolicy( + allow_absolute=True, + must_exist=True, + must_be_file=True, + ) + return tuple( + resolve_under_repo_root(root_path, raw, policy=policy) for raw in raw_paths + ) + + +def resolve_analytics_config(root_path: Path) -> AnalyticsConfig: + resolved_root = root_path.resolve() + payload = load_pyproject_config(resolved_root) + raw_table = payload.get(ANALYTICS_NESTED_TABLE_KEY) + table = ( + AnalyticsPyprojectTable.model_validate(raw_table) + if isinstance(raw_table, dict) + else None + ) + # The FastEmbed model artifact is a multi-hundred-MB download; analytics + # vectors are kept separate (own LanceDB sidecar + embedding_generation_id), + # but the model weights are shared with Engineering Memory rather than + # re-downloaded into a second cache. Default the model cache + download + # policy to the resolved memory semantic config (single source of truth). + memory_semantic = resolve_memory_config(resolved_root).semantic + default_profile_id = table.default_profile_id if table is not None else None + profile_paths = _resolve_profile_paths( + resolved_root, + table.profile_paths if table is not None else (), + ) + if default_profile_id is not None or profile_paths: + from ..analytics.profiles.registry import resolve_profile_registry + + resolve_profile_registry( + profile_paths=profile_paths, + default_profile_id=default_profile_id, + ) + return AnalyticsConfig( + db_path=_resolve_path( + resolved_root, + table.db_path if table is not None else None, + DEFAULT_ANALYTICS_DB_RELATIVE, + ), + vectors_path=_resolve_path( + resolved_root, + table.vectors_path if table is not None else None, + DEFAULT_ANALYTICS_VECTORS_RELATIVE, + ), + audit_db_path=_resolve_path( + resolved_root, + ( + str(payload["audit_path"]) + if payload.get("audit_path") is not None + else None + ), + DEFAULT_AUDIT_PATH, + ), + embedding_model=( + table.embedding_model + if table is not None and table.embedding_model is not None + else DEFAULT_EMBEDDING_MODEL + ), + embedding_dimension=( + table.embedding_dimension + if table is not None and table.embedding_dimension is not None + else DEFAULT_EMBEDDING_DIMENSION + ), + embedding_provider=( + table.embedding_provider + if table is not None and table.embedding_provider is not None + else DEFAULT_EMBEDDING_PROVIDER + ), + embedding_cache_dir=_resolve_path( + resolved_root, + table.embedding_cache_dir if table is not None else None, + memory_semantic.embedding_cache_dir, + ), + min_correlation_sample_size=( + table.min_correlation_sample_size + if table is not None and table.min_correlation_sample_size is not None + else DEFAULT_MIN_CORRELATION_SAMPLE_SIZE + ), + cluster_random_seed=( + table.cluster_random_seed + if table is not None and table.cluster_random_seed is not None + else DEFAULT_CLUSTER_RANDOM_SEED + ), + default_pca_dimensions=( + table.default_pca_dimensions + if table is not None and table.default_pca_dimensions is not None + else DEFAULT_PCA_DIMENSIONS + ), + default_min_cluster_size=( + table.default_min_cluster_size + if table is not None and table.default_min_cluster_size is not None + else DEFAULT_MIN_CLUSTER_SIZE + ), + default_min_samples=( + table.default_min_samples + if table is not None and table.default_min_samples is not None + else DEFAULT_MIN_SAMPLES + ), + default_cluster_selection_method=( + table.default_cluster_selection_method + if table is not None and table.default_cluster_selection_method is not None + else DEFAULT_CLUSTER_SELECTION_METHOD + ), + default_profile_id=default_profile_id, + profile_paths=profile_paths, + sweep_pca_dimensions=( + table.sweep_pca_dimensions + if table is not None and table.sweep_pca_dimensions is not None + else DEFAULT_SWEEP_PCA_DIMENSIONS + ), + sweep_min_cluster_sizes=( + table.sweep_min_cluster_sizes + if table is not None and table.sweep_min_cluster_sizes is not None + else DEFAULT_SWEEP_MIN_CLUSTER_SIZES + ), + sweep_min_samples=( + table.sweep_min_samples + if table is not None and table.sweep_min_samples is not None + else DEFAULT_SWEEP_MIN_SAMPLES + ), + sweep_selection_methods=( + table.sweep_selection_methods + if table is not None and table.sweep_selection_methods is not None + else DEFAULT_SWEEP_SELECTION_METHODS + ), + allow_model_download=( + table.allow_model_download + if table is not None and table.allow_model_download is not None + else memory_semantic.allow_model_download + ), + ) + + +__all__ = [ + "DEFAULT_ANALYTICS_DB_RELATIVE", + "DEFAULT_ANALYTICS_VECTORS_RELATIVE", + "DEFAULT_MIN_CORRELATION_SAMPLE_SIZE", + "DEFAULT_SWEEP_MIN_CLUSTER_SIZES", + "DEFAULT_SWEEP_MIN_SAMPLES", + "DEFAULT_SWEEP_PCA_DIMENSIONS", + "DEFAULT_SWEEP_SELECTION_METHODS", + "AnalyticsConfig", + "AnalyticsPyprojectTable", + "resolve_analytics_config", +] diff --git a/codeclone/config/analytics_specs.py b/codeclone/config/analytics_specs.py new file mode 100644 index 00000000..663412ec --- /dev/null +++ b/codeclone/config/analytics_specs.py @@ -0,0 +1,24 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final + +ANALYTICS_NESTED_TABLE_KEY: Final = "analytics" + +ANALYTICS_PATH_CONFIG_KEYS: Final = frozenset( + { + "db_path", + "vectors_path", + "embedding_cache_dir", + } +) + +__all__ = [ + "ANALYTICS_NESTED_TABLE_KEY", + "ANALYTICS_PATH_CONFIG_KEYS", +] diff --git a/codeclone/config/argparse_builder.py b/codeclone/config/argparse_builder.py index aec5a63f..4b02d043 100644 --- a/codeclone/config/argparse_builder.py +++ b/codeclone/config/argparse_builder.py @@ -1,3 +1,8 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import argparse diff --git a/codeclone/config/intent_registry.py b/codeclone/config/intent_registry.py new file mode 100644 index 00000000..d01855d9 --- /dev/null +++ b/codeclone/config/intent_registry.py @@ -0,0 +1,181 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Final + +from ..utils.repo_paths import ( + PathOutsideRepoError, + RepoPathError, + RepoPathPolicy, + resolve_under_repo_root, +) +from .intent_registry_defaults import ( + DEFAULT_INTENT_REGISTRY_BACKEND, + DEFAULT_INTENT_REGISTRY_DB_PATH, + DEFAULT_INTENT_REGISTRY_RETENTION_DAYS, + MIN_INTENT_REGISTRY_RETENTION_DAYS, + IntentRegistryBackend, +) +from .pyproject_loader import _load_toml + +INTENT_REGISTRY_BACKENDS: Final[frozenset[str]] = frozenset({"file", "sqlite"}) +_VALID_DB_SUFFIXES: Final[frozenset[str]] = frozenset({".sqlite3", ".db"}) + + +class IntentRegistryConfigError(ValueError): + """Raised for invalid workspace intent registry configuration.""" + + +@dataclass(frozen=True, slots=True) +class IntentRegistryConfig: + backend: IntentRegistryBackend + storage_path: Path + retention_days: int = DEFAULT_INTENT_REGISTRY_RETENTION_DAYS + + +def resolve_intent_registry_retention_days( + value: object = None, + *, + env_value: object = None, +) -> int: + raw = env_value if env_value is not None else value + if raw is None: + return DEFAULT_INTENT_REGISTRY_RETENTION_DAYS + if not isinstance(raw, int) or isinstance(raw, bool): + raise IntentRegistryConfigError( + "intent_registry_retention_days must be an integer" + ) + if raw < MIN_INTENT_REGISTRY_RETENTION_DAYS: + raise IntentRegistryConfigError( + "intent_registry_retention_days must be at least " + f"{MIN_INTENT_REGISTRY_RETENTION_DAYS}" + ) + return raw + + +def resolve_intent_registry_backend( + value: object = None, + *, + env_value: object = None, +) -> IntentRegistryBackend: + raw = env_value if env_value is not None else value + if raw is None: + return DEFAULT_INTENT_REGISTRY_BACKEND + if not isinstance(raw, str): + raise IntentRegistryConfigError("intent_registry_backend must be a string") + backend = raw.strip().lower() + if backend not in INTENT_REGISTRY_BACKENDS: + expected = ", ".join(sorted(INTENT_REGISTRY_BACKENDS)) + raise IntentRegistryConfigError( + f"intent_registry_backend must be one of: {expected}" + ) + return backend # type: ignore[return-value] + + +def resolve_intent_registry_db_path(*, root_path: Path, value: object) -> Path: + if not isinstance(value, str): + raise IntentRegistryConfigError("intent_registry_path must be a string") + raw = value.strip() + if not raw: + raise IntentRegistryConfigError("intent_registry_path must not be empty") + path = Path(raw) + if any(part in {"", ".", ".."} for part in path.parts): + raise IntentRegistryConfigError( + "intent_registry_path must not contain empty, '.', or '..' parts" + ) + if path.suffix not in _VALID_DB_SUFFIXES: + raise IntentRegistryConfigError( + "intent_registry_path must end with .sqlite3 or .db" + ) + try: + return resolve_under_repo_root( + root_path, + path, + policy=RepoPathPolicy(), + ) + except PathOutsideRepoError as exc: + raise IntentRegistryConfigError( + "intent_registry_path must be relative to the repository root" + ) from exc + except RepoPathError as exc: + raise IntentRegistryConfigError(f"invalid intent_registry_path: {exc}") from exc + + +def resolve_intent_registry_config(root: Path) -> IntentRegistryConfig: + root_path = root.resolve() + config_path = root_path / "pyproject.toml" + config: dict[str, object] = {} + if config_path.is_file(): + try: + payload = _load_toml(config_path) + except (OSError, ValueError): + payload = {} + if isinstance(payload, dict): + tool = payload.get("tool") + if isinstance(tool, dict): + section = tool.get("codeclone") + if isinstance(section, dict): + config = dict(section) + backend = resolve_intent_registry_backend( + config.get("intent_registry_backend"), + env_value=os.environ.get("CODECLONE_INTENT_REGISTRY_BACKEND"), + ) + retention_days = resolve_intent_registry_retention_days( + config.get("intent_registry_retention_days"), + env_value=os.environ.get("CODECLONE_INTENT_REGISTRY_RETENTION_DAYS"), + ) + if backend == "file": + return IntentRegistryConfig( + backend="file", + storage_path=root_path.joinpath(".codeclone", "intents"), + retention_days=retention_days, + ) + db_value = config.get("intent_registry_path", DEFAULT_INTENT_REGISTRY_DB_PATH) + env_path = os.environ.get("CODECLONE_INTENT_REGISTRY_PATH") + if env_path is not None: + db_value = env_path + db_path = resolve_intent_registry_db_path(root_path=root_path, value=db_value) + return IntentRegistryConfig( + backend="sqlite", + storage_path=db_path, + retention_days=retention_days, + ) + + +def intent_registry_summary(root: Path) -> dict[str, str]: + config = resolve_intent_registry_config(root) + try: + display_path = config.storage_path.relative_to(root.resolve()) + storage = str(display_path) + except ValueError: + storage = str(config.storage_path) + return { + "registry_backend": config.backend, + "registry_storage": storage, + "registry_retention_days": str(config.retention_days), + } + + +__all__ = [ + "DEFAULT_INTENT_REGISTRY_BACKEND", + "DEFAULT_INTENT_REGISTRY_DB_PATH", + "DEFAULT_INTENT_REGISTRY_RETENTION_DAYS", + "INTENT_REGISTRY_BACKENDS", + "MIN_INTENT_REGISTRY_RETENTION_DAYS", + "IntentRegistryBackend", + "IntentRegistryConfig", + "IntentRegistryConfigError", + "intent_registry_summary", + "resolve_intent_registry_backend", + "resolve_intent_registry_config", + "resolve_intent_registry_db_path", + "resolve_intent_registry_retention_days", +] diff --git a/codeclone/config/intent_registry_defaults.py b/codeclone/config/intent_registry_defaults.py new file mode 100644 index 00000000..751dd742 --- /dev/null +++ b/codeclone/config/intent_registry_defaults.py @@ -0,0 +1,26 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final, Literal + +IntentRegistryBackend = Literal["file", "sqlite"] + +DEFAULT_INTENT_REGISTRY_BACKEND: Final[IntentRegistryBackend] = "file" +DEFAULT_INTENT_REGISTRY_DB_PATH: Final = ".codeclone/db/intents.sqlite3" +# Closed-row retention for the SQLite intent registry. A sensible local default; +# there is no edition cap — operators may set any positive number of days. +DEFAULT_INTENT_REGISTRY_RETENTION_DAYS: Final = 14 +MIN_INTENT_REGISTRY_RETENTION_DAYS: Final = 1 + +__all__ = [ + "DEFAULT_INTENT_REGISTRY_BACKEND", + "DEFAULT_INTENT_REGISTRY_DB_PATH", + "DEFAULT_INTENT_REGISTRY_RETENTION_DAYS", + "MIN_INTENT_REGISTRY_RETENTION_DAYS", + "IntentRegistryBackend", +] diff --git a/codeclone/config/memory.py b/codeclone/config/memory.py new file mode 100644 index 00000000..3fd264c6 --- /dev/null +++ b/codeclone/config/memory.py @@ -0,0 +1,438 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field, ValidationError, model_validator + +from ..utils.repo_paths import ( + PathOutsideRepoError, + RepoPathError, + RepoPathPolicy, + resolve_under_repo_root, +) +from .memory_defaults import ( + DEFAULT_INGEST_CONTRACT_CONSTANTS_PATHS, + DEFAULT_INGEST_DOCUMENT_LINK_PATHS, + DEFAULT_INGEST_MCP_TOOL_COUNT_DOC_PATHS, + DEFAULT_INGEST_MCP_TOOL_SCHEMA_SNAPSHOT_PATH, + DEFAULT_SEMANTIC_ALLOW_MODEL_DOWNLOAD, + DEFAULT_SEMANTIC_BACKEND, + DEFAULT_SEMANTIC_DIMENSION, + DEFAULT_SEMANTIC_EMBED_MAX_DOCUMENTS_PER_BATCH, + DEFAULT_SEMANTIC_EMBED_MAX_PADDED_TOKENS_PER_BATCH, + DEFAULT_SEMANTIC_EMBEDDING_CACHE_DIR, + DEFAULT_SEMANTIC_EMBEDDING_PROVIDER, + DEFAULT_SEMANTIC_ENABLED, + DEFAULT_SEMANTIC_FASTEMBED_DIMENSION, + DEFAULT_SEMANTIC_FASTEMBED_MODEL, + DEFAULT_SEMANTIC_INDEX_AUDIT, + DEFAULT_SEMANTIC_INDEX_PATH, + DEFAULT_SEMANTIC_MAX_RESULTS, + DEFAULT_SEMANTIC_PROJECTION_TOKEN_ESTIMATOR, + MEMORY_ENV_DB_PATH, + MEMORY_ENV_PROJECTION_REBUILD_POLICY, + MEMORY_ENV_SEMANTIC_ALLOW_MODEL_DOWNLOAD, + MEMORY_ENV_SEMANTIC_EMBEDDING_CACHE_DIR, + MEMORY_ENV_SEMANTIC_EMBEDDING_MODEL, + MEMORY_ENV_SEMANTIC_EMBEDDING_PROVIDER, + MEMORY_ENV_SEMANTIC_ENABLED, + MEMORY_ENV_SEMANTIC_INDEX_PATH, + MemoryBackend, + MemoryMcpSyncPolicy, + MemoryProjectionRebuildPolicy, + SemanticBackend, + SemanticEmbeddingProvider, + SemanticProjectionTokenEstimator, +) +from .memory_specs import ( + INGEST_NESTED_TABLE_KEY, + MEMORY_CONFIG_DEFAULTS, + SEMANTIC_NESTED_TABLE_KEY, +) +from .pyproject_loader import load_pyproject_config + +_VALID_BACKENDS = frozenset({"sqlite", "postgres"}) +_VALID_MCP_SYNC_POLICIES = frozenset( + {"off", "bootstrap_if_missing", "refresh_when_stale"}, +) +_VALID_PROJECTION_REBUILD_POLICIES = frozenset({"off", "enqueue_when_stale"}) + +_SEMANTIC_ENV_OVERRIDES: dict[str, str] = { + MEMORY_ENV_SEMANTIC_ENABLED: "enabled", + MEMORY_ENV_SEMANTIC_EMBEDDING_PROVIDER: "embedding_provider", + MEMORY_ENV_SEMANTIC_EMBEDDING_MODEL: "embedding_model", + MEMORY_ENV_SEMANTIC_EMBEDDING_CACHE_DIR: "embedding_cache_dir", + MEMORY_ENV_SEMANTIC_ALLOW_MODEL_DOWNLOAD: "allow_model_download", + MEMORY_ENV_SEMANTIC_INDEX_PATH: "index_path", +} + + +class SemanticConfig(BaseModel): + """Validated semantic-retrieval config (Phase 20). + + The single validation authority for ``[tool.codeclone.memory.semantic]``: + ``frozen`` + ``extra="forbid"`` reject unknown keys, bad literals, and + non-positive sizes here, so no flat ConfigKeySpec table duplicates these + field definitions. ``enabled=false`` + ``diagnostic`` keep the default + offline and zero-extra-dependency. ``fastembed`` is the community local + quality provider and remains opt-in. + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + enabled: bool = DEFAULT_SEMANTIC_ENABLED + backend: SemanticBackend = DEFAULT_SEMANTIC_BACKEND + index_path: str = Field(default=DEFAULT_SEMANTIC_INDEX_PATH, min_length=1) + embedding_provider: SemanticEmbeddingProvider = DEFAULT_SEMANTIC_EMBEDDING_PROVIDER + embedding_model: str | None = Field(default=None, min_length=1) + embedding_cache_dir: str = Field( + default=DEFAULT_SEMANTIC_EMBEDDING_CACHE_DIR, min_length=1 + ) + allow_model_download: bool = DEFAULT_SEMANTIC_ALLOW_MODEL_DOWNLOAD + dimension: int = Field(default=DEFAULT_SEMANTIC_DIMENSION, gt=0) + max_results: int = Field(default=DEFAULT_SEMANTIC_MAX_RESULTS, gt=0) + index_audit: bool = DEFAULT_SEMANTIC_INDEX_AUDIT + embed_max_documents_per_batch: int = Field( + default=DEFAULT_SEMANTIC_EMBED_MAX_DOCUMENTS_PER_BATCH, + gt=0, + ) + embed_max_padded_tokens_per_batch: int = Field( + default=DEFAULT_SEMANTIC_EMBED_MAX_PADDED_TOKENS_PER_BATCH, + gt=0, + ) + projection_token_estimator: SemanticProjectionTokenEstimator = ( + DEFAULT_SEMANTIC_PROJECTION_TOKEN_ESTIMATOR + ) + + @model_validator(mode="before") + @classmethod + def _apply_provider_defaults(cls, data: object) -> object: + if not isinstance(data, dict): + return data + normalized = dict(data) + if normalized.get("embedding_provider") == "fastembed": + normalized.setdefault("embedding_model", DEFAULT_SEMANTIC_FASTEMBED_MODEL) + normalized.setdefault("dimension", DEFAULT_SEMANTIC_FASTEMBED_DIMENSION) + return normalized + + +class IngestConfig(BaseModel): + """Validated memory ingest path config (Phase 18+). + + Empty ``contract_constants_paths`` / ``document_link_paths`` enable + registry-aware auto-discovery. MCP tool-count contradiction checks run + only when both ``mcp_tool_schema_snapshot_path`` and + ``mcp_tool_count_doc_paths`` are configured. + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + contract_constants_paths: tuple[str, ...] = DEFAULT_INGEST_CONTRACT_CONSTANTS_PATHS + document_link_paths: tuple[str, ...] = DEFAULT_INGEST_DOCUMENT_LINK_PATHS + mcp_tool_schema_snapshot_path: str | None = ( + DEFAULT_INGEST_MCP_TOOL_SCHEMA_SNAPSHOT_PATH + ) + mcp_tool_count_doc_paths: tuple[str, ...] = DEFAULT_INGEST_MCP_TOOL_COUNT_DOC_PATHS + + @model_validator(mode="before") + @classmethod + def _normalize_path_lists(cls, data: object) -> object: + if not isinstance(data, dict): + return data + normalized = dict(data) + for key in ( + "contract_constants_paths", + "document_link_paths", + "mcp_tool_count_doc_paths", + ): + raw = normalized.get(key) + if raw is None: + continue + if isinstance(raw, str): + normalized[key] = (raw,) + elif isinstance(raw, list): + normalized[key] = tuple(str(item) for item in raw) + snapshot = normalized.get("mcp_tool_schema_snapshot_path") + if snapshot == "": + normalized["mcp_tool_schema_snapshot_path"] = None + return normalized + + +@dataclass(frozen=True, slots=True) +class MemoryConfig: + backend: MemoryBackend + db_path: Path + active_retention_days: int + stale_retention_days: int + draft_retention_days: int + rejected_retention_days: int + archived_retention_days: int + receipt_retention_days: int + max_records: int + max_candidates: int + max_evidence_per_record: int + max_statement_chars: int + max_blast_radius_cache_entries: int + git_hotspot_period_days: int + git_hotspot_min_changes: int + mcp_sync_policy: MemoryMcpSyncPolicy + projection_rebuild_policy: MemoryProjectionRebuildPolicy + projection_rebuild_running_timeout_seconds: int + projection_rebuild_spawn_worker: bool + projection_rebuild_coalesce_window_seconds: int + projection_rebuild_coalesce_min_delta: int + trajectories_enabled: bool + trajectory_retention_days: int + trajectory_export_enabled: bool + trajectory_export_include_payloads: bool + trajectory_export_max_record_bytes: int + trajectory_export_max_file_bytes: int + semantic: SemanticConfig = field(default_factory=SemanticConfig) + ingest: IngestConfig = field(default_factory=IngestConfig) + + +def _memory_int(value: object, *, key: str) -> int: + if isinstance(value, bool): + msg = f"Invalid tool.codeclone.memory.{key}: expected integer" + raise ValueError(msg) + if isinstance(value, int): + return value + if isinstance(value, str) and value.strip().isdigit(): + return int(value.strip()) + msg = f"Invalid tool.codeclone.memory.{key}: expected integer" + raise ValueError(msg) + + +def _memory_bool(value: object, *, key: str) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, str): + raw = value.strip().lower() + if raw in {"1", "true", "yes", "on"}: + return True + if raw in {"0", "false", "no", "off"}: + return False + msg = f"Invalid tool.codeclone.memory.{key}: expected boolean" + raise ValueError(msg) + + +def _memory_choice(value: object, *, key: str, valid: frozenset[str]) -> str: + raw = str(value).strip().lower() + if raw not in valid: + msg = f"Invalid tool.codeclone.memory.{key}: expected one of {sorted(valid)}" + raise ValueError(msg) + return raw + + +def _format_nested_memory_config_error( + *, + section: str, + exc: ValidationError, +) -> str: + errors = exc.errors() + if not errors: + return f"Invalid tool.codeclone.memory.{section} configuration" + first = errors[0] + loc = ".".join(str(part) for part in first.get("loc", ())) + message = first.get("msg", "invalid value") + suffix = f".{loc}" if loc else "" + return f"Invalid tool.codeclone.memory.{section}{suffix}: {message}" + + +def _resolve_ingest_config(raw: object) -> IngestConfig: + data: dict[str, object] = dict(raw) if isinstance(raw, dict) else {} + try: + return IngestConfig.model_validate(data) + except ValidationError as exc: + raise ValueError( + _format_nested_memory_config_error(section="ingest", exc=exc) + ) from exc + + +def _resolve_semantic_config(raw: object, *, root_path: Path) -> SemanticConfig: + data: dict[str, object] = dict(raw) if isinstance(raw, dict) else {} + for env_var, field_name in _SEMANTIC_ENV_OVERRIDES.items(): + env_value = os.environ.get(env_var) + if env_value is not None: + data[field_name] = env_value + try: + config = SemanticConfig.model_validate(data) + except ValidationError as exc: + raise ValueError( + _format_nested_memory_config_error(section="semantic", exc=exc) + ) from exc + index_path = _resolve_memory_state_path( + key="memory.semantic.index_path", + value=config.index_path, + root_path=root_path, + ) + cache_dir = _resolve_memory_state_path( + key="memory.semantic.embedding_cache_dir", + value=config.embedding_cache_dir, + root_path=root_path, + ) + return config.model_copy( + update={"index_path": str(index_path), "embedding_cache_dir": str(cache_dir)} + ) + + +def _resolve_memory_state_path(*, key: str, value: object, root_path: Path) -> Path: + if not isinstance(value, str): + raise TypeError(f"{key} must resolve to a string path") + try: + return resolve_under_repo_root( + root_path, + value, + policy=RepoPathPolicy(allow_absolute=True), + ) + except PathOutsideRepoError as exc: + raise ValueError(f"{key} must stay under the repository root") from exc + except RepoPathError as exc: + raise ValueError(f"Invalid tool.codeclone.{key}: {exc}") from exc + + +def resolve_memory_config( + root_path: Path, + *, + pyproject_config: dict[str, object] | None = None, +) -> MemoryConfig: + loaded = ( + load_pyproject_config(root_path) + if pyproject_config is None + else pyproject_config + ) + memory_obj = loaded.get("memory") + merged: dict[str, object] = dict(MEMORY_CONFIG_DEFAULTS) + if isinstance(memory_obj, dict): + merged.update(memory_obj) + + backend_raw = _memory_choice( + merged["backend"], + key="backend", + valid=_VALID_BACKENDS, + ) + + policy_raw = _memory_choice( + merged["mcp_sync_policy"], + key="mcp_sync_policy", + valid=_VALID_MCP_SYNC_POLICIES, + ) + + projection_policy_raw = _memory_choice( + merged["projection_rebuild_policy"], + key="projection_rebuild_policy", + valid=_VALID_PROJECTION_REBUILD_POLICIES, + ) + env_projection_policy = os.environ.get(MEMORY_ENV_PROJECTION_REBUILD_POLICY) + if env_projection_policy is not None: + projection_policy_raw = _memory_choice( + env_projection_policy, + key="projection_rebuild_policy", + valid=_VALID_PROJECTION_REBUILD_POLICIES, + ) + + env_db_path = os.environ.get(MEMORY_ENV_DB_PATH) + db_path_raw: object = env_db_path if env_db_path is not None else merged["db_path"] + db_path_value = _resolve_memory_state_path( + key="memory.db_path", + value=db_path_raw, + root_path=root_path, + ) + + return MemoryConfig( + backend=backend_raw, # type: ignore[arg-type] + db_path=db_path_value, + active_retention_days=_memory_int( + merged["active_retention_days"], key="active_retention_days" + ), + stale_retention_days=_memory_int( + merged["stale_retention_days"], key="stale_retention_days" + ), + draft_retention_days=_memory_int( + merged["draft_retention_days"], key="draft_retention_days" + ), + rejected_retention_days=_memory_int( + merged["rejected_retention_days"], key="rejected_retention_days" + ), + archived_retention_days=_memory_int( + merged["archived_retention_days"], key="archived_retention_days" + ), + receipt_retention_days=_memory_int( + merged["receipt_retention_days"], key="receipt_retention_days" + ), + max_records=_memory_int(merged["max_records"], key="max_records"), + max_candidates=_memory_int(merged["max_candidates"], key="max_candidates"), + max_evidence_per_record=_memory_int( + merged["max_evidence_per_record"], key="max_evidence_per_record" + ), + max_statement_chars=_memory_int( + merged["max_statement_chars"], key="max_statement_chars" + ), + max_blast_radius_cache_entries=_memory_int( + merged["max_blast_radius_cache_entries"], + key="max_blast_radius_cache_entries", + ), + git_hotspot_period_days=_memory_int( + merged["git_hotspot_period_days"], + key="git_hotspot_period_days", + ), + git_hotspot_min_changes=_memory_int( + merged["git_hotspot_min_changes"], + key="git_hotspot_min_changes", + ), + mcp_sync_policy=policy_raw, # type: ignore[arg-type] + projection_rebuild_policy=projection_policy_raw, # type: ignore[arg-type] + projection_rebuild_running_timeout_seconds=_memory_int( + merged["projection_rebuild_running_timeout_seconds"], + key="projection_rebuild_running_timeout_seconds", + ), + projection_rebuild_spawn_worker=_memory_bool( + merged["projection_rebuild_spawn_worker"], + key="projection_rebuild_spawn_worker", + ), + projection_rebuild_coalesce_window_seconds=_memory_int( + merged["projection_rebuild_coalesce_window_seconds"], + key="projection_rebuild_coalesce_window_seconds", + ), + projection_rebuild_coalesce_min_delta=_memory_int( + merged["projection_rebuild_coalesce_min_delta"], + key="projection_rebuild_coalesce_min_delta", + ), + trajectories_enabled=_memory_bool( + merged["trajectories_enabled"], key="trajectories_enabled" + ), + trajectory_retention_days=_memory_int( + merged["trajectory_retention_days"], key="trajectory_retention_days" + ), + trajectory_export_enabled=_memory_bool( + merged["trajectory_export_enabled"], key="trajectory_export_enabled" + ), + trajectory_export_include_payloads=_memory_bool( + merged["trajectory_export_include_payloads"], + key="trajectory_export_include_payloads", + ), + trajectory_export_max_record_bytes=_memory_int( + merged["trajectory_export_max_record_bytes"], + key="trajectory_export_max_record_bytes", + ), + trajectory_export_max_file_bytes=_memory_int( + merged["trajectory_export_max_file_bytes"], + key="trajectory_export_max_file_bytes", + ), + semantic=_resolve_semantic_config( + merged.get(SEMANTIC_NESTED_TABLE_KEY), + root_path=root_path, + ), + ingest=_resolve_ingest_config(merged.get(INGEST_NESTED_TABLE_KEY)), + ) + + +__all__ = ["IngestConfig", "MemoryConfig", "SemanticConfig", "resolve_memory_config"] diff --git a/codeclone/config/memory_defaults.py b/codeclone/config/memory_defaults.py new file mode 100644 index 00000000..f9291f55 --- /dev/null +++ b/codeclone/config/memory_defaults.py @@ -0,0 +1,159 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final, Literal + +MemoryBackend = Literal["sqlite", "postgres"] +MemoryMcpSyncPolicy = Literal["off", "bootstrap_if_missing", "refresh_when_stale"] +MemoryProjectionRebuildPolicy = Literal["off", "enqueue_when_stale"] +SemanticBackend = Literal["lancedb"] +SemanticEmbeddingProvider = Literal["diagnostic", "fastembed", "local_model", "api"] +SemanticProjectionTokenEstimator = Literal["chars_approx", "tiktoken"] + +DEFAULT_MEMORY_BACKEND: Final[MemoryBackend] = "sqlite" +DEFAULT_MEMORY_DB_PATH: Final = ".codeclone/memory/engineering_memory.sqlite3" +DEFAULT_MEMORY_ACTIVE_RETENTION_DAYS: Final = -1 +DEFAULT_MEMORY_STALE_RETENTION_DAYS: Final = 180 +DEFAULT_MEMORY_DRAFT_RETENTION_DAYS: Final = 14 +DEFAULT_MEMORY_REJECTED_RETENTION_DAYS: Final = 30 +DEFAULT_MEMORY_ARCHIVED_RETENTION_DAYS: Final = 365 +DEFAULT_MEMORY_RECEIPT_RETENTION_DAYS: Final = 90 +DEFAULT_MEMORY_MAX_RECORDS: Final = 10_000 +DEFAULT_MEMORY_MAX_CANDIDATES: Final = 1_000 +DEFAULT_MEMORY_MAX_EVIDENCE_PER_RECORD: Final = 20 +DEFAULT_MEMORY_TARGET_STATEMENT_CHARS: Final = 300 +DEFAULT_MEMORY_SOFT_STATEMENT_CHARS: Final = 500 +DEFAULT_MEMORY_MAX_STATEMENT_CHARS: Final = 1_000 +DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS: Final = 160 +DEFAULT_MEMORY_MAX_BLAST_RADIUS_CACHE_ENTRIES: Final = 500 +DEFAULT_MEMORY_GIT_HOTSPOT_PERIOD_DAYS: Final = 90 +DEFAULT_MEMORY_GIT_HOTSPOT_MIN_CHANGES: Final = 5 +DEFAULT_MEMORY_MCP_SYNC_POLICY: Final[MemoryMcpSyncPolicy] = "bootstrap_if_missing" +DEFAULT_MEMORY_TRAJECTORIES_ENABLED: Final = True +DEFAULT_MEMORY_TRAJECTORY_RETENTION_DAYS: Final = 365 +DEFAULT_MEMORY_TRAJECTORY_EXPORT_ENABLED: Final = False +DEFAULT_MEMORY_TRAJECTORY_EXPORT_INCLUDE_PAYLOADS: Final = False +DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_RECORD_BYTES: Final = 65_536 +DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_FILE_BYTES: Final = 10_485_760 +DEFAULT_MEMORY_PROJECTION_REBUILD_POLICY: Final[MemoryProjectionRebuildPolicy] = "off" +DEFAULT_MEMORY_PROJECTION_REBUILD_RUNNING_TIMEOUT_SECONDS: Final = 1800 +DEFAULT_MEMORY_PROJECTION_REBUILD_SPAWN_WORKER: Final = True +# Coalesce sub-threshold projection rebuilds: defer the worker spawn until the +# window elapses since the last reindex, batching bursts into one model load. +# 0 disables coalescing (spawn immediately, legacy behavior). Only consulted +# under projection_rebuild_policy="enqueue_when_stale". +DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_WINDOW_SECONDS: Final = 60 +# A reindex bypasses the coalesce window when the active-record delta since the +# last applied stimulus reaches this magnitude (a large content change is worth +# an immediate model load). Counts memory records only, not audit events. +DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_MIN_DELTA: Final = 25 + +DEFAULT_INGEST_CONTRACT_CONSTANTS_PATHS: Final[tuple[str, ...]] = () +DEFAULT_INGEST_DOCUMENT_LINK_PATHS: Final[tuple[str, ...]] = () +DEFAULT_INGEST_MCP_TOOL_SCHEMA_SNAPSHOT_PATH: Final[str | None] = None +DEFAULT_INGEST_MCP_TOOL_COUNT_DOC_PATHS: Final[tuple[str, ...]] = () + +# Semantic retrieval index (Phase 20). Default OFF + "diagnostic" keep the +# community default zero-extra-dependency and offline; a real recall model is +# opt-in (fastembed/community local, api/paid later). +DEFAULT_SEMANTIC_ENABLED: Final = False +DEFAULT_SEMANTIC_BACKEND: Final[SemanticBackend] = "lancedb" +DEFAULT_SEMANTIC_INDEX_PATH: Final = ".codeclone/memory/semantic_index.lance" +DEFAULT_SEMANTIC_EMBEDDING_PROVIDER: Final[SemanticEmbeddingProvider] = "diagnostic" +DEFAULT_SEMANTIC_FASTEMBED_MODEL: Final = "BAAI/bge-small-en-v1.5" +DEFAULT_SEMANTIC_FASTEMBED_DIMENSION: Final = 384 +DEFAULT_SEMANTIC_EMBEDDING_CACHE_DIR: Final = ".codeclone/memory/fastembed" +DEFAULT_SEMANTIC_ALLOW_MODEL_DOWNLOAD: Final = False +DEFAULT_SEMANTIC_DIMENSION: Final = 256 +DEFAULT_SEMANTIC_MAX_RESULTS: Final = 20 +DEFAULT_SEMANTIC_INDEX_AUDIT: Final = True +DEFAULT_SEMANTIC_EMBED_MAX_DOCUMENTS_PER_BATCH: Final = 64 +DEFAULT_SEMANTIC_EMBED_MAX_PADDED_TOKENS_PER_BATCH: Final = 8192 +DEFAULT_SEMANTIC_PROJECTION_TOKEN_ESTIMATOR: Final[SemanticProjectionTokenEstimator] = ( + "chars_approx" +) + +MEMORY_ENV_DB_PATH: Final = "CODECLONE_MEMORY_DB_PATH" +MEMORY_ENV_SEMANTIC_ENABLED: Final = "CODECLONE_MEMORY_SEMANTIC_ENABLED" +MEMORY_ENV_SEMANTIC_EMBEDDING_PROVIDER: Final = ( + "CODECLONE_MEMORY_SEMANTIC_EMBEDDING_PROVIDER" +) +MEMORY_ENV_SEMANTIC_EMBEDDING_MODEL: Final = "CODECLONE_MEMORY_SEMANTIC_EMBEDDING_MODEL" +MEMORY_ENV_SEMANTIC_EMBEDDING_CACHE_DIR: Final = ( + "CODECLONE_MEMORY_SEMANTIC_EMBEDDING_CACHE_DIR" +) +MEMORY_ENV_SEMANTIC_ALLOW_MODEL_DOWNLOAD: Final = ( + "CODECLONE_MEMORY_SEMANTIC_ALLOW_MODEL_DOWNLOAD" +) +MEMORY_ENV_SEMANTIC_INDEX_PATH: Final = "CODECLONE_MEMORY_SEMANTIC_INDEX_PATH" +MEMORY_ENV_PROJECTION_REBUILD_POLICY: Final = "CODECLONE_PROJECTION_REBUILD_POLICY" + +__all__ = [ + "DEFAULT_INGEST_CONTRACT_CONSTANTS_PATHS", + "DEFAULT_INGEST_DOCUMENT_LINK_PATHS", + "DEFAULT_INGEST_MCP_TOOL_COUNT_DOC_PATHS", + "DEFAULT_INGEST_MCP_TOOL_SCHEMA_SNAPSHOT_PATH", + "DEFAULT_MEMORY_ACTIVE_RETENTION_DAYS", + "DEFAULT_MEMORY_ARCHIVED_RETENTION_DAYS", + "DEFAULT_MEMORY_BACKEND", + "DEFAULT_MEMORY_DB_PATH", + "DEFAULT_MEMORY_DRAFT_RETENTION_DAYS", + "DEFAULT_MEMORY_GIT_HOTSPOT_MIN_CHANGES", + "DEFAULT_MEMORY_GIT_HOTSPOT_PERIOD_DAYS", + "DEFAULT_MEMORY_MAX_BLAST_RADIUS_CACHE_ENTRIES", + "DEFAULT_MEMORY_MAX_CANDIDATES", + "DEFAULT_MEMORY_MAX_EVIDENCE_PER_RECORD", + "DEFAULT_MEMORY_MAX_RECORDS", + "DEFAULT_MEMORY_MAX_STATEMENT_CHARS", + "DEFAULT_MEMORY_MCP_SYNC_POLICY", + "DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_MIN_DELTA", + "DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_WINDOW_SECONDS", + "DEFAULT_MEMORY_PROJECTION_REBUILD_POLICY", + "DEFAULT_MEMORY_PROJECTION_REBUILD_RUNNING_TIMEOUT_SECONDS", + "DEFAULT_MEMORY_PROJECTION_REBUILD_SPAWN_WORKER", + "DEFAULT_MEMORY_RECEIPT_RETENTION_DAYS", + "DEFAULT_MEMORY_REJECTED_RETENTION_DAYS", + "DEFAULT_MEMORY_SOFT_STATEMENT_CHARS", + "DEFAULT_MEMORY_STALE_RETENTION_DAYS", + "DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS", + "DEFAULT_MEMORY_TARGET_STATEMENT_CHARS", + "DEFAULT_MEMORY_TRAJECTORIES_ENABLED", + "DEFAULT_MEMORY_TRAJECTORY_EXPORT_ENABLED", + "DEFAULT_MEMORY_TRAJECTORY_EXPORT_INCLUDE_PAYLOADS", + "DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_FILE_BYTES", + "DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_RECORD_BYTES", + "DEFAULT_MEMORY_TRAJECTORY_RETENTION_DAYS", + "DEFAULT_SEMANTIC_ALLOW_MODEL_DOWNLOAD", + "DEFAULT_SEMANTIC_BACKEND", + "DEFAULT_SEMANTIC_DIMENSION", + "DEFAULT_SEMANTIC_EMBEDDING_CACHE_DIR", + "DEFAULT_SEMANTIC_EMBEDDING_PROVIDER", + "DEFAULT_SEMANTIC_EMBED_MAX_DOCUMENTS_PER_BATCH", + "DEFAULT_SEMANTIC_EMBED_MAX_PADDED_TOKENS_PER_BATCH", + "DEFAULT_SEMANTIC_ENABLED", + "DEFAULT_SEMANTIC_FASTEMBED_DIMENSION", + "DEFAULT_SEMANTIC_FASTEMBED_MODEL", + "DEFAULT_SEMANTIC_INDEX_AUDIT", + "DEFAULT_SEMANTIC_INDEX_PATH", + "DEFAULT_SEMANTIC_MAX_RESULTS", + "DEFAULT_SEMANTIC_PROJECTION_TOKEN_ESTIMATOR", + "MEMORY_ENV_DB_PATH", + "MEMORY_ENV_PROJECTION_REBUILD_POLICY", + "MEMORY_ENV_SEMANTIC_ALLOW_MODEL_DOWNLOAD", + "MEMORY_ENV_SEMANTIC_EMBEDDING_CACHE_DIR", + "MEMORY_ENV_SEMANTIC_EMBEDDING_MODEL", + "MEMORY_ENV_SEMANTIC_EMBEDDING_PROVIDER", + "MEMORY_ENV_SEMANTIC_ENABLED", + "MEMORY_ENV_SEMANTIC_INDEX_PATH", + "MemoryBackend", + "MemoryMcpSyncPolicy", + "MemoryProjectionRebuildPolicy", + "SemanticBackend", + "SemanticEmbeddingProvider", + "SemanticProjectionTokenEstimator", +] diff --git a/codeclone/config/memory_specs.py b/codeclone/config/memory_specs.py new file mode 100644 index 00000000..4a42e4dd --- /dev/null +++ b/codeclone/config/memory_specs.py @@ -0,0 +1,130 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Final + +from .memory_defaults import ( + DEFAULT_MEMORY_ACTIVE_RETENTION_DAYS, + DEFAULT_MEMORY_ARCHIVED_RETENTION_DAYS, + DEFAULT_MEMORY_BACKEND, + DEFAULT_MEMORY_DB_PATH, + DEFAULT_MEMORY_DRAFT_RETENTION_DAYS, + DEFAULT_MEMORY_GIT_HOTSPOT_MIN_CHANGES, + DEFAULT_MEMORY_GIT_HOTSPOT_PERIOD_DAYS, + DEFAULT_MEMORY_MAX_BLAST_RADIUS_CACHE_ENTRIES, + DEFAULT_MEMORY_MAX_CANDIDATES, + DEFAULT_MEMORY_MAX_EVIDENCE_PER_RECORD, + DEFAULT_MEMORY_MAX_RECORDS, + DEFAULT_MEMORY_MAX_STATEMENT_CHARS, + DEFAULT_MEMORY_MCP_SYNC_POLICY, + DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_MIN_DELTA, + DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_WINDOW_SECONDS, + DEFAULT_MEMORY_PROJECTION_REBUILD_POLICY, + DEFAULT_MEMORY_PROJECTION_REBUILD_RUNNING_TIMEOUT_SECONDS, + DEFAULT_MEMORY_PROJECTION_REBUILD_SPAWN_WORKER, + DEFAULT_MEMORY_RECEIPT_RETENTION_DAYS, + DEFAULT_MEMORY_REJECTED_RETENTION_DAYS, + DEFAULT_MEMORY_STALE_RETENTION_DAYS, + DEFAULT_MEMORY_TRAJECTORIES_ENABLED, + DEFAULT_MEMORY_TRAJECTORY_EXPORT_ENABLED, + DEFAULT_MEMORY_TRAJECTORY_EXPORT_INCLUDE_PAYLOADS, + DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_FILE_BYTES, + DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_RECORD_BYTES, + DEFAULT_MEMORY_TRAJECTORY_RETENTION_DAYS, +) +from .spec import ConfigKeySpec + +MEMORY_CONFIG_KEY_SPECS: Final[dict[str, ConfigKeySpec]] = { + "backend": ConfigKeySpec(expected_type=str), + "db_path": ConfigKeySpec(expected_type=str), + "active_retention_days": ConfigKeySpec(expected_type=int), + "stale_retention_days": ConfigKeySpec(expected_type=int), + "draft_retention_days": ConfigKeySpec(expected_type=int), + "rejected_retention_days": ConfigKeySpec(expected_type=int), + "archived_retention_days": ConfigKeySpec(expected_type=int), + "receipt_retention_days": ConfigKeySpec(expected_type=int), + "max_records": ConfigKeySpec(expected_type=int), + "max_candidates": ConfigKeySpec(expected_type=int), + "max_evidence_per_record": ConfigKeySpec(expected_type=int), + "max_statement_chars": ConfigKeySpec(expected_type=int), + "max_blast_radius_cache_entries": ConfigKeySpec(expected_type=int), + "git_hotspot_period_days": ConfigKeySpec(expected_type=int), + "git_hotspot_min_changes": ConfigKeySpec(expected_type=int), + "mcp_sync_policy": ConfigKeySpec(expected_type=str), + "projection_rebuild_policy": ConfigKeySpec(expected_type=str), + "projection_rebuild_running_timeout_seconds": ConfigKeySpec(expected_type=int), + "projection_rebuild_spawn_worker": ConfigKeySpec(expected_type=bool), + "projection_rebuild_coalesce_window_seconds": ConfigKeySpec(expected_type=int), + "projection_rebuild_coalesce_min_delta": ConfigKeySpec(expected_type=int), + "trajectories_enabled": ConfigKeySpec(expected_type=bool), + "trajectory_retention_days": ConfigKeySpec(expected_type=int), + "trajectory_export_enabled": ConfigKeySpec(expected_type=bool), + "trajectory_export_include_payloads": ConfigKeySpec(expected_type=bool), + "trajectory_export_max_record_bytes": ConfigKeySpec(expected_type=int), + "trajectory_export_max_file_bytes": ConfigKeySpec(expected_type=int), +} + +MEMORY_PATH_CONFIG_KEYS: Final[frozenset[str]] = frozenset({"db_path"}) + +MEMORY_CONFIG_DEFAULTS: Final[dict[str, object]] = { + "backend": DEFAULT_MEMORY_BACKEND, + "db_path": DEFAULT_MEMORY_DB_PATH, + "active_retention_days": DEFAULT_MEMORY_ACTIVE_RETENTION_DAYS, + "stale_retention_days": DEFAULT_MEMORY_STALE_RETENTION_DAYS, + "draft_retention_days": DEFAULT_MEMORY_DRAFT_RETENTION_DAYS, + "rejected_retention_days": DEFAULT_MEMORY_REJECTED_RETENTION_DAYS, + "archived_retention_days": DEFAULT_MEMORY_ARCHIVED_RETENTION_DAYS, + "receipt_retention_days": DEFAULT_MEMORY_RECEIPT_RETENTION_DAYS, + "max_records": DEFAULT_MEMORY_MAX_RECORDS, + "max_candidates": DEFAULT_MEMORY_MAX_CANDIDATES, + "max_evidence_per_record": DEFAULT_MEMORY_MAX_EVIDENCE_PER_RECORD, + "max_statement_chars": DEFAULT_MEMORY_MAX_STATEMENT_CHARS, + "max_blast_radius_cache_entries": DEFAULT_MEMORY_MAX_BLAST_RADIUS_CACHE_ENTRIES, + "git_hotspot_period_days": DEFAULT_MEMORY_GIT_HOTSPOT_PERIOD_DAYS, + "git_hotspot_min_changes": DEFAULT_MEMORY_GIT_HOTSPOT_MIN_CHANGES, + "mcp_sync_policy": DEFAULT_MEMORY_MCP_SYNC_POLICY, + "projection_rebuild_policy": DEFAULT_MEMORY_PROJECTION_REBUILD_POLICY, + "projection_rebuild_running_timeout_seconds": ( + DEFAULT_MEMORY_PROJECTION_REBUILD_RUNNING_TIMEOUT_SECONDS + ), + "projection_rebuild_spawn_worker": DEFAULT_MEMORY_PROJECTION_REBUILD_SPAWN_WORKER, + "projection_rebuild_coalesce_window_seconds": ( + DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_WINDOW_SECONDS + ), + "projection_rebuild_coalesce_min_delta": ( + DEFAULT_MEMORY_PROJECTION_REBUILD_COALESCE_MIN_DELTA + ), + "trajectories_enabled": DEFAULT_MEMORY_TRAJECTORIES_ENABLED, + "trajectory_retention_days": DEFAULT_MEMORY_TRAJECTORY_RETENTION_DAYS, + "trajectory_export_enabled": DEFAULT_MEMORY_TRAJECTORY_EXPORT_ENABLED, + "trajectory_export_include_payloads": ( + DEFAULT_MEMORY_TRAJECTORY_EXPORT_INCLUDE_PAYLOADS + ), + "trajectory_export_max_record_bytes": ( + DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_RECORD_BYTES + ), + "trajectory_export_max_file_bytes": DEFAULT_MEMORY_TRAJECTORY_EXPORT_MAX_FILE_BYTES, +} + +MEMORY_NESTED_TABLE_KEY: Final = "memory" +INGEST_NESTED_TABLE_KEY: Final = "ingest" + +# Nested sub-table under [tool.codeclone.memory]. Field-level validation is +# owned by the pydantic SemanticConfig (codeclone/config/memory.py), so there +# is intentionally no flat SEMANTIC_CONFIG_KEY_SPECS here — a single +# validation authority, no duplicated key specs. +SEMANTIC_NESTED_TABLE_KEY: Final = "semantic" + +__all__ = [ + "INGEST_NESTED_TABLE_KEY", + "MEMORY_CONFIG_DEFAULTS", + "MEMORY_CONFIG_KEY_SPECS", + "MEMORY_NESTED_TABLE_KEY", + "MEMORY_PATH_CONFIG_KEYS", + "SEMANTIC_NESTED_TABLE_KEY", +] diff --git a/codeclone/config/observability.py b/codeclone/config/observability.py new file mode 100644 index 00000000..e9574ebf --- /dev/null +++ b/codeclone/config/observability.py @@ -0,0 +1,101 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Platform observability configuration (Phase 29, Track A). + +Env-first resolution. Default OFF — when disabled, this does the minimal env +check and never imports psutil, never opens a store, never parses a pyproject +observability section (the near-zero-overhead contract, §4.2). The pyproject +``[tool.codeclone.observability]`` table is a later-cycle convenience; for now +every knob is an environment override. +""" + +from __future__ import annotations + +import os +from collections.abc import Mapping +from dataclasses import dataclass +from importlib.util import find_spec + +from ..utils.ci import is_ci_environment + +_TRUE = frozenset({"1", "true", "yes", "on"}) +_FALSE = frozenset({"0", "false", "no", "off"}) + +DEFAULT_OBSERVABILITY_RETENTION_DAYS = 7 +DEFAULT_OBSERVABILITY_MAX_OPERATIONS = 2000 +DEFAULT_OBSERVABILITY_MAX_SPANS = 100 + + +class ObservabilityConfigError(ValueError): + """Invalid observability configuration (profile without [perf], reserved key).""" + + +@dataclass(frozen=True, slots=True) +class ObservabilityConfig: + enabled: bool + persist: bool = True + profile: bool = False + capture_payload_sizes: bool = True + retention_days: int = DEFAULT_OBSERVABILITY_RETENTION_DAYS + max_operations_per_process: int = DEFAULT_OBSERVABILITY_MAX_OPERATIONS + max_spans_per_operation: int = DEFAULT_OBSERVABILITY_MAX_SPANS + + +_DISABLED = ObservabilityConfig(enabled=False) + + +def _env_flag(environ: Mapping[str, str], key: str, *, default: bool = False) -> bool: + raw = environ.get(key, "").strip().lower() + if raw in _TRUE: + return True + if raw in _FALSE: + return False + return default + + +def resolve_observability_config( + *, environ: Mapping[str, str] | None = None +) -> ObservabilityConfig: + """Resolve config from the environment. Returns the frozen disabled config + (default) without touching psutil/sqlite when observability is off.""" + env = environ if environ is not None else os.environ + raw_enabled = env.get("CODECLONE_OBSERVABILITY_ENABLED", "").strip().lower() + if raw_enabled in _FALSE: + return _DISABLED + explicit_on = raw_enabled in _TRUE + force = _env_flag(env, "CODECLONE_OBSERVABILITY_FORCE") + # CI disables collection unless explicitly enabled or forced (mirror of the + # projection-job CI skip, opposite default). FORCE only lifts the CI gate; + # it does not enable on its own. + if is_ci_environment(env) and not force and not explicit_on: + return _DISABLED + if not explicit_on: + return _DISABLED + if _env_flag(env, "CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT"): + raise ObservabilityConfigError( + "observability payload_snapshot is reserved and rejected (MVP)." + ) + profile = _env_flag(env, "CODECLONE_OBSERVABILITY_PROFILE") + if profile and find_spec("psutil") is None: + raise ObservabilityConfigError( + "observability profile=true requires the codeclone[perf] extra (psutil)." + ) + return ObservabilityConfig( + enabled=True, + persist=_env_flag(env, "CODECLONE_OBSERVABILITY_PERSIST", default=True), + profile=profile, + capture_payload_sizes=_env_flag( + env, "CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES", default=True + ), + ) + + +__all__ = [ + "ObservabilityConfig", + "ObservabilityConfigError", + "resolve_observability_config", +] diff --git a/codeclone/config/pyproject_loader.py b/codeclone/config/pyproject_loader.py index 3c2cd207..2b81aaaa 100644 --- a/codeclone/config/pyproject_loader.py +++ b/codeclone/config/pyproject_loader.py @@ -1,14 +1,31 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import importlib +import os import sys from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, BinaryIO from ..findings.clones.golden_fixtures import ( GoldenFixturePatternError, normalize_golden_fixture_patterns, ) +from .analytics_specs import ( + ANALYTICS_NESTED_TABLE_KEY, + ANALYTICS_PATH_CONFIG_KEYS, +) +from .memory_specs import ( + INGEST_NESTED_TABLE_KEY, + MEMORY_CONFIG_KEY_SPECS, + MEMORY_NESTED_TABLE_KEY, + MEMORY_PATH_CONFIG_KEYS, + SEMANTIC_NESTED_TABLE_KEY, +) from .spec import CONFIG_KEY_SPECS, PATH_CONFIG_KEYS, ConfigKeySpec if TYPE_CHECKING: @@ -75,6 +92,8 @@ def load_pyproject_config( ) -> dict[str, object]: config_path = root_path / "pyproject.toml" if not config_path.exists(): + if config_path.is_symlink(): + raise ConfigValidationError("pyproject.toml must not be a symlink.") return {} load_toml_fn = _load_toml if load_toml is None else load_toml @@ -82,6 +101,8 @@ def load_pyproject_config( payload: object try: payload = load_toml_fn(config_path) + except ConfigValidationError: + raise except OSError as exc: raise ConfigValidationError( f"Cannot read pyproject.toml at {config_path}: {exc}" @@ -111,7 +132,11 @@ def load_pyproject_config( f"{config_path}: 'tool.codeclone' must be object" ) - unknown = sorted(set(codeclone_obj.keys()) - set(config_key_specs)) + unknown = sorted( + set(codeclone_obj.keys()) + - set(config_key_specs) + - {MEMORY_NESTED_TABLE_KEY, ANALYTICS_NESTED_TABLE_KEY} + ) if unknown: raise ConfigValidationError( "Unknown key(s) in tool.codeclone: " + ", ".join(unknown) @@ -119,6 +144,8 @@ def load_pyproject_config( validated: dict[str, object] = {} for key in sorted(codeclone_obj.keys()): + if key in {MEMORY_NESTED_TABLE_KEY, ANALYTICS_NESTED_TABLE_KEY}: + continue value = validate_config_value( key=key, value=codeclone_obj[key], @@ -130,9 +157,129 @@ def load_pyproject_config( root_path=root_path, path_config_keys=path_config_keys, ) + + memory_obj = codeclone_obj.get(MEMORY_NESTED_TABLE_KEY) + if memory_obj is not None: + validated[MEMORY_NESTED_TABLE_KEY] = _validate_nested_memory_table( + memory_obj=memory_obj, + root_path=root_path, + config_path=config_path, + ) + analytics_obj = codeclone_obj.get(ANALYTICS_NESTED_TABLE_KEY) + if analytics_obj is not None: + validated[ANALYTICS_NESTED_TABLE_KEY] = _validate_nested_analytics_table( + analytics_obj=analytics_obj, + root_path=root_path, + config_path=config_path, + ) return validated +def _validate_nested_analytics_table( + *, + analytics_obj: object, + root_path: Path, + config_path: Path, +) -> dict[str, object]: + if not isinstance(analytics_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone.analytics' must be object" + ) + normalized: dict[str, object] = {} + for key in sorted(analytics_obj.keys()): + value = analytics_obj[key] + if key in ANALYTICS_PATH_CONFIG_KEYS and isinstance(value, str): + normalized[key] = normalize_path_config_value( + key=key, + value=value, + root_path=root_path, + path_config_keys=ANALYTICS_PATH_CONFIG_KEYS, + ) + else: + normalized[key] = value + return normalized + + +def _validate_nested_memory_table( + *, + memory_obj: object, + root_path: Path, + config_path: Path, +) -> dict[str, object]: + if not isinstance(memory_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone.memory' must be object" + ) + unknown = sorted( + set(memory_obj.keys()) + - set(MEMORY_CONFIG_KEY_SPECS) + - {SEMANTIC_NESTED_TABLE_KEY, INGEST_NESTED_TABLE_KEY} + ) + if unknown: + raise ConfigValidationError( + "Unknown key(s) in tool.codeclone.memory: " + ", ".join(unknown) + ) + validated: dict[str, object] = {} + for key in sorted(memory_obj.keys()): + if key in {SEMANTIC_NESTED_TABLE_KEY, INGEST_NESTED_TABLE_KEY}: + continue + value = validate_config_value( + key=key, + value=memory_obj[key], + config_key_specs=MEMORY_CONFIG_KEY_SPECS, + ) + validated[key] = normalize_path_config_value( + key=key, + value=value, + root_path=root_path, + path_config_keys=MEMORY_PATH_CONFIG_KEYS, + ) + semantic_obj = memory_obj.get(SEMANTIC_NESTED_TABLE_KEY) + if semantic_obj is not None: + validated[SEMANTIC_NESTED_TABLE_KEY] = _validate_nested_semantic_table( + semantic_obj=semantic_obj, + config_path=config_path, + ) + ingest_obj = memory_obj.get(INGEST_NESTED_TABLE_KEY) + if ingest_obj is not None: + validated[INGEST_NESTED_TABLE_KEY] = _validate_nested_ingest_table( + ingest_obj=ingest_obj, + config_path=config_path, + ) + return validated + + +def _validate_nested_ingest_table( + *, + ingest_obj: object, + config_path: Path, +) -> dict[str, object]: + if not isinstance(ingest_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone.memory.ingest' must be object" + ) + return dict(ingest_obj) + + +def _validate_nested_semantic_table( + *, + semantic_obj: object, + config_path: Path, +) -> dict[str, object]: + # Structural boundary only: ensure it is a table. Field-level validation + # (allowed keys, types, literals, ranges) is owned by the pydantic + # SemanticConfig in resolve_memory_config — one validation authority. + if not isinstance(semantic_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone.memory.semantic' must be object" + ) + return dict(semantic_obj) + + def normalize_path_config_value( *, key: str, @@ -187,7 +334,7 @@ def _load_toml(path: Path) -> object: if sys.version_info >= (3, 11): import tomllib - with path.open("rb") as config_file: + with _open_toml_file_no_follow(path) as config_file: return tomllib.load(config_file) try: @@ -201,10 +348,29 @@ def _load_toml(path: Path) -> object: if not callable(load_fn): raise ConfigValidationError("Invalid 'tomli' module: missing callable 'load'.") - with path.open("rb") as config_file: + with _open_toml_file_no_follow(path) as config_file: return load_fn(config_file) +def open_repo_config(root_path: Path) -> BinaryIO: + """Open repo ``pyproject.toml`` through the security-hardened config path.""" + + return _open_toml_file_no_follow(root_path / "pyproject.toml") + + +def _open_toml_file_no_follow(path: Path) -> BinaryIO: + if path.is_symlink(): + raise ConfigValidationError("pyproject.toml must not be a symlink.") + if getattr(sys, "platform", "") == "win32": + return path.open("rb") + flags = os.O_RDONLY + nofollow = getattr(os, "O_NOFOLLOW", 0) + if isinstance(nofollow, int): + flags |= nofollow + fd = os.open(path, flags) + return os.fdopen(fd, "rb") + + __all__ = [ "CONFIG_KEY_SPECS", "PATH_CONFIG_KEYS", @@ -212,5 +378,6 @@ def _load_toml(path: Path) -> object: "_load_toml", "load_pyproject_config", "normalize_path_config_value", + "open_repo_config", "validate_config_value", ] diff --git a/codeclone/config/resolver.py b/codeclone/config/resolver.py index 03ef8964..f90e2d79 100644 --- a/codeclone/config/resolver.py +++ b/codeclone/config/resolver.py @@ -1,3 +1,8 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations from dataclasses import dataclass diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 798e2bf4..6413427a 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -1,9 +1,20 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations from dataclasses import dataclass from typing import Final, Literal from .. import ui_messages as ui +from ..audit.validation import ( + DEFAULT_AUDIT_PATH, + DEFAULT_AUDIT_PAYLOADS, + DEFAULT_AUDIT_RETENTION_DAYS, + DEFAULT_AUDIT_TOKEN_ESTIMATOR, +) from ..contracts import ( DEFAULT_BASELINE_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -27,6 +38,11 @@ DEFAULT_SEGMENT_MIN_STMT, DEFAULT_TEXT_REPORT_PATH, ) +from .intent_registry_defaults import ( + DEFAULT_INTENT_REGISTRY_BACKEND, + DEFAULT_INTENT_REGISTRY_DB_PATH, + DEFAULT_INTENT_REGISTRY_RETENTION_DAYS, +) CliKind = Literal[ "positional", @@ -231,6 +247,105 @@ def _option( metavar="GIT_REF", help_text=ui.HELP_PATHS_FROM_GIT_DIFF, ), + _option( + dest="blast_radius", + group="Analysis", + cli_kind="value", + flags=("--blast-radius",), + default=None, + nargs="+", + metavar="FILE", + help_text=ui.HELP_BLAST_RADIUS, + ), + _option( + dest="patch_verify", + group="Analysis", + cli_kind="store_true", + flags=("--patch-verify",), + default=False, + help_text=ui.HELP_PATCH_VERIFY, + ), + _option( + dest="strictness", + group="Analysis", + cli_kind="value", + flags=("--strictness",), + default="ci", + metavar="LEVEL", + help_text=ui.HELP_STRICTNESS, + ), + _option( + dest="session_stats", + group="Analysis", + cli_kind="store_true", + flags=("--session-stats",), + default=False, + help_text=ui.HELP_SESSION_STATS, + ), + _option( + dest="audit", + group="Analysis", + cli_kind="store_true", + flags=("--audit",), + default=False, + help_text=ui.HELP_AUDIT, + ), + _option( + dest="audit_json", + group="Analysis", + cli_kind="store_true", + flags=("--audit-json",), + default=False, + help_text=ui.HELP_AUDIT_JSON, + ), + _option( + dest="audit_enabled", + group=None, + default=False, + pyproject_type=bool, + ), + _option( + dest="audit_path", + group=None, + default=DEFAULT_AUDIT_PATH, + pyproject_type=str, + ), + _option( + dest="audit_payloads", + group=None, + default=DEFAULT_AUDIT_PAYLOADS, + pyproject_type=str, + ), + _option( + dest="audit_retention_days", + group=None, + default=DEFAULT_AUDIT_RETENTION_DAYS, + pyproject_type=int, + ), + _option( + dest="audit_token_estimator", + group=None, + default=DEFAULT_AUDIT_TOKEN_ESTIMATOR, + pyproject_type=str, + ), + _option( + dest="intent_registry_backend", + group=None, + default=DEFAULT_INTENT_REGISTRY_BACKEND, + pyproject_type=str, + ), + _option( + dest="intent_registry_path", + group=None, + default=DEFAULT_INTENT_REGISTRY_DB_PATH, + pyproject_type=str, + ), + _option( + dest="intent_registry_retention_days", + group=None, + default=DEFAULT_INTENT_REGISTRY_RETENTION_DAYS, + pyproject_type=int, + ), _option( dest="cache_path", group="Analysis", diff --git a/codeclone/contracts/__init__.py b/codeclone/contracts/__init__.py index f7ff9e51..083c00e6 100644 --- a/codeclone/contracts/__init__.py +++ b/codeclone/contracts/__init__.py @@ -12,9 +12,44 @@ BASELINE_SCHEMA_VERSION: Final = "2.1" BASELINE_FINGERPRINT_VERSION: Final = "1" -CACHE_VERSION: Final = "2.8" +CACHE_VERSION: Final = "2.10" REPORT_SCHEMA_VERSION: Final = "2.11" METRICS_BASELINE_SCHEMA_VERSION: Final = "1.2" +ENGINEERING_MEMORY_SCHEMA_VERSION: Final = "1.7" +# Semantic retrieval index (Phase 20). Derived, rebuildable sidecar — NOT +# covered by ENGINEERING_MEMORY_SCHEMA_VERSION. Bump to invalidate the index +# on an incompatible projection/row-format change (forces a rebuild, not a +# SQLite migration). +SEMANTIC_INDEX_FORMAT_VERSION: Final = "2" +PATCH_TRAIL_SCHEMA_VERSION: Final = "1" +# Platform observability sqlite store (.codeclone/db/platform_observability.sqlite3): +# a runtime-profiling plane separate from audit/memory. Bump on an incompatible +# observability schema change. +PLATFORM_OBSERVABILITY_SCHEMA_VERSION: Final = "1.1" + +# Memory-derived projection/derivation versions. NOT persistence schema +# versions: bump to supersede previously derived rows on an incompatible +# projection/scoring/distillation change (re-projection, not a SQLite +# migration). Defined here so all version constants live in one place; the +# owning modules re-export these names. +TRAJECTORY_PROJECTION_VERSION: Final = "trajectory-v3" +TRAJECTORY_PROJECTION_VERSION_V1: Final = "trajectory-v1" +TRAJECTORY_QUALITY_SCORE_VERSION: Final = "2" +EXPERIENCE_DISTILLATION_VERSION: Final = "experience-v1" +# IDE governance HMAC attestation protocol version (VS Code Memory channel). +IDE_GOVERNANCE_PROTOCOL_VERSION: Final = 2 + +# Corpus analytics store (.codeclone/analytics/corpus_clustering.sqlite3) and +# derived export/representation contracts. Bump independently from memory schema. +CORPUS_ANALYTICS_STORE_SCHEMA_VERSION: Final = "1.2" +CORPUS_EXPORT_SCHEMA_VERSION: Final = "1.3" +CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION: Final = "1" +CORPUS_CONTROL_PLANE_CONTRACT_VERSION: Final = "1.0" +CORPUS_REPRESENTATION_CONTRACT_VERSION: Final = "3" +CORPUS_NORMALIZER_VERSION: Final = "1" +CORPUS_EMBEDDING_CONTRACT_VERSION: Final = "2" +CORPUS_AGENT_LABEL_CONTRACT_VERSION: Final = "1" +CORPUS_PARTITION_MAP_VERSION: Final = "1" DEFAULT_COMPLEXITY_THRESHOLD: Final = 20 DEFAULT_COUPLING_THRESHOLD: Final = 10 @@ -35,11 +70,11 @@ DEFAULT_MAX_BASELINE_SIZE_MB: Final = 5 DEFAULT_COVERAGE_MIN: Final = 50 DEFAULT_BASELINE_PATH: Final = "codeclone.baseline.json" -DEFAULT_HTML_REPORT_PATH: Final = ".cache/codeclone/report.html" -DEFAULT_JSON_REPORT_PATH: Final = ".cache/codeclone/report.json" -DEFAULT_MARKDOWN_REPORT_PATH: Final = ".cache/codeclone/report.md" -DEFAULT_SARIF_REPORT_PATH: Final = ".cache/codeclone/report.sarif" -DEFAULT_TEXT_REPORT_PATH: Final = ".cache/codeclone/report.txt" +DEFAULT_HTML_REPORT_PATH: Final = ".codeclone/report.html" +DEFAULT_JSON_REPORT_PATH: Final = ".codeclone/report.json" +DEFAULT_MARKDOWN_REPORT_PATH: Final = ".codeclone/report.md" +DEFAULT_SARIF_REPORT_PATH: Final = ".codeclone/report.sarif" +DEFAULT_TEXT_REPORT_PATH: Final = ".codeclone/report.txt" COMPLEXITY_RISK_LOW_MAX: Final = 10 COMPLEXITY_RISK_MEDIUM_MAX: Final = 20 @@ -100,6 +135,15 @@ def cli_help_epilog() -> str: "COHESION_RISK_MEDIUM_MAX", "COMPLEXITY_RISK_LOW_MAX", "COMPLEXITY_RISK_MEDIUM_MAX", + "CORPUS_AGENT_LABEL_CONTRACT_VERSION", + "CORPUS_ANALYTICS_STORE_SCHEMA_VERSION", + "CORPUS_CONTROL_PLANE_CONTRACT_VERSION", + "CORPUS_EMBEDDING_CONTRACT_VERSION", + "CORPUS_EXPORT_SCHEMA_VERSION", + "CORPUS_NORMALIZER_VERSION", + "CORPUS_PARTITION_MAP_VERSION", + "CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION", + "CORPUS_REPRESENTATION_CONTRACT_VERSION", "COUPLING_RISK_LOW_MAX", "COUPLING_RISK_MEDIUM_MAX", "DEFAULT_BASELINE_PATH", @@ -127,15 +171,23 @@ def cli_help_epilog() -> str: "DEFAULT_SEGMENT_MIN_STMT", "DEFAULT_TEXT_REPORT_PATH", "DOCS_URL", + "ENGINEERING_MEMORY_SCHEMA_VERSION", + "EXPERIENCE_DISTILLATION_VERSION", "HEALTH_DEPENDENCY_CYCLE_PENALTY", "HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER", "HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY", "HEALTH_DEPENDENCY_DEPTH_P95_MARGIN", "HEALTH_WEIGHTS", + "IDE_GOVERNANCE_PROTOCOL_VERSION", "ISSUES_URL", "METRICS_BASELINE_SCHEMA_VERSION", + "PATCH_TRAIL_SCHEMA_VERSION", "REPORT_SCHEMA_VERSION", "REPOSITORY_URL", + "SEMANTIC_INDEX_FORMAT_VERSION", + "TRAJECTORY_PROJECTION_VERSION", + "TRAJECTORY_PROJECTION_VERSION_V1", + "TRAJECTORY_QUALITY_SCORE_VERSION", "ExitCode", "cli_help_epilog", ] diff --git a/codeclone/controller_insights/__init__.py b/codeclone/controller_insights/__init__.py new file mode 100644 index 00000000..c35d2a31 --- /dev/null +++ b/codeclone/controller_insights/__init__.py @@ -0,0 +1,23 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .audit_trail import controller_audit_trail_payload +from .session_stats import ( + SessionSnapshot, + collect_session_snapshot, + session_snapshot_to_payload, + workspace_session_stats_payload, +) + +__all__ = [ + "SessionSnapshot", + "collect_session_snapshot", + "controller_audit_trail_payload", + "session_snapshot_to_payload", + "workspace_session_stats_payload", +] diff --git a/codeclone/controller_insights/audit_trail.py b/codeclone/controller_insights/audit_trail.py new file mode 100644 index 00000000..e472f98a --- /dev/null +++ b/codeclone/controller_insights/audit_trail.py @@ -0,0 +1,133 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +from ..audit.reader import ( + AuditSummary, + payload_footprint_to_dict, + read_audit_summary, +) +from ..audit.validation import ( + DEFAULT_AUDIT_PATH, + AuditConfigError, + AuditReadError, + resolve_audit_path, +) +from ..config.pyproject_loader import load_pyproject_config + +AUDIT_NOT_ENABLED_MESSAGE = ( + "Controller audit trail is disabled. Set audit_enabled=true in pyproject.toml " + "to record MCP controller events." +) + + +def _load_audit_db_path(root_path: Path, audit_path_value: str | None) -> Path: + config = load_pyproject_config(root_path) + if not bool(config.get("audit_enabled", False)): + raise AuditConfigError(AUDIT_NOT_ENABLED_MESSAGE) + configured = audit_path_value or config.get("audit_path", DEFAULT_AUDIT_PATH) + return resolve_audit_path(root_path=root_path, value=configured) + + +def audit_summary_to_payload(summary: AuditSummary) -> dict[str, object]: + events = [ + { + "event_id": event.event_id, + "event_type": event.event_type, + "severity": event.severity, + "created_at_utc": event.created_at_utc, + "run_id": event.run_id, + "intent_id": event.intent_id, + "status": event.status, + "agent_label": event.agent_label, + "summary": event.summary, + "estimated_tokens": event.estimated_tokens, + "token_encoding": event.token_encoding, + "payload_characters": event.payload_characters, + } + for event in summary.events + ] + footprint = ( + payload_footprint_to_dict(summary.payload_footprint) + if summary.payload_footprint is not None + else None + ) + return { + "status": "ok", + "database": { + "path": str(summary.db_path), + "size_bytes": summary.db_size_bytes, + "retention_days": summary.retention_days, + }, + "counts": { + "total_events": summary.total_events, + "intent_events": summary.intent_events, + "contract_events": summary.contract_events, + "receipt_events": summary.receipt_events, + "violation_events": summary.violation_events, + }, + "time_range": { + "oldest_event_utc": summary.oldest_event_utc, + "latest_event_utc": summary.latest_event_utc, + }, + "token_summary": { + "total_estimated_tokens": summary.total_estimated_tokens, + "token_encoding": summary.token_encoding, + "token_event_count": summary.token_event_count, + }, + "payload_footprint": footprint, + "events": events, + } + + +def controller_audit_trail_payload( + root_path: Path, + *, + limit: int = 50, + audit_path_value: str | None = None, +) -> dict[str, object]: + try: + db_path = _load_audit_db_path(root_path, audit_path_value) + summary = read_audit_summary(db_path=db_path, limit=limit) + except AuditConfigError as exc: + return { + "status": "disabled", + "message": str(exc), + "counts": { + "total_events": 0, + "intent_events": 0, + "contract_events": 0, + "receipt_events": 0, + "violation_events": 0, + }, + "events": [], + "payload_footprint": None, + } + except AuditReadError as exc: + return { + "status": "empty", + "message": str(exc), + "counts": { + "total_events": 0, + "intent_events": 0, + "contract_events": 0, + "receipt_events": 0, + "violation_events": 0, + }, + "events": [], + "payload_footprint": None, + } + return audit_summary_to_payload(summary) + + +__all__ = [ + "AUDIT_NOT_ENABLED_MESSAGE", + "audit_summary_to_payload", + "controller_audit_trail_payload", +] diff --git a/codeclone/controller_insights/session_stats.py b/codeclone/controller_insights/session_stats.py new file mode 100644 index 00000000..1e041182 --- /dev/null +++ b/codeclone/controller_insights/session_stats.py @@ -0,0 +1,638 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import os +import time +from collections import defaultdict +from collections.abc import Mapping +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..audit.reader import AnalysisRunSnapshot + from ..surfaces.mcp._workspace_intents import WorkspaceIntentRecord + +from ..paths.workspace import REPORT_JSON_PARTS as _REPORT_PATH_PARTS +from ..utils.utc_timestamps import age_seconds_since_utc_timestamp + +_MAX_ALLOWED_FILES_SHOWN = 2 +_MAX_TOP_WORKFLOWS_SHOWN = 3 +_PLAIN_LABEL_WIDTH = 25 + + +@dataclass(frozen=True, slots=True) +class WorkflowFootprintSnapshot: + workflow_kind: str + workflow_id: str + call_count: int + total_tokens: int + max_tokens: int + agent_label: str + + +@dataclass(frozen=True, slots=True) +class IntentSnapshot: + intent_id: str + status: str + ownership: str + scope_file_count: int + allowed_files: tuple[str, ...] + declared_at_utc: str + lease_remaining_seconds: int + + +@dataclass(frozen=True, slots=True) +class AgentSnapshot: + pid: int + start_epoch: int + label: str + alive: bool + intents: tuple[IntentSnapshot, ...] + + +@dataclass(frozen=True, slots=True) +class SessionSnapshot: + root: Path + agents: tuple[AgentSnapshot, ...] + stale_count: int + expired_count: int + recoverable_count: int + latest_run_id: str | None + latest_run_health: int | None + latest_run_findings: int | None + latest_run_files: int | None + latest_run_age_seconds: int | None + latest_run_source: str | None + cache_present: bool + workspace_health: str + intent_registry_backend: str + intent_registry_storage: str + audit_enabled: bool = False + audit_storage: str | None = None + mcp_token_footprint: int | None = None + mcp_token_encoding: str | None = None + mcp_token_event_count: int = 0 + top_workflows: tuple[WorkflowFootprintSnapshot, ...] = () + + +def collect_session_snapshot(root_path: Path) -> SessionSnapshot: + from ..surfaces.mcp._workspace_intents import ( + IntentOwnership, + classify_intent_ownership, + list_workspace_intent_records_for_recovery, + utc_now, + ) + + now = utc_now() + own_pid = os.getpid() + own_start_epoch = _process_start_epoch() + + try: + records = list_workspace_intent_records_for_recovery(root=root_path) + except Exception: + records = () + + stale_count = 0 + expired_count = 0 + recoverable_count = 0 + agent_intents: dict[tuple[int, int], list[IntentSnapshot]] = defaultdict(list) + agent_labels: dict[tuple[int, int], str] = {} + agent_alive: dict[tuple[int, int], bool] = {} + + for record in records: + ownership = classify_intent_ownership( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + now=now, + ) + + if ownership == IntentOwnership.EXPIRED: + expired_count += 1 + continue + if ownership == IntentOwnership.OWN_STALE: + stale_count += 1 + if ownership == IntentOwnership.RECOVERABLE: + recoverable_count += 1 + + lease_remaining = _lease_remaining_seconds(record, now) + scope = record.scope + allowed_files: list[str] = [] + if isinstance(scope, dict): + raw_files = scope.get("allowed_files") + if isinstance(raw_files, list): + allowed_files = [str(f) for f in raw_files] + + agent_key = (record.agent_pid, record.agent_start_epoch) + agent_labels[agent_key] = record.agent_label + if agent_key not in agent_alive: + agent_alive[agent_key] = _is_pid_alive(record.agent_pid) + + agent_intents[agent_key].append( + IntentSnapshot( + intent_id=record.intent_id, + status=record.status, + ownership=ownership.value, + scope_file_count=len(allowed_files), + allowed_files=tuple(sorted(allowed_files)), + declared_at_utc=record.declared_at_utc, + lease_remaining_seconds=lease_remaining, + ) + ) + + agents: list[AgentSnapshot] = [] + for agent_key in sorted(agent_intents): + pid, start_epoch = agent_key + agents.append( + AgentSnapshot( + pid=pid, + start_epoch=start_epoch, + label=agent_labels.get(agent_key, ""), + alive=agent_alive.get(agent_key, False), + intents=tuple(agent_intents[agent_key]), + ) + ) + + audit_enabled, audit_storage = _read_audit_config(root_path) + ( + latest_run_id, + latest_run_health, + latest_run_findings, + latest_run_files, + latest_run_age_seconds, + latest_run_source, + cache_present, + ) = _resolve_latest_run(root_path, audit_enabled=audit_enabled) + + workspace_health = _classify_workspace_health( + agents=agents, + stale_count=stale_count, + expired_count=expired_count, + ) + + mcp_tokens, mcp_enc, mcp_count, top_workflows = _read_audit_token_footprint( + root_path + ) + from ..config.intent_registry import intent_registry_summary + + registry = intent_registry_summary(root_path) + + return SessionSnapshot( + root=root_path, + agents=tuple(agents), + stale_count=stale_count, + expired_count=expired_count, + recoverable_count=recoverable_count, + latest_run_id=latest_run_id, + latest_run_health=latest_run_health, + latest_run_findings=latest_run_findings, + latest_run_files=latest_run_files, + latest_run_age_seconds=latest_run_age_seconds, + latest_run_source=latest_run_source, + cache_present=cache_present, + workspace_health=workspace_health, + intent_registry_backend=registry["registry_backend"], + intent_registry_storage=registry["registry_storage"], + audit_enabled=audit_enabled, + audit_storage=audit_storage, + mcp_token_footprint=mcp_tokens, + mcp_token_encoding=mcp_enc, + mcp_token_event_count=mcp_count, + top_workflows=top_workflows, + ) + + +def _live_agent_count(snapshot: SessionSnapshot) -> int: + return sum(1 for agent in snapshot.agents if agent.alive) + + +def _active_intent_count(snapshot: SessionSnapshot) -> int: + return sum( + 1 + for agent in snapshot.agents + for intent in agent.intents + if agent.alive and intent.status == "active" + ) + + +def _visible_intent_count(snapshot: SessionSnapshot) -> int: + return sum(len(agent.intents) for agent in snapshot.agents) + + +def _classify_workspace_health( + *, + agents: list[AgentSnapshot] | tuple[AgentSnapshot, ...], + stale_count: int, + expired_count: int, +) -> str: + live_agents = [a for a in agents if a.alive] + if not live_agents: + return "idle" + + active_intent_agents = [ + agent + for agent in live_agents + if any(intent.status == "active" for intent in agent.intents) + ] + + if not active_intent_agents: + return "clean" + + if len(active_intent_agents) >= 2 and _has_scope_overlap(active_intent_agents): + return "contested" + + return "active" + + +def _has_scope_overlap(agents: list[AgentSnapshot]) -> bool: + all_files: list[set[str]] = [] + for agent in agents: + agent_files: set[str] = set() + for intent in agent.intents: + if intent.status == "active": + agent_files.update(intent.allowed_files) + if agent_files: + all_files.append(agent_files) + + for i in range(len(all_files)): + for j in range(i + 1, len(all_files)): + if all_files[i] & all_files[j]: + return True + return False + + +def _resolve_latest_run( + root_path: Path, + *, + audit_enabled: bool, +) -> tuple[ + str | None, + int | None, + int | None, + int | None, + int | None, + str | None, + bool, +]: + disk = _read_disk_report(root_path) + if audit_enabled: + audit_run = _read_audit_latest_run(root_path) + if audit_run is not None: + return ( + audit_run.run_id, + audit_run.health, + audit_run.findings, + audit_run.files, + audit_run.age_seconds, + audit_run.source, + disk[5], + ) + if disk[0] is not None: + return (*disk[:5], "disk_report", disk[5]) + return None, None, None, None, None, None, disk[5] + + +def _read_audit_latest_run(root_path: Path) -> AnalysisRunSnapshot | None: + try: + from ..audit.reader import read_latest_analysis_run + from ..audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path + from ..config.pyproject_loader import load_pyproject_config + + config = load_pyproject_config(root_path) + db_path = resolve_audit_path( + root_path=root_path, + value=config.get("audit_path", DEFAULT_AUDIT_PATH), + ) + return read_latest_analysis_run(db_path=db_path, repo_root=root_path) + except Exception: + return None + + +def _read_disk_report( + root_path: Path, +) -> tuple[str | None, int | None, int | None, int | None, int | None, bool]: + report_path = root_path.joinpath(*_REPORT_PATH_PARTS) + if not report_path.is_file(): + return None, None, None, None, None, False + try: + with open(report_path, "rb") as fh: + data = json.load(fh) + except Exception: + return None, None, None, None, None, False + + run_id: str | None = None + health: int | None = None + findings: int | None = None + files: int | None = None + age_seconds: int | None = None + + data_mapping = data if isinstance(data, dict) else {} + digest_value = _string_field( + _mapping_at(data_mapping, ("integrity", "digest")), "value" + ) + if digest_value is not None and len(digest_value) >= 8: + run_id = digest_value[:8] + + files = _list_field_len( + _mapping_at(data_mapping, ("inventory", "file_registry")), + "items", + ) + if _mapping_at(data_mapping, ("metrics", "families")) is not None: + health = _int_field(_mapping_at(data_mapping, ("health",)), "score") + findings = _int_field(_mapping_at(data_mapping, ("findings",)), "total") + + generated_at = _string_field( + _mapping_at(data_mapping, ("meta", "runtime")), + "report_generated_at_utc", + ) + if generated_at is None: + generated_at = _string_field( + _mapping_at(data_mapping, ("meta",)), + "report_generated_at_utc", + ) + age_seconds = age_seconds_since_utc_timestamp(generated_at) + if age_seconds is None: + try: + mtime = report_path.stat().st_mtime + age_seconds = max(0, int(time.time() - mtime)) + except OSError: + pass + + return run_id, health, findings, files, age_seconds, True + + +def _mapping_at( + payload: Mapping[str, object], + keys: tuple[str, ...], +) -> Mapping[str, object] | None: + current: object = payload + for key in keys: + if not isinstance(current, dict): + return None + current = current.get(key) + return current if isinstance(current, dict) else None + + +def _string_field(payload: Mapping[str, object] | None, key: str) -> str | None: + if payload is None: + return None + value = payload.get(key) + return value if isinstance(value, str) else None + + +def _int_field(payload: Mapping[str, object] | None, key: str) -> int | None: + if payload is None: + return None + value = payload.get(key) + return value if isinstance(value, int) else None + + +def _list_field_len(payload: Mapping[str, object] | None, key: str) -> int | None: + if payload is None: + return None + value = payload.get(key) + return len(value) if isinstance(value, list) else None + + +def _lease_remaining_seconds(record: WorkspaceIntentRecord, now: datetime) -> int: + from ..surfaces.mcp._workspace_intents import _lease_expiry + + expiry = _lease_expiry(record) + if expiry is None: + return 0 + delta = (expiry - now).total_seconds() + return max(0, int(delta)) + + +def _is_pid_alive(pid: int) -> bool: + if pid <= 0: + return False + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + return True + return True + + +def _process_start_epoch() -> int: + return int(time.time()) + + +def _read_audit_config(root_path: Path) -> tuple[bool, str | None]: + try: + from ..audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path + from ..config.pyproject_loader import ( + ConfigValidationError, + load_pyproject_config, + ) + + config = load_pyproject_config(root_path) + except (ConfigValidationError, OSError): + return False, None + if not bool(config.get("audit_enabled", False)): + return False, None + try: + db_path = resolve_audit_path( + root_path=root_path, + value=config.get("audit_path", DEFAULT_AUDIT_PATH), + ) + except Exception: + return True, None + try: + storage = str(db_path.relative_to(root_path.resolve())) + except ValueError: + storage = str(db_path) + return True, storage + + +def _read_audit_token_footprint( + root_path: Path, +) -> tuple[int | None, str | None, int, tuple[WorkflowFootprintSnapshot, ...]]: + """Read aggregate token estimation from audit trail, if available.""" + try: + from ..audit.reader import read_audit_summary + from ..audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path + from ..config.pyproject_loader import ( + load_pyproject_config, + ) + + config = load_pyproject_config(root_path) + if not bool(config.get("audit_enabled", False)): + return None, None, 0, () + db_path = resolve_audit_path( + root_path=root_path, + value=config.get("audit_path", DEFAULT_AUDIT_PATH), + ) + if not db_path.is_file(): + return None, None, 0, () + summary = read_audit_summary(db_path=db_path, limit=1) + footprint = summary.payload_footprint + if footprint is not None: + workflows = tuple( + WorkflowFootprintSnapshot( + workflow_kind=workflow.workflow_kind, + workflow_id=workflow.workflow_id, + call_count=workflow.call_count, + total_tokens=workflow.total_tokens, + max_tokens=workflow.max_tokens, + agent_label=workflow.agent_label, + ) + for workflow in footprint.top_workflows[:_MAX_TOP_WORKFLOWS_SHOWN] + ) + return ( + footprint.total_tokens, + footprint.encoding, + footprint.tool_calls, + workflows, + ) + return ( + summary.total_estimated_tokens, + summary.token_encoding, + summary.token_event_count, + (), + ) + except Exception: + return None, None, 0, () + + +def _format_age(seconds: int | None) -> str: + if seconds is None or seconds < 0: + return "unknown" + if seconds < 60: + return f"{seconds}s ago" + minutes = seconds // 60 + if minutes < 60: + return f"{minutes}m ago" + hours = minutes // 60 + remaining_minutes = minutes % 60 + if remaining_minutes: + return f"{hours}h{remaining_minutes}m ago" + return f"{hours}h ago" + + +def latest_run_source_label(source: str | None) -> str | None: + from .. import ui_messages as ui + + labels = { + "disk_report": ui.SESSION_STATS_LATEST_RUN_SOURCE_DISK, + "audit_mcp": ui.SESSION_STATS_LATEST_RUN_SOURCE_AUDIT_MCP, + "audit_cli": ui.SESSION_STATS_LATEST_RUN_SOURCE_AUDIT_CLI, + } + if source is None: + return None + return labels.get(source) + + +def _format_duration(seconds: int) -> str: + if seconds <= 0: + return "expired" + if seconds < 60: + return f"{seconds}s" + minutes = seconds // 60 + remaining_seconds = seconds % 60 + if remaining_seconds: + return f"{minutes}m{remaining_seconds}s" + return f"{minutes}m" + + +def session_snapshot_to_payload(snapshot: SessionSnapshot) -> dict[str, object]: + agents = [ + { + "pid": agent.pid, + "start_epoch": agent.start_epoch, + "label": agent.label, + "alive": agent.alive, + "intents": [ + { + "intent_id": intent.intent_id, + "status": intent.status, + "ownership": intent.ownership, + "scope_file_count": intent.scope_file_count, + "allowed_files": list(intent.allowed_files), + "declared_at_utc": intent.declared_at_utc, + "lease_remaining_seconds": intent.lease_remaining_seconds, + } + for intent in agent.intents + ], + } + for agent in snapshot.agents + ] + workflows = [ + { + "workflow_kind": workflow.workflow_kind, + "workflow_id": workflow.workflow_id, + "call_count": workflow.call_count, + "total_tokens": workflow.total_tokens, + "max_tokens": workflow.max_tokens, + "agent_label": workflow.agent_label, + } + for workflow in snapshot.top_workflows + ] + return { + "status": "ok", + "workspace": { + "root": str(snapshot.root), + "health": snapshot.workspace_health, + "intent_registry_backend": snapshot.intent_registry_backend, + "intent_registry_storage": snapshot.intent_registry_storage, + }, + "counts": { + "live_agents": _live_agent_count(snapshot), + "active_intents": _active_intent_count(snapshot), + "visible_intents": _visible_intent_count(snapshot), + "stale": snapshot.stale_count, + "expired": snapshot.expired_count, + "recoverable": snapshot.recoverable_count, + }, + "latest_run": { + "run_id": snapshot.latest_run_id, + "health": snapshot.latest_run_health, + "findings": snapshot.latest_run_findings, + "files": snapshot.latest_run_files, + "age_seconds": snapshot.latest_run_age_seconds, + "source": snapshot.latest_run_source, + "cache_present": snapshot.cache_present, + }, + "audit": { + "enabled": snapshot.audit_enabled, + "storage": snapshot.audit_storage, + }, + "token_footprint": { + "total_tokens": snapshot.mcp_token_footprint, + "encoding": snapshot.mcp_token_encoding, + "tool_calls": snapshot.mcp_token_event_count, + }, + "top_workflows": workflows, + "agents": agents, + } + + +def workspace_session_stats_payload(root_path: Path) -> dict[str, object]: + return session_snapshot_to_payload(collect_session_snapshot(root_path)) + + +__all__ = [ + "AgentSnapshot", + "IntentSnapshot", + "SessionSnapshot", + "WorkflowFootprintSnapshot", + "_active_intent_count", + "_format_age", + "_format_duration", + "_live_agent_count", + "_visible_intent_count", + "collect_session_snapshot", + "latest_run_source_label", + "session_snapshot_to_payload", + "workspace_session_stats_payload", +] diff --git a/codeclone/core/_types.py b/codeclone/core/_types.py index 0d67cf5b..6b7dd25a 100644 --- a/codeclone/core/_types.py +++ b/codeclone/core/_types.py @@ -24,6 +24,7 @@ CoverageJoinResult, DeadCandidate, FileMetrics, + FunctionRelationshipFacts, GroupItem, GroupItemLike, ModuleApiSurface, @@ -89,6 +90,7 @@ class DiscoveryResult: cached_docstring_modules: tuple[ModuleDocstringCoverage, ...] = () cached_api_modules: tuple[ModuleApiSurface, ...] = () cached_structural_findings: tuple[StructuralFindingGroup, ...] = () + cached_function_relationship_facts: tuple[FunctionRelationshipFacts, ...] = () cached_segment_report_projection: SegmentReportProjection | None = None cached_lines: int = 0 cached_functions: int = 0 @@ -139,6 +141,7 @@ class ProcessingResult: docstring_modules: tuple[ModuleDocstringCoverage, ...] = () api_modules: tuple[ModuleApiSurface, ...] = () structural_findings: tuple[StructuralFindingGroup, ...] = () + function_relationship_facts: tuple[FunctionRelationshipFacts, ...] = () source_stats_by_file: tuple[tuple[str, int, int, int, int], ...] = () diff --git a/codeclone/core/discovery.py b/codeclone/core/discovery.py index 984f5321..eab28dc3 100644 --- a/codeclone/core/discovery.py +++ b/codeclone/core/discovery.py @@ -7,16 +7,22 @@ from __future__ import annotations from collections.abc import Mapping, Sequence +from typing import cast from ..cache.store import Cache, file_stat_signature from ..models import ( ClassMetrics, DeadCandidate, + FunctionRelationshipFacts, GroupItem, ModuleApiSurface, ModuleDep, ModuleDocstringCoverage, ModuleTypingCoverage, + RelationshipKind, + RelationshipOriginLane, + RelationshipRecord, + RelationshipResolutionStatus, RuntimeReachabilityFact, SecuritySurface, StructuralFindingGroup, @@ -40,6 +46,47 @@ ) from .discovery_cache import usable_cached_source_stats as _usable_cached_source_stats + +def _decode_cached_function_relationship_facts( + rows: Sequence[Mapping[str, object]], +) -> list[FunctionRelationshipFacts]: + """Reconstruct typed relationship facts from a trusted cache entry. + + Kept local to discovery (not in the shared cached-metrics decoder) so the + canonical ``load_cached_metrics_extended`` path stays byte-identical. The + cache is already integrity-validated on store, so this is a lean rehydration. + """ + facts: list[FunctionRelationshipFacts] = [] + for row in rows: + relationships = row.get("relationships") + source_qualname = row.get("source_qualname") + if not isinstance(relationships, list) or not isinstance(source_qualname, str): + continue + records = tuple( + RelationshipRecord( + relation_kind=cast(RelationshipKind, record["relation_kind"]), + resolution_status=cast( + RelationshipResolutionStatus, record["resolution_status"] + ), + origin_lane=cast(RelationshipOriginLane, record["origin_lane"]), + source_qualname=str(record["source_qualname"]), + target_qualname=cast("str | None", record["target_qualname"]), + path=str(record["path"]), + line=cast(int, record["line"]), + expression=cast("str | None", record["expression"]), + resolution_rule=cast("str | None", record["resolution_rule"]), + ) + for record in relationships + if isinstance(record, Mapping) + ) + facts.append( + FunctionRelationshipFacts( + source_qualname=source_qualname, relationships=records + ) + ) + return facts + + DiscoveryBuffers = tuple[ list[GroupItem], list[GroupItem], @@ -94,11 +141,9 @@ def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: skipped_warnings, ) = _new_discovery_buffers() cached_sf: list[StructuralFindingGroup] = [] + cached_relationship_facts: list[FunctionRelationshipFacts] = [] cached_source_stats_by_file: list[tuple[str, int, int, int, int]] = [] - cached_lines = 0 - cached_functions = 0 - cached_methods = 0 - cached_classes = 0 + cached_lines = cached_functions = cached_methods = cached_classes = 0 all_file_paths: list[str] = [] for filepath in iter_py_files(str(boot.root)): @@ -163,6 +208,11 @@ def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: _decode_cached_structural_finding_group(group_dict, filepath) for group_dict in cached.get("structural_findings") or [] ) + cached_relationship_facts.extend( + _decode_cached_function_relationship_facts( + cached.get("function_relationship_facts") or [] + ) + ) continue files_to_process.append(filepath) @@ -228,6 +278,9 @@ def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: files_to_process=tuple(files_to_process), skipped_warnings=tuple(sorted(skipped_warnings)), cached_structural_findings=tuple(cached_sf), + cached_function_relationship_facts=tuple( + sorted(cached_relationship_facts, key=lambda facts: facts.source_qualname) + ), cached_segment_report_projection=cached_segment_projection, cached_lines=cached_lines, cached_functions=cached_functions, diff --git a/codeclone/core/parallelism.py b/codeclone/core/parallelism.py index a1375e5f..b3bcbfe1 100644 --- a/codeclone/core/parallelism.py +++ b/codeclone/core/parallelism.py @@ -14,6 +14,7 @@ from ..models import ( ClassMetrics, DeadCandidate, + FunctionRelationshipFacts, GroupItem, ModuleApiSurface, ModuleDep, @@ -95,6 +96,7 @@ def process( failed_files=(), source_read_failures=(), structural_findings=discovery.cached_structural_findings, + function_relationship_facts=discovery.cached_function_relationship_facts, source_stats_by_file=discovery.cached_source_stats_by_file, ) @@ -136,6 +138,9 @@ def process( all_structural_findings: list[StructuralFindingGroup] = list( discovery.cached_structural_findings ) + all_function_relationship_facts: list[FunctionRelationshipFacts] = list( + discovery.cached_function_relationship_facts + ) source_stats_by_file: dict[str, tuple[int, int, int, int]] = { filepath: (lines, functions, methods, classes) for ( @@ -233,6 +238,9 @@ def _accept_result(result: FileProcessResult) -> None: result.file_metrics.runtime_reachability ) all_security_surfaces.extend(result.file_metrics.security_surfaces) + all_function_relationship_facts.extend( + result.file_metrics.function_relationship_facts + ) if result.file_metrics.typing_coverage is not None: all_typing_modules.append(result.file_metrics.typing_coverage) if result.file_metrics.docstring_coverage is not None: @@ -370,6 +378,12 @@ def _run_sequential(files: Sequence[str]) -> None: failed_files=tuple(sorted(failed_files)), source_read_failures=tuple(sorted(source_read_failures)), structural_findings=tuple(all_structural_findings), + function_relationship_facts=tuple( + sorted( + all_function_relationship_facts, + key=lambda facts: facts.source_qualname, + ) + ), source_stats_by_file=tuple( (filepath, *stats) for filepath, stats in sorted(source_stats_by_file.items()) diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py index 4cbd52ca..4e45a0ec 100644 --- a/codeclone/core/worker.py +++ b/codeclone/core/worker.py @@ -9,7 +9,7 @@ import inspect import os from collections.abc import Callable -from pathlib import Path +from functools import lru_cache from ..analysis.normalizer import NormalizationConfig from ..analysis.units import extract_units_and_stats_from_source @@ -20,7 +20,7 @@ DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, ) -from ..scanner import module_name_from_path +from ..scanner import module_name_from_path, resolved_path_under_root from ._types import MAX_FILE_SIZE, FileProcessResult @@ -39,8 +39,16 @@ def process_file( segment_min_stmt: int = DEFAULT_SEGMENT_MIN_STMT, ) -> FileProcessResult: try: + resolved = resolved_path_under_root(filepath, root) + if resolved is None: + return FileProcessResult( + filepath=filepath, + success=False, + error="Source path resolves outside repository root.", + error_kind="source_read_error", + ) try: - stat_result = os.stat(filepath) + stat_result = os.stat(resolved) if stat_result.st_size > MAX_FILE_SIZE: return FileProcessResult( filepath=filepath, @@ -63,7 +71,7 @@ def process_file( "size": stat_result.st_size, } try: - source = Path(filepath).read_text("utf-8") + source = resolved.read_text("utf-8") except UnicodeDecodeError as exc: return FileProcessResult( filepath=filepath, @@ -143,24 +151,16 @@ def _invoke_process_file( "segment_min_loc": segment_min_loc, "segment_min_stmt": segment_min_stmt, } - try: - signature = inspect.signature(process_file) - except (TypeError, ValueError): + process_callable: Callable[..., FileProcessResult] = process_file + supported_names = _supported_process_file_kwarg_names(process_callable) + if supported_names is None: supported_kwargs = optional_kwargs else: - parameters = tuple(signature.parameters.values()) - if any( - parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in parameters - ): - supported_kwargs = optional_kwargs - else: - supported_names = {parameter.name for parameter in parameters} - supported_kwargs = { - key: value - for key, value in optional_kwargs.items() - if key in supported_names - } - process_callable: Callable[..., FileProcessResult] = process_file + supported_kwargs = { + key: value + for key, value in optional_kwargs.items() + if key in supported_names + } return process_callable( filepath, root, @@ -169,3 +169,17 @@ def _invoke_process_file( min_stmt, **supported_kwargs, ) + + +@lru_cache(maxsize=32) +def _supported_process_file_kwarg_names( + process_callable: Callable[..., FileProcessResult], +) -> frozenset[str] | None: + try: + signature = inspect.signature(process_callable) + except (TypeError, ValueError): + return None + parameters = tuple(signature.parameters.values()) + if any(parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in parameters): + return None + return frozenset(parameter.name for parameter in parameters) diff --git a/codeclone/domain/findings.py b/codeclone/domain/findings.py index 686e44fd..0bdb392f 100644 --- a/codeclone/domain/findings.py +++ b/codeclone/domain/findings.py @@ -35,6 +35,7 @@ CATEGORY_DEAD_CODE: Final = "dead_code" CATEGORY_DEPENDENCY: Final = "dependency" CATEGORY_COVERAGE: Final = "coverage" +CATEGORY_DESIGN: Final = "design" FINDING_KIND_CLONE_GROUP: Final = "clone_group" FINDING_KIND_UNUSED_SYMBOL: Final = "unused_symbol" @@ -45,6 +46,19 @@ FINDING_KIND_COVERAGE_HOTSPOT: Final = "coverage_hotspot" FINDING_KIND_COVERAGE_SCOPE_GAP: Final = "coverage_scope_gap" +DESIGN_KIND_INSTANCE_INDEPENDENT_METHOD: Final = "instance_independent_method" + +# Classifications for instance-independent method occurrences (Phase 21). +# Only ``candidate`` is a default-surfaced signal; the rest are context or +# suppressed so default payloads avoid noisy contract methods. +IIM_CLASSIFICATION_CANDIDATE: Final = "candidate" +IIM_CLASSIFICATION_DECORATED_CONTEXT: Final = "decorated_context" +IIM_CLASSIFICATION_INTERFACE_CONTRACT: Final = "interface_contract" +IIM_CLASSIFICATION_OVERRIDE_CONTEXT: Final = "override_context" +IIM_CLASSIFICATION_PROPERTY_LIKE: Final = "property_like" +IIM_CLASSIFICATION_DUNDER_PROTOCOL: Final = "dunder_protocol" +IIM_CLASSIFICATION_NOOP_STUB: Final = "noop_stub" + STRUCTURAL_KIND_DUPLICATED_BRANCHES: Final = "duplicated_branches" STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: Final = "clone_guard_exit_divergence" STRUCTURAL_KIND_CLONE_COHORT_DRIFT: Final = "clone_cohort_drift" @@ -57,12 +71,14 @@ "CATEGORY_COVERAGE", "CATEGORY_DEAD_CODE", "CATEGORY_DEPENDENCY", + "CATEGORY_DESIGN", "CATEGORY_STRUCTURAL", "CLONE_KIND_BLOCK", "CLONE_KIND_FUNCTION", "CLONE_KIND_SEGMENT", "CLONE_NOVELTY_KNOWN", "CLONE_NOVELTY_NEW", + "DESIGN_KIND_INSTANCE_INDEPENDENT_METHOD", "FAMILY_CLONE", "FAMILY_CLONES", "FAMILY_DEAD_CODE", @@ -77,6 +93,13 @@ "FINDING_KIND_FUNCTION_HOTSPOT", "FINDING_KIND_UNTESTED_HOTSPOT", "FINDING_KIND_UNUSED_SYMBOL", + "IIM_CLASSIFICATION_CANDIDATE", + "IIM_CLASSIFICATION_DECORATED_CONTEXT", + "IIM_CLASSIFICATION_DUNDER_PROTOCOL", + "IIM_CLASSIFICATION_INTERFACE_CONTRACT", + "IIM_CLASSIFICATION_NOOP_STUB", + "IIM_CLASSIFICATION_OVERRIDE_CONTEXT", + "IIM_CLASSIFICATION_PROPERTY_LIKE", "STRUCTURAL_KIND_CLONE_COHORT_DRIFT", "STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE", "STRUCTURAL_KIND_DUPLICATED_BRANCHES", diff --git a/codeclone/findings/design/__init__.py b/codeclone/findings/design/__init__.py new file mode 100644 index 00000000..ae0aa738 --- /dev/null +++ b/codeclone/findings/design/__init__.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Report-only structural design signals (Phase 21). + +Design signals are advisory review context. They never affect gates, health, +baseline, fingerprints, patch verification acceptance, ``edit_allowed``, +blast-radius permissions, or Engineering Memory truth. +""" + +from __future__ import annotations + +from .instance_methods import ( + DesignFindingGroup, + DesignFindingSignature, + InstanceIndependentMethodOccurrence, + collect_instance_independent_methods, + group_instance_independent_methods, +) + +__all__ = [ + "DesignFindingGroup", + "DesignFindingSignature", + "InstanceIndependentMethodOccurrence", + "collect_instance_independent_methods", + "group_instance_independent_methods", +] diff --git a/codeclone/findings/design/instance_methods.py b/codeclone/findings/design/instance_methods.py new file mode 100644 index 00000000..f965d150 --- /dev/null +++ b/codeclone/findings/design/instance_methods.py @@ -0,0 +1,549 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Instance-independent method detection (Phase 21, report-only design signal). + +A method is *instance-independent* when it declares ``self`` but its executable +body never reads the instance receiver. This is a deterministic AST signal: +a method either loads its receiver binding or it does not. The interpretation +stays advisory — "does not read self" is **not** the same as "pure", and the +remediation language is always "review whether this belongs on the instance, as +a ``@staticmethod``, or as a module-level helper", never "convert to +``@staticmethod``". + +The detector is bounded: Python AST only, one walk per method body, no import +execution, no type checker, no MRO resolution, and no framework heuristics. +""" + +from __future__ import annotations + +import ast +import hashlib +from collections.abc import Iterable, Sequence +from dataclasses import dataclass + +from ...domain.findings import ( + DESIGN_KIND_INSTANCE_INDEPENDENT_METHOD, + IIM_CLASSIFICATION_CANDIDATE, + IIM_CLASSIFICATION_DECORATED_CONTEXT, + IIM_CLASSIFICATION_DUNDER_PROTOCOL, + IIM_CLASSIFICATION_INTERFACE_CONTRACT, + IIM_CLASSIFICATION_NOOP_STUB, + IIM_CLASSIFICATION_OVERRIDE_CONTEXT, + IIM_CLASSIFICATION_PROPERTY_LIKE, +) + +_RECEIVER_NAME = "self" + +# Decorators that mean the method is not an instance method at all, or is an +# overload stub — never emitted as an instance-independent candidate. +_IGNORED_DECORATORS = frozenset({"staticmethod", "classmethod", "overload"}) +_PROPERTY_DECORATORS = frozenset({"property", "setter", "deleter"}) +_ABSTRACT_DECORATORS = frozenset({"abstractmethod", "abstractproperty"}) +_OVERRIDE_DECORATORS = frozenset({"override"}) +# Decorators that do not change the receiver-independence interpretation. +_KNOWN_SAFE_DECORATORS = frozenset({"final"}) +_INTERFACE_BASE_NAMES = frozenset({"Protocol", "ABC"}) +_INTERFACE_METACLASS_NAMES = frozenset({"ABCMeta"}) + +# Classification precedence: lower rank wins for mixed cases (e.g. an abstract +# property is property_like, a decorated dunder is dunder_protocol). +_CLASSIFICATION_RANK: dict[str, int] = { + IIM_CLASSIFICATION_NOOP_STUB: 2, + IIM_CLASSIFICATION_PROPERTY_LIKE: 3, + IIM_CLASSIFICATION_DUNDER_PROTOCOL: 4, + IIM_CLASSIFICATION_INTERFACE_CONTRACT: 5, + IIM_CLASSIFICATION_OVERRIDE_CONTEXT: 6, + IIM_CLASSIFICATION_DECORATED_CONTEXT: 7, + IIM_CLASSIFICATION_CANDIDATE: 8, +} + +_DEFAULT_CLASSIFICATIONS = frozenset({IIM_CLASSIFICATION_CANDIDATE}) +_CONTEXT_CLASSIFICATIONS = frozenset( + { + IIM_CLASSIFICATION_INTERFACE_CONTRACT, + IIM_CLASSIFICATION_OVERRIDE_CONTEXT, + IIM_CLASSIFICATION_DECORATED_CONTEXT, + } +) +_SUPPRESSED_CLASSIFICATIONS = frozenset( + { + IIM_CLASSIFICATION_PROPERTY_LIKE, + IIM_CLASSIFICATION_DUNDER_PROTOCOL, + IIM_CLASSIFICATION_NOOP_STUB, + } +) + +_FUNCTION_NODES = (ast.FunctionDef, ast.AsyncFunctionDef) + + +@dataclass(frozen=True, slots=True) +class InstanceIndependentMethodOccurrence: + """A single method that declares ``self`` but does not read instance state.""" + + file_path: str + class_qualname: str + method_qualname: str + method_name: str + start: int + end: int + classification: str + receiver_name: str + decorators: tuple[str, ...] + class_bases: tuple[str, ...] + receiver_reads: int + nested_receiver_reads: int + + +@dataclass(frozen=True, slots=True) +class DesignFindingSignature: + """Aggregate counts for a design finding group.""" + + candidate_count: int + production_candidate_count: int + test_candidate_count: int + context_count: int + suppressed_count: int + + +@dataclass(frozen=True, slots=True) +class DesignFindingGroup: + """A per-class group of instance-independent method occurrences.""" + + finding_kind: str + finding_key: str + file_path: str + class_qualname: str + class_bases: tuple[str, ...] + signature: DesignFindingSignature + items: tuple[InstanceIndependentMethodOccurrence, ...] + + +def _simple_decorator_name(node: ast.expr) -> str: + """Last dotted component of a decorator expression. + + ``@staticmethod`` -> ``staticmethod``; ``@typing.override`` -> ``override``; + ``@x.setter`` -> ``setter``; ``@functools.cache`` -> ``cache``. + """ + target: ast.expr = node + if isinstance(target, ast.Call): + target = target.func + if isinstance(target, ast.Attribute): + return target.attr + if isinstance(target, ast.Name): + return target.id + return "" + + +def _normalized_decorators( + method: ast.FunctionDef | ast.AsyncFunctionDef, +) -> tuple[str, ...]: + names = [ + name + for name in (_simple_decorator_name(dec) for dec in method.decorator_list) + if name + ] + return tuple(sorted(names)) + + +def _simple_base_name(node: ast.expr) -> str: + target: ast.expr = node + if isinstance(target, ast.Subscript): + target = target.value + if isinstance(target, ast.Attribute): + return target.attr + if isinstance(target, ast.Name): + return target.id + return "" + + +def _class_base_names(class_node: ast.ClassDef) -> tuple[str, ...]: + names = [ + name for name in (_simple_base_name(base) for base in class_node.bases) if name + ] + return tuple(names) + + +def _class_is_interface(class_node: ast.ClassDef, base_names: Sequence[str]) -> bool: + if any(name in _INTERFACE_BASE_NAMES for name in base_names): + return True + for keyword in class_node.keywords: + if keyword.arg == "metaclass": + metaclass = _simple_base_name(keyword.value) + if metaclass in _INTERFACE_METACLASS_NAMES: + return True + return False + + +def _first_positional_arg(method: ast.FunctionDef | ast.AsyncFunctionDef) -> str | None: + positional = [*method.args.posonlyargs, *method.args.args] + if not positional: + return None + return positional[0].arg + + +def _function_param_names( + func: ast.FunctionDef | ast.AsyncFunctionDef | ast.Lambda, +) -> set[str]: + args = func.args + names = {arg.arg for arg in (*args.posonlyargs, *args.args, *args.kwonlyargs)} + if args.vararg is not None: + names.add(args.vararg.arg) + if args.kwarg is not None: + names.add(args.kwarg.arg) + return names + + +class _ReceiverUseVisitor(ast.NodeVisitor): + """Counts lexical reads of the outer method receiver. + + Descends into ``lambda`` and nested functions so a closure reading the outer + ``self`` still proves instance dependence. A nested function whose own + parameters shadow ``self`` masks the outer receiver for its subtree. Nested + ``ClassDef`` bodies introduce a new receiver context and are not descended as + outer-receiver evidence. + """ + + __slots__ = ( + "_depth", + "_shadowed", + "nested_receiver_reads", + "receiver_reads", + ) + + def __init__(self) -> None: + self._depth = 0 + self._shadowed = False + self.receiver_reads = 0 + self.nested_receiver_reads = 0 + + def _record_read(self) -> None: + if self._depth == 0: + self.receiver_reads += 1 + else: + self.nested_receiver_reads += 1 + + def visit_Name(self, node: ast.Name) -> None: + if ( + node.id == _RECEIVER_NAME + and isinstance(node.ctx, ast.Load) + and not self._shadowed + ): + self._record_read() + + def visit_Call(self, node: ast.Call) -> None: + # Direct method-body zero-arg ``super()`` depends on the receiver even + # though the AST has no ``Name("self")``. Nested zero-arg ``super()`` is + # not inferred as outer-receiver use without full compiler semantics. + if ( + self._depth == 0 + and not self._shadowed + and isinstance(node.func, ast.Name) + and node.func.id == "super" + and not node.args + and not node.keywords + ): + self._record_read() + self.generic_visit(node) + + def _visit_nested_callable( + self, + node: ast.FunctionDef | ast.AsyncFunctionDef | ast.Lambda, + ) -> None: + shadows = _RECEIVER_NAME in _function_param_names(node) + previous_shadowed = self._shadowed + self._depth += 1 + self._shadowed = previous_shadowed or shadows + # Default values execute in the enclosing scope, not the nested body, + # so visit them at the current (outer) shadow level by walking the + # signature defaults before flipping into the nested body. + try: + self.generic_visit(node) + finally: + self._depth -= 1 + self._shadowed = previous_shadowed + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._visit_nested_callable(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._visit_nested_callable(node) + + def visit_Lambda(self, node: ast.Lambda) -> None: + self._visit_nested_callable(node) + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + # A nested class introduces a fresh receiver context. Its bases, + # decorators, and keyword arguments still execute in the method body, + # so scan those, but do not descend into its method bodies. + for child in (*node.bases, *node.keywords, *node.decorator_list): + self.visit(child) + + +def _count_receiver_usage( + method: ast.FunctionDef | ast.AsyncFunctionDef, +) -> tuple[int, int]: + visitor = _ReceiverUseVisitor() + for statement in method.body: + visitor.visit(statement) + return visitor.receiver_reads, visitor.nested_receiver_reads + + +def _is_docstring_statement(statement: ast.stmt) -> bool: + return ( + isinstance(statement, ast.Expr) + and isinstance(statement.value, ast.Constant) + and isinstance(statement.value.value, str) + ) + + +def _is_stub_statement(statement: ast.stmt) -> bool: + """True for ``pass``, an ``...`` ellipsis expression, or ``NotImplementedError``.""" + if isinstance(statement, ast.Pass): + return True + if isinstance(statement, ast.Expr) and isinstance(statement.value, ast.Constant): + return statement.value.value is Ellipsis + if isinstance(statement, ast.Raise): + return _raises_not_implemented(statement) + return False + + +def _is_noop_stub(method: ast.FunctionDef | ast.AsyncFunctionDef) -> bool: + body = [ + statement for statement in method.body if not _is_docstring_statement(statement) + ] + # A docstring-only body collapses to an empty list and counts as a stub. + return all(_is_stub_statement(statement) for statement in body) + + +def _raises_not_implemented(statement: ast.Raise) -> bool: + exc = statement.exc + if exc is None: + return False + target: ast.expr = exc.func if isinstance(exc, ast.Call) else exc + return isinstance(target, ast.Name) and target.id == "NotImplementedError" + + +def _classify_method( + *, + method: ast.FunctionDef | ast.AsyncFunctionDef, + decorators: Sequence[str], + class_is_interface: bool, +) -> str: + decorator_set = set(decorators) + candidates: list[str] = [] + + if _is_noop_stub(method): + candidates.append(IIM_CLASSIFICATION_NOOP_STUB) + if decorator_set & _PROPERTY_DECORATORS: + candidates.append(IIM_CLASSIFICATION_PROPERTY_LIKE) + if _is_dunder(method.name): + candidates.append(IIM_CLASSIFICATION_DUNDER_PROTOCOL) + if class_is_interface or (decorator_set & _ABSTRACT_DECORATORS): + candidates.append(IIM_CLASSIFICATION_INTERFACE_CONTRACT) + if decorator_set & _OVERRIDE_DECORATORS: + candidates.append(IIM_CLASSIFICATION_OVERRIDE_CONTEXT) + + classified_names = ( + _PROPERTY_DECORATORS + | _ABSTRACT_DECORATORS + | _OVERRIDE_DECORATORS + | _KNOWN_SAFE_DECORATORS + ) + if decorator_set - classified_names: + candidates.append(IIM_CLASSIFICATION_DECORATED_CONTEXT) + + if not candidates: + return IIM_CLASSIFICATION_CANDIDATE + return min(candidates, key=lambda name: _CLASSIFICATION_RANK[name]) + + +def _is_dunder(name: str) -> bool: + return len(name) > 4 and name.startswith("__") and name.endswith("__") + + +def _node_end_line(node: ast.AST, fallback: int) -> int: + end = getattr(node, "end_lineno", None) + if isinstance(end, int): + return end + return fallback + + +def _iter_class_defs(tree: ast.Module) -> Iterable[tuple[ast.ClassDef, str]]: + """Yield every class with its dotted qualname (including nested classes).""" + + def walk(node: ast.AST, prefix: str) -> Iterable[tuple[ast.ClassDef, str]]: + for child in ast.iter_child_nodes(node): + if isinstance(child, ast.ClassDef): + qualname = f"{prefix}{child.name}" + yield child, qualname + yield from walk(child, f"{qualname}.") + elif isinstance(child, _FUNCTION_NODES): + yield from walk(child, f"{prefix}{child.name}.") + + yield from walk(tree, "") + + +def _direct_methods( + class_node: ast.ClassDef, +) -> Iterable[ast.FunctionDef | ast.AsyncFunctionDef]: + for statement in class_node.body: + if isinstance(statement, _FUNCTION_NODES): + yield statement + + +def _occurrence_for_method( + method: ast.FunctionDef | ast.AsyncFunctionDef, + *, + class_qualname: str, + base_names: tuple[str, ...], + class_is_interface: bool, + file_path: str, +) -> InstanceIndependentMethodOccurrence | None: + """Build an occurrence for a method, or ``None`` when it is not a candidate.""" + decorators = _normalized_decorators(method) + if ( + _first_positional_arg(method) != _RECEIVER_NAME + or set(decorators) & _IGNORED_DECORATORS + ): + return None + receiver_reads, nested_receiver_reads = _count_receiver_usage(method) + if receiver_reads or nested_receiver_reads: + return None + start = method.lineno + return InstanceIndependentMethodOccurrence( + file_path=file_path, + class_qualname=class_qualname, + method_qualname=f"{class_qualname}.{method.name}", + method_name=method.name, + start=start, + end=_node_end_line(method, start), + classification=_classify_method( + method=method, + decorators=decorators, + class_is_interface=class_is_interface, + ), + receiver_name=_RECEIVER_NAME, + decorators=decorators, + class_bases=base_names, + receiver_reads=receiver_reads, + nested_receiver_reads=nested_receiver_reads, + ) + + +def collect_instance_independent_methods( + *, + tree: ast.Module, + file_path: str, +) -> tuple[InstanceIndependentMethodOccurrence, ...]: + """Collect instance-independent method occurrences for one module AST.""" + occurrences: list[InstanceIndependentMethodOccurrence] = [] + for class_node, class_qualname in _iter_class_defs(tree): + base_names = _class_base_names(class_node) + class_is_interface = _class_is_interface(class_node, base_names) + for method in _direct_methods(class_node): + occurrence = _occurrence_for_method( + method, + class_qualname=class_qualname, + base_names=base_names, + class_is_interface=class_is_interface, + file_path=file_path, + ) + if occurrence is not None: + occurrences.append(occurrence) + occurrences.sort( + key=lambda item: (item.file_path, item.start, item.method_qualname) + ) + return tuple(occurrences) + + +def _group_finding_key(file_path: str, class_qualname: str) -> str: + payload = ( + f"design:{DESIGN_KIND_INSTANCE_INDEPENDENT_METHOD}:v1\n" + f"{file_path}\n{class_qualname}" + ) + return hashlib.sha1(payload.encode("utf-8"), usedforsecurity=False).hexdigest() + + +def group_instance_independent_methods( + occurrences: Sequence[InstanceIndependentMethodOccurrence], + *, + test_paths: frozenset[str] = frozenset(), +) -> tuple[DesignFindingGroup, ...]: + """Group occurrences by class and compute per-group signature counts. + + ``test_paths`` carries repo-relative paths classified as test source so the + signature can split production vs test candidates without re-deriving + source-kind policy here. + """ + by_class: dict[tuple[str, str], list[InstanceIndependentMethodOccurrence]] = {} + class_bases: dict[tuple[str, str], tuple[str, ...]] = {} + for occurrence in occurrences: + key = (occurrence.file_path, occurrence.class_qualname) + by_class.setdefault(key, []).append(occurrence) + class_bases.setdefault(key, occurrence.class_bases) + + groups: list[DesignFindingGroup] = [] + for (file_path, class_qualname), items in by_class.items(): + ordered = tuple( + sorted( + items, + key=lambda item: (item.file_path, item.start, item.method_qualname), + ) + ) + is_test = file_path in test_paths + candidate_count = 0 + production_candidate_count = 0 + test_candidate_count = 0 + context_count = 0 + suppressed_count = 0 + for item in ordered: + if item.classification in _DEFAULT_CLASSIFICATIONS: + candidate_count += 1 + if is_test: + test_candidate_count += 1 + else: + production_candidate_count += 1 + elif item.classification in _CONTEXT_CLASSIFICATIONS: + context_count += 1 + elif item.classification in _SUPPRESSED_CLASSIFICATIONS: + suppressed_count += 1 + groups.append( + DesignFindingGroup( + finding_kind=DESIGN_KIND_INSTANCE_INDEPENDENT_METHOD, + finding_key=_group_finding_key(file_path, class_qualname), + file_path=file_path, + class_qualname=class_qualname, + class_bases=class_bases[(file_path, class_qualname)], + signature=DesignFindingSignature( + candidate_count=candidate_count, + production_candidate_count=production_candidate_count, + test_candidate_count=test_candidate_count, + context_count=context_count, + suppressed_count=suppressed_count, + ), + items=ordered, + ) + ) + + groups.sort( + key=lambda group: ( + -group.signature.candidate_count, + group.file_path, + group.class_qualname, + group.finding_key, + ) + ) + return tuple(groups) + + +__all__ = [ + "DesignFindingGroup", + "DesignFindingSignature", + "InstanceIndependentMethodOccurrence", + "collect_instance_independent_methods", + "group_instance_independent_methods", +] diff --git a/codeclone/memory/__init__.py b/codeclone/memory/__init__.py new file mode 100644 index 00000000..5765aa8b --- /dev/null +++ b/codeclone/memory/__init__.py @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..contracts import ENGINEERING_MEMORY_SCHEMA_VERSION + +__all__ = ["ENGINEERING_MEMORY_SCHEMA_VERSION"] diff --git a/codeclone/memory/coverage.py b/codeclone/memory/coverage.py new file mode 100644 index 00000000..b2922dd4 --- /dev/null +++ b/codeclone/memory/coverage.py @@ -0,0 +1,73 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from .paths import normalize_memory_scope_paths +from .retrieval.service import path_has_memory +from .sqlite_store import SqliteEngineeringMemoryStore + + +@dataclass(frozen=True, slots=True) +class ScopeCoverageReport: + scope_paths: tuple[str, ...] + scope_paths_with_memory: int + scope_paths_total: int + scope_coverage_percent: int + uncovered_paths: tuple[str, ...] + + +def compute_scope_coverage( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + scope_paths: Sequence[str], +) -> ScopeCoverageReport: + normalized = normalize_memory_scope_paths(scope_paths) + with_memory = 0 + uncovered: list[str] = [] + for scope_path in normalized: + if path_has_memory( + store, + project_id=project_id, + rel_path=scope_path, + ): + with_memory += 1 + else: + uncovered.append(scope_path) + total = len(normalized) + percent = round(with_memory * 100 / total) if total else 100 + return ScopeCoverageReport( + scope_paths=normalized, + scope_paths_with_memory=with_memory, + scope_paths_total=total, + scope_coverage_percent=percent, + uncovered_paths=tuple(uncovered), + ) + + +def coverage_delta( + before: ScopeCoverageReport, + after: ScopeCoverageReport, +) -> dict[str, object]: + before_set = set(before.scope_paths) - set(before.uncovered_paths) + after_set = set(after.scope_paths) - set(after.uncovered_paths) + newly_uncovered = sorted(after_set - before_set) + return { + "scope_coverage_before": before.scope_coverage_percent, + "scope_coverage_after": after.scope_coverage_percent, + "new_uncovered_paths": newly_uncovered, + } + + +__all__ = [ + "ScopeCoverageReport", + "compute_scope_coverage", + "coverage_delta", +] diff --git a/codeclone/memory/display.py b/codeclone/memory/display.py new file mode 100644 index 00000000..3483c4ee --- /dev/null +++ b/codeclone/memory/display.py @@ -0,0 +1,65 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +from collections.abc import Mapping + +_NUMBERED_HEADING_RE = re.compile(r"^(\d+)\)\s*(.+)$") + + +def normalize_doc_heading(raw: str) -> str: + """Normalize markdown headings for stable, readable link statements.""" + text = raw.strip() + if not text: + return "root" + match = _NUMBERED_HEADING_RE.match(text) + if match is not None: + return f"§{match.group(1)} · {match.group(2).strip()}" + return text + + +def format_document_link_statement( + *, + doc_file: str, + heading: str, + anchored_path: str, +) -> str: + normalized = normalize_doc_heading(heading) + return f"{doc_file} · {normalized} → {anchored_path}" + + +def format_memory_record_line(item: Mapping[str, object]) -> str: + record_type = str(item.get("type", "?")) + statement = str(item.get("statement", "")) + if record_type != "document_link": + return statement + payload = item.get("payload") + if not isinstance(payload, dict): + return statement + doc_file = payload.get("doc_file") + heading = payload.get("heading") + anchored = payload.get("anchored_symbols") + if not isinstance(doc_file, str) or not isinstance(heading, str): + return statement + path = "" + if isinstance(anchored, list) and anchored: + path = str(anchored[0]) + if not path: + return statement + return format_document_link_statement( + doc_file=doc_file, + heading=heading, + anchored_path=path, + ) + + +__all__ = [ + "format_document_link_statement", + "format_memory_record_line", + "normalize_doc_heading", +] diff --git a/codeclone/memory/embedding/__init__.py b/codeclone/memory/embedding/__init__.py new file mode 100644 index 00000000..8e851236 --- /dev/null +++ b/codeclone/memory/embedding/__init__.py @@ -0,0 +1,168 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import math +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +from ...observability import is_observability_enabled, span +from ..exceptions import MemorySemanticUnavailableError +from .length import estimate_token_counts_from_chars + +if TYPE_CHECKING: + from ...config.memory import SemanticConfig + +DIAGNOSTIC_EMBEDDING_MODEL_ID = "diagnostic-hash-v1" + + +class EmbeddingProvider(Protocol): + """Maps text to fixed-dimension vectors. Real providers are optional and + loaded lazily by the factory; the diagnostic provider is always available. + """ + + model_id: str + dimension: int + + def embed(self, texts: Sequence[str]) -> list[list[float]]: ... + + +@runtime_checkable +class _QueryEmbeddingProvider(Protocol): + def embed_query(self, text: str) -> list[float]: ... + + +@runtime_checkable +class _DocumentEmbeddingProvider(Protocol): + def embed_documents( + self, + texts: Sequence[str], + *, + infer_counters: Mapping[str, int] | None = None, + ) -> list[list[float]]: ... + + +class DeterministicHashEmbeddingProvider: + """Diagnostic/test-only embedding: deterministic sha256-derived vectors. + + Stable (same text -> same vector, across runs and platforms) so the test + suite stays fully deterministic, but it carries NO semantic meaning. Its + hits must never be presented as real-model recall — callers surface + ``provider="diagnostic"`` in status/envelope. + """ + + model_id = DIAGNOSTIC_EMBEDDING_MODEL_ID + + def __init__(self, *, dimension: int) -> None: + if dimension <= 0: + raise ValueError("embedding dimension must be positive") + self.dimension = dimension + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + return [self._embed_one(text) for text in texts] + + def embed_query(self, text: str) -> list[float]: + return self._embed_one(text) + + def embed_documents( + self, + texts: Sequence[str], + *, + infer_counters: Mapping[str, int] | None = None, + ) -> list[list[float]]: + return self.embed(texts) + + def estimate_token_counts(self, texts: Sequence[str]) -> tuple[int, ...]: + return estimate_token_counts_from_chars(texts) + + def max_sequence_tokens(self) -> int | None: # codeclone: ignore[dead-code] + return None + + def _embed_one(self, text: str) -> list[float]: + values: list[float] = [] + counter = 0 + while len(values) < self.dimension: + digest = hashlib.sha256(f"{text}\x00{counter}".encode()).digest() + for offset in range(0, len(digest), 4): + if len(values) >= self.dimension: + break + chunk = int.from_bytes(digest[offset : offset + 4], "big") + values.append((chunk / 0xFFFFFFFF) * 2.0 - 1.0) + counter += 1 + norm = math.sqrt(sum(value * value for value in values)) or 1.0 + return [value / norm for value in values] + + +def embed_query(provider: EmbeddingProvider, text: str) -> list[float]: + with span(name="memory.embedding.query") as embed_span: + if is_observability_enabled(): + embed_span.set_counter("chars", len(text)) + if isinstance(provider, _QueryEmbeddingProvider): + return provider.embed_query(text) + (vector,) = provider.embed([text]) + return vector + + +def embed_documents( + provider: EmbeddingProvider, + texts: Sequence[str], + *, + infer_counters: Mapping[str, int] | None = None, +) -> list[list[float]]: + with span(name="memory.embedding.documents") as embed_span: + if is_observability_enabled(): + embed_span.set_counter("count", len(texts)) + if isinstance(provider, _DocumentEmbeddingProvider): + return provider.embed_documents(texts, infer_counters=infer_counters) + return provider.embed(texts) + + +def _resolve_fastembed_provider(config: SemanticConfig) -> EmbeddingProvider: + from .fastembed_provider import FastEmbedEmbeddingProvider + + model_name = config.embedding_model or "BAAI/bge-small-en-v1.5" + return FastEmbedEmbeddingProvider( + model_name=model_name, + dimension=config.dimension, + cache_dir=Path(config.embedding_cache_dir), + allow_model_download=config.allow_model_download, + ) + + +def resolve_embedding_provider(config: SemanticConfig) -> EmbeddingProvider: + """Resolve the embedding provider for the given config. + + ``diagnostic`` is always available (no deps). ``fastembed`` is the + community local-quality provider and is loaded lazily from the optional + ``semantic-fastembed`` extra. ``api`` is reserved for paid/cloud providers. + """ + kind = config.embedding_provider + if kind == "diagnostic": + return DeterministicHashEmbeddingProvider(dimension=config.dimension) + if kind == "fastembed": + return _resolve_fastembed_provider(config) + if kind == "local_model": + raise MemorySemanticUnavailableError( + "local_model embedding provider is not available yet; use " + "embedding_provider='fastembed' for community local semantic search" + ) + raise MemorySemanticUnavailableError( + "api embedding provider is not available yet (Phase 20.6); " + "use embedding_provider='diagnostic'" + ) + + +__all__ = [ + "DIAGNOSTIC_EMBEDDING_MODEL_ID", + "DeterministicHashEmbeddingProvider", + "EmbeddingProvider", + "embed_documents", + "embed_query", + "resolve_embedding_provider", +] diff --git a/codeclone/memory/embedding/batching.py b/codeclone/memory/embedding/batching.py new file mode 100644 index 00000000..c50afc08 --- /dev/null +++ b/codeclone/memory/embedding/batching.py @@ -0,0 +1,144 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Generic, TypeVar + +T = TypeVar("T") + + +@dataclass(frozen=True, slots=True) +class EmbedBatchLimits: + """Adaptive embed batch contract: document count and padded token volume.""" + + max_documents: int = 64 + max_padded_tokens: int = 8192 + + +@dataclass(frozen=True, slots=True) +class LengthScoredItem(Generic[T]): + item: T + char_count: int + token_count: int + source_kind: str + source_id: str + + +@dataclass(frozen=True, slots=True) +class EmbedBatchPlan(Generic[T]): + """One adaptive inference batch with padding telemetry.""" + + items: tuple[LengthScoredItem[T], ...] + total_chars: int + max_chars: int + total_tokens: int + max_tokens: int + padded_tokens: int + padding_amplification_permille: int + + +def length_sort_key(item: LengthScoredItem[T]) -> tuple[int, str, str]: + return (item.token_count, item.source_kind, item.source_id) + + +def score_lengths( + items: Sequence[T], + *, + char_counts: Sequence[int], + token_counts: Sequence[int], + source_kinds: Sequence[str], + source_ids: Sequence[str], +) -> tuple[LengthScoredItem[T], ...]: + if not ( + len(items) + == len(char_counts) + == len(token_counts) + == len(source_kinds) + == len(source_ids) + ): + raise ValueError("length score inputs must align with items") + scored = [ + LengthScoredItem( + item=item, + char_count=char_count, + token_count=token_count, + source_kind=source_kind, + source_id=source_id, + ) + for item, char_count, token_count, source_kind, source_id in zip( + items, + char_counts, + token_counts, + source_kinds, + source_ids, + strict=True, + ) + ] + return tuple(sorted(scored, key=length_sort_key)) + + +def pack_adaptive_batches( + scored_items: Sequence[LengthScoredItem[T]], + *, + limits: EmbedBatchLimits, +) -> list[EmbedBatchPlan[T]]: + if limits.max_documents <= 0 or limits.max_padded_tokens <= 0: + raise ValueError("embed batch limits must be positive") + if not scored_items: + return [] + batches: list[list[LengthScoredItem[T]]] = [] + current: list[LengthScoredItem[T]] = [] + current_max_tokens = 0 + + for item in scored_items: + next_size = len(current) + 1 + next_max_tokens = max(current_max_tokens, item.token_count) + padded = next_size * next_max_tokens + if current and ( + next_size > limits.max_documents or padded > limits.max_padded_tokens + ): + batches.append(current) + current = [item] + current_max_tokens = item.token_count + else: + current.append(item) + current_max_tokens = next_max_tokens + if current: + batches.append(current) + return [_plan_batch(batch) for batch in batches] + + +def _plan_batch(batch: Sequence[LengthScoredItem[T]]) -> EmbedBatchPlan[T]: + char_counts = [item.char_count for item in batch] + token_counts = [item.token_count for item in batch] + total_tokens = sum(token_counts) + max_tokens = max(token_counts) + padded_tokens = len(batch) * max_tokens + amplification = ( + round((padded_tokens * 1000) / total_tokens) if total_tokens else 1000 + ) + return EmbedBatchPlan( + items=tuple(batch), + total_chars=sum(char_counts), + max_chars=max(char_counts), + total_tokens=total_tokens, + max_tokens=max_tokens, + padded_tokens=padded_tokens, + padding_amplification_permille=int(amplification), + ) + + +__all__ = [ + "EmbedBatchLimits", + "EmbedBatchPlan", + "LengthScoredItem", + "length_sort_key", + "pack_adaptive_batches", + "score_lengths", +] diff --git a/codeclone/memory/embedding/fastembed_provider.py b/codeclone/memory/embedding/fastembed_provider.py new file mode 100644 index 00000000..e67d042b --- /dev/null +++ b/codeclone/memory/embedding/fastembed_provider.py @@ -0,0 +1,386 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +from collections.abc import Callable, Iterable, Mapping, Sequence +from pathlib import Path +from typing import Protocol, cast + +from ...budget.estimator import ( + TOKEN_ESTIMATOR_CHARS_APPROX, + estimate_texts_token_counts, +) +from ...observability import is_observability_enabled, span +from ..exceptions import MemorySemanticUnavailableError, SemanticChunkingInvariantError +from .length import PassageTokenCounts + +_PASSAGE_PREFIX = "passage: " + +_KNOWN_MODEL_MAX_TOKENS: dict[str, int] = { + "baai/bge-small-en-v1.5": 512, + "baai/bge-small-en": 512, + "baai/bge-base-en-v1.5": 512, + "baai/bge-base-en": 512, + "baai/bge-large-en-v1.5": 512, +} + + +def known_model_max_tokens(model_name: str) -> int: + return _KNOWN_MODEL_MAX_TOKENS.get(model_name.lower(), 512) + + +def _tokenizer_max_length(tokenizer: object) -> int | None: + truncation = getattr(tokenizer, "truncation", None) + if truncation is None: + return None + max_length = getattr(truncation, "max_length", None) + if isinstance(max_length, int) and max_length > 0: + return max_length + return None + + +def _encoding_length(encoding: object) -> int: + ids = getattr(encoding, "ids", None) + if isinstance(ids, list): + return len(ids) + return 0 + + +def _tokenizer_encode_ops( + tokenizer: object, +) -> ( + tuple[ + Callable[..., object], + Callable[[list[int]], str], + Callable[[], None], + Callable[..., None], + ] + | None +): + encode = getattr(tokenizer, "encode", None) + decode = getattr(tokenizer, "decode", None) + no_truncation = getattr(tokenizer, "no_truncation", None) + enable_truncation = getattr(tokenizer, "enable_truncation", None) + if not all( + callable(value) for value in (encode, decode, no_truncation, enable_truncation) + ): + return None + return ( + cast("Callable[..., object]", encode), + cast("Callable[[list[int]], str]", decode), + cast("Callable[[], None]", no_truncation), + cast("Callable[..., None]", enable_truncation), + ) + + +def _restore_tokenizer_truncation(tokenizer: object, *, max_length: int) -> None: + enable_truncation = getattr(tokenizer, "enable_truncation", None) + if callable(enable_truncation): + enable_truncation(max_length=max_length) + + +def _special_token_count(encode: Callable[..., object]) -> int: + with_special = _encoding_length(encode("x", add_special_tokens=True)) + without_special = _encoding_length(encode("x", add_special_tokens=False)) + return max(0, with_special - without_special) + + +def _passage_prefix_token_count(encode: Callable[..., object]) -> int: + return _encoding_length(encode(_PASSAGE_PREFIX, add_special_tokens=False)) + + +def _passage_model_input_token_count( + encode: Callable[..., object], + chunk_text: str, +) -> int: + return _encoding_length( + encode(f"{_PASSAGE_PREFIX}{chunk_text}", add_special_tokens=True) + ) + + +def _chunk_payload_token_budget( + encode: Callable[..., object], + *, + model_max_tokens: int, +) -> int: + special_tokens = _special_token_count(encode) + prefix_tokens = _passage_prefix_token_count(encode) + return max(1, model_max_tokens - special_tokens - prefix_tokens) + + +def _verify_chunk_passage_input( + encode: Callable[..., object], + chunk_text: str, + *, + model_max_tokens: int, +) -> None: + raw_tokens = _passage_model_input_token_count(encode, chunk_text) + if raw_tokens > model_max_tokens: + raise SemanticChunkingInvariantError( + "passage chunk exceeds model token window: " + f"raw_tokens={raw_tokens}, model_max_tokens={model_max_tokens}" + ) + + +class _TextEmbeddingModel(Protocol): + model: object + + def embed(self, texts: list[str]) -> Iterable[object]: ... + + +class _TokenizingTextModel(Protocol): + tokenizer: object | None + + def tokenize(self, documents: list[str]) -> list[object]: ... + + +class FastEmbedEmbeddingProvider: + """Local FastEmbed provider for semantic-quality community retrieval. + + FastEmbed remains an optional dependency. The provider runs local ONNX + embeddings and uses explicit query/passage prefixes for retrieval models + such as BAAI/bge-small-en-v1.5. Model download is disabled by default; users + must opt in or pre-populate the cache. + """ + + def __init__( + self, + *, + model_name: str, + dimension: int, + cache_dir: Path, + allow_model_download: bool, + ) -> None: + self.model_name = model_name + self.model_id = f"fastembed:{model_name}" + self.dimension = dimension + self.cache_dir = cache_dir + self.allow_model_download = allow_model_download + # Verify the optional package eagerly (cheap) so "extra not installed" + # still fails at construction, but defer the expensive ONNX model load + # (~hundreds of MB / seconds) to the first embed. A provider that is + # built but never embeds — e.g. a semantic query against an index that + # turns out to be unavailable — then costs nothing. Callers degrade + # gracefully when the model is unavailable at embed time. + self._text_embedding = self._resolve_text_embedding() + self._model: _TextEmbeddingModel | None = None + + def _resolve_text_embedding(self) -> Callable[..., object]: + try: + fastembed = importlib.import_module("fastembed") + except ImportError as exc: + raise MemorySemanticUnavailableError( + "fastembed embedding provider requires the optional " + "`codeclone[semantic-fastembed]` extra" + ) from exc + text_embedding = getattr(fastembed, "TextEmbedding", None) + if text_embedding is None: + raise MemorySemanticUnavailableError( + "fastembed package does not expose TextEmbedding" + ) + return cast("Callable[..., object]", text_embedding) + + def _get_model(self) -> _TextEmbeddingModel: + if self._model is not None: + return self._model + with span(name="memory.embedding.model_load"): + try: + model = self._text_embedding( + model_name=self.model_name, + cache_dir=str(self.cache_dir), + local_files_only=not self.allow_model_download, + ) + except Exception as exc: + mode = ( + "download disabled" + if not self.allow_model_download + else "download allowed" + ) + raise MemorySemanticUnavailableError( + "fastembed embedding model is unavailable " + f"({self.model_name}; {mode}; cache={self.cache_dir}): {exc}" + ) from exc + self._model = cast(_TextEmbeddingModel, model) + return self._model + + def _inner_text_model(self) -> _TokenizingTextModel: + return cast(_TokenizingTextModel, self._get_model().model) + + def max_sequence_tokens(self) -> int | None: # codeclone: ignore[dead-code] + if self._model is not None: + inner = self._inner_text_model() + tokenizer = inner.tokenizer + if tokenizer is not None: + truncation = getattr(tokenizer, "truncation", None) + if truncation is not None: + max_length = getattr(truncation, "max_length", None) + if isinstance(max_length, int) and max_length > 0: + return max_length + return known_model_max_tokens(self.model_name) + + @property + def estimator_label(self) -> str: + return "fastembed_tokenizer" + + def probe_passage_token_counts( + self, + texts: Sequence[str], + ) -> tuple[PassageTokenCounts, ...]: + prefixed = [f"passage: {text}" for text in texts] + inner = self._inner_text_model() + tokenizer = inner.tokenizer + tokenize = getattr(inner, "tokenize", None) + if tokenizer is None or tokenize is None: + counts = estimate_texts_token_counts( + prefixed, + estimator=TOKEN_ESTIMATOR_CHARS_APPROX, + ) + return tuple( + PassageTokenCounts(raw=count, effective=count) for count in counts + ) + max_length = _tokenizer_max_length(tokenizer) or known_model_max_tokens( + self.model_name + ) + encode_batch = getattr(tokenizer, "encode_batch", None) + encode_ops = _tokenizer_encode_ops(tokenizer) + if encode_ops is not None and callable(encode_batch): + _, _, no_truncation, enable_truncation = encode_ops + no_truncation() + raw_encodings = encode_batch(prefixed) + raw_counts = tuple(_encoding_length(encoding) for encoding in raw_encodings) + enable_truncation(max_length=max_length) + effective_encodings = tokenize(prefixed) + effective_counts = tuple( + _encoding_length(encoding) for encoding in effective_encodings + ) + return tuple( + PassageTokenCounts(raw=raw, effective=effective) + for raw, effective in zip(raw_counts, effective_counts, strict=True) + ) + effective_counts = self.estimate_token_counts(texts) + return tuple( + PassageTokenCounts(raw=count, effective=count) for count in effective_counts + ) + + def chunk_text(self, text: str) -> tuple[str, ...]: + inner = self._inner_text_model() + tokenizer = inner.tokenizer + if tokenizer is None: + return (text,) + max_length = _tokenizer_max_length(tokenizer) or known_model_max_tokens( + self.model_name + ) + encode_ops = _tokenizer_encode_ops(tokenizer) + if encode_ops is None: + return (text,) + encode, decode, no_truncation, _enable_truncation = encode_ops + no_truncation() + try: + if _passage_model_input_token_count(encode, text) <= max_length: + _verify_chunk_passage_input(encode, text, model_max_tokens=max_length) + return (text,) + content_encoding = encode(text, add_special_tokens=False) + content_ids = list(getattr(content_encoding, "ids", ())) + payload_budget = _chunk_payload_token_budget( + encode, + model_max_tokens=max_length, + ) + chunks: list[str] = [] + start = 0 + while start < len(content_ids): + end = min(start + payload_budget, len(content_ids)) + while end > start: + chunk = str(decode(content_ids[start:end])) + if _passage_model_input_token_count(encode, chunk) <= max_length: + break + end -= 1 + if end <= start: + raise SemanticChunkingInvariantError( + "unable to fit passage chunk within model token window " + f"at content offset {start}" + ) + _verify_chunk_passage_input(encode, chunk, model_max_tokens=max_length) + chunks.append(chunk) + start = end + return tuple(chunks) + finally: + _restore_tokenizer_truncation(tokenizer, max_length=max_length) + + def estimate_token_counts(self, texts: Sequence[str]) -> tuple[int, ...]: + prefixed = [f"passage: {text}" for text in texts] + if self._model is None: + return estimate_texts_token_counts( + prefixed, + estimator=TOKEN_ESTIMATOR_CHARS_APPROX, + ) + inner = self._inner_text_model() + tokenize = getattr(inner, "tokenize", None) + if tokenize is None: + return estimate_texts_token_counts( + prefixed, + estimator=TOKEN_ESTIMATOR_CHARS_APPROX, + ) + encodings = tokenize(prefixed) + return tuple(len(getattr(encoding, "ids", ())) for encoding in encodings) + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> list[float]: + (vector,) = self._embed_prefixed([f"query: {text}"]) + return vector + + def embed_documents( + self, + texts: Sequence[str], + *, + infer_counters: Mapping[str, int] | None = None, + ) -> list[list[float]]: + return self._embed_prefixed( + [f"passage: {text}" for text in texts], + infer_counters=infer_counters, + ) + + def _embed_prefixed( + self, + texts: Sequence[str], + *, + infer_counters: Mapping[str, int] | None = None, + ) -> list[list[float]]: + with span(name="memory.embedding.infer") as infer_span: + if is_observability_enabled(): + infer_span.set_counter("batch", len(texts)) + if infer_counters is not None: + for key, value in sorted(infer_counters.items()): + infer_span.set_counter(key, value) + try: + raw_vectors = list(self._get_model().embed(list(texts))) + except Exception as exc: + raise MemorySemanticUnavailableError( + f"fastembed embedding failed for model {self.model_name}: {exc}" + ) from exc + vectors = [self._coerce_vector(vector) for vector in raw_vectors] + for vector in vectors: + if len(vector) != self.dimension: + raise MemorySemanticUnavailableError( + "fastembed embedding dimension mismatch: " + f"expected {self.dimension}, got {len(vector)} for " + f"{self.model_name}" + ) + return vectors + + @staticmethod + def _coerce_vector(raw_vector: object) -> list[float]: + if not isinstance(raw_vector, Iterable) or isinstance(raw_vector, str): + raise MemorySemanticUnavailableError( + "fastembed returned a non-iterable embedding vector" + ) + return [float(value) for value in raw_vector] + + +__all__ = ["FastEmbedEmbeddingProvider", "known_model_max_tokens"] diff --git a/codeclone/memory/embedding/length.py b/codeclone/memory/embedding/length.py new file mode 100644 index 00000000..ad51e7f6 --- /dev/null +++ b/codeclone/memory/embedding/length.py @@ -0,0 +1,239 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +from ...budget.estimator import ( + TOKEN_ESTIMATOR_CHARS_APPROX, + TokenEstimatorMode, + approx_tokens_from_chars, + estimate_texts_token_counts, +) + +if TYPE_CHECKING: + from ...config.memory import SemanticConfig + +_KNOWN_EMBEDDING_MODEL_MAX_TOKENS: dict[str, int] = { + "baai/bge-small-en-v1.5": 512, + "baai/bge-small-en": 512, + "baai/bge-base-en-v1.5": 512, + "baai/bge-base-en": 512, + "baai/bge-large-en-v1.5": 512, +} + + +@dataclass(frozen=True, slots=True) +class LengthDistribution: + min: int + p50: int + p75: int + p95: int + p99: int + max: int + + +@dataclass(frozen=True, slots=True) +class TokenOverflowStats: + model_max_tokens: int | None + over_model_limit: int + max_overflow_tokens: int + + +@dataclass(frozen=True, slots=True) +class PassageTokenCounts: + raw: int + effective: int + + +@dataclass(frozen=True, slots=True) +class TruncationStats: + documents: int + max_dropped_tokens: int + + +@runtime_checkable +class TokenEstimatingProvider(Protocol): + """Optional provider surface for batch planning and projection probes.""" + + def estimate_token_counts(self, texts: Sequence[str]) -> tuple[int, ...]: ... + + def max_sequence_tokens(self) -> int | None: ... + + +@runtime_checkable +class ProjectionTokenProber(Protocol): + """Probe surface for semantic projection length measurement.""" + + def probe_passage_token_counts( + self, + texts: Sequence[str], + ) -> tuple[PassageTokenCounts, ...]: ... + + def max_sequence_tokens(self) -> int | None: ... + + @property + def estimator_label(self) -> str: ... + + +def truncation_stats( + raw_counts: Sequence[int], + effective_counts: Sequence[int], +) -> TruncationStats: + documents = 0 + max_dropped = 0 + for raw, effective in zip(raw_counts, effective_counts, strict=True): + dropped = raw - effective + if dropped > 0: + documents += 1 + max_dropped = max(max_dropped, dropped) + return TruncationStats(documents=documents, max_dropped_tokens=max_dropped) + + +def estimate_tokens_from_chars(char_count: int) -> int: + return approx_tokens_from_chars(char_count) + + +def estimate_char_counts(texts: Sequence[str]) -> tuple[int, ...]: + return tuple(len(text) for text in texts) + + +def estimate_token_counts_from_chars(texts: Sequence[str]) -> tuple[int, ...]: + return estimate_texts_token_counts( + texts, + estimator=TOKEN_ESTIMATOR_CHARS_APPROX, + ) + + +def token_overflow_stats( + token_counts: Sequence[int], + *, + model_max_tokens: int | None, +) -> TokenOverflowStats: + if model_max_tokens is None or model_max_tokens <= 0: + return TokenOverflowStats( + model_max_tokens=model_max_tokens, + over_model_limit=0, + max_overflow_tokens=0, + ) + over = 0 + max_overflow = 0 + for count in token_counts: + if count > model_max_tokens: + over += 1 + max_overflow = max(max_overflow, count - model_max_tokens) + return TokenOverflowStats( + model_max_tokens=model_max_tokens, + over_model_limit=over, + max_overflow_tokens=max_overflow, + ) + + +def length_distribution(values: Sequence[int]) -> LengthDistribution: + if not values: + return LengthDistribution(min=0, p50=0, p75=0, p95=0, p99=0, max=0) + ordered = sorted(values) + return LengthDistribution( + min=ordered[0], + p50=_percentile(ordered, 50), + p75=_percentile(ordered, 75), + p95=_percentile(ordered, 95), + p99=_percentile(ordered, 99), + max=ordered[-1], + ) + + +def _percentile(ordered: Sequence[int], percentile: float) -> int: + if len(ordered) == 1: + return ordered[0] + index = int((len(ordered) - 1) * (percentile / 100.0)) + return ordered[index] + + +@dataclass(frozen=True, slots=True) +class PlanningTextTokenEstimator: + """Cheap token planning via the shared budget estimator contract.""" + + mode: TokenEstimatorMode + model_max_tokens: int | None + encoding: str = "o200k_base" + + def estimate_token_counts(self, texts: Sequence[str]) -> tuple[int, ...]: + return estimate_texts_token_counts( + texts, + encoding=self.encoding, + estimator=self.mode, + ) + + def max_sequence_tokens(self) -> int | None: + return self.model_max_tokens + + def probe_passage_token_counts( + self, + texts: Sequence[str], + ) -> tuple[PassageTokenCounts, ...]: + counts = self.estimate_token_counts(texts) + return tuple(PassageTokenCounts(raw=count, effective=count) for count in counts) + + @property + def estimator_label(self) -> str: + return self.mode + + +def resolve_semantic_model_max_tokens(config: SemanticConfig) -> int | None: + if config.embedding_provider == "fastembed": + model_name = config.embedding_model or "BAAI/bge-small-en-v1.5" + return _KNOWN_EMBEDDING_MODEL_MAX_TOKENS.get(model_name.lower(), 512) + return None + + +def resolve_planning_token_estimator( + config: SemanticConfig, +) -> PlanningTextTokenEstimator: + return PlanningTextTokenEstimator( + mode=config.projection_token_estimator, + model_max_tokens=resolve_semantic_model_max_tokens(config), + ) + + +def resolve_token_estimator(provider: object) -> TokenEstimatingProvider: + if isinstance(provider, TokenEstimatingProvider): + return provider + return PlanningTextTokenEstimator( + mode=TOKEN_ESTIMATOR_CHARS_APPROX, + model_max_tokens=None, + ) + + +def estimate_document_tokens( + provider: object, + texts: Sequence[str], +) -> tuple[int, ...]: + return resolve_token_estimator(provider).estimate_token_counts(texts) + + +__all__ = [ + "LengthDistribution", + "PassageTokenCounts", + "PlanningTextTokenEstimator", + "ProjectionTokenProber", + "TokenEstimatingProvider", + "TokenOverflowStats", + "TruncationStats", + "estimate_char_counts", + "estimate_document_tokens", + "estimate_token_counts_from_chars", + "estimate_tokens_from_chars", + "length_distribution", + "resolve_planning_token_estimator", + "resolve_semantic_model_max_tokens", + "resolve_token_estimator", + "token_overflow_stats", + "truncation_stats", +] diff --git a/codeclone/memory/enums.py b/codeclone/memory/enums.py new file mode 100644 index 00000000..63e33d63 --- /dev/null +++ b/codeclone/memory/enums.py @@ -0,0 +1,125 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Literal + +MemoryRecordType = Literal[ + "module_role", + "contract_note", + "test_anchor", + "document_link", + "risk_note", + "public_surface", + "contradiction_note", + "architecture_decision", + "change_rationale", + "protocol_rule", + "stale_marker", + "human_note", +] + +MemoryStatus = Literal[ + "draft", + "active", + "historical", + "stale", + "superseded", + "rejected", + "archived", +] + +MemoryConfidence = Literal["inferred", "supported", "verified"] + +MemoryOrigin = Literal["system", "agent", "human"] + +MemoryIngestSource = Literal[ + "analysis", + "contract", + "doc", + "test", + "git", + "receipt", + "audit", + "agent", + "human", + "snapshot", +] + +SubjectKind = Literal[ + "path", + "symbol", + "module", + "package", + "test", + "doc", + "contract", + "mcp_tool", + "mcp_resource", + "cli_option", + "report_field", + "baseline_schema", + "cache_schema", + "config_key", + "plugin_surface", +] + +SubjectRelation = Literal[ + "about", + "owns", + "tests", + "documents", + "depends_on", + "imports", + "exports", +] + +EvidenceKind = Literal[ + "code", + "test", + "doc", + "spec", + "receipt", + "git_commit", + "report", + "baseline", + "cache", + "audit_event", + "trajectory", + "external_url", +] + +LinkRelation = Literal[ + "supersedes", + "depends_on", + "contradicts", + "explains", + "implements", + "tests", + "documents", + "deprecates", + "related_to", + "implicit_coupling", +] + +IngestionMode = Literal["init", "refresh"] + +IngestionRunStatus = Literal["running", "completed", "failed", "partial"] + +__all__ = [ + "EvidenceKind", + "IngestionMode", + "IngestionRunStatus", + "LinkRelation", + "MemoryConfidence", + "MemoryIngestSource", + "MemoryOrigin", + "MemoryRecordType", + "MemoryStatus", + "SubjectKind", + "SubjectRelation", +] diff --git a/codeclone/memory/exceptions.py b/codeclone/memory/exceptions.py new file mode 100644 index 00000000..c9d97337 --- /dev/null +++ b/codeclone/memory/exceptions.py @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + + +class MemoryError(RuntimeError): + """Base error for engineering memory operations.""" + + +class MemorySchemaError(MemoryError): + """Raised for unsupported or corrupt engineering memory database schemas.""" + + +class MemoryContractError(MemoryError): + """Raised when memory record or config contracts are violated.""" + + +class MemoryInitLockError(MemoryError): + """Raised when the memory init advisory lock cannot be acquired.""" + + +class MemoryCapacityError(MemoryContractError): + """Raised when memory store capacity limits are exceeded.""" + + +class MemorySemanticUnavailableError(MemoryError): + """Raised when a semantic provider/backend is required but unavailable. + + Read paths never raise this — they degrade to FTS/structural and report + ``semantic.used=false``. It is raised only by explicit semantic operations + (e.g. resolving a real embedding provider whose dependency is missing). + """ + + +class SemanticChunkingInvariantError(MemoryContractError): + """Raised when passage model input still exceeds the token window after chunking.""" + + +__all__ = [ + "MemoryCapacityError", + "MemoryContractError", + "MemoryError", + "MemoryInitLockError", + "MemorySchemaError", + "MemorySemanticUnavailableError", + "SemanticChunkingInvariantError", +] diff --git a/codeclone/memory/experience/__init__.py b/codeclone/memory/experience/__init__.py new file mode 100644 index 00000000..89cdd166 --- /dev/null +++ b/codeclone/memory/experience/__init__.py @@ -0,0 +1,52 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Experience Layer: deterministic distillation of structural regularities +from the trajectory corpus (advisory, machine-owned, human-promotable).""" + +from __future__ import annotations + +from .distiller import ( + EXPERIENCE_MIN_SUPPORT, + MIN_INFORMATION_VALUE, + distill_experiences, + information_value, + pattern_keys, +) +from .models import ( + EXPERIENCE_DISTILLATION_VERSION, + Experience, + ExperienceEvidence, + ExperienceFacet, + ExperienceFacetKind, + ExperienceStatus, + PatternKey, +) +from .store import ( + count_experiences, + list_experiences, + list_experiences_for_subject_family, + replace_experiences, +) + +__all__ = [ + "EXPERIENCE_DISTILLATION_VERSION", + "EXPERIENCE_MIN_SUPPORT", + "MIN_INFORMATION_VALUE", + "Experience", + "ExperienceEvidence", + "ExperienceFacet", + "ExperienceFacetKind", + "ExperienceStatus", + "PatternKey", + "count_experiences", + "distill_experiences", + "information_value", + "list_experiences", + "list_experiences_for_subject_family", + "pattern_keys", + "replace_experiences", +] diff --git a/codeclone/memory/experience/distillation_workflow.py b/codeclone/memory/experience/distillation_workflow.py new file mode 100644 index 00000000..8aedb330 --- /dev/null +++ b/codeclone/memory/experience/distillation_workflow.py @@ -0,0 +1,110 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Experience distillation runner. + +Mirrors ``trajectory/rebuild_workflow.py``: a derived-state recompute that reads +the project's canonical trajectories, distills deterministic Experiences, and +replaces the project's experience set wholesale. Runs on the async projection +queue right after the trajectory rebuild (same lifecycle, no ritual).""" + +from __future__ import annotations + +from pathlib import Path +from typing import Literal, TypedDict + +from ...config.memory import MemoryConfig +from ...report.meta import current_report_timestamp_utc +from ..exceptions import MemoryContractError +from ..models import MemoryProject +from ..project import resolve_memory_db_path, resolve_project_identity +from ..sqlite_store import SqliteEngineeringMemoryStore +from .distiller import distill_experiences +from .models import EXPERIENCE_DISTILLATION_VERSION + + +class DistillExperiencesMeta(TypedDict): + action: Literal["distill_experiences"] + distillation_version: str + + +class DistillExperiencesOkPayload(DistillExperiencesMeta): + status: Literal["ok"] + experiences_distilled: int + trajectories_considered: int + + +class DistillExperiencesSkippedPayload(DistillExperiencesMeta): + status: Literal["skipped"] + reason: str + experiences_distilled: int + + +DistillExperiencesPayload = ( + DistillExperiencesOkPayload | DistillExperiencesSkippedPayload +) + + +def execute_experience_distillation( + *, + root_path: Path, + config: MemoryConfig, + store: SqliteEngineeringMemoryStore | None = None, + project: MemoryProject | None = None, +) -> DistillExperiencesPayload: + base: DistillExperiencesMeta = { + "action": "distill_experiences", + "distillation_version": EXPERIENCE_DISTILLATION_VERSION, + } + if not config.trajectories_enabled: + # Experiences are distilled from trajectories; no trajectories, nothing + # to distill. + return { + **base, + "status": "skipped", + "reason": "trajectories_disabled", + "experiences_distilled": 0, + } + owns_store = store is None + active_store = store + try: + resolved_project = project or resolve_project_identity(root_path) + if active_store is None: + db_path = resolve_memory_db_path(root_path, config) + if not db_path.exists(): + raise MemoryContractError( + f"Engineering memory database not found: {db_path}. " + "Run memory init or " + "manage_engineering_memory(action='refresh_from_run')." + ) + active_store = SqliteEngineeringMemoryStore(db_path) + trajectories = active_store.list_canonical_trajectories_for_export( + project_id=resolved_project.id + ) + considered = len(trajectories) + experiences = distill_experiences( + trajectories, now=current_report_timestamp_utc() + ) + distilled = active_store.replace_experiences( + project_id=resolved_project.id, experiences=experiences + ) + finally: + if owns_store and active_store is not None: + active_store.close() + return { + **base, + "status": "ok", + "experiences_distilled": distilled, + "trajectories_considered": considered, + } + + +__all__ = [ + "DistillExperiencesOkPayload", + "DistillExperiencesPayload", + "DistillExperiencesSkippedPayload", + "execute_experience_distillation", +] diff --git a/codeclone/memory/experience/distiller.py b/codeclone/memory/experience/distiller.py new file mode 100644 index 00000000..4b1326e4 --- /dev/null +++ b/codeclone/memory/experience/distiller.py @@ -0,0 +1,262 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic Experience distillation over the trajectory corpus. + +Given a set of *canonical* trajectories (deduped by workflow, newest projection +preferred — the caller's responsibility), this groups them by a structural +``PatternKey`` and emits an Experience for each key that passes BOTH filters: + +1. **support** — at least ``min_support`` distinct trajectories, and +2. **informativeness** — ``information_value`` over the threshold; a pattern + explained by a single tool identity (one ``agent_family``) is a tool quirk, + not a system regularity, and is rejected. + +The pattern key never contains a tool identity (agent / profile / intent) — those +are recorded as facets. Same trajectory set always yields byte-identical +experiences: ordering is sorted, the digest is an orjson canonical over the key +and the sorted member ids. + +Pure function: no DB, no surfaces, no clock (the caller passes ``now``). The +patch-trail-based process-artifact refinement (``changed=0``) is added when the +patch trail is wired (step 3); the single-facet guard already rejects the +single-agent ``verification_incomplete`` pattern. +""" + +from __future__ import annotations + +import hashlib +from collections import defaultdict +from collections.abc import Sequence +from pathlib import PurePosixPath + +from ...utils.json_io import json_text +from ..paths import normalize_repo_path +from ..trajectory.models import Trajectory +from .models import ( + EXPERIENCE_DISTILLATION_VERSION, + Experience, + ExperienceEvidence, + ExperienceFacet, + PatternKey, +) + +EXPERIENCE_MIN_SUPPORT = 5 +MIN_INFORMATION_VALUE = 50 +MAX_EVIDENCE = 20 +MAX_FAMILIES_PER_TRAJECTORY = 8 + +# Labels carried by every change-control cycle — not informative as signals. +_UBIQUITOUS_LABELS = frozenset( + {"change_control_workflow", "patch_trail_recorded", "receipt_issued"} +) + +# Signal scoring weights (see information_value). +_MULTI_AGENT_SCORE = 60 +_STRUCTURAL_SIGNAL_SCORE = 25 + +# Outcome-derived signals (not labels): lower-confidence, may be process noise. +_SIGNAL_VERIFICATION_INCOMPLETE = "verification_incomplete" +_SIGNAL_INCIDENT_PRESENT = "incident_present" + + +def _agent_family(agent_key: str) -> str: + return agent_key.split("/", 1)[0] + + +def _path_family(path_key: str) -> str | None: + try: + normalized = normalize_repo_path(path_key) + except ValueError: + return None + parent = PurePosixPath(normalized).parent.as_posix() + if parent in {"", "."}: + return None + return parent + + +def _path_families(trajectory: Trajectory) -> frozenset[str]: + families = { + family + for subject in trajectory.subjects + if subject.subject_kind == "path" + for family in (_path_family(subject.subject_key),) + if family is not None + } + return frozenset(families) + + +def _agent_families(trajectory: Trajectory) -> frozenset[str]: + return frozenset( + _agent_family(subject.subject_key) + for subject in trajectory.subjects + if subject.subject_kind == "agent" + ) + + +def _outcome_class(trajectory: Trajectory) -> str: + return f"{trajectory.outcome}:{trajectory.quality_tier}" + + +def _signals(trajectory: Trajectory) -> frozenset[str]: + signals: set[str] = { + label for label in trajectory.labels if label not in _UBIQUITOUS_LABELS + } + if trajectory.outcome in {"partial", "blocked"} and ( + "verified_finish" not in trajectory.labels + ): + signals.add(_SIGNAL_VERIFICATION_INCOMPLETE) + if trajectory.incident_count > 0: + signals.add(_SIGNAL_INCIDENT_PRESENT) + return frozenset(signals) + + +def pattern_keys(trajectory: Trajectory) -> frozenset[PatternKey]: + """Structural keys a trajectory contributes to (family x signal x outcome).""" + families = sorted(_path_families(trajectory))[:MAX_FAMILIES_PER_TRAJECTORY] + signals = _signals(trajectory) + outcome_class = _outcome_class(trajectory) + return frozenset( + PatternKey(family, signal, outcome_class) + for family in families + for signal in signals + ) + + +def _structural_signal(signal: str) -> bool: + return signal not in {_SIGNAL_VERIFICATION_INCOMPLETE, _SIGNAL_INCIDENT_PRESENT} + + +def information_value(key: PatternKey, members: Sequence[Trajectory]) -> int: + """Deterministic 0-100 score: is this a system regularity or a tool quirk? + + Cross-agent recurrence is the system signal; a structural label cause adds + confidence. A single-agent pattern scores below the threshold by design. + """ + agents = {family for member in members for family in _agent_families(member)} + score = 0 + if len(agents) >= 2: + score += _MULTI_AGENT_SCORE + if _structural_signal(key.signal): + score += _STRUCTURAL_SIGNAL_SCORE + return min(100, score) + + +def _aggregate_facets(members: Sequence[Trajectory]) -> tuple[ExperienceFacet, ...]: + counts: dict[str, int] = defaultdict(int) + for member in members: + for family in _agent_families(member): + counts[family] += 1 + return tuple( + ExperienceFacet("agent_family", value, counts[value]) + for value in sorted(counts) + ) + + +def _build_evidence(members: Sequence[Trajectory]) -> tuple[ExperienceEvidence, ...]: + ordered = sorted(members, key=lambda member: (member.finished_at_utc, member.id)) + return tuple( + ExperienceEvidence(member.id, member.outcome, member.finished_at_utc) + for member in ordered[:MAX_EVIDENCE] + ) + + +def _experience_digest(key: PatternKey, member_ids: Sequence[str]) -> str: + payload = { + "subject_family": key.subject_family, + "signal": key.signal, + "outcome_class": key.outcome_class, + "members": list(member_ids), + "distillation_version": EXPERIENCE_DISTILLATION_VERSION, + } + canonical = json_text(payload, sort_keys=True) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _statement( + key: PatternKey, + members: Sequence[Trajectory], + facets: tuple[ExperienceFacet, ...], +) -> str: + agents = ", ".join(f"{facet.facet_value}x{facet.count}" for facet in facets) + return ( + f"Change-control cycles under {key.subject_family} recurrently show " + f"'{key.signal}' with outcome {key.outcome_class} " + f"(support {len(members)}; agents: {agents})." + ) + + +def _build_experience( + key: PatternKey, + members: Sequence[Trajectory], + *, + info: int, + now: str, +) -> Experience: + member_ids = sorted(member.id for member in members) + digest = _experience_digest(key, member_ids) + facets = _aggregate_facets(members) + anchor = min(members, key=lambda member: member.id) + return Experience( + id=f"exp-{digest[:32]}", + project_id=anchor.project_id, + repo_root_digest=anchor.repo_root_digest, + subject_family=key.subject_family, + signal=key.signal, + outcome_class=key.outcome_class, + support=len(members), + quality_min=min(member.quality_score for member in members), + information_value=info, + status="active", + statement=_statement(key, members, facets), + experience_digest=digest, + distillation_version=EXPERIENCE_DISTILLATION_VERSION, + first_observed_at_utc=min(member.finished_at_utc for member in members), + last_observed_at_utc=max(member.finished_at_utc for member in members), + distilled_at_utc=now, + updated_at_utc=now, + facets=facets, + evidence=_build_evidence(members), + ) + + +def distill_experiences( + trajectories: Sequence[Trajectory], + *, + now: str, + min_support: int = EXPERIENCE_MIN_SUPPORT, +) -> list[Experience]: + """Distill Experiences from canonical trajectories. Deterministic. + + ``trajectories`` must already be canonical (one per workflow). ``now`` stamps + ``distilled_at_utc`` and is excluded from the identity digest. + """ + buckets: dict[PatternKey, list[Trajectory]] = defaultdict(list) + for trajectory in trajectories: + for key in pattern_keys(trajectory): + buckets[key].append(trajectory) + + experiences: list[Experience] = [] + for key in sorted( + buckets, key=lambda item: (item.subject_family, item.signal, item.outcome_class) + ): + members = buckets[key] + if len(members) < min_support: + continue + info = information_value(key, members) + if info >= MIN_INFORMATION_VALUE: + experiences.append(_build_experience(key, members, info=info, now=now)) + return experiences + + +__all__ = [ + "EXPERIENCE_MIN_SUPPORT", + "MAX_EVIDENCE", + "MIN_INFORMATION_VALUE", + "distill_experiences", + "information_value", + "pattern_keys", +] diff --git a/codeclone/memory/experience/models.py b/codeclone/memory/experience/models.py new file mode 100644 index 00000000..802e479c --- /dev/null +++ b/codeclone/memory/experience/models.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Experience Layer domain model. + +An Experience is a deterministic *signal extraction* over the trajectory +corpus: a structural regularity observed across many trajectories, with those +trajectories as evidence. It is the third knowledge tier after Engineering +Memory ("what we know") and Trajectory ("what happened"): "what we have +repeatedly observed". + +Invariant: the pattern key describes a *structural situation*, never a tool +identity. Agent / profile / intent are recorded as facets, never folded into +``PatternKey`` — keying by agent would fragment support and hide cross-agent +regularities. See ``specs/rfc-experience-layer.md``. + +This module is pure data (frozen dataclasses); distillation lives in +``distiller`` and persistence in ``store``. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +from ...contracts import EXPERIENCE_DISTILLATION_VERSION + +ExperienceStatus = Literal["active", "dormant"] +ExperienceFacetKind = Literal["agent_family", "analysis_profile", "intent_class"] + + +@dataclass(frozen=True, slots=True) +class PatternKey: + """Structural identity of an Experience. Never includes tool identity.""" + + subject_family: str + signal: str + outcome_class: str + + +@dataclass(frozen=True, slots=True) +class ExperienceFacet: + """A non-key breakdown dimension (e.g. agent_family) with its member count.""" + + facet_kind: ExperienceFacetKind + facet_value: str + count: int + + +@dataclass(frozen=True, slots=True) +class ExperienceEvidence: + """A contributing trajectory: the proof that the pattern was observed.""" + + trajectory_id: str + outcome: str + finished_at_utc: str + + +@dataclass(frozen=True, slots=True) +class Experience: + """A distilled structural regularity over the trajectory corpus. + + Advisory and machine-owned: it never asserts truth and never authorizes + edits. Promotion into durable Engineering Memory is a separate, optional, + human-governed step. + """ + + id: str + project_id: str + repo_root_digest: str + subject_family: str + signal: str + outcome_class: str + support: int + quality_min: int + information_value: int + status: ExperienceStatus + statement: str + experience_digest: str + distillation_version: str + first_observed_at_utc: str + last_observed_at_utc: str + distilled_at_utc: str + updated_at_utc: str + facets: tuple[ExperienceFacet, ...] + evidence: tuple[ExperienceEvidence, ...] + + +__all__ = [ + "EXPERIENCE_DISTILLATION_VERSION", + "Experience", + "ExperienceEvidence", + "ExperienceFacet", + "ExperienceFacetKind", + "ExperienceStatus", + "PatternKey", +] diff --git a/codeclone/memory/experience/store.py b/codeclone/memory/experience/store.py new file mode 100644 index 00000000..d2132c60 --- /dev/null +++ b/codeclone/memory/experience/store.py @@ -0,0 +1,226 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Experience persistence: replace-all per project, with cascading facets and +evidence. Experiences are derived state — a distillation run replaces the +project's experiences wholesale (dormant lifecycle is deferred to E.2).""" + +from __future__ import annotations + +import sqlite3 +from collections.abc import Sequence + +from .models import ( + Experience, + ExperienceEvidence, + ExperienceFacet, + ExperienceFacetKind, + ExperienceStatus, +) + + +def _use_row_factory(conn: sqlite3.Connection) -> None: + conn.row_factory = sqlite3.Row + + +def replace_experiences( + conn: sqlite3.Connection, + *, + project_id: str, + experiences: Sequence[Experience], +) -> int: + """Replace all experiences for a project with the distilled set.""" + conn.execute("DELETE FROM memory_experiences WHERE project_id=?", (project_id,)) + for experience in experiences: + _insert_experience(conn, experience) + conn.commit() + return len(experiences) + + +def _insert_experience(conn: sqlite3.Connection, experience: Experience) -> None: + conn.execute( + """ + INSERT INTO memory_experiences( + id, project_id, repo_root_digest, subject_family, signal, + outcome_class, support, quality_min, information_value, status, + statement, experience_digest, distillation_version, + first_observed_at_utc, last_observed_at_utc, distilled_at_utc, + updated_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + experience.id, + experience.project_id, + experience.repo_root_digest, + experience.subject_family, + experience.signal, + experience.outcome_class, + experience.support, + experience.quality_min, + experience.information_value, + experience.status, + experience.statement, + experience.experience_digest, + experience.distillation_version, + experience.first_observed_at_utc, + experience.last_observed_at_utc, + experience.distilled_at_utc, + experience.updated_at_utc, + ), + ) + conn.executemany( + "INSERT INTO memory_experience_facets(" + "experience_id, facet_kind, facet_value, count) VALUES (?, ?, ?, ?)", + [ + (experience.id, facet.facet_kind, facet.facet_value, facet.count) + for facet in experience.facets + ], + ) + conn.executemany( + "INSERT INTO memory_experience_evidence(" + "experience_id, trajectory_id, outcome, finished_at_utc) VALUES (?, ?, ?, ?)", + [ + (experience.id, item.trajectory_id, item.outcome, item.finished_at_utc) + for item in experience.evidence + ], + ) + + +def count_experiences(conn: sqlite3.Connection, *, project_id: str) -> int: + row = conn.execute( + "SELECT COUNT(*) FROM memory_experiences WHERE project_id=?", + (project_id,), + ).fetchone() + return int(row[0]) + + +def list_experiences( + conn: sqlite3.Connection, + *, + project_id: str, +) -> list[Experience]: + _use_row_factory(conn) + rows = conn.execute( + "SELECT * FROM memory_experiences WHERE project_id=? " + "ORDER BY subject_family ASC, signal ASC, outcome_class ASC", + (project_id,), + ).fetchall() + return [_row_to_experience(conn, row) for row in rows] + + +def list_experiences_for_subject_family( + conn: sqlite3.Connection, + *, + project_id: str, + subject_family: str, +) -> list[Experience]: + _use_row_factory(conn) + rows = conn.execute( + "SELECT * FROM memory_experiences WHERE project_id=? AND subject_family=? " + "ORDER BY signal ASC, outcome_class ASC", + (project_id, subject_family), + ).fetchall() + return [_row_to_experience(conn, row) for row in rows] + + +def find_experience( + conn: sqlite3.Connection, + *, + experience_id: str, +) -> Experience | None: + _use_row_factory(conn) + row = conn.execute( + "SELECT * FROM memory_experiences WHERE id=?", + (experience_id,), + ).fetchone() + return _row_to_experience(conn, row) if row is not None else None + + +def _facets_for_experience( + conn: sqlite3.Connection, + experience_id: str, +) -> tuple[ExperienceFacet, ...]: + rows = conn.execute( + "SELECT facet_kind, facet_value, count FROM memory_experience_facets " + "WHERE experience_id=? ORDER BY facet_kind ASC, facet_value ASC", + (experience_id,), + ).fetchall() + return tuple( + ExperienceFacet( + facet_kind=_facet_kind(str(row["facet_kind"])), + facet_value=str(row["facet_value"]), + count=int(row["count"]), + ) + for row in rows + ) + + +def _evidence_for_experience( + conn: sqlite3.Connection, + experience_id: str, +) -> tuple[ExperienceEvidence, ...]: + rows = conn.execute( + "SELECT trajectory_id, outcome, finished_at_utc " + "FROM memory_experience_evidence WHERE experience_id=? " + "ORDER BY finished_at_utc ASC, trajectory_id ASC", + (experience_id,), + ).fetchall() + return tuple( + ExperienceEvidence( + trajectory_id=str(row["trajectory_id"]), + outcome=str(row["outcome"]), + finished_at_utc=str(row["finished_at_utc"]), + ) + for row in rows + ) + + +def _facet_kind(value: str) -> ExperienceFacetKind: + if value in ("agent_family", "analysis_profile", "intent_class"): + return value # type: ignore[return-value] + msg = f"unknown experience facet kind: {value!r}" + raise ValueError(msg) + + +def _row_to_experience(conn: sqlite3.Connection, row: sqlite3.Row) -> Experience: + experience_id = str(row["id"]) + return Experience( + id=experience_id, + project_id=str(row["project_id"]), + repo_root_digest=str(row["repo_root_digest"]), + subject_family=str(row["subject_family"]), + signal=str(row["signal"]), + outcome_class=str(row["outcome_class"]), + support=int(row["support"]), + quality_min=int(row["quality_min"]), + information_value=int(row["information_value"]), + status=_status(str(row["status"])), + statement=str(row["statement"]), + experience_digest=str(row["experience_digest"]), + distillation_version=str(row["distillation_version"]), + first_observed_at_utc=str(row["first_observed_at_utc"]), + last_observed_at_utc=str(row["last_observed_at_utc"]), + distilled_at_utc=str(row["distilled_at_utc"]), + updated_at_utc=str(row["updated_at_utc"]), + facets=_facets_for_experience(conn, experience_id), + evidence=_evidence_for_experience(conn, experience_id), + ) + + +def _status(value: str) -> ExperienceStatus: + if value in ("active", "dormant"): + return value # type: ignore[return-value] + msg = f"unknown experience status: {value!r}" + raise ValueError(msg) + + +__all__ = [ + "count_experiences", + "find_experience", + "list_experiences", + "list_experiences_for_subject_family", + "replace_experiences", +] diff --git a/codeclone/memory/governance.py b/codeclone/memory/governance.py new file mode 100644 index 00000000..e45bf013 --- /dev/null +++ b/codeclone/memory/governance.py @@ -0,0 +1,665 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import re +from dataclasses import dataclass +from pathlib import Path +from typing import TypeAlias + +from ..config.memory_defaults import ( + DEFAULT_MEMORY_MAX_STATEMENT_CHARS, + DEFAULT_MEMORY_SOFT_STATEMENT_CHARS, + DEFAULT_MEMORY_TARGET_STATEMENT_CHARS, +) +from ..report.meta import current_report_timestamp_utc +from .enums import MemoryRecordType +from .exceptions import MemoryCapacityError, MemoryContractError +from .identity import make_identity_key +from .models import ( + MemoryEvidence, + MemoryProject, + MemoryQuery, + MemoryRecord, + MemoryRevision, + MemorySubject, + generate_memory_id, +) +from .paths import normalize_memory_scope_path +from .project import ( + code_fingerprint_for_memory_subject, + read_git_provenance, +) +from .sqlite_store import SqliteEngineeringMemoryStore + +_NEGATION_WINDOW = re.compile( + r"(?:cannot|can't|can not|does not|doesn't|do not|don't|never|not)\s+" + r"(?:\w+\s+){0,4}$", + re.IGNORECASE, +) + +_FORBIDDEN_LITERALS = ( + "edit allowed", + "do_not_touch cleared", + "gate passed because memory", + "scope expanded because memory", + "expanded scope because memory", +) + +_FORBIDDEN_NEGATABLE = ( + "override finding", + "override findings", + "overrides finding", + "overrides findings", +) + +_ForbiddenClaimRule: TypeAlias = tuple[str, re.Pattern[str]] + +_FORBIDDEN_APPROVE_DRAFT_RULES: tuple[_ForbiddenClaimRule, ...] = ( + ( + "agent or MCP self-approving memory drafts as active policy", + re.compile( + r"\b(?:mcp|memory)\b[^.]{0,80}\bapprove\b[^.]{0,80}\bdraft", + re.I, + ), + ), + ( + "approving memory drafts as active or verified policy", + re.compile( + r"\bapprove\b[^.]{0,80}\b(?:memory|draft)\b[^.]{0,80}" + r"\b(?:active|policy|verified)\b", + re.I, + ), + ), +) +_FORBIDDEN_OTHER_RULES: tuple[_ForbiddenClaimRule, ...] = ( + ( + "memory authorizing edits, changes, or touching paths", + re.compile( + r"\b(?:engineering )?memory\b[^.]{0,60}\b(?:allows?|permits?|authoriz\w+)\b" + r"[^.]{0,40}\b(?:edit\w*|chang\w*|touch\w*)\b", + re.I, + ), + ), + ( + "scope or intent expansion via memory", + re.compile( + r"\b(?:scope|intent)\b[^.]{0,50}\b(?:expand|widened|broadened)\b", + re.I, + ), + ), + ( + "findings or structural checks cleared by memory", + re.compile( + r"\b(?:findings?|codeclone|structural)\b[^.]{0,50}" + r"\b(?:clear\w*|resolved|gone|passed|clean\w*)\b", + re.I, + ), + ), +) + +MEMORY_STATEMENT_TOO_LONG_ERROR = ( + "Memory candidate is too long for a durable card. " + "Compress it into one evidence-linked conclusion; store details in " + "receipt/spec/docs." +) + +_VS_CODE_CHANNEL_RE = re.compile(r"\bvs\s*code\b|\bvscode\b", re.IGNORECASE) +_HUMAN_GOVERNANCE_MARKERS = ( + "human", + "operator", + "maintainer", + "ide channel", + "human review", + "not mcp", + "not available through mcp", +) + + +def _is_vscode_human_approval_descriptor(text: str) -> bool: + """Describe IDE human governance, not agent/MCP self-grant of approval power.""" + lowered = text.lower() + if _VS_CODE_CHANNEL_RE.search(text) is None: + return False + if "memory view" not in lowered: + return False + if "approve" not in lowered or "draft" not in lowered: + return False + return any(marker in lowered for marker in _HUMAN_GOVERNANCE_MARKERS) + + +def _phrase_is_negated(text: str, phrase: str, *, start: int) -> bool: + del phrase + return _match_is_negated(text, start=start) + + +def _match_is_negated(text: str, *, start: int) -> bool: + window = text[max(0, start - 48) : start] + return _NEGATION_WINDOW.search(window) is not None + + +_PERMISSION_VERB_IN_MATCH = re.compile( + r"\b(approve\w*|allow\w*|permits?\w*|authoriz\w+|clear\w*|" + r"expand\w*|widened|broadened|resolved|gone|passed|clean\w*)\b", + re.IGNORECASE, +) + + +def _pattern_matches_unnegated(text: str, pattern: re.Pattern[str]) -> bool: + for match in pattern.finditer(text): + span_start, span_end = match.span() + segment = text[span_start:span_end] + anchors = list(_PERMISSION_VERB_IN_MATCH.finditer(segment)) + if not anchors: + if not _match_is_negated(text, start=span_start): + return True + continue + if any( + not _match_is_negated(text, start=span_start + anchor.start()) + for anchor in anchors + ): + return True + return False + + +def _contains_unnegated_phrase(text: str, phrase: str) -> bool: + lowered = text.lower() + needle = phrase.lower() + start = 0 + while True: + index = lowered.find(needle, start) + if index < 0: + return False + if not _phrase_is_negated(lowered, needle, start=index): + return True + start = index + len(needle) + return False + + +def _permission_claim_error(description: str) -> str: + return f"Claim may grant permission memory cannot provide: {description}." + + +def _forbidden_claim_errors(text: str) -> tuple[str, ...]: + lowered = text.lower() + errors = [ + _permission_claim_error(phrase) + for phrase in _FORBIDDEN_LITERALS + if phrase in lowered + ] + errors.extend( + _permission_claim_error(f"unnegated '{phrase}'") + for phrase in _FORBIDDEN_NEGATABLE + if _contains_unnegated_phrase(lowered, phrase) + ) + approve_rules = ( + () + if _is_vscode_human_approval_descriptor(text) + else _FORBIDDEN_APPROVE_DRAFT_RULES + ) + errors.extend( + _permission_claim_error(label) + for label, pattern in approve_rules + _FORBIDDEN_OTHER_RULES + if _pattern_matches_unnegated(text, pattern) + ) + return tuple(errors) + + +@dataclass(frozen=True, slots=True) +class ClaimValidationResult: + valid: bool + warnings: tuple[str, ...] + errors: tuple[str, ...] + + +def _require_record( + store: SqliteEngineeringMemoryStore, + record_id: str, +) -> MemoryRecord: + record = store.find_record(record_id) + if record is None: + msg = f"Memory record not found: {record_id}" + raise MemoryContractError(msg) + return record + + +def _write_governance_revision( + store: SqliteEngineeringMemoryStore, + record: MemoryRecord, + *, + record_id: str, + reason: str, + changed_by: str, + now: str, +) -> None: + store.write_revision( + MemoryRevision( + id=generate_memory_id(prefix="rev"), + memory_id=record_id, + revision_number=store.next_revision_number(record_id), + previous_statement=record.statement, + new_statement=record.statement, + previous_payload=record.payload, + new_payload=record.payload, + reason=reason, + changed_by=changed_by, + changed_at_utc=now, + branch=record.verified_on_branch, + commit=record.verified_at_commit, + ) + ) + + +def _finalize_governance_record( + store: SqliteEngineeringMemoryStore, + record_id: str, +) -> MemoryRecord: + store.commit() + updated = store.find_record(record_id) + assert updated is not None + return updated + + +def _ensure_approval_evidence( + store: SqliteEngineeringMemoryStore, + record: MemoryRecord, + *, + record_id: str, + approved_by: str, + now: str, +) -> None: + """Record the human approval as the warrant for an evidence-less record. + + Every active record must carry at least one evidence link. Agent + candidates are approved with no ingested evidence, so the approval itself + is the recorded warrant — keeping the store evidence-linked rather than + leaving active records with no provenance. Records that already carry + evidence (system-ingested facts that went stale and are re-approved) are + left untouched. + """ + if store.count_evidence_for_memory(record_id) > 0: + return + branch = record.verified_on_branch or "" + commit = record.verified_at_commit or "" + locator = f"{branch}@{commit}".strip("@") or None + store.write_evidence( + MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=record_id, + evidence_kind="audit_event", + ref=f"human_approval:{approved_by}", + locator=locator, + quote=None, + digest=None, + created_at_utc=now, + ) + ) + + +def approve_record( + store: SqliteEngineeringMemoryStore, + *, + record_id: str, + approved_by: str, + revision_reason: str = "human_approve", +) -> MemoryRecord: + record = _require_record(store, record_id) + if record.status not in {"draft", "stale"}: + msg = f"Cannot approve record in status '{record.status}'" + raise MemoryContractError(msg) + now = current_report_timestamp_utc() + store.update_record_status( + record_id, + status="active", + approved_by=approved_by, + approved_at_utc=now, + stale_reason=None, + ) + _ensure_approval_evidence( + store, + record, + record_id=record_id, + approved_by=approved_by, + now=now, + ) + _write_governance_revision( + store, + record, + record_id=record_id, + reason=revision_reason, + changed_by=approved_by, + now=now, + ) + return _finalize_governance_record(store, record_id) + + +def reject_record( + store: SqliteEngineeringMemoryStore, + *, + record_id: str, + rejected_by: str, + reason: str | None = None, + revision_reason: str | None = None, +) -> MemoryRecord: + record = _require_record(store, record_id) + if record.status != "draft": + msg = f"Cannot reject record in status '{record.status}'" + raise MemoryContractError(msg) + now = current_report_timestamp_utc() + store.update_record_status( + record_id, + status="rejected", + stale_reason=reason, + ) + _write_governance_revision( + store, + record, + record_id=record_id, + reason=revision_reason or reason or "human_reject", + changed_by=rejected_by, + now=now, + ) + return _finalize_governance_record(store, record_id) + + +def archive_record( + store: SqliteEngineeringMemoryStore, + *, + record_id: str, + archived_by: str, + revision_reason: str = "human_archive", +) -> MemoryRecord: + record = _require_record(store, record_id) + if record.status != "active": + msg = f"Cannot archive record in status '{record.status}'" + raise MemoryContractError(msg) + now = current_report_timestamp_utc() + store.update_record_status(record_id, status="archived") + _write_governance_revision( + store, + record, + record_id=record_id, + reason=revision_reason, + changed_by=archived_by, + now=now, + ) + return _finalize_governance_record(store, record_id) + + +def _statement_length_warnings( + length: int, + *, + target_limit: int = DEFAULT_MEMORY_TARGET_STATEMENT_CHARS, + soft_limit: int = DEFAULT_MEMORY_SOFT_STATEMENT_CHARS, +) -> tuple[str, ...]: + if length > soft_limit: + return ( + f"Statement length {length} exceeds soft limit ({soft_limit} chars); " + "compress to one durable fact before record_candidate.", + ) + if length > target_limit: + return ( + f"Statement length {length} exceeds target ({target_limit} chars); " + "prefer <= 300 chars for durable cards.", + ) + return () + + +def _new_draft_record( + *, + project: MemoryProject, + record_type: MemoryRecordType, + identity: str, + statement: str, + payload: dict[str, object], + now: str, + created_by: str, + code_fingerprint: str | None, + created_on_branch: str | None, + created_at_commit: str | None, +) -> MemoryRecord: + """Build a draft agent record with the shared field defaults (status=draft, + confidence=inferred, origin=agent). Single source for the draft shape used by + record_candidate and promote_experience.""" + return MemoryRecord( + id=generate_memory_id(), + project_id=project.id, + identity_key=identity, + type=record_type, + status="draft", + confidence="inferred", + origin="agent", + ingest_source="agent", + statement=statement, + summary=None, + payload=payload, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by=created_by, + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=None, + code_fingerprint=code_fingerprint, + stale_reason=None, + created_on_branch=created_on_branch, + created_at_commit=created_at_commit, + verified_on_branch=None, + verified_at_commit=None, + ) + + +def record_candidate( + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + record_type: MemoryRecordType, + statement: str, + subject_path: str | None = None, + root_path: Path | None = None, + created_by: str = "agent", + max_candidates: int, + max_statement_chars: int = DEFAULT_MEMORY_MAX_STATEMENT_CHARS, +) -> MemoryRecord: + stripped = statement.strip() + if not stripped: + raise MemoryContractError("Candidate statement must not be empty.") + if len(stripped) > max_statement_chars: + raise MemoryContractError(MEMORY_STATEMENT_TOO_LONG_ERROR) + if subject_path is None or not subject_path.strip(): + raise MemoryContractError( + "record_candidate requires subject_path linking the observation to a " + "repo file." + ) + draft_count = store.count_records_by_status(project.id, "draft") + if draft_count >= max_candidates: + raise MemoryCapacityError( + f"max_candidates_reached: {draft_count}/{max_candidates}" + ) + now = current_report_timestamp_utc() + normalized_path = normalize_memory_scope_path(subject_path) + statement_digest = hashlib.sha256(statement.strip().encode("utf-8")).hexdigest()[ + :12 + ] + subject_key = normalized_path + identity = make_identity_key( + type=record_type, + subject_kind="path", + subject_key=subject_key.replace("/", ".").removesuffix(".py"), + discriminator=f"agent_candidate:{statement_digest}", + ) + if store.find_by_identity_key(project.id, identity) is not None: + msg = f"Candidate already exists for identity_key={identity}" + raise MemoryContractError(msg) + + resolved_root = (root_path or Path(project.root)).resolve() + git = read_git_provenance(resolved_root) + code_fingerprint = code_fingerprint_for_memory_subject( + resolved_root, + subject_path=normalized_path, + ) + anchor_available = git.available and code_fingerprint is not None + + record = _new_draft_record( + project=project, + record_type=record_type, + identity=identity, + statement=stripped, + payload={"subject_path": normalized_path}, + now=now, + created_by=created_by, + code_fingerprint=code_fingerprint, + created_on_branch=git.branch if anchor_available else None, + created_at_commit=git.head if anchor_available else None, + ) + store.write_record(record) + from .paths import repo_path_to_module_key + + store.write_subject( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record.id, + subject_kind="path", + subject_key=normalized_path, + relation="about", + ) + ) + store.write_subject( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record.id, + subject_kind="module", + subject_key=repo_path_to_module_key(normalized_path), + relation="about", + ) + ) + store.sync_fts_record(record.id) + store.commit() + return record + + +def promote_experience( + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + experience_id: str, + record_type: MemoryRecordType = "risk_note", + created_by: str = "agent", + max_candidates: int, +) -> MemoryRecord: + """Promote a distilled Experience into a human-approvable draft record. + + The draft carries the experience statement and one ``evidence_kind=trajectory`` + row per proof trajectory, then follows the normal draft -> human approve path. + Idempotent: re-promoting the same experience is rejected. The experience keeps + informing agents advisorily whether or not it is ever promoted. + """ + experience = store.find_experience(experience_id) + if experience is None or experience.project_id != project.id: + raise MemoryContractError(f"Experience not found: {experience_id}") + draft_count = store.count_records_by_status(project.id, "draft") + if draft_count >= max_candidates: + raise MemoryCapacityError( + f"max_candidates_reached: {draft_count}/{max_candidates}" + ) + now = current_report_timestamp_utc() + family = experience.subject_family + identity = make_identity_key( + type=record_type, + subject_kind="path", + subject_key=family.replace("/", "."), + discriminator=f"experience:{experience.experience_digest[:12]}", + ) + existing = store.find_by_identity_key(project.id, identity) + if existing is not None: + raise MemoryContractError(f"Experience already promoted: record={existing.id}") + git = read_git_provenance(Path(project.root).resolve()) + record = _new_draft_record( + project=project, + record_type=record_type, + identity=identity, + statement=experience.statement, + payload={ + "subject_path": family, + "promoted_from_experience": experience.id, + "experience_digest": experience.experience_digest, + "support": experience.support, + }, + now=now, + created_by=created_by, + code_fingerprint=None, + created_on_branch=git.branch if git.available else None, + created_at_commit=git.head if git.available else None, + ) + store.write_record(record) + store.write_subject( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record.id, + subject_kind="path", + subject_key=family, + relation="about", + ) + ) + for item in experience.evidence: + store.write_evidence( + MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=record.id, + evidence_kind="trajectory", + ref=item.trajectory_id, + locator=None, + quote=None, + digest=None, + created_at_utc=now, + ) + ) + store.sync_fts_record(record.id) + store.commit() + return record + + +def validate_memory_claims( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + text: str, +) -> ClaimValidationResult: + warnings: list[str] = list(_statement_length_warnings(len(text.strip()))) + lowered = text.lower() + errors = list(_forbidden_claim_errors(text)) + if "inferred" in lowered and "established fact" in lowered: + warnings.append("Treat inferred memory as hypothesis, not established fact.") + stale_hits = store.query_records( + MemoryQuery( + project_id=project_id, + statuses=("stale",), + limit=5, + ) + ) + if stale_hits and "no stale" in lowered: + warnings.append("Active stale records exist; do not claim freshness.") + return ClaimValidationResult( + valid=not errors, + warnings=tuple(warnings), + errors=tuple(errors), + ) + + +__all__ = [ + "MEMORY_STATEMENT_TOO_LONG_ERROR", + "ClaimValidationResult", + "approve_record", + "archive_record", + "promote_experience", + "record_candidate", + "reject_record", + "validate_memory_claims", +] diff --git a/codeclone/memory/ide_governance.py b/codeclone/memory/ide_governance.py new file mode 100644 index 00000000..ab21287d --- /dev/null +++ b/codeclone/memory/ide_governance.py @@ -0,0 +1,561 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import hmac +import secrets +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal, NoReturn + +from ..contracts import IDE_GOVERNANCE_PROTOCOL_VERSION +from .exceptions import MemoryContractError +from .governance import approve_record, archive_record, reject_record +from .models import MemoryRecord +from .project import compute_project_id +from .sqlite_store import SqliteEngineeringMemoryStore + +IDE_GOVERNANCE_TICKET_TTL_SECONDS = 120 +IDE_GOVERNANCE_MIN_KEY_BYTES = 32 +IDE_GOVERNANCE_MAX_COMMIT_ATTEMPTS = 100 +IDE_GOVERNANCE_ALLOWED_CLIENTS = frozenset({"CodeClone VS Code"}) + +GovernanceDecision = Literal["approve", "reject", "archive"] +GovernanceAction = Literal[ + "register_ide_governance", + "prepare_governance", + "commit_governance", +] + +GOVERNANCE_MODE_UNAVAILABLE_MESSAGE = ( + "This action is only available through the CodeClone VS Code IDE governance " + "channel." +) +GOVERNANCE_MODE_UNAVAILABLE_NEXT_STEP = ( + "Use the Memory view in the CodeClone extension to approve or reject draft records." +) + + +@dataclass(slots=True) +class IdeGovernanceTicket: + ticket_id: str + record_id: str + decision: GovernanceDecision + confirmation_nonce: str + project_id: str + statement_digest: str + expires_at_unix: float + consumed: bool = False + + +@dataclass(slots=True) +class IdeGovernanceSessionState: + channel_enabled: bool = False + governance_key: bytes | None = None + client_name: str | None = None + client_version: str | None = None + tickets: dict[str, IdeGovernanceTicket] = field(default_factory=dict) + commit_attempts: int = 0 + + +def compute_statement_digest(statement: str) -> str: + normalized = statement.strip() + return hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:32] + + +def _canonical_proof_message( + *, + ticket_id: str, + record_id: str, + decision: str, + confirmation_nonce: str, + project_id: str, + statement_digest: str, + protocol: int, +) -> bytes: + return ( + f"v{protocol}|{ticket_id}|{record_id}|{decision}|{confirmation_nonce}|" + f"{project_id}|{statement_digest}" + ).encode() + + +def compute_governance_proof( + key: bytes, + *, + ticket_id: str, + record_id: str, + decision: str, + confirmation_nonce: str, + project_id: str, + statement_digest: str, + protocol: int = IDE_GOVERNANCE_PROTOCOL_VERSION, +) -> str: + message = _canonical_proof_message( + ticket_id=ticket_id, + record_id=record_id, + decision=decision, + confirmation_nonce=confirmation_nonce, + project_id=project_id, + statement_digest=statement_digest, + protocol=protocol, + ) + return hmac.new(key, message, hashlib.sha256).hexdigest() + + +def _raise_memory_contract( + message: str, + *, + cause: BaseException | None = None, +) -> NoReturn: + if cause is None: + raise MemoryContractError(message) + raise MemoryContractError(message) from cause + + +def _parse_governance_key(raw_key: str) -> bytes: + cleaned = raw_key.strip().lower() + if cleaned.startswith("0x"): + cleaned = cleaned[2:] + if len(cleaned) % 2 != 0: + _raise_memory_contract("ide_governance_key hex length must be even.") + try: + key = bytes.fromhex(cleaned) + except ValueError as exc: + _raise_memory_contract( + "ide_governance_key must be valid hexadecimal.", + cause=exc, + ) + if len(key) < IDE_GOVERNANCE_MIN_KEY_BYTES: + _raise_memory_contract( + "ide_governance_key must be at least 32 bytes (64 hex characters)." + ) + return key + + +def _validate_ide_governance_protocol(protocol: int) -> None: + if protocol != IDE_GOVERNANCE_PROTOCOL_VERSION: + _raise_memory_contract( + f"Unsupported ide_attestation protocol {protocol!r}. " + f"Expected {IDE_GOVERNANCE_PROTOCOL_VERSION}." + ) + + +def _governance_key_or_reject( + state: IdeGovernanceSessionState, + *, + action: str, +) -> bytes | dict[str, object]: + key = state.governance_key + if key is None: + return _governance_rejected( + action, + reason="governance_key_missing", + ) + return key + + +def _assert_ticket_request_matches( + ticket: IdeGovernanceTicket, + *, + record_id: str, + decision: GovernanceDecision, + project_id: str, + statement_digest: str, +) -> None: + if ( + ticket.record_id != record_id + or ticket.decision != decision + or ticket.project_id != project_id + or ticket.statement_digest != statement_digest + ): + _raise_memory_contract("Governance ticket does not match the commit request.") + + +def _require_matching_confirmation_nonce( + ticket: IdeGovernanceTicket, + confirmation_nonce: str, +) -> None: + if confirmation_nonce != ticket.confirmation_nonce: + _raise_memory_contract( + "confirmation_nonce does not match the prepared governance ticket." + ) + + +def _require_valid_governance_proof( + *, + key: bytes, + ticket_id: str, + record_id: str, + decision: GovernanceDecision, + confirmation_nonce: str, + project_id: str, + statement_digest: str, + protocol: int, + proof: str, +) -> None: + expected_proof = compute_governance_proof( + key, + ticket_id=ticket_id, + record_id=record_id, + decision=decision, + confirmation_nonce=confirmation_nonce, + project_id=project_id, + statement_digest=statement_digest, + protocol=protocol, + ) + if not hmac.compare_digest(expected_proof, proof.strip().lower()): + _raise_memory_contract("Invalid IDE governance proof.") + + +def _register_commit_attempt( + state: IdeGovernanceSessionState, + *, + action: str, +) -> dict[str, object] | None: + if state.commit_attempts >= IDE_GOVERNANCE_MAX_COMMIT_ATTEMPTS: + return _governance_rejected( + action, + reason="governance_rate_limited", + message=( + "IDE governance commit attempt limit reached for this MCP session. " + "Restart the CodeClone MCP session before further governance commits." + ), + ) + state.commit_attempts += 1 + return None + + +def _resolve_client_label(state: IdeGovernanceSessionState) -> str: + name = state.client_name or "unknown-client" + version = state.client_version + if version: + return f"{name}/{version}" + return name + + +def _governance_rejected( + action: str, + *, + reason: str = "governance_mode_unavailable", + message: str = GOVERNANCE_MODE_UNAVAILABLE_MESSAGE, +) -> dict[str, object]: + return { + "action": action, + "status": "rejected", + "reason": reason, + "message": message, + "next_step": GOVERNANCE_MODE_UNAVAILABLE_NEXT_STEP, + } + + +def _require_governance_channel( + state: IdeGovernanceSessionState, + *, + action: str, +) -> dict[str, object] | None: + if not state.channel_enabled: + return _governance_rejected(action) + if state.governance_key is None and action != "register_ide_governance": + return _governance_rejected( + action, + reason="governance_key_missing", + message=( + "IDE governance channel is active but no session key is registered. " + "Reconnect the CodeClone VS Code extension." + ), + ) + if ( + action != "register_ide_governance" + and state.client_name not in IDE_GOVERNANCE_ALLOWED_CLIENTS + ): + return _governance_rejected(action) + return None + + +def _validate_repository_project(project_id: str, root_path: str | Path) -> None: + expected_project_id = compute_project_id(Path(root_path)) + if project_id != expected_project_id: + _raise_memory_contract( + "Memory project identity does not match the requested repository root." + ) + + +def _find_project_record( + store: SqliteEngineeringMemoryStore, + *, + record_id: str, + project_id: str, +) -> MemoryRecord | None: + record = store.find_record(record_id) + if record is None or record.project_id != project_id: + return None + return record + + +def _validate_decision(decision: str) -> GovernanceDecision: + normalized = decision.strip().lower() + if normalized not in {"approve", "reject", "archive"}: + _raise_memory_contract(f"Unknown governance decision: {decision!r}") + return normalized # type: ignore[return-value] + + +def _validate_record_for_decision( + record: MemoryRecord, + decision: GovernanceDecision, +) -> None: + # Mirror governance.py: approve accepts {draft, stale}, reject accepts only + # draft (stale is discarded via vacuum, never human-rejected — see the + # trust-and-lifecycle state machine), archive accepts only active. Keeps the + # IDE channel consistent with reject_record so a VS Code reject on a stale + # record fails here with a clear message instead of raising downstream. + allowed_by_decision: dict[GovernanceDecision, frozenset[str]] = { + "approve": frozenset({"draft", "stale"}), + "reject": frozenset({"draft"}), + "archive": frozenset({"active"}), + } + if record.status not in allowed_by_decision[decision]: + _raise_memory_contract(f"Cannot {decision} record in status '{record.status}'") + + +def register_ide_governance( + state: IdeGovernanceSessionState, + *, + ide_governance_key: str, + client_name: str, + client_version: str | None, +) -> dict[str, object]: + rejected = _require_governance_channel(state, action="register_ide_governance") + if rejected is not None: + return rejected + if client_name not in IDE_GOVERNANCE_ALLOWED_CLIENTS: + return _governance_rejected("register_ide_governance") + key = _parse_governance_key(ide_governance_key) + state.governance_key = key + state.client_name = client_name + state.client_version = client_version + state.tickets.clear() + state.commit_attempts = 0 + return { + "action": "register_ide_governance", + "status": "ok", + "protocol": IDE_GOVERNANCE_PROTOCOL_VERSION, + "client_name": client_name, + "client_version": client_version, + "max_commit_attempts": IDE_GOVERNANCE_MAX_COMMIT_ATTEMPTS, + } + + +def prepare_governance( + state: IdeGovernanceSessionState, + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + root_path: str | Path, + record_id: str, + decision: str, +) -> dict[str, object]: + rejected = _require_governance_channel(state, action="prepare_governance") + if rejected is not None: + return rejected + normalized_decision = _validate_decision(decision) + record = _find_project_record( + store, + record_id=record_id, + project_id=project_id, + ) + if record is None: + return { + "action": "prepare_governance", + "status": "not_found", + "record_id": record_id, + } + _validate_record_for_decision(record, normalized_decision) + _validate_repository_project(project_id, root_path) + statement_digest = compute_statement_digest(record.statement) + ticket_id = secrets.token_hex(16) + nonce = secrets.token_hex(16) + ticket = IdeGovernanceTicket( + ticket_id=ticket_id, + record_id=record_id, + decision=normalized_decision, + confirmation_nonce=nonce, + project_id=project_id, + statement_digest=statement_digest, + expires_at_unix=time.time() + IDE_GOVERNANCE_TICKET_TTL_SECONDS, + ) + state.tickets[ticket_id] = ticket + subjects = store.list_subjects_for_memory(record.id) + return { + "action": "prepare_governance", + "status": "ok", + "protocol": IDE_GOVERNANCE_PROTOCOL_VERSION, + "governance_ticket": ticket_id, + "expires_at_unix": ticket.expires_at_unix, + "confirmation_nonce": nonce, + "project_id": project_id, + "statement_digest": statement_digest, + "record": { + "id": record.id, + "type": record.type, + "status": record.status, + "statement": record.statement, + "confidence": record.confidence, + "subjects": [ + { + "subject_kind": item.subject_kind, + "subject_key": item.subject_key, + "relation": item.relation, + } + for item in subjects + ], + }, + } + + +def _consume_ticket( + state: IdeGovernanceSessionState, + *, + ticket_id: str, + record_id: str, + decision: GovernanceDecision, + project_id: str, + statement_digest: str, +) -> IdeGovernanceTicket: + ticket = state.tickets.get(ticket_id) + if ticket is None: + _raise_memory_contract(f"Unknown or expired governance ticket: {ticket_id!r}") + if ticket.consumed: + _raise_memory_contract("Governance ticket was already used.") + if time.time() > ticket.expires_at_unix: + state.tickets.pop(ticket_id, None) + _raise_memory_contract( + "Governance ticket expired. Prepare governance again from the IDE." + ) + _assert_ticket_request_matches( + ticket, + record_id=record_id, + decision=decision, + project_id=project_id, + statement_digest=statement_digest, + ) + ticket.consumed = True + return ticket + + +def commit_governance( + state: IdeGovernanceSessionState, + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + root_path: str | Path, + record_id: str, + decision: str, + governance_ticket: str, + confirmation_nonce: str, + proof: str, + actor: str, + protocol: int, +) -> dict[str, object]: + rejected = _require_governance_channel(state, action="commit_governance") + if rejected is not None: + return rejected + rate_limited = _register_commit_attempt(state, action="commit_governance") + if rate_limited is not None: + return rate_limited + _validate_ide_governance_protocol(protocol) + key_or_rejected = _governance_key_or_reject( + state, + action="commit_governance", + ) + if isinstance(key_or_rejected, dict): + return key_or_rejected + key = key_or_rejected + normalized_decision = _validate_decision(decision) + record = _find_project_record( + store, + record_id=record_id, + project_id=project_id, + ) + if record is None: + return { + "action": "commit_governance", + "status": "not_found", + "record_id": record_id, + } + _validate_repository_project(project_id, root_path) + statement_digest = compute_statement_digest(record.statement) + ticket = _consume_ticket( + state, + ticket_id=governance_ticket, + record_id=record_id, + decision=normalized_decision, + project_id=project_id, + statement_digest=statement_digest, + ) + _require_matching_confirmation_nonce(ticket, confirmation_nonce) + _require_valid_governance_proof( + key=key, + ticket_id=governance_ticket, + record_id=record_id, + decision=normalized_decision, + confirmation_nonce=confirmation_nonce, + project_id=project_id, + statement_digest=statement_digest, + protocol=protocol, + proof=proof, + ) + _validate_record_for_decision(record, normalized_decision) + actor_label = actor.strip() or _resolve_client_label(state) + if normalized_decision == "approve": + updated = approve_record( + store, + record_id=record_id, + approved_by=actor_label, + revision_reason="ide_govern_approve", + ) + elif normalized_decision == "reject": + updated = reject_record( + store, + record_id=record_id, + rejected_by=actor_label, + reason="ide_govern_reject", + revision_reason="ide_govern_reject", + ) + else: + updated = archive_record( + store, + record_id=record_id, + archived_by=actor_label, + revision_reason="ide_govern_archive", + ) + state.tickets.pop(governance_ticket, None) + return { + "action": "commit_governance", + "status": "ok", + "record_id": updated.id, + "record_status": updated.status, + "approved_by": updated.approved_by, + } + + +__all__ = [ + "GOVERNANCE_MODE_UNAVAILABLE_MESSAGE", + "GOVERNANCE_MODE_UNAVAILABLE_NEXT_STEP", + "IDE_GOVERNANCE_ALLOWED_CLIENTS", + "IDE_GOVERNANCE_MAX_COMMIT_ATTEMPTS", + "IDE_GOVERNANCE_PROTOCOL_VERSION", + "IDE_GOVERNANCE_TICKET_TTL_SECONDS", + "IdeGovernanceSessionState", + "IdeGovernanceTicket", + "commit_governance", + "compute_governance_proof", + "compute_statement_digest", + "prepare_governance", + "register_ide_governance", +] diff --git a/codeclone/memory/identity.py b/codeclone/memory/identity.py new file mode 100644 index 00000000..cac10174 --- /dev/null +++ b/codeclone/memory/identity.py @@ -0,0 +1,39 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from urllib.parse import quote + +from .enums import MemoryRecordType, SubjectKind + + +def _encode_segment(value: str) -> str: + normalized = value.replace("\\", "/").strip() + return quote(normalized, safe="") + + +def make_identity_key( + *, + type: MemoryRecordType | str, + subject_kind: SubjectKind | str, + subject_key: str, + discriminator: str, +) -> str: + """Build a deterministic opaque identity key for idempotent upsert.""" + segments = ( + str(type).strip(), + str(subject_kind).strip(), + _encode_segment(subject_key), + _encode_segment(discriminator), + ) + if not all(segments): + msg = "identity key segments must be non-empty" + raise ValueError(msg) + return ":".join(segments) + + +__all__ = ["make_identity_key"] diff --git a/codeclone/memory/ingest/__init__.py b/codeclone/memory/ingest/__init__.py new file mode 100644 index 00000000..def5a836 --- /dev/null +++ b/codeclone/memory/ingest/__init__.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path + +from ..models import RecordBatch +from ..project import GitProvenance + + +@dataclass(frozen=True, slots=True) +class InitOptions: + dry_run: bool = False + refresh: bool = False + from_report: Path | None = None + include_docs: bool = True + include_tests: bool = True + + +@dataclass +class InitReport: + project_id: str + db_path: Path | None + dry_run: bool + analysis_fingerprint: str | None + stats: dict[str, int] = field(default_factory=dict) + planned_counts: dict[str, int] = field(default_factory=dict) + git: GitProvenance | None = None + warnings: list[str] = field(default_factory=list) + ingestion_mode: str = "init" + records_marked_stale: int = 0 + vacuum_deleted: int = 0 + + +__all__ = ["InitOptions", "InitReport", "RecordBatch"] diff --git a/codeclone/memory/ingest/extractors.py b/codeclone/memory/ingest/extractors.py new file mode 100644 index 00000000..4e961f6a --- /dev/null +++ b/codeclone/memory/ingest/extractors.py @@ -0,0 +1,1013 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +import re +import subprocess +from collections import Counter +from collections.abc import Mapping, Sequence +from pathlib import Path, PurePosixPath + +import orjson + +from ...config.memory import IngestConfig +from ...report.meta import current_report_timestamp_utc +from ...utils.coerce import as_mapping, as_sequence +from ..display import format_document_link_statement +from ..identity import make_identity_key +from ..models import ( + MemoryEvidence, + MemoryProject, + MemoryRecord, + MemorySubject, + RecordBatch, + generate_memory_id, +) +from ..project import GitProvenance, code_fingerprint_for_memory_subject +from .paths import ( + resolve_contract_constants_paths, + resolve_document_link_paths, + resolve_mcp_tool_contradiction_sources, + resolve_mcp_tool_schema_snapshot_path, +) + +_CODE_PATH_RE = re.compile(r"`([a-zA-Z0-9_./-]+\.(?:py|md|json|toml|yml))`") +_CONTRACT_CONSTANT_SUFFIX = "_VERSION" + + +def _literal_constant_value(node: ast.expr) -> str | None: + if isinstance(node, ast.Constant) and isinstance(node.value, (str, int, float)): + return repr(node.value) + return None + + +def _parse_contract_constants(path: Path) -> dict[str, str]: + try: + tree = ast.parse(path.read_text(encoding="utf-8")) + except (OSError, SyntaxError, UnicodeDecodeError): + return {} + constants: dict[str, str] = {} + for node in tree.body: + if isinstance(node, ast.Assign): + value = _literal_constant_value(node.value) + if value is None: + continue + for target in node.targets: + if isinstance(target, ast.Name) and target.id.endswith( + _CONTRACT_CONSTANT_SUFFIX + ): + constants[target.id] = value + elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name): + if not node.target.id.endswith(_CONTRACT_CONSTANT_SUFFIX): + continue + if node.value is None: + continue + value = _literal_constant_value(node.value) + if value is not None: + constants[node.target.id] = value + return dict(sorted(constants.items())) + + +def _new_metrics_batch( + report_document: Mapping[str, object], +) -> tuple[RecordBatch, str, Mapping[str, object]]: + return ( + RecordBatch(), + current_report_timestamp_utc(), + as_mapping(report_document.get("metrics")), + ) + + +def _inventory_module_key(file_item: object, seen: set[str]) -> str | None: + file_path = str(file_item).replace("\\", "/").strip("/") + if not file_path.endswith(".py"): + return None + module_path = file_path.removesuffix(".py").replace("/", ".") + if module_path.endswith(".__init__"): + module_path = module_path[: -len(".__init__")] + if not module_path or module_path in seen: + return None + seen.add(module_path) + return module_path + + +def _normalized_mapping_path( + mapping: Mapping[str, object], + *field_names: str, +) -> str | None: + for field_name in field_names: + raw = mapping.get(field_name) + if raw is None: + continue + path = str(raw).strip() + if path: + return path + return None + + +def _iter_mapping_paths( + items: Sequence[object], + *field_names: str, +) -> list[tuple[Mapping[str, object], str]]: + pairs: list[tuple[Mapping[str, object], str]] = [] + for item in items: + mapping = as_mapping(item) + path = _normalized_mapping_path(mapping, *field_names) + if path is not None: + pairs.append((mapping, path)) + return pairs + + +def _append_path_risk_note( + batch: RecordBatch, + *, + project: MemoryProject, + root_path: Path, + path: str, + now: str, + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, + discriminator: str, + statement: str, + payload: Mapping[str, object], + confidence: str, +) -> None: + identity = make_identity_key( + type="risk_note", + subject_kind="path", + subject_key=path, + discriminator=discriminator, + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="risk_note", + status="active", + confidence=confidence, # type: ignore[arg-type] + origin="system", + ingest_source="analysis", + statement=statement, + summary=None, + payload=dict(payload), + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=code_fingerprint_for_memory_subject( + root_path, + subject_path=path, + analysis_fingerprint=analysis_fingerprint, + ), + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="path", + subject_key=path, + relation="about", + ) + ) + + +def extract_module_roles( + *, + project: MemoryProject, + root_path: Path, + report_document: Mapping[str, object], + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, +) -> RecordBatch: + batch = RecordBatch() + inventory = as_mapping(report_document.get("inventory")) + file_registry = as_mapping(inventory.get("file_registry")) + file_items = as_sequence(file_registry.get("items")) + now = current_report_timestamp_utc() + seen: set[str] = set() + for item in file_items: + module_path = _inventory_module_key(item, seen) + if module_path is None: + continue + identity = make_identity_key( + type="module_role", + subject_kind="module", + subject_key=module_path, + discriminator="inventory_module", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="module_role", + status="active", + confidence="supported", + origin="system", + ingest_source="analysis", + statement=( + f"{module_path} is an analyzed Python module in project inventory." + ), + summary=None, + payload={ + "module_path": module_path, + "role_kind": "inventory_module", + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=code_fingerprint_for_memory_subject( + root_path, + module_key=module_path, + analysis_fingerprint=analysis_fingerprint, + ), + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="module", + subject_key=module_path, + relation="about", + ) + ) + return batch + + +def extract_contract_notes( + *, + project: MemoryProject, + root_path: Path, + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, + registry_paths: frozenset[str] | None = None, + ingest: IngestConfig | None = None, +) -> RecordBatch: + batch = RecordBatch() + ingest_config = ingest or IngestConfig() + registry = registry_paths or frozenset() + contract_paths = resolve_contract_constants_paths( + root_path=root_path, + registry_paths=registry, + ingest=ingest_config, + ) + if not contract_paths: + return batch + now = current_report_timestamp_utc() + for contracts_path in contract_paths: + rel_path = str(contracts_path.relative_to(root_path)).replace("\\", "/") + constants = _parse_contract_constants(contracts_path) + for name, value in constants.items(): + identity = make_identity_key( + type="contract_note", + subject_kind="contract", + subject_key=name, + discriminator=f"{rel_path}:schema_constant", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="contract_note", + status="active", + confidence="verified", + origin="system", + ingest_source="contract", + statement=f"{name} = {value} in {rel_path}.", + summary=None, + payload={ + "contract_kind": "schema_constant", + "schema_version": value.strip("'\"") + if value.startswith(("'", '"')) + else value, + "constant_name": name, + "source_path": rel_path, + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=analysis_fingerprint, + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="contract", + subject_key=name, + relation="about", + ) + ) + batch.evidence.append( + MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=record_id, + evidence_kind="code", + ref=rel_path, + locator=contracts_path.name, + quote=f"{name} = {value}", + digest=None, + created_at_utc=now, + ) + ) + return batch + + +def extract_public_surfaces( + *, + project: MemoryProject, + root_path: Path, + report_document: Mapping[str, object], + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, + ingest: IngestConfig | None = None, +) -> RecordBatch: + batch, now, metrics = _new_metrics_batch(report_document) + api_surface = as_mapping(metrics.get("api_surface")) + for item in as_sequence(api_surface.get("items")): + mapping = as_mapping(item) + symbol = str(mapping.get("qualname") or mapping.get("name") or "").strip() + file_path = str(mapping.get("file") or mapping.get("path") or "").strip() + if not symbol: + continue + identity = make_identity_key( + type="public_surface", + subject_kind="symbol", + subject_key=symbol, + discriminator="api_surface", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="public_surface", + status="active", + confidence="supported", + origin="system", + ingest_source="analysis", + statement=f"Public API surface includes symbol {symbol}.", + summary=None, + payload={ + "surface_kind": "api_symbol", + "surface_name": symbol, + "file_path": file_path, + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=analysis_fingerprint, + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="symbol", + subject_key=symbol, + relation="exports", + ) + ) + + snapshot_path = resolve_mcp_tool_schema_snapshot_path( + root_path=root_path, + ingest=ingest or IngestConfig(), + ) + if snapshot_path is not None: + try: + payload = orjson.loads(snapshot_path.read_text(encoding="utf-8")) + except (OSError, UnicodeDecodeError, orjson.JSONDecodeError): + payload = {} + tools_obj = payload.get("tools") if isinstance(payload, dict) else None + if isinstance(tools_obj, dict): + for tool_name in sorted(tools_obj.keys()): + identity = make_identity_key( + type="public_surface", + subject_kind="mcp_tool", + subject_key=tool_name, + discriminator="mcp_tool_schema", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="public_surface", + status="active", + confidence="verified", + origin="system", + ingest_source="snapshot", + statement=( + f"MCP tool {tool_name} is registered in contract snapshot." + ), + summary=None, + payload={ + "surface_kind": "mcp_tool", + "surface_name": tool_name, + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=analysis_fingerprint, + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="mcp_tool", + subject_key=tool_name, + relation="about", + ) + ) + return batch + + +def extract_risk_notes( + *, + project: MemoryProject, + root_path: Path, + report_document: Mapping[str, object], + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, +) -> RecordBatch: + batch, now, metrics = _new_metrics_batch(report_document) + design = as_mapping(metrics.get("design")) + complexity_items = as_sequence(design.get("complexity_hotspots")) + for mapping, path in _iter_mapping_paths(complexity_items, "path", "file"): + value = mapping.get("value") + threshold = mapping.get("threshold") + _append_path_risk_note( + batch, + project=project, + root_path=root_path, + path=path, + now=now, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + discriminator="high_complexity", + statement=( + f"{path} has cyclomatic complexity {value} (threshold: {threshold})." + ), + payload={ + "risk_kind": "high_complexity", + "metric_value": value, + "threshold": threshold, + "severity": "medium", + "interpretation": "Structural complexity hotspot from analysis.", + }, + confidence="verified", + ) + + security = as_mapping(metrics.get("security_surfaces")) + for mapping, path in _iter_mapping_paths( + as_sequence(security.get("items")), + "path", + ): + category = str(mapping.get("category") or "security_surface").strip() + _append_path_risk_note( + batch, + project=project, + root_path=root_path, + path=path, + now=now, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + discriminator="security_surface", + statement=( + f"{path} is in the security surface inventory ({category}). " + "Report-only inventory; not a vulnerability finding." + ), + payload={ + "risk_kind": "security_surface", + "category": category, + "interpretation": "report_only_inventory", + }, + confidence="supported", + ) + return batch + + +def extract_test_anchors( + *, + project: MemoryProject, + root_path: Path, + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, +) -> RecordBatch: + batch = RecordBatch() + now = current_report_timestamp_utc() + tests_dir = root_path / "tests" + if not tests_dir.is_dir(): + return batch + for test_file in sorted(tests_dir.rglob("test_*.py")): + rel = str(test_file.relative_to(root_path)).replace("\\", "/") + try: + tree = ast.parse(test_file.read_text("utf-8")) + except (OSError, SyntaxError, UnicodeDecodeError): + continue + symbols: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Constant) and isinstance(node.value, str): + if node.value.isidentifier() or "." in node.value: + symbols.add(node.value.split(".")[0]) + elif isinstance(node, ast.Name): + symbols.add(node.id) + for symbol in sorted(s for s in symbols if len(s) > 3)[:5]: + identity = make_identity_key( + type="test_anchor", + subject_kind="test", + subject_key=rel, + discriminator=f"symbol:{symbol}", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="test_anchor", + status="active", + confidence="supported", + origin="system", + ingest_source="test", + statement=(f"{rel} contains tests referencing symbol {symbol}."), + summary=None, + payload={ + "test_file": rel, + "referenced_symbols": [symbol], + "reference_kind": "ast_name_or_string", + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=code_fingerprint_for_memory_subject( + root_path, + subject_path=rel, + analysis_fingerprint=analysis_fingerprint, + ), + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="test", + subject_key=rel, + relation="tests", + ) + ) + return batch + + +def _resolve_doc_anchor_path( + anchored: str, + *, + root_path: Path, + registry_paths: frozenset[str], +) -> str | None: + normalized = anchored.replace("\\", "/").strip("/") + if not normalized: + return None + if normalized in registry_paths: + return normalized + if (root_path / normalized).is_file(): + return normalized + if "/" in normalized or "\\" in anchored: + return None + basename = PurePosixPath(normalized).name + matches = sorted( + path + for path in registry_paths + if path == basename or path.endswith(f"/{basename}") + ) + if len(matches) == 1: + return matches[0] + return None + + +def extract_document_links( + *, + project: MemoryProject, + root_path: Path, + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, + registry_paths: frozenset[str] | None = None, + ingest: IngestConfig | None = None, +) -> RecordBatch: + batch = RecordBatch() + now = current_report_timestamp_utc() + registry = registry_paths or frozenset() + ingest_config = ingest or IngestConfig() + doc_paths = resolve_document_link_paths( + root_path=root_path, + registry_paths=registry, + ingest=ingest_config, + ) + for doc_path in doc_paths: + rel = str(doc_path.relative_to(root_path)).replace("\\", "/") + text = doc_path.read_text("utf-8", errors="replace") + heading = "root" + for line in text.splitlines(): + if line.startswith("#"): + heading = line.lstrip("#").strip() or heading + for match in _CODE_PATH_RE.finditer(line): + anchored = match.group(1) + resolved_path = _resolve_doc_anchor_path( + anchored, + root_path=root_path, + registry_paths=registry, + ) + identity = make_identity_key( + type="document_link", + subject_kind="doc", + subject_key=rel, + discriminator=f"path:{anchored}", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="document_link", + status="active", + confidence="supported", + origin="system", + ingest_source="doc", + statement=format_document_link_statement( + doc_file=rel, + heading=heading, + anchored_path=anchored, + ), + summary=None, + payload={ + "doc_file": rel, + "heading": heading, + "anchored_symbols": [anchored], + **( + {"resolved_path": resolved_path} + if resolved_path is not None + else {} + ), + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=code_fingerprint_for_memory_subject( + root_path, + subject_path=rel, + analysis_fingerprint=analysis_fingerprint, + ), + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="doc", + subject_key=rel, + relation="documents", + ) + ) + anchored_path = resolved_path + if anchored_path is not None and anchored_path.endswith(".py"): + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="path", + subject_key=anchored_path, + relation="about", + ) + ) + return batch + + +def extract_git_hotspots( + *, + project: MemoryProject, + root_path: Path, + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, + period_days: int = 90, + min_changes: int = 10, +) -> RecordBatch: + batch = RecordBatch() + if not git.available: + return batch + try: + completed = subprocess.run( + [ + "git", + "log", + f"--since={period_days}.days", + "--name-only", + "--pretty=format:", + ], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=30.0, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return batch + counts = Counter( + line.strip().replace("\\", "/") + for line in completed.stdout.splitlines() + if line.strip().endswith(".py") + ) + now = current_report_timestamp_utc() + for path, count in sorted(counts.items(), key=lambda item: (-item[1], item[0])): + if count < min_changes: + continue + identity = make_identity_key( + type="risk_note", + subject_kind="path", + subject_key=path, + discriminator="change_hotspot", + ) + record_id = generate_memory_id() + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="risk_note", + status="active", + confidence="verified", + origin="system", + ingest_source="git", + statement=( + f"{path} changed {count} times in the last {period_days} days." + ), + summary=None, + payload={ + "risk_kind": "change_hotspot", + "change_count": count, + "period_days": period_days, + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=code_fingerprint_for_memory_subject( + root_path, + subject_path=path, + analysis_fingerprint=analysis_fingerprint, + ), + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="path", + subject_key=path, + relation="about", + ) + ) + if git.head: + batch.evidence.append( + MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=record_id, + evidence_kind="git_commit", + ref=git.head, + locator=git.branch, + quote=f"change_count={count}", + digest=None, + created_at_utc=now, + ) + ) + return batch + + +def extract_contradictions( + *, + project: MemoryProject, + root_path: Path, + git: GitProvenance, + report_digest: str | None, + analysis_fingerprint: str | None, + ingest: IngestConfig | None = None, +) -> RecordBatch: + batch = RecordBatch() + ingest_config = ingest or IngestConfig() + sources = resolve_mcp_tool_contradiction_sources( + root_path=root_path, + ingest=ingest_config, + ) + if sources is None: + return batch + snapshot_path, doc_paths = sources + try: + tools_payload = orjson.loads(snapshot_path.read_text(encoding="utf-8")) + except (OSError, UnicodeDecodeError, orjson.JSONDecodeError): + return batch + tools_obj = tools_payload.get("tools") if isinstance(tools_payload, dict) else None + if not isinstance(tools_obj, dict): + return batch + actual_count = len(tools_obj) + now = current_report_timestamp_utc() + for docs_path in doc_paths: + try: + doc_text = docs_path.read_text("utf-8", errors="replace") + except OSError: + continue + claimed_counts = [ + int(match.group(1)) + for match in re.finditer(r"(\d+)\s+(?:MCP\s+)?tools?", doc_text, re.I) + ] + for claimed in sorted(set(claimed_counts)): + if claimed == actual_count: + continue + identity = make_identity_key( + type="contradiction_note", + subject_kind="doc", + subject_key=str(docs_path.relative_to(root_path)), + discriminator=f"tool_count:{claimed}_vs_{actual_count}", + ) + record_id = generate_memory_id() + rel_doc = str(docs_path.relative_to(root_path)).replace("\\", "/") + batch.records.append( + MemoryRecord( + id=record_id, + project_id=project.id, + identity_key=identity, + type="contradiction_note", + status="draft", + confidence="supported", + origin="system", + ingest_source="doc", + statement=( + f"{rel_doc} claims {claimed} MCP tools but contract snapshot " + f"registers {actual_count}." + ), + summary=None, + payload={ + "source_a": rel_doc, + "source_b": str(snapshot_path.relative_to(root_path)).replace( + "\\", "/" + ), + "claim_a": str(claimed), + "claim_b": str(actual_count), + }, + created_at_utc=now, + updated_at_utc=now, + last_verified_at_utc=now, + expires_at_utc=None, + created_by="memory_init", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=report_digest, + code_fingerprint=analysis_fingerprint, + stale_reason=None, + created_on_branch=git.branch, + created_at_commit=git.head, + verified_on_branch=git.branch, + verified_at_commit=git.head, + ) + ) + batch.subjects.append( + MemorySubject( + id=generate_memory_id(prefix="subj"), + memory_id=record_id, + subject_kind="doc", + subject_key=rel_doc, + relation="about", + ) + ) + return batch + + +def merge_batches(batches: Sequence[RecordBatch]) -> RecordBatch: + merged = RecordBatch() + for batch in batches: + merged += batch + return merged + + +__all__ = [ + "extract_contract_notes", + "extract_contradictions", + "extract_document_links", + "extract_git_hotspots", + "extract_module_roles", + "extract_public_surfaces", + "extract_risk_notes", + "extract_test_anchors", + "merge_batches", +] diff --git a/codeclone/memory/ingest/mcp_sync.py b/codeclone/memory/ingest/mcp_sync.py new file mode 100644 index 00000000..7d722f0e --- /dev/null +++ b/codeclone/memory/ingest/mcp_sync.py @@ -0,0 +1,209 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from ...config.memory import MemoryConfig, resolve_memory_config +from ..project import report_digest_from_report, resolve_memory_db_path +from ..sqlite_store import SqliteEngineeringMemoryStore +from . import InitOptions, InitReport +from .runner import run_memory_init + +MemorySyncAction = Literal["bootstrap", "refresh", "none"] +MemorySyncStatus = Literal["completed", "skipped", "unchanged"] +MemorySyncTrigger = Literal["auto", "explicit"] + + +@dataclass(frozen=True, slots=True) +class MemorySyncDecision: + action: MemorySyncAction + reason: str + + +def read_stored_report_digest(db_path: Path) -> str | None: + if not db_path.exists(): + return None + store = SqliteEngineeringMemoryStore(db_path) + try: + return store.get_meta("last_report_digest") + finally: + store.close() + + +def decide_mcp_memory_sync( + *, + policy: str, + db_path: Path, + report_digest: str, + stored_digest: str | None, +) -> MemorySyncDecision: + if policy == "off": + return MemorySyncDecision(action="none", reason="policy_off") + if not db_path.exists(): + return MemorySyncDecision(action="bootstrap", reason="missing_db") + if policy == "refresh_when_stale": + if stored_digest != report_digest: + return MemorySyncDecision(action="refresh", reason="digest_changed") + return MemorySyncDecision(action="none", reason="digest_unchanged") + return MemorySyncDecision(action="none", reason="db_present") + + +def sync_report_document_to_memory( + *, + root_path: Path, + report_document: Mapping[str, object], + refresh: bool, +) -> InitReport: + return run_memory_init( + root_path=root_path, + report_document=report_document, + options=InitOptions( + refresh=refresh, + include_docs=True, + include_tests=True, + ), + ) + + +def memory_sync_result_payload( + *, + status: MemorySyncStatus, + trigger: MemorySyncTrigger, + run_id: str, + report_digest: str | None, + init_report: InitReport | None, + reason: str | None = None, +) -> dict[str, object]: + payload: dict[str, object] = { + "status": status, + "trigger": trigger, + "run_id": run_id, + "report_digest": report_digest, + "reason": reason, + } + if init_report is None: + return payload + payload.update( + { + "mode": "refresh" + if init_report.ingestion_mode == "refresh" + else "bootstrap", + "project_id": init_report.project_id, + "db_path": str(init_report.db_path) if init_report.db_path else None, + "analysis_fingerprint": init_report.analysis_fingerprint, + "stats": dict(init_report.stats), + "planned_counts": dict(init_report.planned_counts), + "records_marked_stale": init_report.records_marked_stale, + "vacuum_deleted": init_report.vacuum_deleted, + } + ) + return payload + + +def _complete_memory_sync( + *, + root_path: Path, + report_document: Mapping[str, object], + trigger: MemorySyncTrigger, + run_id: str, + report_digest: str, + refresh: bool, + reason: str, +) -> dict[str, object]: + init_report = sync_report_document_to_memory( + root_path=root_path, + report_document=report_document, + refresh=refresh, + ) + return memory_sync_result_payload( + status="completed", + trigger=trigger, + run_id=run_id, + report_digest=report_digest, + init_report=init_report, + reason=reason, + ) + + +def execute_mcp_memory_sync( + *, + root_path: Path, + report_document: Mapping[str, object], + config: MemoryConfig | None = None, + trigger: MemorySyncTrigger, + run_id: str, + force: bool = False, +) -> dict[str, object]: + resolved_root = root_path.resolve() + resolved_config = config or resolve_memory_config(resolved_root) + db_path = resolve_memory_db_path(resolved_root, resolved_config) + report_digest = report_digest_from_report(dict(report_document)) + stored_digest = read_stored_report_digest(db_path) + + if report_digest is None: + return memory_sync_result_payload( + status="skipped", + trigger=trigger, + run_id=run_id, + report_digest=None, + init_report=None, + reason="missing_report_digest", + ) + + if force or trigger == "explicit": + return _complete_memory_sync( + root_path=resolved_root, + report_document=report_document, + trigger=trigger, + run_id=run_id, + report_digest=report_digest, + refresh=db_path.exists(), + reason="forced" if force else "explicit_refresh", + ) + + decision = decide_mcp_memory_sync( + policy=resolved_config.mcp_sync_policy, + db_path=db_path, + report_digest=report_digest, + stored_digest=stored_digest, + ) + if decision.action == "none": + return memory_sync_result_payload( + status="unchanged", + trigger=trigger, + run_id=run_id, + report_digest=report_digest, + init_report=None, + reason=decision.reason, + ) + + return _complete_memory_sync( + root_path=resolved_root, + report_document=report_document, + trigger=trigger, + run_id=run_id, + report_digest=report_digest, + refresh=decision.action == "refresh", + reason=decision.reason, + ) + + +__all__ = [ + "MemorySyncAction", + "MemorySyncDecision", + "MemorySyncStatus", + "MemorySyncTrigger", + "decide_mcp_memory_sync", + "execute_mcp_memory_sync", + "memory_sync_result_payload", + "read_stored_report_digest", + "sync_report_document_to_memory", +] diff --git a/codeclone/memory/ingest/paths.py b/codeclone/memory/ingest/paths.py new file mode 100644 index 00000000..95773098 --- /dev/null +++ b/codeclone/memory/ingest/paths.py @@ -0,0 +1,139 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +from ...config.memory import IngestConfig + +CONTRACT_CONSTANTS_SUFFIX: str = "contracts/__init__.py" + +DEFAULT_DOCUMENT_LINK_ROOT_FILES: tuple[str, ...] = ( + "README.md", + "CONTRIBUTING.md", + "AGENTS.md", + "CLAUDE.md", +) + +DOCS_REGISTRY_PREFIX: str = "docs/" + + +def _normalize_repo_path(path: str) -> str: + return path.replace("\\", "/").strip("/") + + +def _existing_repo_file(root_path: Path, repo_relative: str) -> Path | None: + normalized = _normalize_repo_path(repo_relative) + if not normalized or ".." in normalized.split("/"): + return None + candidate = root_path / normalized + if candidate.is_file(): + return candidate + return None + + +def resolve_contract_constants_paths( + *, + root_path: Path, + registry_paths: frozenset[str], + ingest: IngestConfig, +) -> tuple[Path, ...]: + configured = ingest.contract_constants_paths + if configured: + resolved = [ + path + for item in configured + if (path := _existing_repo_file(root_path, item)) is not None + ] + return tuple(sorted(resolved, key=lambda item: str(item))) + + discovered = sorted( + { + _normalize_repo_path(path) + for path in registry_paths + if path.endswith(CONTRACT_CONSTANTS_SUFFIX) + } + ) + return tuple( + path + for item in discovered + if (path := _existing_repo_file(root_path, item)) is not None + ) + + +def resolve_document_link_paths( + *, + root_path: Path, + registry_paths: frozenset[str], + ingest: IngestConfig, +) -> tuple[Path, ...]: + configured = ingest.document_link_paths + if configured: + candidates = configured + else: + registry_docs = sorted( + path + for path in registry_paths + if path.startswith(DOCS_REGISTRY_PREFIX) and path.endswith(".md") + ) + root_docs = [ + name + for name in DEFAULT_DOCUMENT_LINK_ROOT_FILES + if _existing_repo_file(root_path, name) is not None + ] + candidates = tuple(dict.fromkeys([*root_docs, *registry_docs])) + + resolved = [ + path + for item in candidates + if (path := _existing_repo_file(root_path, item)) is not None + ] + return tuple(dict.fromkeys(resolved)) + + +def resolve_mcp_tool_schema_snapshot_path( + *, + root_path: Path, + ingest: IngestConfig, +) -> Path | None: + raw = ingest.mcp_tool_schema_snapshot_path + if raw is None: + return None + return _existing_repo_file(root_path, raw) + + +def resolve_mcp_tool_contradiction_sources( + *, + root_path: Path, + ingest: IngestConfig, +) -> tuple[Path, tuple[Path, ...]] | None: + snapshot_raw = ingest.mcp_tool_schema_snapshot_path + doc_paths = ingest.mcp_tool_count_doc_paths + if snapshot_raw is None or not doc_paths: + return None + snapshot = _existing_repo_file(root_path, snapshot_raw) + if snapshot is None: + return None + docs = tuple( + path + for item in doc_paths + if (path := _existing_repo_file(root_path, item)) is not None + ) + if not docs: + return None + return snapshot, docs + + +__all__ = [ + "CONTRACT_CONSTANTS_SUFFIX", + "DEFAULT_DOCUMENT_LINK_ROOT_FILES", + "DOCS_REGISTRY_PREFIX", + "resolve_contract_constants_paths", + "resolve_document_link_paths", + "resolve_mcp_tool_contradiction_sources", + "resolve_mcp_tool_schema_snapshot_path", +] diff --git a/codeclone/memory/ingest/receipts.py b/codeclone/memory/ingest/receipts.py new file mode 100644 index 00000000..9da1861e --- /dev/null +++ b/codeclone/memory/ingest/receipts.py @@ -0,0 +1,179 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from contextlib import suppress + +from ...report.meta import current_report_timestamp_utc +from ..governance import record_candidate +from ..models import MemoryProject, generate_memory_id +from ..sqlite_store import SqliteEngineeringMemoryStore + + +def _try_append_text_candidate( + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + record_type: str, + text: object, + subject_path: str | None, + created_by: str, + max_candidates: int, + max_statement_chars: int, +) -> dict[str, object] | None: + if not isinstance(text, str) or not text.strip() or not subject_path: + return None + try: + record = record_candidate( + store, + project=project, + record_type=record_type, # type: ignore[arg-type] + statement=text.strip()[:max_statement_chars], + subject_path=subject_path, + created_by=created_by, + max_candidates=max_candidates, + max_statement_chars=max_statement_chars, + ) + except Exception: + # Best-effort draft proposal: a single failed candidate must not break + # the finish flow. Count the drop as observability telemetry (no-op when + # disabled) so silently skipped candidates stay visible. Never re-raises. + with suppress(Exception): + from ...observability import record_counter + + record_counter("memory.propose_candidate_dropped") + return None + return { + "id": record.id, + "type": record.type, + "status": record.status, + "statement": record.statement, + } + + +def propose_memory_from_finish_payload( + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + finish_payload: Mapping[str, object], + max_candidates: int, + max_statement_chars: int, +) -> list[dict[str, object]]: + """Extract draft memory candidates from a neutral finish payload.""" + candidates: list[dict[str, object]] = [] + primary_subject_path: str | None = None + scope_check = finish_payload.get("scope_check") + if isinstance(scope_check, Mapping): + declared = scope_check.get("declared_scope") + if isinstance(declared, list): + for path in declared[:10]: + if not isinstance(path, str) or not path.endswith(".py"): + continue + if primary_subject_path is None: + primary_subject_path = path + try: + record = record_candidate( + store, + project=project, + record_type="module_role", + statement=( + f"Patch touched scope includes {path}; " + "review module role after change." + ), + subject_path=path, + created_by="finish_hook", + max_candidates=max_candidates, + max_statement_chars=max_statement_chars, + ) + except Exception: + continue + candidates.append( + { + "id": record.id, + "type": record.type, + "status": record.status, + "statement": record.statement, + } + ) + + claims_text = finish_payload.get("claims_text") + claims_candidate = _try_append_text_candidate( + store, + project=project, + record_type="change_rationale", + text=claims_text, + subject_path=primary_subject_path, + created_by="finish_hook", + max_candidates=max_candidates, + max_statement_chars=max_statement_chars, + ) + if claims_candidate is not None: + candidates.append(claims_candidate) + + review_text = finish_payload.get("review_text") + review_candidate = _try_append_text_candidate( + store, + project=project, + record_type="architecture_decision", + text=review_text, + subject_path=primary_subject_path, + created_by="finish_hook", + max_candidates=max_candidates, + max_statement_chars=max_statement_chars, + ) + if review_candidate is not None: + candidates.append(review_candidate) + + verification = finish_payload.get("verification") + if isinstance(verification, Mapping): + profile = verification.get("verification_profile") + if isinstance(profile, str): + now = current_report_timestamp_utc() + candidates.append( + { + "id": generate_memory_id(prefix="mem-proposal"), + "type": "contract_note", + "status": "draft", + "statement": (f"Patch verified under profile {profile} at {now}."), + "proposal_only": True, + } + ) + + return candidates + + +def propose_memory_from_changed_paths( + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + changed_paths: Sequence[str], + claims_text: str | None, + review_text: str | None, + verification_profile: str | None, + max_candidates: int, + max_statement_chars: int, +) -> list[dict[str, object]]: + payload: dict[str, object] = { + "scope_check": {"declared_scope": list(changed_paths)}, + "claims_text": claims_text, + "review_text": review_text, + "verification": {"verification_profile": verification_profile}, + } + return propose_memory_from_finish_payload( + store, + project=project, + finish_payload=payload, + max_candidates=max_candidates, + max_statement_chars=max_statement_chars, + ) + + +__all__ = [ + "propose_memory_from_changed_paths", + "propose_memory_from_finish_payload", +] diff --git a/codeclone/memory/ingest/runner.py b/codeclone/memory/ingest/runner.py new file mode 100644 index 00000000..1976c1d2 --- /dev/null +++ b/codeclone/memory/ingest/runner.py @@ -0,0 +1,316 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import Counter +from collections.abc import Mapping +from pathlib import Path + +from ...config.memory import IngestConfig, resolve_memory_config +from ...report.meta import current_report_timestamp_utc +from ..models import IngestionRun, MemoryProject, RecordBatch, generate_memory_id +from ..project import ( + GitProvenance, + analysis_fingerprint_from_report, + git_head_evidence, + read_git_provenance, + report_digest_from_report, + resolve_memory_db_path, + resolve_project_identity, +) +from ..schema import create_schema_v1, open_memory_db +from ..sqlite_store import SqliteEngineeringMemoryStore +from ..staleness import apply_refresh_staleness +from ..vacuum import run_memory_vacuum +from . import InitOptions, InitReport +from .extractors import ( + extract_contract_notes, + extract_contradictions, + extract_document_links, + extract_git_hotspots, + extract_module_roles, + extract_public_surfaces, + extract_risk_notes, + extract_test_anchors, + merge_batches, +) + + +def enrich_batch_git_evidence(batch: RecordBatch, git: GitProvenance) -> None: + if not git.available: + return + now = current_report_timestamp_utc() + covered = { + evidence.memory_id + for evidence in batch.evidence + if evidence.evidence_kind == "git_commit" + } + for record in batch.records: + if record.id in covered: + continue + evidence = git_head_evidence( + memory_id=record.id, + git=git, + created_at_utc=now, + ) + if evidence is not None: + batch.evidence.append(evidence) + + +def _registry_paths(report_document: Mapping[str, object]) -> frozenset[str]: + inventory = report_document.get("inventory") + if not isinstance(inventory, dict): + return frozenset() + registry = inventory.get("file_registry") + if not isinstance(registry, dict): + return frozenset() + items = registry.get("items") + if not isinstance(items, list): + return frozenset() + return frozenset(str(item).replace("\\", "/").strip("/") for item in items) + + +def build_init_batch( + *, + root_path: Path, + project: object, + report_document: Mapping[str, object], + git: object, + report_digest: str | None, + analysis_fingerprint: str | None, + options: InitOptions, + git_hotspot_period_days: int = 90, + git_hotspot_min_changes: int = 5, + ingest: IngestConfig | None = None, +) -> RecordBatch: + if not isinstance(project, MemoryProject): + raise TypeError("project must be MemoryProject") + if not isinstance(git, GitProvenance): + raise TypeError("git must be GitProvenance") + + ingest_config = ingest or resolve_memory_config(root_path).ingest + registry = _registry_paths(report_document) + + batches = [ + extract_module_roles( + project=project, + root_path=root_path, + report_document=report_document, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + ), + extract_contract_notes( + project=project, + root_path=root_path, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + registry_paths=registry, + ingest=ingest_config, + ), + extract_public_surfaces( + project=project, + root_path=root_path, + report_document=report_document, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + ingest=ingest_config, + ), + extract_risk_notes( + project=project, + root_path=root_path, + report_document=report_document, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + ), + extract_git_hotspots( + project=project, + root_path=root_path, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + period_days=git_hotspot_period_days, + min_changes=git_hotspot_min_changes, + ), + extract_contradictions( + project=project, + root_path=root_path, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + ingest=ingest_config, + ), + ] + if options.include_tests: + batches.append( + extract_test_anchors( + project=project, + root_path=root_path, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + ) + ) + if options.include_docs: + batches.append( + extract_document_links( + project=project, + root_path=root_path, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + registry_paths=registry, + ingest=ingest_config, + ) + ) + merged = merge_batches(batches) + enrich_batch_git_evidence(merged, git) + return merged + + +def planned_type_counts(batch: RecordBatch) -> dict[str, int]: + counter: Counter[str] = Counter() + for record in batch.records: + counter[str(record.type)] += 1 + return dict(sorted(counter.items())) + + +def run_memory_init( + *, + root_path: Path, + report_document: Mapping[str, object], + options: InitOptions, +) -> InitReport: + resolved_root = root_path.resolve() + config = resolve_memory_config(resolved_root) + db_path = resolve_memory_db_path(resolved_root, config) + project = resolve_project_identity(resolved_root) + git = read_git_provenance(resolved_root) + analysis_fingerprint = analysis_fingerprint_from_report(dict(report_document)) + report_digest = report_digest_from_report(dict(report_document)) + + batch = build_init_batch( + root_path=resolved_root, + project=project, + report_document=report_document, + git=git, + report_digest=report_digest, + analysis_fingerprint=analysis_fingerprint, + options=options, + git_hotspot_period_days=config.git_hotspot_period_days, + git_hotspot_min_changes=config.git_hotspot_min_changes, + ) + planned = planned_type_counts(batch) + + if options.dry_run: + conn = open_memory_db(Path(":memory:")) + try: + create_schema_v1(conn) + finally: + conn.close() + return InitReport( + project_id=project.id, + db_path=None, + dry_run=True, + analysis_fingerprint=analysis_fingerprint, + planned_counts=planned, + git=git, + ) + + store = SqliteEngineeringMemoryStore(db_path) + started = current_report_timestamp_utc() + ingestion_run = IngestionRun( + id=generate_memory_id(prefix="mem-init"), + project_id=project.id, + mode="refresh" if options.refresh else "init", + started_at_utc=started, + finished_at_utc=None, + status="running", + analysis_fingerprint=analysis_fingerprint, + report_digest=report_digest, + branch=git.branch, + commit=git.head, + ) + stats: dict[str, int] = {} + stale_marked = 0 + vacuum_deleted = 0 + try: + with store.exclusive_init_lock(): + store.initialize(project) + with store.transaction(): + stats = store.persist_batch(batch, commit=False) + store.prune_duplicate_subjects(commit=False) + if options.refresh: + stale_report = apply_refresh_staleness( + store, + project_id=project.id, + batch=batch, + report_document=report_document, + root_path=root_path, + report_digest=report_digest, + commit=False, + ) + stale_marked = stale_report.records_marked_stale + vacuum_report = run_memory_vacuum( + store, + config, + commit=False, + ) + vacuum_deleted = vacuum_report.total_deleted + ingestion_run = IngestionRun( + id=ingestion_run.id, + project_id=ingestion_run.project_id, + mode=ingestion_run.mode, + started_at_utc=ingestion_run.started_at_utc, + finished_at_utc=current_report_timestamp_utc(), + status="completed", + analysis_fingerprint=ingestion_run.analysis_fingerprint, + report_digest=ingestion_run.report_digest, + branch=ingestion_run.branch, + commit=ingestion_run.commit, + records_created=stats.get("created", 0), + records_updated=stats.get("updated", 0), + records_marked_stale=stale_marked, + candidates_created=planned.get("contradiction_note", 0), + contradictions_found=planned.get("contradiction_note", 0), + message=( + f"vacuum_deleted={vacuum_deleted}" if vacuum_deleted else None + ), + ) + store.write_ingestion_run(ingestion_run) + store.rebuild_project_fts(project.id) + store.set_meta("last_analysis_fingerprint", analysis_fingerprint or "") + store.set_meta("last_report_digest", report_digest or "") + store.set_meta("last_init_run_id", ingestion_run.id) + store.set_meta("project_id", project.id) + store.set_meta("project_root", project.root) + finally: + store.close() + + return InitReport( + project_id=project.id, + db_path=db_path, + dry_run=False, + analysis_fingerprint=analysis_fingerprint, + stats=stats, + planned_counts=planned, + git=git, + ingestion_mode="refresh" if options.refresh else "init", + records_marked_stale=stale_marked, + vacuum_deleted=vacuum_deleted, + ) + + +__all__ = [ + "build_init_batch", + "enrich_batch_git_evidence", + "planned_type_counts", + "run_memory_init", +] diff --git a/codeclone/memory/jobs/__init__.py b/codeclone/memory/jobs/__init__.py new file mode 100644 index 00000000..7c16d77f --- /dev/null +++ b/codeclone/memory/jobs/__init__.py @@ -0,0 +1,49 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .spawn import SpawnWorkerResult, spawn_projection_jobs_worker +from .staleness import ( + compute_projection_stimulus, + last_applied_stimulus, + projection_is_stale, + stimulus_digest, +) +from .store import ( + EnqueueProjectionJobResult, + enqueue_projection_job, + list_projection_jobs, + pending_projection_job, +) +from .worker import ProjectionWorkerResult, run_projection_jobs_once +from .workflow import ( + execute_enqueue_projection_rebuild, + execute_projection_rebuild_status, + execute_run_projection_jobs_once, + is_ci_environment, + maybe_auto_enqueue_projection_rebuild, +) + +__all__ = [ + "EnqueueProjectionJobResult", + "ProjectionWorkerResult", + "SpawnWorkerResult", + "compute_projection_stimulus", + "enqueue_projection_job", + "execute_enqueue_projection_rebuild", + "execute_projection_rebuild_status", + "execute_run_projection_jobs_once", + "is_ci_environment", + "last_applied_stimulus", + "list_projection_jobs", + "maybe_auto_enqueue_projection_rebuild", + "pending_projection_job", + "projection_is_stale", + "run_projection_jobs_once", + "spawn_projection_jobs_worker", + "stimulus_digest", +] diff --git a/codeclone/memory/jobs/models.py b/codeclone/memory/jobs/models.py new file mode 100644 index 00000000..80929be6 --- /dev/null +++ b/codeclone/memory/jobs/models.py @@ -0,0 +1,49 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +ProjectionJobKind = Literal["projection_bundle"] +ProjectionJobStatus = Literal[ + "pending", + "running", + "done", + "failed", + "skipped", +] +ProjectionJobTrigger = Literal["auto", "explicit", "mcp_finish", "cli"] + + +@dataclass(frozen=True, slots=True) +class ProjectionJobRecord: + id: str + project_id: str + job_kind: ProjectionJobKind + status: ProjectionJobStatus + trigger: ProjectionJobTrigger + requested_at_utc: str + started_at_utc: str | None + finished_at_utc: str | None + claimed_by: str | None + attempt: int + stimulus_json: str + result_json: str | None + error_message: str | None + # PID@host of the single scheduled delayed-flush worker holding the coalesce + # slot for this pending job (None when no flush is scheduled). See + # try_claim_flush_slot in store.py. + flush_claimed_by: str | None = None + + +__all__ = [ + "ProjectionJobKind", + "ProjectionJobRecord", + "ProjectionJobStatus", + "ProjectionJobTrigger", +] diff --git a/codeclone/memory/jobs/spawn.py b/codeclone/memory/jobs/spawn.py new file mode 100644 index 00000000..74eafb7f --- /dev/null +++ b/codeclone/memory/jobs/spawn.py @@ -0,0 +1,97 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + +from ...observability import current_operation_context + + +@dataclass(frozen=True, slots=True) +class SpawnWorkerResult: + spawned: bool + reason: str | None + pid: int | None + + +def _worker_env() -> dict[str, str] | None: + """Subprocess env carrying the observability correlation handoff, or ``None`` + to inherit the parent environment unchanged (no active operation). + """ + context = current_operation_context() + if context is None: + return None + operation_id, correlation_id = context + return { + **os.environ, + "CODECLONE_OBSERVABILITY_CORRELATION_ID": correlation_id, + "CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID": operation_id, + } + + +def _run_once_argv(root: Path, *, not_before_utc: str | None = None) -> list[str]: + """Argv for the ``memory jobs run-once`` worker subprocess. A non-empty + ``not_before_utc`` adds ``--not-before `` so the worker defers its model + load and corpus drain until that deadline (delayed single-shot flush). + """ + argv = [ + sys.executable, + "-m", + "codeclone.main", + "memory", + "jobs", + "run-once", + "--root", + str(root), + ] + if not_before_utc: + argv += ["--not-before", not_before_utc] + return argv + + +def spawn_projection_jobs_worker( + *, root_path: Path, not_before_utc: str | None = None +) -> SpawnWorkerResult: + root = root_path.resolve() + argv = _run_once_argv(root, not_before_utc=not_before_utc) + try: + proc = subprocess.Popen( + argv, + cwd=root, + env=_worker_env(), + start_new_session=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + except OSError as exc: + return SpawnWorkerResult(spawned=False, reason=str(exc), pid=None) + return SpawnWorkerResult(spawned=True, reason=None, pid=proc.pid) + + +def run_projection_jobs_worker_sync( + *, root_path: Path +) -> subprocess.CompletedProcess[str]: + root = root_path.resolve() + argv = _run_once_argv(root) + return subprocess.run( + argv, + cwd=root, + check=False, + capture_output=True, + text=True, + ) + + +__all__ = [ + "SpawnWorkerResult", + "run_projection_jobs_worker_sync", + "spawn_projection_jobs_worker", +] diff --git a/codeclone/memory/jobs/staleness.py b/codeclone/memory/jobs/staleness.py new file mode 100644 index 00000000..db99235a --- /dev/null +++ b/codeclone/memory/jobs/staleness.py @@ -0,0 +1,153 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import sqlite3 +from collections.abc import Mapping +from pathlib import Path + +import orjson + +from ...audit.events import repo_root_digest +from ...audit.reader import count_audit_event_core_gaps +from ...audit.schema import open_audit_db_readonly +from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path +from ...config.memory import MemoryConfig +from ..models import MemoryProject +from ..trajectory.models import TRAJECTORY_PROJECTION_VERSION +from .store import canonical_stimulus_json, latest_done_projection_job + + +def _audit_event_core_fingerprint( + *, + audit_db_path: Path, + root_digest: str, +) -> dict[str, int]: + if not audit_db_path.is_file(): + return { + "event_core_count": 0, + "event_core_max_id": 0, + "legacy_event_count": 0, + } + conn = open_audit_db_readonly(audit_db_path) + try: + row = conn.execute( + "SELECT COUNT(*), COALESCE(MAX(id), 0) FROM controller_events " + "WHERE repo_root_digest=? " + "AND workflow_id IS NOT NULL AND workflow_id != '' " + "AND event_core_json IS NOT NULL AND event_core_sha256 IS NOT NULL", + (root_digest,), + ).fetchone() + count = int(row[0]) if row is not None else 0 + max_id = int(row[1]) if row is not None else 0 + finally: + conn.close() + legacy = count_audit_event_core_gaps( + db_path=audit_db_path, + repo_root_digest=root_digest, + ) + return { + "event_core_count": count, + "event_core_max_id": max_id, + "legacy_event_count": legacy, + } + + +def _memory_records_fingerprint( + conn: sqlite3.Connection, + *, + project_id: str, +) -> dict[str, int | str]: + row = conn.execute( + "SELECT COUNT(*), COALESCE(MAX(updated_at_utc), '') " + "FROM memory_records WHERE project_id=? AND status='active'", + (project_id,), + ).fetchone() + count = int(row[0]) if row is not None else 0 + max_updated = str(row[1]) if row is not None else "" + return { + "active_record_count": count, + "active_record_max_updated_at_utc": max_updated, + } + + +def compute_projection_stimulus( + *, + conn: sqlite3.Connection, + project: MemoryProject, + root_path: Path, + config: MemoryConfig, + audit_db_path: Path | None = None, +) -> dict[str, object]: + root_digest = repo_root_digest(root_path.resolve()) + resolved_audit = audit_db_path or resolve_audit_path( + root_path=root_path, + value=DEFAULT_AUDIT_PATH, + ) + audit_fp = _audit_event_core_fingerprint( + audit_db_path=resolved_audit, + root_digest=root_digest, + ) + memory_fp = _memory_records_fingerprint(conn, project_id=project.id) + return { + "repo_root_digest": root_digest, + "trajectory_projection_version": TRAJECTORY_PROJECTION_VERSION, + "trajectories_enabled": config.trajectories_enabled, + "semantic_enabled": config.semantic.enabled, + **audit_fp, + **memory_fp, + } + + +def stimulus_digest(stimulus: Mapping[str, object]) -> str: + payload = canonical_stimulus_json(stimulus) + return hashlib.sha256(payload.encode()).hexdigest() + + +def projection_is_stale( + *, + current: Mapping[str, object], + last_applied: Mapping[str, object] | None, +) -> bool: + if last_applied is None: + return True + return stimulus_digest(current) != stimulus_digest(last_applied) + + +def parse_stimulus_json(raw: str | None) -> dict[str, object]: + if not raw: + return {} + try: + parsed = orjson.loads(raw) + except orjson.JSONDecodeError: + return {} + return dict(parsed) if isinstance(parsed, dict) else {} + + +def last_applied_stimulus( + conn: sqlite3.Connection, + *, + project_id: str, +) -> dict[str, object] | None: + job = latest_done_projection_job(conn, project_id=project_id) + if job is None: + return None + result = parse_stimulus_json(job.result_json) + applied = result.get("applied_stimulus") + if isinstance(applied, dict): + return dict(applied) + return parse_stimulus_json(job.stimulus_json) + + +__all__ = [ + "compute_projection_stimulus", + "last_applied_stimulus", + "parse_stimulus_json", + "projection_is_stale", + "stimulus_digest", +] diff --git a/codeclone/memory/jobs/store.py b/codeclone/memory/jobs/store.py new file mode 100644 index 00000000..153855b4 --- /dev/null +++ b/codeclone/memory/jobs/store.py @@ -0,0 +1,404 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import socket +import sqlite3 +import uuid +from collections.abc import Mapping +from dataclasses import dataclass +from typing import cast + +from ...report.meta import current_report_timestamp_utc +from ...utils.json_io import json_text +from ..models import MemoryProject +from .models import ( + ProjectionJobKind, + ProjectionJobRecord, + ProjectionJobStatus, + ProjectionJobTrigger, +) + +PROJECTION_BUNDLE_KIND: ProjectionJobKind = "projection_bundle" + + +@dataclass(frozen=True, slots=True) +class EnqueueProjectionJobResult: + job_id: str + status: LiteralEnqueueStatus + coalesced: bool + reason: str | None = None + + +LiteralEnqueueStatus = str # pending | skipped + + +def worker_claim_token(*, pid: int | None = None) -> str: + active_pid = pid if pid is not None else os.getpid() + host = socket.gethostname() + return f"{active_pid}@{host}" + + +def _new_job_id() -> str: + return f"projjob-{uuid.uuid4().hex}" + + +def _row_to_record(row: sqlite3.Row) -> ProjectionJobRecord: + return ProjectionJobRecord( + id=str(row["id"]), + project_id=str(row["project_id"]), + job_kind=cast(ProjectionJobKind, row["job_kind"]), + status=cast(ProjectionJobStatus, row["status"]), + trigger=cast(ProjectionJobTrigger, row["trigger"]), + requested_at_utc=str(row["requested_at_utc"]), + started_at_utc=row["started_at_utc"], + finished_at_utc=row["finished_at_utc"], + claimed_by=row["claimed_by"], + attempt=int(row["attempt"]), + stimulus_json=str(row["stimulus_json"]), + result_json=row["result_json"], + error_message=row["error_message"], + flush_claimed_by=row["flush_claimed_by"], + ) + + +def canonical_stimulus_json(stimulus: Mapping[str, object]) -> str: + return json_text(stimulus, sort_keys=True) + + +def _use_row_factory(conn: sqlite3.Connection) -> None: + conn.row_factory = sqlite3.Row + + +def enqueue_projection_job( + conn: sqlite3.Connection, + *, + project: MemoryProject, + trigger: ProjectionJobTrigger, + stimulus: Mapping[str, object], + job_kind: ProjectionJobKind = PROJECTION_BUNDLE_KIND, +) -> EnqueueProjectionJobResult: + _use_row_factory(conn) + now = current_report_timestamp_utc() + stimulus_json = canonical_stimulus_json(stimulus) + pending = conn.execute( + "SELECT id FROM memory_projection_jobs " + "WHERE project_id=? AND job_kind=? AND status='pending'", + (project.id, job_kind), + ).fetchone() + if pending is not None: + job_id = str(pending[0]) + conn.execute( + "UPDATE memory_projection_jobs " + "SET trigger=?, requested_at_utc=?, stimulus_json=? " + "WHERE id=?", + (trigger, now, stimulus_json, job_id), + ) + conn.commit() + return EnqueueProjectionJobResult( + job_id=job_id, + status="pending", + coalesced=True, + reason="coalesced_pending", + ) + job_id = _new_job_id() + conn.execute( + "INSERT INTO memory_projection_jobs(" + "id, project_id, job_kind, status, trigger, requested_at_utc, " + "attempt, stimulus_json" + ") VALUES (?, ?, ?, 'pending', ?, ?, 0, ?)", + (job_id, project.id, job_kind, trigger, now, stimulus_json), + ) + conn.commit() + return EnqueueProjectionJobResult( + job_id=job_id, + status="pending", + coalesced=False, + reason=None, + ) + + +def _pid_alive(token: str | None) -> bool: + if not token: + return False + head = token.split("@", 1)[0] + if not head.isdigit(): + return False + pid = int(head) + if pid <= 0: + return False + try: + os.kill(pid, 0) + except OSError: + return False + return True + + +def _reclaim_stale_running_jobs( + conn: sqlite3.Connection, + *, + project_id: str, + running_timeout_seconds: int, +) -> None: + rows = conn.execute( + "SELECT id, claimed_by, started_at_utc FROM memory_projection_jobs " + "WHERE project_id=? AND status='running'", + (project_id,), + ).fetchall() + if not rows: + return + now = current_report_timestamp_utc() + for row in rows: + job_id = str(row[0]) + claimed_by = row[1] + started_at = row[2] + stale = not _pid_alive(claimed_by) + if not stale and started_at: + # Timestamp ordering is ISO-8601 UTC; lexicographic compare is safe. + from datetime import datetime, timedelta, timezone + + try: + started = datetime.fromisoformat(str(started_at).replace("Z", "+00:00")) + except ValueError: + stale = True + else: + deadline = started + timedelta(seconds=max(1, running_timeout_seconds)) + stale = datetime.now(timezone.utc) >= deadline + if not stale: + continue + conn.execute( + "UPDATE memory_projection_jobs " + "SET status='failed', finished_at_utc=?, error_message=? " + "WHERE id=?", + (now, "stale_running_reclaimed", job_id), + ) + + +def has_live_running_job( + conn: sqlite3.Connection, + *, + project_id: str, + running_timeout_seconds: int, +) -> bool: + """True if a worker is actively processing a job for this project. + + Stale (dead-PID / timed-out) running jobs are reclaimed first, so a crashed + worker never blocks future spawns. Used by the spawn guard to avoid + launching a second worker while one is already running. + """ + _reclaim_stale_running_jobs( + conn, + project_id=project_id, + running_timeout_seconds=running_timeout_seconds, + ) + conn.commit() + row = conn.execute( + "SELECT 1 FROM memory_projection_jobs " + "WHERE project_id=? AND status='running' LIMIT 1", + (project_id,), + ).fetchone() + return row is not None + + +def try_claim_flush_slot( + conn: sqlite3.Connection, + *, + project_id: str, + claimant: str, + job_kind: ProjectionJobKind = PROJECTION_BUNDLE_KIND, +) -> str | None: + """Atomically reserve the single delayed-flush worker slot on the pending + job. Returns the pending job id when THIS caller reserved it (the slot was + free or held by a dead worker); None when a live flush worker is already + scheduled or there is no pending job. The BEGIN IMMEDIATE guards against two + concurrent enqueues each spawning a sleeper for the same job. + """ + _use_row_factory(conn) + conn.execute("BEGIN IMMEDIATE") + try: + row = conn.execute( + "SELECT id, flush_claimed_by FROM memory_projection_jobs " + "WHERE project_id=? AND job_kind=? AND status='pending'", + (project_id, job_kind), + ).fetchone() + if row is None or _pid_alive(row["flush_claimed_by"]): + conn.execute("COMMIT") + return None + job_id = str(row["id"]) + conn.execute( + "UPDATE memory_projection_jobs SET flush_claimed_by=? WHERE id=?", + (claimant, job_id), + ) + conn.execute("COMMIT") + return job_id + except sqlite3.Error: + conn.execute("ROLLBACK") + raise + + +def set_flush_claimed_by( + conn: sqlite3.Connection, *, job_id: str, claimant: str | None +) -> None: + """Overwrite the flush-slot holder for a job (the spawned worker's PID@host + after a successful spawn, or None to release the slot on spawn failure).""" + conn.execute( + "UPDATE memory_projection_jobs SET flush_claimed_by=? WHERE id=?", + (claimant, job_id), + ) + conn.commit() + + +def claim_next_projection_job( + conn: sqlite3.Connection, + *, + project_id: str, + claimed_by: str, + running_timeout_seconds: int, +) -> ProjectionJobRecord | None: + _use_row_factory(conn) + conn.execute("BEGIN IMMEDIATE") + try: + _reclaim_stale_running_jobs( + conn, + project_id=project_id, + running_timeout_seconds=running_timeout_seconds, + ) + running = conn.execute( + "SELECT id FROM memory_projection_jobs " + "WHERE project_id=? AND status='running' LIMIT 1", + (project_id,), + ).fetchone() + row: sqlite3.Row | None = None + if running is None: + row = conn.execute( + "SELECT * FROM memory_projection_jobs " + "WHERE project_id=? AND status='pending' " + "ORDER BY requested_at_utc ASC, id ASC LIMIT 1", + (project_id,), + ).fetchone() + if running is not None or row is None: + conn.execute("COMMIT") + return None + now = current_report_timestamp_utc() + attempt = int(row["attempt"]) + 1 + conn.execute( + "UPDATE memory_projection_jobs " + "SET status='running', started_at_utc=?, claimed_by=?, attempt=? " + "WHERE id=?", + (now, claimed_by, attempt, row["id"]), + ) + conn.execute("COMMIT") + except sqlite3.Error: + conn.execute("ROLLBACK") + raise + updated = conn.execute( + "SELECT * FROM memory_projection_jobs WHERE id=?", + (row["id"],), + ).fetchone() + assert updated is not None + return _row_to_record(updated) + + +def complete_projection_job( + conn: sqlite3.Connection, + *, + job_id: str, + status: ProjectionJobStatus, + result: Mapping[str, object] | None = None, + error_message: str | None = None, +) -> None: + now = current_report_timestamp_utc() + result_json = json_text(result, sort_keys=True) if result is not None else None + conn.execute( + "UPDATE memory_projection_jobs " + "SET status=?, finished_at_utc=?, result_json=?, error_message=? " + "WHERE id=?", + (status, now, result_json, error_message, job_id), + ) + conn.commit() + + +def list_projection_jobs( + conn: sqlite3.Connection, + *, + project_id: str, + limit: int = 20, +) -> tuple[ProjectionJobRecord, ...]: + _use_row_factory(conn) + rows = conn.execute( + "SELECT * FROM memory_projection_jobs " + "WHERE project_id=? " + "ORDER BY requested_at_utc DESC, id DESC LIMIT ?", + (project_id, max(1, int(limit))), + ).fetchall() + return tuple(_row_to_record(row) for row in rows) + + +def _fetch_projection_job( + conn: sqlite3.Connection, + sql: str, + params: tuple[object, ...], +) -> ProjectionJobRecord | None: + _use_row_factory(conn) + row = conn.execute(sql, params).fetchone() + if row is None: + return None + return _row_to_record(row) + + +def latest_done_projection_job( + conn: sqlite3.Connection, + *, + project_id: str, + job_kind: ProjectionJobKind = PROJECTION_BUNDLE_KIND, +) -> ProjectionJobRecord | None: + return _fetch_projection_job( + conn, + "SELECT * FROM memory_projection_jobs " + "WHERE project_id=? AND job_kind=? AND status='done' " + "ORDER BY finished_at_utc DESC, id DESC LIMIT 1", + (project_id, job_kind), + ) + + +def pending_projection_job( + conn: sqlite3.Connection, + *, + project_id: str, + job_kind: ProjectionJobKind = PROJECTION_BUNDLE_KIND, +) -> ProjectionJobRecord | None: + return _fetch_projection_job( + conn, + "SELECT * FROM memory_projection_jobs " + "WHERE project_id=? AND job_kind=? AND status IN ('pending', 'running') " + "ORDER BY CASE status WHEN 'running' THEN 0 ELSE 1 END, " + "requested_at_utc DESC LIMIT 1", + (project_id, job_kind), + ) + + +def new_projection_job_id() -> str: + return _new_job_id() + + +__all__ = [ + "PROJECTION_BUNDLE_KIND", + "EnqueueProjectionJobResult", + "canonical_stimulus_json", + "claim_next_projection_job", + "complete_projection_job", + "enqueue_projection_job", + "has_live_running_job", + "latest_done_projection_job", + "list_projection_jobs", + "new_projection_job_id", + "pending_projection_job", + "set_flush_claimed_by", + "try_claim_flush_slot", + "worker_claim_token", +] diff --git a/codeclone/memory/jobs/worker.py b/codeclone/memory/jobs/worker.py new file mode 100644 index 00000000..9bd900db --- /dev/null +++ b/codeclone/memory/jobs/worker.py @@ -0,0 +1,290 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import sqlite3 +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING + +from ...config.memory import MemoryConfig +from ...observability import ( + is_observability_enabled, + operation, + record_elapsed_span, + span, +) +from ...observability.profile import worker_bootstrap_sample +from ..experience.distillation_workflow import execute_experience_distillation +from ..models import MemoryProject +from ..semantic.rebuild_workflow import execute_semantic_index_rebuild +from ..sqlite_store import SqliteEngineeringMemoryStore +from ..trajectory.rebuild_workflow import execute_trajectory_rebuild +from .models import ProjectionJobStatus +from .store import claim_next_projection_job as _claim_next +from .store import ( + complete_projection_job, + worker_claim_token, +) + +if TYPE_CHECKING: + from ...observability.reason_kind import ReasonKind + + +@dataclass(frozen=True, slots=True) +class ProjectionWorkerResult: + status: str + job_id: str | None + reason: str | None + trajectory_status: str | None + semantic_status: str | None + experience_status: str | None = None + + +def _block_status(result: Mapping[str, object], key: str) -> str | None: + block = result.get(key) + return str(block.get("status")) if isinstance(block, dict) else None + + +def _payload_int(payload: Mapping[str, object], key: str) -> int: + """Read a non-negative integer counter from a step payload, defaulting to 0. + + Tolerant of partial payloads (failed/skipped steps may omit the key). + """ + value = payload.get(key, 0) + return value if isinstance(value, int) else 0 + + +def _trajectory_incremental_watermark( + conn: sqlite3.Connection, + *, + project_id: str, +) -> int | None: + """Event-core id to rebuild trajectories incrementally after, or None to + force a full rebuild (first run, projection-version change, or no watermark). + + The watermark is the ``event_core_max_id`` of the last applied stimulus; the + append-only audit trail guarantees workflows with no newer event are + byte-identical, so they need not be re-projected. + """ + from ..trajectory.models import TRAJECTORY_PROJECTION_VERSION + from .staleness import last_applied_stimulus + + applied = last_applied_stimulus(conn, project_id=project_id) + if applied is None: + return None + if applied.get("trajectory_projection_version") != TRAJECTORY_PROJECTION_VERSION: + return None + watermark = applied.get("event_core_max_id") + return watermark if isinstance(watermark, int) else None + + +def _trajectory_reason_kind( + conn: sqlite3.Connection, + *, + project_id: str, + watermark: int | None, +) -> ReasonKind: + """Classify *why* the trajectory rebuild runs (deterministic, spec §6.4). + + An integer watermark is an incremental content-change rebuild and needs no + extra query. A ``None`` watermark forces a full rebuild; one cheap lookup + then distinguishes a projection-version bump from a first index. + """ + if watermark is not None: + return "content_changed" + from ..trajectory.models import TRAJECTORY_PROJECTION_VERSION + from .staleness import last_applied_stimulus + + applied = last_applied_stimulus(conn, project_id=project_id) + if applied is None: + return "first_index" + if applied.get("trajectory_projection_version") != TRAJECTORY_PROJECTION_VERSION: + return "schema_version_changed" + return "first_index" + + +def _correlation_handoff() -> tuple[str | None, str | None]: + """Read the cross-process observability handoff the spawner injected, so the + worker operation links under the finish operation that triggered it. Returns + ``(correlation_id, parent_operation_id)``, both None for a standalone run. + """ + return ( + os.environ.get("CODECLONE_OBSERVABILITY_CORRELATION_ID") or None, + os.environ.get("CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID") or None, + ) + + +def _emit_worker_bootstrap_span() -> None: + """Record the worker cold-start (process spawn -> first job instrumentation) + as a ``memory.projection.worker_bootstrap`` span, positioned at the process + creation time so the spawn->job gap in the waterfall is labelled rather than + left as empty space. No-op when disabled or psutil is unavailable. + """ + if not is_observability_enabled(): + return + sample = worker_bootstrap_sample() + if sample is None: + return + started_at_utc, elapsed_ms = sample + record_elapsed_span( + "memory.projection.worker_bootstrap", + started_at_utc=started_at_utc, + duration_ms=elapsed_ms, + ) + + +def run_projection_job( + store: SqliteEngineeringMemoryStore, + *, + job_id: str, + root_path: Path, + config: MemoryConfig, + project: MemoryProject, + stimulus: Mapping[str, object], + emit_bootstrap_span: bool = True, +) -> tuple[ProjectionJobStatus, dict[str, object], str | None]: + conn = store.connection + correlation_id, parent_operation_id = _correlation_handoff() + with operation( + name="memory.projection.job", + surface="memory", + correlation_id=correlation_id, + parent_operation_id=parent_operation_id, + ): + # Only a spawned worker (one that carries the env handoff) has a real + # cold-start to attribute; an in-process run shares the caller's process. + # A delayed flush worker slept before this point, so its "bootstrap" gap + # is intentional idle time, not cold-start — suppress it to avoid a + # multi-second phantom span in the waterfall. + if parent_operation_id is not None and emit_bootstrap_span: + _emit_worker_bootstrap_span() + watermark = _trajectory_incremental_watermark(conn, project_id=project.id) + with span( + name="memory.trajectory.rebuild", + reason_kind=_trajectory_reason_kind( + conn, project_id=project.id, watermark=watermark + ), + ) as trajectory_span: + trajectory_payload = execute_trajectory_rebuild( + root_path=root_path, + config=config, + store=store, + project=project, + incremental_after_event_core_id=watermark, + ) + trajectory_span.set_counter( + "workflows_seen", _payload_int(trajectory_payload, "workflows_seen") + ) + semantic_payload = execute_semantic_index_rebuild( + root_path=root_path, + config=config, + store=store, + project=project, + ) + with span(name="memory.experience.distill") as experience_span: + experience_payload = execute_experience_distillation( + root_path=root_path, + config=config, + store=store, + project=project, + ) + experience_span.set_counter( + "experiences_distilled", + _payload_int(experience_payload, "experiences_distilled"), + ) + result: dict[str, object] = { + "trajectory": dict(trajectory_payload), + "semantic": dict(semantic_payload), + "experience": dict(experience_payload), + "applied_stimulus": dict(stimulus), + } + trajectory_status = str(trajectory_payload.get("status", "")) + semantic_status = str(semantic_payload.get("status", "")) + experience_status = str(experience_payload.get("status", "")) + if trajectory_status == "failed" or semantic_status == "failed": + return "failed", result, "projection_step_failed" + skipped = ( + trajectory_status == "skipped" + and semantic_status == "skipped" + and experience_status == "skipped" + ) + final_status: ProjectionJobStatus = "skipped" if skipped else "done" + return final_status, result, None if not skipped else "all_steps_skipped" + + +def run_projection_jobs_once( + store: SqliteEngineeringMemoryStore, + *, + root_path: Path, + config: MemoryConfig, + project: MemoryProject, + running_timeout_seconds: int, + emit_bootstrap_span: bool = True, +) -> ProjectionWorkerResult: + conn = store.connection + claimed = _claim_next( + conn, + project_id=project.id, + claimed_by=worker_claim_token(), + running_timeout_seconds=running_timeout_seconds, + ) + if claimed is None: + return ProjectionWorkerResult( + status="nothing_to_do", + job_id=None, + reason="no_pending_job_or_running", + trajectory_status=None, + semantic_status=None, + ) + from .staleness import parse_stimulus_json + + stimulus = parse_stimulus_json(claimed.stimulus_json) + try: + final_status, result, error = run_projection_job( + store, + job_id=claimed.id, + root_path=root_path, + config=config, + project=project, + stimulus=stimulus, + emit_bootstrap_span=emit_bootstrap_span, + ) + except Exception as exc: + complete_projection_job( + conn, + job_id=claimed.id, + status="failed", + error_message=str(exc), + ) + return ProjectionWorkerResult( + status="failed", + job_id=claimed.id, + reason=str(exc), + trajectory_status=None, + semantic_status=None, + ) + complete_projection_job( + conn, + job_id=claimed.id, + status=final_status, + result=result, + error_message=error, + ) + return ProjectionWorkerResult( + status=final_status, + job_id=claimed.id, + reason=error, + trajectory_status=_block_status(result, "trajectory"), + semantic_status=_block_status(result, "semantic"), + experience_status=_block_status(result, "experience"), + ) + + +__all__ = ["ProjectionWorkerResult", "run_projection_job", "run_projection_jobs_once"] diff --git a/codeclone/memory/jobs/workflow.py b/codeclone/memory/jobs/workflow.py new file mode 100644 index 00000000..ee228e31 --- /dev/null +++ b/codeclone/memory/jobs/workflow.py @@ -0,0 +1,460 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +import time +from collections.abc import Mapping +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Literal + +from ...config.memory import MemoryConfig, resolve_memory_config +from ...config.observability import resolve_observability_config +from ...observability import ( + bootstrap, + current_operation_context, + is_observability_enabled, + operation, + shutdown, +) +from ...utils.ci import is_ci_environment +from ..exceptions import MemoryContractError +from ..models import MemoryProject +from ..project import resolve_memory_db_path, resolve_project_identity +from ..sqlite_store import SqliteEngineeringMemoryStore +from .models import ProjectionJobRecord +from .spawn import SpawnWorkerResult, spawn_projection_jobs_worker +from .staleness import ( + compute_projection_stimulus, + last_applied_stimulus, + projection_is_stale, +) +from .store import ( + enqueue_projection_job, + has_live_running_job, + latest_done_projection_job, + list_projection_jobs, + pending_projection_job, + set_flush_claimed_by, + try_claim_flush_slot, + worker_claim_token, +) +from .worker import run_projection_jobs_once + +ProjectionRebuildPolicy = Literal["off", "enqueue_when_stale"] + + +def _require_memory_store_session( + root_path: Path, + config: MemoryConfig | None = None, +) -> tuple[Path, MemoryConfig, MemoryProject, SqliteEngineeringMemoryStore]: + resolved_root = root_path.resolve() + resolved_config = config or resolve_memory_config(resolved_root) + db_path = resolve_memory_db_path(resolved_root, resolved_config) + if not db_path.exists(): + raise MemoryContractError( + f"Engineering memory database not found: {db_path}. " + "Run memory init or refresh_from_run first." + ) + project = resolve_project_identity(resolved_root) + store = SqliteEngineeringMemoryStore(db_path) + return resolved_root, resolved_config, project, store + + +def execute_projection_rebuild_status( + *, + root_path: Path, + config: MemoryConfig | None = None, + limit: int = 10, +) -> dict[str, object]: + resolved_root, resolved_config, project, store = _require_memory_store_session( + root_path, + config=config, + ) + conn = store.connection + try: + current = compute_projection_stimulus( + conn=conn, + project=project, + root_path=resolved_root, + config=resolved_config, + ) + applied = last_applied_stimulus(conn, project_id=project.id) + active = pending_projection_job(conn, project_id=project.id) + jobs = list_projection_jobs(conn, project_id=project.id, limit=limit) + finally: + store.close() + return { + "action": "projection_rebuild_status", + "policy": resolved_config.projection_rebuild_policy, + "ci_environment": is_ci_environment(), + "stale": projection_is_stale(current=current, last_applied=applied), + "current_stimulus": current, + "last_applied_stimulus": applied, + "active_job": _job_payload(active), + "jobs": [_job_payload(job) for job in jobs], + } + + +@dataclass(frozen=True, slots=True) +class _FlushDecision: + immediate: bool + deadline_utc: str | None + + +_FLUSH_WINDOW_MIN_SECONDS = 10 +_FLUSH_WINDOW_MAX_SECONDS = 600 + + +def _active_record_delta( + current: Mapping[str, object], applied: Mapping[str, object] | None +) -> int: + """Magnitude of the active-record count change since the last applied + stimulus — the projection-affecting delta that bypasses the coalesce window. + Audit-only events (no record change) deliberately count as 0 so a burst of + service events never forces an immediate model load. + """ + if not applied: + return 0 + cur = current.get("active_record_count") + prev = applied.get("active_record_count") + if not isinstance(cur, int) or not isinstance(prev, int): + return 0 + return abs(cur - prev) + + +def _add_seconds_utc(iso_utc: str, seconds: int) -> str: + try: + base = datetime.fromisoformat(iso_utc.replace("Z", "+00:00")) + except ValueError: + base = datetime.now(timezone.utc) + if base.tzinfo is None: + base = base.replace(tzinfo=timezone.utc) + return (base + timedelta(seconds=max(1, seconds))).isoformat() + + +def _decide_flush( + conn: sqlite3.Connection, + *, + project: MemoryProject, + stimulus: Mapping[str, object], + config: MemoryConfig, + trigger: str, + force: bool, +) -> _FlushDecision: + """Spawn now, or defer behind a fixed last_reindex+window deadline. + + Deferral coalesces bursts of small mcp_finish/auto rebuilds into one model + load. A disabled window (<=0), an explicit/cli/forced trigger, the first + index, or a large active-record delta all spawn immediately. The deadline is + last_reindex+window (fixed, not sliding) so a steady low-rate stream still + drains every window rather than being deferred forever. + """ + window = config.projection_rebuild_coalesce_window_seconds + if window <= 0 or force or trigger in {"explicit", "cli"}: + return _FlushDecision(immediate=True, deadline_utc=None) + last_done = latest_done_projection_job(conn, project_id=project.id) + if last_done is None or not last_done.finished_at_utc: + return _FlushDecision(immediate=True, deadline_utc=None) + applied = last_applied_stimulus(conn, project_id=project.id) + if ( + _active_record_delta(stimulus, applied) + >= config.projection_rebuild_coalesce_min_delta + ): + return _FlushDecision(immediate=True, deadline_utc=None) + window = max(_FLUSH_WINDOW_MIN_SECONDS, min(_FLUSH_WINDOW_MAX_SECONDS, window)) + deadline = _add_seconds_utc(last_done.finished_at_utc, window) + return _FlushDecision(immediate=False, deadline_utc=deadline) + + +def _spawn_with_operation( + root_path: Path, *, not_before_utc: str | None +) -> SpawnWorkerResult: + # Op B of the finish->spawn->worker chain (spec §4.3): the spawn decision + # becomes the active operation, inheriting the finish op (A) as parent + + # correlation so the env handoff in spawn.py parents the worker (C) under B. + # Inert when observability is disabled. + parent = current_operation_context() + with operation( + name="memory.projection.spawn", + surface="memory", + parent_operation_id=parent[0] if parent else None, + correlation_id=parent[1] if parent else None, + ): + return spawn_projection_jobs_worker( + root_path=root_path, not_before_utc=not_before_utc + ) + + +def _run_flush_spawn( + conn: sqlite3.Connection, + *, + project: MemoryProject, + root_path: Path, + decision: _FlushDecision, +) -> tuple[bool, int | None, str | None]: + """Spawn the rebuild worker per the flush decision. Immediate -> spawn now. + Deferred -> reserve the single flush slot (strict guard) and spawn exactly + one delayed worker, recording its PID so a concurrent enqueue coalesces + instead of spawning a second sleeper. Returns (spawned, worker_pid, reason). + """ + if decision.immediate: + result = _spawn_with_operation(root_path, not_before_utc=None) + return result.spawned, result.pid, None + claimed_job_id = try_claim_flush_slot( + conn, project_id=project.id, claimant=worker_claim_token() + ) + if claimed_job_id is None: + return False, None, "flush_already_scheduled" + result = _spawn_with_operation(root_path, not_before_utc=decision.deadline_utc) + if result.spawned: + set_flush_claimed_by( + conn, job_id=claimed_job_id, claimant=worker_claim_token(pid=result.pid) + ) + return True, result.pid, None + # Spawn failed: release the slot so the next enqueue retries the flush. + set_flush_claimed_by(conn, job_id=claimed_job_id, claimant=None) + return False, None, result.reason + + +def execute_enqueue_projection_rebuild( + *, + root_path: Path, + config: MemoryConfig | None = None, + trigger: Literal["auto", "explicit", "mcp_finish", "cli"] = "explicit", + force: bool = False, + spawn_worker: bool | None = None, +) -> dict[str, object]: + if is_ci_environment(): + return { + "action": "enqueue_projection_rebuild", + "status": "skipped", + "reason": "ci_environment", + "job_id": None, + "spawned": False, + } + resolved_config = config or resolve_memory_config(root_path.resolve()) + if resolved_config.projection_rebuild_policy == "off" and not force: + return { + "action": "enqueue_projection_rebuild", + "status": "skipped", + "reason": "policy_off", + "job_id": None, + "spawned": False, + } + resolved_root, resolved_config, project, store = _require_memory_store_session( + root_path, + config=resolved_config, + ) + conn = store.connection + try: + stimulus = compute_projection_stimulus( + conn=conn, + project=project, + root_path=resolved_root, + config=resolved_config, + ) + if ( + not force + and resolved_config.projection_rebuild_policy == "enqueue_when_stale" + ): + applied = last_applied_stimulus(conn, project_id=project.id) + if not projection_is_stale(current=stimulus, last_applied=applied): + return { + "action": "enqueue_projection_rebuild", + "status": "unchanged", + "reason": "stimulus_unchanged", + "job_id": None, + "spawned": False, + } + enqueue_result = enqueue_projection_job( + conn, + project=project, + trigger=trigger, + stimulus=stimulus, + ) + worker_running = has_live_running_job( + conn, + project_id=project.id, + running_timeout_seconds=( + resolved_config.projection_rebuild_running_timeout_seconds + ), + ) + base_should_spawn = ( + resolved_config.projection_rebuild_spawn_worker + if spawn_worker is None + else spawn_worker + ) + spawned = False + worker_pid: int | None = None + spawn_skipped_reason: str | None = None + flush_deferred = False + if base_should_spawn and worker_running: + # A worker is already processing; the pending job it leaves behind is + # drained by the next spawn when none is running. Avoid the redundant + # overlapping process. + spawn_skipped_reason = "worker_already_running" + elif base_should_spawn: + decision = _decide_flush( + conn, + project=project, + stimulus=stimulus, + config=resolved_config, + trigger=trigger, + force=force, + ) + flush_deferred = not decision.immediate + spawned, worker_pid, spawn_skipped_reason = _run_flush_spawn( + conn, + project=project, + root_path=resolved_root, + decision=decision, + ) + return { + "action": "enqueue_projection_rebuild", + "status": "enqueued", + "reason": enqueue_result.reason, + "job_id": enqueue_result.job_id, + "coalesced": enqueue_result.coalesced, + "spawned": spawned, + "worker_pid": worker_pid, + "spawn_skipped_reason": spawn_skipped_reason, + "flush_deferred": flush_deferred, + } + finally: + store.close() + + +# Defensive ceiling on the trailing-flush sleep so a malformed/runaway deadline +# can never park a worker indefinitely; the real window is clamped far lower at +# the enqueue layer. +_MAX_FLUSH_SLEEP_SECONDS = 3600 + + +def _flush_sleep_seconds( + not_before_utc: str | None, *, now: datetime | None = None +) -> float: + """Seconds a delayed flush worker should sleep before draining, derived from + the ISO-8601 ``not_before_utc`` deadline. 0 when absent/past/malformed; + capped at ``_MAX_FLUSH_SLEEP_SECONDS``. + """ + if not not_before_utc: + return 0.0 + current = now or datetime.now(timezone.utc) + try: + deadline = datetime.fromisoformat(not_before_utc.replace("Z", "+00:00")) + except ValueError: + return 0.0 + if deadline.tzinfo is None: + deadline = deadline.replace(tzinfo=timezone.utc) + remaining = (deadline - current).total_seconds() + return min(max(0.0, remaining), float(_MAX_FLUSH_SLEEP_SECONDS)) + + +def execute_run_projection_jobs_once( + *, + root_path: Path, + config: MemoryConfig | None = None, + not_before_utc: str | None = None, +) -> dict[str, object]: + # Delayed single-shot flush: a coalescing spawn parks the worker until its + # deadline BEFORE any bootstrap/store-open or model load, so the wait holds + # no DB lock and is not counted as observed work. Sleeping here (not in the + # worker body) keeps the bootstrap-before-store-open order below intact. + delay = _flush_sleep_seconds(not_before_utc) + if delay > 0: + time.sleep(delay) + # Bootstrap observability BEFORE opening the store: open_memory_db attaches + # the per-span DB-query counter only while observability is enabled, so a + # store opened pre-bootstrap stays uninstrumented and the worker's whole + # query stream is invisible to the cockpit. owns_observability guards against + # a caller that already bootstrapped (e.g. an MCP session). + resolved_root = root_path.resolve() + owns_observability = not is_observability_enabled() + if owns_observability: + bootstrap(resolve_observability_config(), root=resolved_root) + try: + resolved_root, resolved_config, project, store = _require_memory_store_session( + resolved_root, + config=config, + ) + try: + worker_result = run_projection_jobs_once( + store, + root_path=resolved_root, + config=resolved_config, + project=project, + running_timeout_seconds=( + resolved_config.projection_rebuild_running_timeout_seconds + ), + # A delayed worker slept above; its spawn->job gap is intentional, + # so suppress the cold-start bootstrap span (see worker.py). + emit_bootstrap_span=not_before_utc is None, + ) + finally: + store.close() + finally: + if owns_observability: + shutdown() + return { + "action": "run_projection_jobs_once", + "status": worker_result.status, + "job_id": worker_result.job_id, + "reason": worker_result.reason, + "trajectory_status": worker_result.trajectory_status, + "semantic_status": worker_result.semantic_status, + } + + +def maybe_auto_enqueue_projection_rebuild( + *, + root_path: Path, + trigger: Literal["auto", "mcp_finish"] = "mcp_finish", +) -> dict[str, object] | None: + if is_ci_environment(): + return None + config = resolve_memory_config(root_path) + if config.projection_rebuild_policy == "off": + return None + payload = execute_enqueue_projection_rebuild( + root_path=root_path, + config=config, + trigger=trigger, + force=False, + spawn_worker=None, + ) + if payload.get("status") in {"skipped", "unchanged"}: + return None + return payload + + +def _job_payload(job: ProjectionJobRecord | None) -> dict[str, object] | None: + if job is None: + return None + return { + "id": job.id, + "job_kind": job.job_kind, + "status": job.status, + "trigger": job.trigger, + "requested_at_utc": job.requested_at_utc, + "started_at_utc": job.started_at_utc, + "finished_at_utc": job.finished_at_utc, + "claimed_by": job.claimed_by, + "attempt": job.attempt, + "error_message": job.error_message, + } + + +__all__ = [ + "execute_enqueue_projection_rebuild", + "execute_projection_rebuild_status", + "execute_run_projection_jobs_once", + "is_ci_environment", + "maybe_auto_enqueue_projection_rebuild", +] diff --git a/codeclone/memory/locks.py b/codeclone/memory/locks.py new file mode 100644 index 00000000..b94a68b7 --- /dev/null +++ b/codeclone/memory/locks.py @@ -0,0 +1,38 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Cross-process advisory lock for memory init/refresh.""" + +from __future__ import annotations + +from collections.abc import Iterator +from contextlib import contextmanager +from pathlib import Path +from typing import Final + +from ..utils.file_lock import advisory_file_lock +from .exceptions import MemoryInitLockError + +DEFAULT_MEMORY_INIT_LOCK_TIMEOUT_SECONDS: Final[float] = 30.0 + + +@contextmanager +def memory_init_lock( + lock_path: Path, + *, + timeout_seconds: float = DEFAULT_MEMORY_INIT_LOCK_TIMEOUT_SECONDS, +) -> Iterator[None]: + with advisory_file_lock( + lock_path, + timeout_seconds=timeout_seconds, + timeout_error=lambda path: MemoryInitLockError( + f"Timed out acquiring memory init lock at {path}" + ), + ): + yield + + +__all__ = ["DEFAULT_MEMORY_INIT_LOCK_TIMEOUT_SECONDS", "memory_init_lock"] diff --git a/codeclone/memory/models.py b/codeclone/memory/models.py new file mode 100644 index 00000000..e17f667d --- /dev/null +++ b/codeclone/memory/models.py @@ -0,0 +1,222 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import uuid +from dataclasses import dataclass, field +from typing import Literal + +import orjson + +from ..contracts import ENGINEERING_MEMORY_SCHEMA_VERSION +from ..utils.json_io import json_text +from .enums import ( + EvidenceKind, + IngestionMode, + IngestionRunStatus, + LinkRelation, + MemoryConfidence, + MemoryIngestSource, + MemoryOrigin, + MemoryRecordType, + MemoryStatus, + SubjectKind, + SubjectRelation, +) +from .identity import make_identity_key + +UpsertAction = Literal["created", "updated", "unchanged", "skipped"] + + +def generate_memory_id(*, prefix: str = "mem") -> str: + return f"{prefix}-{uuid.uuid4().hex}" + + +@dataclass(frozen=True, slots=True) +class MemoryProject: + id: str + root: str + git_remote: str | None + git_branch: str | None + git_head: str | None + python_tag: str | None + created_at_utc: str + updated_at_utc: str + + +@dataclass(frozen=True, slots=True) +class MemoryRecord: + id: str + project_id: str + identity_key: str + type: MemoryRecordType + status: MemoryStatus + confidence: MemoryConfidence + origin: MemoryOrigin + ingest_source: MemoryIngestSource + statement: str + summary: str | None + payload: dict[str, object] | None + created_at_utc: str + updated_at_utc: str + last_verified_at_utc: str | None + expires_at_utc: str | None + created_by: str + verified_by: str | None + approved_by: str | None + approved_at_utc: str | None + report_digest: str | None + code_fingerprint: str | None + stale_reason: str | None + created_on_branch: str | None + created_at_commit: str | None + verified_on_branch: str | None + verified_at_commit: str | None + schema_version: str = ENGINEERING_MEMORY_SCHEMA_VERSION + + +@dataclass(frozen=True, slots=True) +class MemorySubject: + id: str + memory_id: str + subject_kind: SubjectKind + subject_key: str + relation: SubjectRelation = "about" + + +@dataclass(frozen=True, slots=True) +class MemoryEvidence: + id: str + memory_id: str + evidence_kind: EvidenceKind + ref: str + locator: str | None + quote: str | None + digest: str | None + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class MemoryLink: + id: str + project_id: str + from_memory_id: str + to_memory_id: str + relation: LinkRelation + created_by: str + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class IngestionRun: + id: str + project_id: str + mode: IngestionMode + started_at_utc: str + finished_at_utc: str | None + status: IngestionRunStatus + analysis_fingerprint: str | None + report_digest: str | None + branch: str | None + commit: str | None + records_created: int = 0 + records_updated: int = 0 + records_marked_stale: int = 0 + candidates_created: int = 0 + contradictions_found: int = 0 + message: str | None = None + + +@dataclass(frozen=True, slots=True) +class MemoryRevision: + id: str + memory_id: str + revision_number: int + previous_statement: str | None + new_statement: str + previous_payload: dict[str, object] | None + new_payload: dict[str, object] | None + reason: str | None + changed_by: str + changed_at_utc: str + branch: str | None + commit: str | None + + +@dataclass(frozen=True, slots=True) +class MemoryQuery: + project_id: str + types: tuple[MemoryRecordType, ...] = () + statuses: tuple[MemoryStatus, ...] = () + subject_kind: SubjectKind | None = None + subject_key: str | None = None + subject_key_prefix: str | None = None + limit: int = 100 + offset: int = 0 + + +@dataclass(frozen=True, slots=True) +class UpsertResult: + action: UpsertAction + record_id: str + revision_written: bool = False + + +@dataclass +class RecordBatch: + records: list[MemoryRecord] = field(default_factory=list) + subjects: list[MemorySubject] = field(default_factory=list) + evidence: list[MemoryEvidence] = field(default_factory=list) + links: list[MemoryLink] = field(default_factory=list) + + def __iadd__(self, other: RecordBatch) -> RecordBatch: + self.records.extend(other.records) + self.subjects.extend(other.subjects) + self.evidence.extend(other.evidence) + self.links.extend(other.links) + return self + + +def payload_json_text(payload: dict[str, object] | None) -> str | None: + if payload is None: + return None + return json_text(payload, sort_keys=True) + + +def parse_payload_json(text: str | None) -> dict[str, object] | None: + # Read-side parser: a single damaged row must not crash the whole read path + # (find_record, list_records, retrieval). Fail soft — a corrupt JSON payload + # or a payload that does not decode to an object loads as None rather than + # raising. Writes still serialize through payload_json_text. + if text is None or not text.strip(): + return None + try: + loaded = orjson.loads(text) + except orjson.JSONDecodeError: + return None + if not isinstance(loaded, dict): + return None + return loaded + + +__all__ = [ + "IngestionRun", + "MemoryEvidence", + "MemoryLink", + "MemoryProject", + "MemoryQuery", + "MemoryRecord", + "MemoryRevision", + "MemorySubject", + "RecordBatch", + "UpsertAction", + "UpsertResult", + "generate_memory_id", + "make_identity_key", + "parse_payload_json", + "payload_json_text", +] diff --git a/codeclone/memory/paths.py b/codeclone/memory/paths.py new file mode 100644 index 00000000..eb146f5e --- /dev/null +++ b/codeclone/memory/paths.py @@ -0,0 +1,102 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from pathlib import PurePosixPath + +from .exceptions import MemoryContractError + +MEMORY_ROOT_SCOPE_ERROR = ( + "Engineering Memory requires a file, symbol, or declared intent scope. " + "Project root is not a valid memory scope. Use status/search for project " + "orientation." +) + +MEMORY_RETRIEVAL_SCOPE_REQUIRED_ERROR = ( + "get_relevant_memory requires scope, intent_id, or symbols. " + "Use query_engineering_memory(mode=status|search) for project orientation." +) + +MEMORY_COVERAGE_SCOPE_REQUIRED_ERROR = ( + "mode=coverage requires one or more repo-relative scope paths." +) + + +def normalize_repo_path(raw_path: str) -> str: + text = raw_path.replace("\\", "/").strip().removeprefix("./") + path = PurePosixPath(text) + if path.is_absolute() or ".." in path.parts: + msg = "path must be repo-relative without traversal" + raise ValueError(msg) + return path.as_posix() + + +def is_root_scope_path(normalized_path: str) -> bool: + return normalized_path in {"", "."} + + +def normalize_memory_scope_path(raw_path: str) -> str: + normalized = normalize_repo_path(raw_path) + if is_root_scope_path(normalized): + raise MemoryContractError(MEMORY_ROOT_SCOPE_ERROR) + return normalized + + +def normalize_memory_scope_paths(raw_paths: Sequence[str]) -> tuple[str, ...]: + if not raw_paths: + raise MemoryContractError(MEMORY_COVERAGE_SCOPE_REQUIRED_ERROR) + return tuple(normalize_memory_scope_path(item) for item in raw_paths) + + +def repo_path_to_module_key(rel_path: str) -> str: + module_path = normalize_repo_path(rel_path).removesuffix(".py").replace("/", ".") + if module_path.endswith(".__init__"): + module_path = module_path[: -len(".__init__")] + return module_path + + +def expand_scope_paths(scope_paths: frozenset[str]) -> frozenset[str]: + expanded: set[str] = set() + for raw_path in scope_paths: + normalized = normalize_memory_scope_path(raw_path) + expanded.add(normalized) + expanded.add(repo_path_to_module_key(normalized)) + return frozenset(expanded) + + +def subject_matches_scope( + subject_key: str, + *, + scope_paths: frozenset[str], +) -> float: + key = subject_key.replace("\\", "/").strip("/") + best = 0.0 + for scope_path in scope_paths: + normalized = normalize_memory_scope_path(scope_path) + module_key = repo_path_to_module_key(normalized) + if key in {normalized, module_key}: + return 1.0 + if key.startswith(f"{normalized}/") or normalized.startswith(f"{key}/"): + best = max(best, 0.8) + if key.startswith(f"{module_key}.") or module_key.startswith(f"{key}."): + best = max(best, 0.8) + return best + + +__all__ = [ + "MEMORY_COVERAGE_SCOPE_REQUIRED_ERROR", + "MEMORY_RETRIEVAL_SCOPE_REQUIRED_ERROR", + "MEMORY_ROOT_SCOPE_ERROR", + "expand_scope_paths", + "is_root_scope_path", + "normalize_memory_scope_path", + "normalize_memory_scope_paths", + "normalize_repo_path", + "repo_path_to_module_key", + "subject_matches_scope", +] diff --git a/codeclone/memory/project.py b/codeclone/memory/project.py new file mode 100644 index 00000000..792e578b --- /dev/null +++ b/codeclone/memory/project.py @@ -0,0 +1,204 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import subprocess +from dataclasses import dataclass +from pathlib import Path + +from ..baseline.trust import current_python_tag +from ..config.memory import MemoryConfig, resolve_memory_config +from ..report.meta import current_report_timestamp_utc +from ..utils.coerce import as_mapping +from ..utils.repo_paths import ( + PathOutsideRepoError, + RepoPathError, + RepoPathPolicy, + resolve_under_repo_root, +) +from .models import MemoryEvidence, MemoryProject, MemorySubject, generate_memory_id +from .paths import normalize_repo_path + + +@dataclass(frozen=True, slots=True) +class GitProvenance: + remote: str | None + branch: str | None + head: str | None + available: bool + + +def resolve_memory_db_path(root_path: Path, config: MemoryConfig | None = None) -> Path: + resolved = config or resolve_memory_config(root_path) + return resolved.db_path + + +def resolve_project_identity(root_path: Path) -> MemoryProject: + resolved_root = root_path.resolve() + git = read_git_provenance(resolved_root) + now = current_report_timestamp_utc() + project_id = compute_project_id(resolved_root) + return MemoryProject( + id=project_id, + root=str(resolved_root), + git_remote=git.remote, + git_branch=git.branch, + git_head=git.head, + python_tag=current_python_tag(), + created_at_utc=now, + updated_at_utc=now, + ) + + +def compute_project_id(root_path: Path) -> str: + digest = hashlib.sha256(str(root_path.resolve()).encode("utf-8")).hexdigest() + return f"proj-{digest[:8]}" + + +def read_git_provenance(root_path: Path) -> GitProvenance: + if not (root_path / ".git").exists(): + return GitProvenance(remote=None, branch=None, head=None, available=False) + branch = _git_output_optional(root_path, ["rev-parse", "--abbrev-ref", "HEAD"]) + head = _git_output_optional(root_path, ["rev-parse", "HEAD"]) + if not branch or not head: + return GitProvenance(remote=None, branch=None, head=None, available=False) + remote = _git_output_optional(root_path, ["remote", "get-url", "origin"]) + return GitProvenance( + remote=remote or None, + branch=branch or None, + head=head or None, + available=True, + ) + + +def git_head_evidence( + *, + memory_id: str, + git: GitProvenance, + created_at_utc: str, +) -> MemoryEvidence | None: + if not git.available or not git.head: + return None + return MemoryEvidence( + id=generate_memory_id(prefix="evid"), + memory_id=memory_id, + evidence_kind="git_commit", + ref=git.head, + locator=git.branch, + quote=git.remote, + digest=None, + created_at_utc=created_at_utc, + ) + + +def analysis_fingerprint_from_report(report_document: dict[str, object]) -> str: + integrity = as_mapping(report_document.get("integrity")) + digest = as_mapping(integrity.get("digest")) + value = str(digest.get("value", "")).strip() + if value: + return value[:16] + meta = as_mapping(report_document.get("meta")) + generated = str(meta.get("report_generated_at_utc", "")).strip() + if generated: + return hashlib.sha256(generated.encode("utf-8")).hexdigest()[:16] + return "unknown" + + +def report_digest_from_report(report_document: dict[str, object]) -> str | None: + integrity = as_mapping(report_document.get("integrity")) + digest = as_mapping(integrity.get("digest")) + value = str(digest.get("value", "")).strip() + return value or None + + +def module_repo_path(module_key: str) -> str: + return module_key.replace(".", "/") + ".py" + + +def subject_path_fingerprint(root_path: Path, rel_path: str) -> str | None: + """SHA-1 of on-disk bytes for a repo-relative subject path at HEAD.""" + + try: + normalized = normalize_repo_path(rel_path) + except ValueError: + return None + try: + file_path = resolve_under_repo_root( + root_path, + normalized, + policy=RepoPathPolicy(must_exist=True, must_be_file=True), + ) + except (PathOutsideRepoError, RepoPathError): + return None + return hashlib.sha1(file_path.read_bytes()).hexdigest() + + +def subject_fingerprint_for_subject( + root_path: Path, + subject: MemorySubject, +) -> str | None: + if subject.subject_kind in ("path", "test", "doc"): + return subject_path_fingerprint(root_path, subject.subject_key) + if subject.subject_kind == "module": + return subject_path_fingerprint( + root_path, module_repo_path(subject.subject_key) + ) + return None + + +def code_fingerprint_for_memory_subject( + root_path: Path, + *, + subject_path: str | None = None, + module_key: str | None = None, + analysis_fingerprint: str | None = None, +) -> str | None: + if subject_path is not None: + file_fingerprint = subject_path_fingerprint(root_path, subject_path) + if file_fingerprint is not None: + return file_fingerprint + if module_key is not None: + file_fingerprint = subject_path_fingerprint( + root_path, + module_repo_path(module_key), + ) + if file_fingerprint is not None: + return file_fingerprint + return analysis_fingerprint + + +def _git_output_optional(root_path: Path, args: list[str]) -> str | None: + try: + completed = subprocess.run( + ["git", *args], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=5.0, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return None + text = completed.stdout.strip() + return text or None + + +__all__ = [ + "GitProvenance", + "analysis_fingerprint_from_report", + "code_fingerprint_for_memory_subject", + "compute_project_id", + "git_head_evidence", + "module_repo_path", + "read_git_provenance", + "report_digest_from_report", + "resolve_memory_db_path", + "resolve_project_identity", + "subject_fingerprint_for_subject", + "subject_path_fingerprint", +] diff --git a/codeclone/memory/report_trust.py b/codeclone/memory/report_trust.py new file mode 100644 index 00000000..2a726d9e --- /dev/null +++ b/codeclone/memory/report_trust.py @@ -0,0 +1,151 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import subprocess +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path + +from ..utils.coerce import as_mapping, as_sequence + + +@dataclass(frozen=True, slots=True) +class CachedReportTrust: + trusted: bool + reason: str | None = None + + +def _git_repo(root_path: Path) -> bool: + return (root_path / ".git").exists() + + +def cached_report_untrusted_reason( + *, + root_path: Path, + report_path: Path, + report_document: Mapping[str, object], +) -> str | None: + """Return a human-readable reason when a cached report must not be reused.""" + meta = as_mapping(report_document.get("meta")) + scan_root_raw = str(meta.get("scan_root", "")).strip() + if not scan_root_raw: + return "cached report missing meta.scan_root" + try: + scan_root = Path(scan_root_raw).expanduser().resolve() + except OSError: + return "cached report meta.scan_root is invalid" + if scan_root != root_path.resolve(): + return "cached report scan_root does not match init root" + + inventory = as_mapping(report_document.get("inventory")) + file_registry = as_mapping(inventory.get("file_registry")) + items = { + str(item).replace("\\", "/").strip("/") + for item in as_sequence(file_registry.get("items")) + if str(item).strip() + } + if not items: + return "cached report inventory.file_registry is empty" + + if _git_repo(root_path): + tracked = _git_tracked_py_paths(root_path) + if tracked is not None: + missing = tracked - items + if missing: + sample = ", ".join(sorted(missing)[:3]) + extra = len(missing) - 3 + suffix = f" (+{extra} more)" if extra > 0 else "" + return ( + "cached report missing " + f"{len(missing)} tracked Python files (e.g. {sample}{suffix})" + ) + + head = _git_head_commit(root_path) + if head and report_path.is_file(): + commit_ts = _git_head_commit_unix(root_path) + report_mtime = int(report_path.stat().st_mtime) + 1 + if commit_ts is not None and commit_ts > report_mtime: + return "cached report is older than current git HEAD commit" + + return None + + +def _git_head_commit(root_path: Path) -> str | None: + try: + completed = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=5.0, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return None + text = completed.stdout.strip() + return text or None + + +def assess_cached_report_trust( + *, + root_path: Path, + report_path: Path, + report_document: Mapping[str, object], +) -> CachedReportTrust: + reason = cached_report_untrusted_reason( + root_path=root_path, + report_path=report_path, + report_document=report_document, + ) + if reason is None: + return CachedReportTrust(trusted=True) + return CachedReportTrust(trusted=False, reason=reason) + + +def _git_tracked_py_paths(root_path: Path) -> set[str] | None: + try: + completed = subprocess.run( + ["git", "ls-files", "--", "*.py"], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=30.0, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return None + return { + line.strip().replace("\\", "/") + for line in completed.stdout.splitlines() + if line.strip() + } + + +def _git_head_commit_unix(root_path: Path) -> int | None: + try: + completed = subprocess.run( + ["git", "log", "-1", "--format=%ct", "HEAD"], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=5.0, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return None + text = completed.stdout.strip() + if not text: + return None + return int(text) + + +__all__ = [ + "CachedReportTrust", + "assess_cached_report_trust", + "cached_report_untrusted_reason", +] diff --git a/codeclone/memory/retrieval/__init__.py b/codeclone/memory/retrieval/__init__.py new file mode 100644 index 00000000..c6b7e953 --- /dev/null +++ b/codeclone/memory/retrieval/__init__.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .service import ( + QUERY_MODES, + get_relevant_memory, + path_has_memory, + query_engineering_memory, + query_records_for_repo_path, +) + +__all__ = [ + "QUERY_MODES", + "get_relevant_memory", + "path_has_memory", + "query_engineering_memory", + "query_records_for_repo_path", +] diff --git a/codeclone/memory/retrieval/context_coverage.py b/codeclone/memory/retrieval/context_coverage.py new file mode 100644 index 00000000..692691ca --- /dev/null +++ b/codeclone/memory/retrieval/context_coverage.py @@ -0,0 +1,173 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence + +from ..experience.models import Experience +from ..paths import normalize_memory_scope_path, repo_path_to_module_key +from ..trajectory.models import Trajectory + + +def _percent(covered: int, total: int) -> int | None: + return round(covered * 100 / total) if total else None + + +def _trajectory_coverage( + *, + scope_paths: Sequence[str], + trajectories: Sequence[Trajectory], +) -> tuple[dict[str, object], frozenset[str]]: + normalized = tuple(normalize_memory_scope_path(path) for path in scope_paths) + matched_paths: set[str] = set() + agent_labels: set[str] = set() + for trajectory in trajectories: + subject_pairs = { + (subject.subject_kind, subject.subject_key) + for subject in trajectory.subjects + } + for path in normalized: + if ("path", path) in subject_pairs or ( + "module", + repo_path_to_module_key(path), + ) in subject_pairs: + matched_paths.add(path) + agent_labels.update( + key.strip() + for kind, key in subject_pairs + if kind == "agent" and key.strip() + ) + total = len(normalized) + return ( + { + "scope_paths_with_trajectories": len(matched_paths), + "scope_paths_total": total, + "coverage_percent": _percent(len(matched_paths), total), + }, + frozenset(agent_labels), + ) + + +def _experience_coverage( + *, + scope_families: frozenset[str], + experiences: Sequence[Experience], +) -> tuple[dict[str, object], frozenset[str]]: + matched_families = { + experience.subject_family + for experience in experiences + if experience.subject_family in scope_families + } + agent_families = { + facet.facet_value + for experience in experiences + for facet in experience.facets + if facet.facet_kind == "agent_family" and facet.facet_value + } + total = len(scope_families) + return ( + { + "scope_families_with_experiences": len(matched_families), + "scope_families_total": total, + "coverage_percent": _percent(len(matched_families), total), + }, + frozenset(agent_families), + ) + + +def _count(coverage: Mapping[str, object], key: str) -> int: + value = coverage.get(key) + return value if isinstance(value, int) and not isinstance(value, bool) else 0 + + +def _observation_confidence( + *, + record_coverage: Mapping[str, object], + trajectory_coverage: Mapping[str, object], + experience_coverage: Mapping[str, object], +) -> dict[str, object]: + basis = [ + lane + for lane, coverage, key in ( + ("records", record_coverage, "scope_paths_with_memory"), + ( + "trajectories", + trajectory_coverage, + "scope_paths_with_trajectories", + ), + ( + "experiences", + experience_coverage, + "scope_families_with_experiences", + ), + ) + if _count(coverage, key) > 0 + ] + path_total = _count(record_coverage, "scope_paths_total") + record_paths = _count(record_coverage, "scope_paths_with_memory") + trajectory_paths = _count( + trajectory_coverage, + "scope_paths_with_trajectories", + ) + level = "unknown" + if basis: + complete_path_evidence = ( + path_total > 0 + and record_paths >= path_total + and trajectory_paths >= path_total + ) + level = "supported" if complete_path_evidence else "partial" + return { + "level": level, + "basis": basis, + "note": ( + "Evidence availability only; not correctness, approval, or edit " + "authorization." + ), + } + + +def build_context_coverage( + *, + record_coverage: Mapping[str, object], + scope_paths: Sequence[str], + scope_families: frozenset[str], + trajectories: Sequence[Trajectory], + experiences: Sequence[Experience], + detail_level: str = "compact", +) -> dict[str, object]: + trajectory_coverage, trajectory_agents = _trajectory_coverage( + scope_paths=scope_paths, + trajectories=trajectories, + ) + experience_coverage, experience_agents = _experience_coverage( + scope_families=scope_families, + experiences=experiences, + ) + record_payload = dict(record_coverage) + coverage: dict[str, object] = { + "record_coverage": record_payload, + "trajectory_coverage": trajectory_coverage, + "experience_coverage": experience_coverage, + "observation_confidence": _observation_confidence( + record_coverage=record_payload, + trajectory_coverage=trajectory_coverage, + experience_coverage=experience_coverage, + ), + } + # agent_diversity is analytics, not pre-edit signal — full/detail only. + if detail_level != "compact": + coverage["agent_diversity"] = { + "trajectory_agent_labels": sorted(trajectory_agents), + "trajectory_agent_label_count": len(trajectory_agents), + "experience_agent_families": sorted(experience_agents), + "experience_agent_family_count": len(experience_agents), + } + return coverage + + +__all__ = ["build_context_coverage"] diff --git a/codeclone/memory/retrieval/ranking.py b/codeclone/memory/retrieval/ranking.py new file mode 100644 index 00000000..fc738fd5 --- /dev/null +++ b/codeclone/memory/retrieval/ranking.py @@ -0,0 +1,176 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from ..models import MemoryRecord, MemorySubject +from ..paths import expand_scope_paths, subject_matches_scope + +_TYPE_BOOST: dict[str, float] = { + "contract_note": 0.25, + "document_link": 0.2, + "public_surface": 0.15, + "risk_note": 0.15, + "test_anchor": 0.15, + "contradiction_note": 0.1, + "module_role": 0.1, +} +_INGEST_BOOST: dict[str, float] = { + "git": 0.1, + "contract": 0.08, + "doc": 0.06, + "test": 0.05, + "analysis": 0.04, +} +# Semantic proximity re-ranks recall; the weight is deliberately small so it +# never dominates an exact subject match (1.0) or a scope match. It is applied +# only after the scoped short-circuit, so it cannot inject out-of-scope records. +_SEMANTIC_WEIGHT = 0.3 +# Git churn is useful review context, but it is not a durable architectural +# assertion. Keep exact-scope hotspots visible without letting their type and +# ingest boosts outrank richer memory by default. +_CHANGE_HOTSPOT_PENALTY = 0.35 +# Finish-hook module-role drafts are workflow reminders, not durable module +# descriptions. Keep them visible while placing substantive memory first. +_WORKFLOW_CONTEXT_PENALTY = 0.65 +# Reciprocal Rank Fusion damping constant. 60 is the widely used default; a +# larger K flattens the gap between adjacent ranks. Used by hybrid search to +# fuse the lexical (BM25) and vector rankings without letting metadata boosts +# override a strong retrieval-engine match. +_RRF_K = 60 + + +@dataclass(frozen=True, slots=True) +class RankingContext: + scope_paths: frozenset[str] + symbols: frozenset[str] + blast_dependents: frozenset[str] + + @classmethod + def from_scope( + cls, + *, + scope_paths: Sequence[str], + symbols: Sequence[str], + blast_dependents: Sequence[str], + ) -> RankingContext: + normalized_scope = frozenset(scope_paths) + return cls( + scope_paths=expand_scope_paths(normalized_scope), + symbols=frozenset(symbols), + blast_dependents=frozenset(blast_dependents), + ) + + +def is_git_change_hotspot(record: MemoryRecord) -> bool: + payload = record.payload + return ( + record.type == "risk_note" + and record.ingest_source == "git" + and isinstance(payload, dict) + and payload.get("risk_kind") == "change_hotspot" + ) + + +def retrieval_lane(record: MemoryRecord) -> str | None: + if is_git_change_hotspot(record): + return "hotspot_context" + if ( + record.type == "module_role" + and record.status == "draft" + and record.created_by == "finish_hook" + ): + return "workflow_context" + return None + + +def _context_signal_adjustment(record: MemoryRecord) -> float: + lane = retrieval_lane(record) + if lane == "hotspot_context": + return -_CHANGE_HOTSPOT_PENALTY + if lane == "workflow_context": + return -_WORKFLOW_CONTEXT_PENALTY + return 0.0 + + +def relevance_score( + *, + record: MemoryRecord, + subjects: Sequence[MemorySubject], + context: RankingContext, + evidence_count: int, + semantic_proximity: float = 0.0, +) -> float: + scoped = bool(context.scope_paths or context.symbols) + score = 0.0 + has_contextual_match = False + for subject in subjects: + key = subject.subject_key.replace("\\", "/").strip("/") + boost = 0.0 + if key in context.symbols: + boost = 1.0 + else: + scope_boost = subject_matches_scope(key, scope_paths=context.scope_paths) + if scope_boost > 0.0: + boost = scope_boost + elif key in context.blast_dependents: + boost = 0.7 + if boost > 0.0: + score += boost + has_contextual_match = True + + if scoped and not has_contextual_match: + return 0.0 + + score += _TYPE_BOOST.get(record.type, 0.0) + score += _INGEST_BOOST.get(record.ingest_source, 0.0) + if record.confidence == "verified": + score += 0.15 + elif record.confidence == "supported": + score += 0.1 + if record.approved_by: + score += 0.1 + if evidence_count > 0: + score += min(0.1, evidence_count * 0.02) + if record.status == "draft": + score += 0.3 + if record.origin == "agent": + score += 0.05 + if record.status == "stale": + score -= 0.5 + score += _context_signal_adjustment(record) + score += semantic_proximity * _SEMANTIC_WEIGHT + return round(score, 4) + + +def reciprocal_rank_fusion( + *, lexical_rank: int | None = None, vector_rank: int | None = None +) -> float: + """Fuse a record's lexical (BM25) and vector ranks into one score. + + Each present rank contributes ``1 / (_RRF_K + rank)`` (0-based); a record + missing from a list simply omits that term. Higher is better. This keeps the + retrieval engines' own rank order as the lead signal — the caller adds + curation metadata only as a tie-break — so a strong lexical or vector match + is never buried by metadata boosts. + """ + score = 0.0 + if lexical_rank is not None: + score += 1.0 / (_RRF_K + lexical_rank) + if vector_rank is not None: + score += 1.0 / (_RRF_K + vector_rank) + return score + + +__all__ = [ + "RankingContext", + "reciprocal_rank_fusion", + "relevance_score", + "retrieval_lane", +] diff --git a/codeclone/memory/retrieval/semantic.py b/codeclone/memory/retrieval/semantic.py new file mode 100644 index 00000000..ab4fb164 --- /dev/null +++ b/codeclone/memory/retrieval/semantic.py @@ -0,0 +1,191 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from collections.abc import Callable, Sequence +from pathlib import Path +from typing import TYPE_CHECKING, Protocol, cast + +from ...audit.schema import open_audit_db_readonly +from ...audit.validation import AuditSchemaError +from ..embedding import embed_query +from ..semantic.chunking import ( + TRAJECTORY_SEARCH_OVERSAMPLE, + collapse_trajectory_hits, + trajectory_parent_id, +) +from ..semantic.models import SemanticSearchResult + +if TYPE_CHECKING: + from ..embedding import EmbeddingProvider + from ..models import MemoryRecord, MemorySubject + from ..semantic import SemanticHit, SemanticIndex + from ..trajectory.models import Trajectory + + +class _RecordStore(Protocol): + def find_record(self, record_id: str) -> MemoryRecord | None: ... + + def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: ... + + +def semantic_search( + *, + index: SemanticIndex, + provider: EmbeddingProvider, + store: _RecordStore | None, + audit_db_path: Path, + query: str, + limit: int, + preview_chars: int, +) -> list[SemanticSearchResult]: + """Embed the query, search the index, and hydrate hits from the source. + + The index supplies ids + proximity only; the returned record/event is + always loaded from SQLite / the audit DB (truth never lives in the vector + row). Stale hits whose record/event no longer exists are skipped. + """ + vector = embed_query(provider, query) + results: list[SemanticSearchResult] = [] + memory_hits = list(index.search(vector, k=limit, source="memory")) + audit_hits = list(index.search(vector, k=limit, source="audit")) + trajectory_raw = list( + index.search( + vector, + k=max(limit, limit * TRAJECTORY_SEARCH_OVERSAMPLE), + source="trajectory", + ) + ) + trajectory_hits = collapse_trajectory_hits(trajectory_raw, k=limit) + for hit in [*memory_hits, *audit_hits, *trajectory_hits]: + if hit.source == "memory": + hydrated = ( + _hydrate_memory(hit, store, preview_chars) + if store is not None + else None + ) + elif hit.source == "audit": + hydrated = _hydrate_audit(hit, audit_db_path, preview_chars) + else: + hydrated = ( + _hydrate_trajectory(hit, store, preview_chars) + if store is not None + else None + ) + if hydrated is not None: + results.append(hydrated) + return results + + +def _hydrate_memory( + hit: SemanticHit, store: _RecordStore, preview_chars: int +) -> SemanticSearchResult | None: + record = store.find_record(hit.source_id) + if record is None: + return None + return SemanticSearchResult( + source="memory", + source_id=hit.source_id, + score=hit.score, + kind=record.type, + status=record.status, + confidence=record.confidence, + subject_path=_primary_path(store.list_subjects_for_memory(record.id)), + preview=_preview(record.statement, preview_chars), + ) + + +def _hydrate_audit( + hit: SemanticHit, audit_db_path: Path, preview_chars: int +) -> SemanticSearchResult | None: + row = audit_event_row(audit_db_path, hit.source_id) + if row is None: + return None + event_type, status, summary = row + return SemanticSearchResult( + source="audit", + source_id=hit.source_id, + score=hit.score, + kind=event_type, + status=status, + confidence=None, + subject_path=None, + preview=_preview(summary, preview_chars), + ) + + +def _hydrate_trajectory( + hit: SemanticHit, store: object, preview_chars: int +) -> SemanticSearchResult | None: + find_trajectory = getattr(store, "find_trajectory", None) + if not callable(find_trajectory): + return None + typed_find = cast("Callable[[str], Trajectory | None]", find_trajectory) + trajectory_id = trajectory_parent_id(hit) + trajectory = typed_find(trajectory_id) + if trajectory is None: + return None + return SemanticSearchResult( + source="trajectory", + source_id=trajectory_id, + score=hit.score, + kind="trajectory", + status=trajectory.outcome, + confidence=None, + subject_path=_primary_trajectory_path(trajectory), + preview=_preview(trajectory.summary, preview_chars), + ) + + +def audit_event_row( + audit_db_path: Path, event_id: str +) -> tuple[str, str | None, str] | None: + if not audit_db_path.is_file(): + return None + try: + conn = open_audit_db_readonly(audit_db_path) + except (sqlite3.Error, AuditSchemaError, OSError): + return None + try: + row = conn.execute( + "SELECT event_type, status, summary FROM controller_events " + "WHERE event_id = ?", + (event_id,), + ).fetchone() + except (sqlite3.Error, AuditSchemaError): + return None + finally: + conn.close() + if row is None or not isinstance(row[2], str) or not row[2].strip(): + return None + status = str(row[1]) if row[1] is not None else None + return str(row[0]), status, row[2] + + +def _primary_path(subjects: Sequence[MemorySubject]) -> str | None: + for subject in subjects: + if subject.subject_kind == "path": + return subject.subject_key + return None + + +def _primary_trajectory_path(trajectory: Trajectory) -> str | None: + for subject in trajectory.subjects: + if subject.subject_kind == "path": + return subject.subject_key + return None + + +def _preview(text: str, preview_chars: int) -> str: + cleaned = " ".join(text.split()) + if len(cleaned) <= preview_chars: + return cleaned + return cleaned[: max(1, preview_chars - 1)].rstrip() + "…" + + +__all__ = ["audit_event_row", "semantic_search"] diff --git a/codeclone/memory/retrieval/service.py b/codeclone/memory/retrieval/service.py new file mode 100644 index 00000000..3663e4d5 --- /dev/null +++ b/codeclone/memory/retrieval/service.py @@ -0,0 +1,1815 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from pathlib import PurePosixPath +from typing import TYPE_CHECKING, Literal, cast + +from ...config.memory_defaults import DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS +from ...contracts import SEMANTIC_INDEX_FORMAT_VERSION +from ...observability import is_observability_enabled, record_counter, span +from ..embedding import embed_query +from ..enums import LinkRelation, MemoryConfidence, MemoryRecordType, MemoryStatus +from ..exceptions import MemoryContractError, MemorySemanticUnavailableError +from ..experience.models import Experience +from ..models import MemoryEvidence, MemoryQuery, MemoryRecord, MemorySubject +from ..paths import ( + MEMORY_RETRIEVAL_SCOPE_REQUIRED_ERROR, + normalize_memory_scope_path, + normalize_memory_scope_paths, + normalize_repo_path, + repo_path_to_module_key, + subject_matches_scope, +) +from ..search_index import SearchMatchMode +from ..semantic.chunking import ( + TRAJECTORY_SEARCH_OVERSAMPLE, + collapse_trajectory_hits, + trajectory_parent_id, +) +from ..sqlite_store import SqliteEngineeringMemoryStore +from ..status_report import build_memory_status_report +from ..trajectory.analytics import ( + build_trajectory_agent_stats_payload, + build_trajectory_anomalies_payload, + build_trajectory_dashboard_payload, +) +from ..trajectory.retrieval import ( + DEFAULT_TRAJECTORY_PREVIEW_LIMIT, + filter_trajectories_for_default_retrieval, + rank_trajectories_for_query, + rank_trajectories_for_scope, + serialize_trajectory_detail, + serialize_trajectory_preview, + trajectory_status_payload, + trajectory_subject_keys, +) +from .context_coverage import build_context_coverage +from .ranking import ( + RankingContext, + reciprocal_rank_fusion, + relevance_score, + retrieval_lane, +) +from .semantic import audit_event_row + +if TYPE_CHECKING: + from pathlib import Path + + from ..embedding import EmbeddingProvider + from ..semantic import SemanticIndex + from ..semantic.models import SemanticHit, SemanticIndexStatus + +QueryMode = Literal[ + "search", + "get", + "for_path", + "for_symbol", + "stale", + "drafts", + "coverage", + "status", + "trajectory_status", + "trajectory_search", + "trajectory_get", + "trajectory_anomalies", + "trajectory_agents", + "trajectory_dashboard", +] + +QUERY_MODES: tuple[str, ...] = ( + "search", + "get", + "for_path", + "for_symbol", + "stale", + "drafts", + "coverage", + "status", + "trajectory_status", + "trajectory_search", + "trajectory_get", + "trajectory_anomalies", + "trajectory_agents", + "trajectory_dashboard", +) + +MemoryDetailLevel = Literal["compact", "full"] + + +def _normalize_detail_level(detail_level: str) -> MemoryDetailLevel: + if detail_level == "full": + return "full" + if detail_level in {"compact", "summary", "normal"}: + return "compact" + raise MemoryContractError("detail_level must be compact, summary, normal, or full.") + + +def _statement_preview( + statement: str, + *, + max_chars: int = DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS, +) -> str: + if len(statement) <= max_chars: + return statement + trimmed = statement[: max_chars - 1].rstrip() + return f"{trimmed}…" + + +def query_records_for_repo_path( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + rel_path: str, + limit: int, + types: tuple[MemoryRecordType, ...] = (), + statuses: tuple[MemoryStatus, ...] = ("active", "historical", "stale"), +) -> tuple[MemoryRecord, ...]: + normalized = normalize_repo_path(rel_path) + records = store.query_records( + MemoryQuery( + project_id=project_id, + types=types, + statuses=statuses, + subject_kind="path", + subject_key_prefix=normalized, + limit=limit, + ) + ) + if len(records) >= limit: + return tuple(records[:limit]) + + module_key = repo_path_to_module_key(normalized) + module_records = store.query_records( + MemoryQuery( + project_id=project_id, + types=types, + statuses=statuses, + subject_kind="module", + subject_key_prefix=module_key, + limit=limit, + ) + ) + seen = {record.id for record in records} + merged = list(records) + for record in module_records: + if record.id in seen: + continue + merged.append(record) + seen.add(record.id) + if len(merged) >= limit: + break + return tuple(merged[:limit]) + + +def path_has_memory( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + rel_path: str, +) -> bool: + return bool( + query_records_for_repo_path( + store, + project_id=project_id, + rel_path=rel_path, + limit=1, + statuses=("active", "historical", "stale"), + ) + ) + + +def _default_statuses( + *, + include_stale: bool, + include_drafts: bool, +) -> tuple[MemoryStatus, ...]: + statuses: list[MemoryStatus] = ["active", "historical"] + if include_stale: + statuses.append("stale") + if include_drafts: + statuses.append("draft") + return tuple(statuses) + + +def _record_visible( + record: MemoryRecord, + *, + include_stale: bool, + include_drafts: bool, +) -> bool: + if record.status == "stale" and not include_stale: + return False + if record.status == "historical": + return True + if record.status == "draft": + return include_drafts + if record.confidence == "inferred" and not record.approved_by: + return False + return record.status in {"active", "stale", "draft"} + + +def _retrieval_policy(*, include_drafts: bool) -> dict[str, object]: + return { + "drafts_included": include_drafts, + "memory_does_not_authorize_edits": True, + "memory_does_not_override_findings": True, + "trajectories_do_not_authorize_edits": True, + "experiences_do_not_authorize_edits": True, + # Per-record guardrail lives here once, not duplicated on every record: + # status is the single source of truth (status=="draft" => unverified). + "status_is_authoritative": True, + "draft_records_are_unverified": True, + } + + +DEFAULT_EXPERIENCE_PREVIEW_LIMIT = 10 +COMPACT_MEMORY_SUBJECT_LIMIT = 6 + +_MEMORY_SUBJECT_KIND_ORDER = { + "path": 0, + "module": 1, + "symbol": 2, + "intent": 3, + "workflow": 4, +} + + +def _scope_family(path: str) -> str | None: + """Directory family of a scope path — the experience subject_family unit.""" + try: + normalized = normalize_repo_path(path) + except ValueError: + return None + parent = PurePosixPath(normalized).parent.as_posix() + return None if parent in {"", "."} else parent + + +def _scope_families(scope_paths: Sequence[str]) -> frozenset[str]: + return frozenset( + family + for path in scope_paths + for family in (_scope_family(path),) + if family is not None + ) + + +def _serialize_experience( + experience: Experience, + *, + detail_level: MemoryDetailLevel, +) -> dict[str, object]: + statement_length = len(experience.statement) + statement = ( + experience.statement + if detail_level == "full" + else _statement_preview(experience.statement) + ) + payload: dict[str, object] = { + "id": experience.id, + "subject_family": experience.subject_family, + "signal": experience.signal, + "outcome_class": experience.outcome_class, + "support": experience.support, + "information_value": experience.information_value, + "status": experience.status, + "statement": statement, + } + agent_facet_items = sorted( + ( + (facet.facet_value, facet.count) + for facet in experience.facets + if facet.facet_kind == "agent_family" + ), + key=lambda item: (-item[1], item[0]), + ) + agent_facets: list[dict[str, object]] = [ + {"agent_family": family, "count": count} for family, count in agent_facet_items + ] + payload.update( + _experience_detail_payload( + experience, + detail_level=detail_level, + statement_length=statement_length, + statement=statement, + agent_facets=agent_facets, + ) + ) + return payload + + +def _experience_detail_payload( + experience: Experience, + *, + detail_level: MemoryDetailLevel, + statement_length: int, + statement: str, + agent_facets: list[dict[str, object]], +) -> dict[str, object]: + if detail_level == "full": + return { + "agent_facets": agent_facets, + "evidence_trajectory_ids": [ + item.trajectory_id for item in experience.evidence + ], + } + payload: dict[str, object] = { + "statement_length": statement_length, + "evidence_count": len(experience.evidence), + "agent_family_count": len(agent_facets), + "multi_agent": len(agent_facets) > 1, + } + if agent_facets: + payload["dominant_agent_facet"] = agent_facets[0] + if statement_length > len(statement): + payload["statement_truncated"] = True + return payload + + +def _matching_experiences( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + families: frozenset[str], +) -> list[Experience]: + """Active advisory experiences matching the requested scope families.""" + if not families: + return [] + matched = [ + experience + for experience in store.list_experiences(project_id=project_id) + if experience.status == "active" and experience.subject_family in families + ] + matched.sort( + key=lambda experience: ( + -experience.support, + -experience.information_value, + experience.id, + ) + ) + return matched + + +def _serialize_relevant_experiences( + experiences: Sequence[Experience], + *, + max_results: int, + detail_level: MemoryDetailLevel, +) -> list[dict[str, object]]: + return [ + _serialize_experience(experience, detail_level=detail_level) + for experience in experiences[:max_results] + ] + + +def _serialize_subject(subject: MemorySubject) -> dict[str, object]: + return { + "subject_kind": subject.subject_kind, + "subject_key": subject.subject_key, + "relation": subject.relation, + } + + +def _memory_subject_priority( + subject: MemorySubject, + *, + context: RankingContext | None, +) -> tuple[object, ...]: + key = subject.subject_key.replace("\\", "/").strip("/") + context_group = 3 + context_score = 0.0 + if context is not None: + if key in context.symbols: + context_group = 0 + context_score = 1.0 + else: + scope_score = subject_matches_scope(key, scope_paths=context.scope_paths) + if scope_score > 0.0: + context_group = 1 + context_score = scope_score + elif key in context.blast_dependents: + context_group = 2 + context_score = 0.7 + return ( + context_group, + -context_score, + _MEMORY_SUBJECT_KIND_ORDER.get(subject.subject_kind, 99), + key, + subject.relation, + subject.id, + ) + + +def _preview_memory_subjects( + subjects: Sequence[MemorySubject], + *, + detail_level: MemoryDetailLevel, + context: RankingContext | None, +) -> list[MemorySubject]: + if detail_level == "full": + return list(subjects) + return sorted( + subjects, + key=lambda subject: _memory_subject_priority(subject, context=context), + )[:COMPACT_MEMORY_SUBJECT_LIMIT] + + +def _serialize_evidence(evidence: MemoryEvidence) -> dict[str, object]: + return { + "id": evidence.id, + "evidence_kind": evidence.evidence_kind, + "ref": evidence.ref, + "locator": evidence.locator, + "quote": evidence.quote, + "digest": evidence.digest, + "created_at_utc": evidence.created_at_utc, + } + + +def _retrieval_lane_payload(record: MemoryRecord) -> dict[str, object]: + lane = retrieval_lane(record) + return {"retrieval_lane": lane} if lane is not None else {} + + +def _serialize_record_summary( + *, + record: MemoryRecord, + subjects: Sequence[MemorySubject], + evidence_count: int, + relevance_score: float | None = None, + detail_level: MemoryDetailLevel = "compact", + context: RankingContext | None = None, +) -> dict[str, object]: + statement_length = len(record.statement) + if detail_level == "full": + statement_value: str = record.statement + else: + statement_value = _statement_preview(record.statement) + serialized_subjects = _preview_memory_subjects( + subjects, + detail_level=detail_level, + context=context, + ) + payload: dict[str, object] = { + "id": record.id, + "type": record.type, + "status": record.status, + "confidence": record.confidence, + "approved": record.approved_by is not None, + "statement": statement_value, + "statement_length": statement_length, + "subjects": [_serialize_subject(item) for item in serialized_subjects], + "evidence_count": evidence_count, + } + if detail_level == "full": + payload["payload"] = record.payload + else: + payload["subject_count"] = len(subjects) + payload["subjects_truncated"] = len(serialized_subjects) < len(subjects) + if statement_length > len(statement_value): + payload["statement_truncated"] = True + if record.stale_reason: + payload["stale_reason"] = record.stale_reason + payload.update(_retrieval_lane_payload(record)) + if relevance_score is not None: + payload["relevance_score"] = relevance_score + return payload + + +# Bounded down-rank for a record refuted by a 1-hop neighbour; mirrors the +# stale -0.5 lever. Truth is corrected, never silently returned. +_CONFLICT_PENALTY = 0.5 +_CONFLICT_RELATIONS: tuple[LinkRelation, ...] = ("contradicts", "supersedes") + + +def _record_relations( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + record_ids: Sequence[str], +) -> dict[str, dict[str, list[str]]]: + """1-hop contradicts/supersedes neighbours per record (deterministic). + + The other endpoint may be outside ``record_ids``; it is surfaced as a + relation id, never promoted into the result set. + """ + links = store.list_links_for_records( + project_id=project_id, + record_ids=record_ids, + relations=_CONFLICT_RELATIONS, + ) + raw: dict[str, dict[str, set[str]]] = {} + + def bucket(record_id: str) -> dict[str, set[str]]: + return raw.setdefault( + record_id, + {"contradicted_by": set(), "superseded_by": set(), "supersedes": set()}, + ) + + for link in links: + if link.relation == "contradicts": + bucket(link.from_memory_id)["contradicted_by"].add(link.to_memory_id) + bucket(link.to_memory_id)["contradicted_by"].add(link.from_memory_id) + else: # supersedes + bucket(link.from_memory_id)["supersedes"].add(link.to_memory_id) + bucket(link.to_memory_id)["superseded_by"].add(link.from_memory_id) + + wanted = set(record_ids) + relations: dict[str, dict[str, list[str]]] = {} + for record_id, groups in raw.items(): + if record_id not in wanted: + continue + compact = {key: sorted(values) for key, values in groups.items() if values} + if compact: + relations[record_id] = compact + return relations + + +def _apply_conflict_penalty( + score: float, relations: dict[str, list[str]] | None +) -> float: + if relations is not None and ( + relations.get("contradicted_by") or relations.get("superseded_by") + ): + return round(score - _CONFLICT_PENALTY, 4) + return score + + +def _rank_records( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + candidates: Sequence[MemoryRecord], + context: RankingContext, + max_records: int, + detail_level: MemoryDetailLevel, + lexical_ranks: Mapping[str, int] | None = None, + vector_ranks: Mapping[str, int] | None = None, +) -> tuple[list[dict[str, object]], bool]: + # Fusion mode (hybrid search) supplies the lexical (BM25) and/or vector + # rankings. There the metadata relevance_score is only a deterministic + # tie-break, so the vector signal must NOT also be folded into it via + # semantic_proximity (avoid double-counting). Scoped retrieval supplies + # neither map and keeps relevance_score as the sole ordering signal. + fusion_enabled = lexical_ranks is not None or vector_ranks is not None + lexical_map = lexical_ranks or {} + vector_map = vector_ranks or {} + candidate_ids = tuple(record.id for record in candidates) + subjects_by_id = store.list_subjects_for_memories(candidate_ids) + evidence_counts = store.count_evidence_for_memories(candidate_ids) + base: list[tuple[float, MemoryRecord, list[MemorySubject], int]] = [] + for record in candidates: + subjects = subjects_by_id[record.id] + evidence_count = evidence_counts[record.id] + score = relevance_score( + record=record, + subjects=subjects, + context=context, + evidence_count=evidence_count, + ) + if score <= 0.0 and (context.scope_paths or context.symbols): + continue + base.append((score, record, subjects, evidence_count)) + relations = _record_relations( + store, project_id=project_id, record_ids=[item[1].id for item in base] + ) + scored: list[tuple[float, float, str, dict[str, object]]] = [] + for score, record, subjects, evidence_count in base: + record_relations = relations.get(record.id) + adjusted = _apply_conflict_penalty(score, record_relations) + if fusion_enabled: + primary = reciprocal_rank_fusion( + lexical_rank=lexical_map.get(record.id), + vector_rank=vector_map.get(record.id), + ) + else: + primary = adjusted + summary = _serialize_record_summary( + record=record, + subjects=subjects, + evidence_count=evidence_count, + relevance_score=adjusted, + detail_level=detail_level, + context=context, + ) + if record_relations is not None: + summary["relations"] = record_relations + scored.append((primary, adjusted, record.id, summary)) + scored.sort(key=lambda item: (-item[0], -item[1], item[2])) + truncated = len(scored) > max_records + return [item[3] for item in scored[:max_records]], truncated + + +def _coverage_summary( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + scope_paths: Sequence[str], +) -> dict[str, object]: + normalized = normalize_memory_scope_paths(scope_paths) + with_memory = 0 + for raw_path in normalized: + if path_has_memory( + store, + project_id=project_id, + rel_path=raw_path, + ): + with_memory += 1 + total = len(normalized) + percent = round(with_memory * 100 / total) if total else 100 + return { + "scope_paths_with_memory": with_memory, + "scope_paths_total": total, + "coverage_percent": percent, + "coverage_kind": "record_subject_coverage", + } + + +def get_relevant_memory( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + scope_paths: Sequence[str] | None = None, + symbols: Sequence[str] | None = None, + blast_dependents: Sequence[str] | None = None, + scope_resolved_from: str, + max_records: int = 20, + include_stale: bool = False, + include_drafts: bool = False, + include_routine: bool = False, + detail_level: str = "compact", +) -> dict[str, object]: + normalized_detail = _normalize_detail_level(detail_level) + raw_scope = scope_paths or () + normalized_symbols = frozenset(symbols or ()) + if not raw_scope and not normalized_symbols: + raise MemoryContractError(MEMORY_RETRIEVAL_SCOPE_REQUIRED_ERROR) + normalized_scope = tuple(normalize_memory_scope_path(path) for path in raw_scope) + normalized_blast = frozenset( + normalize_memory_scope_path(path) for path in (blast_dependents or ()) + ) + effective_include_drafts = include_drafts or bool(normalized_scope) + context = RankingContext.from_scope( + scope_paths=normalized_scope, + symbols=tuple(normalized_symbols), + blast_dependents=tuple(normalized_blast), + ) + statuses = _default_statuses( + include_stale=include_stale, + include_drafts=effective_include_drafts, + ) + candidates = store.query_records( + MemoryQuery(project_id=project_id, statuses=statuses, limit=5000) + ) + visible = [ + record + for record in candidates + if _record_visible( + record, + include_stale=include_stale, + include_drafts=effective_include_drafts, + ) + ] + records_payload, truncated = _rank_records( + store, + project_id=project_id, + candidates=visible, + context=context, + max_records=max_records, + detail_level=normalized_detail, + ) + trajectory_candidates = store.list_trajectories_for_subjects( + project_id=project_id, + subjects=trajectory_subject_keys( + scope_paths=normalized_scope, + symbols=tuple(normalized_symbols), + ), + limit=max(DEFAULT_TRAJECTORY_PREVIEW_LIMIT * 3, max_records), + ) + patch_trails = _load_patch_trails_for_trajectories( + store, + trajectory_ids=tuple(item.id for item in trajectory_candidates), + ) + trajectories_payload, trajectories_truncated = rank_trajectories_for_scope( + trajectory_candidates, + scope_paths=normalized_scope, + symbols=tuple(normalized_symbols), + max_results=min(max_records, DEFAULT_TRAJECTORY_PREVIEW_LIMIT), + include_routine=include_routine, + patch_trails=patch_trails, + detail_level=normalized_detail, + ) + matching_experiences = _matching_experiences( + store, + project_id=project_id, + families=_scope_families(normalized_scope), + ) + experiences_payload = _serialize_relevant_experiences( + matching_experiences, + max_results=min(max_records, DEFAULT_EXPERIENCE_PREVIEW_LIMIT), + detail_level=normalized_detail, + ) + coverage: dict[str, object] + if normalized_scope: + coverage = build_context_coverage( + record_coverage=_coverage_summary( + store, + project_id=project_id, + scope_paths=normalized_scope, + ), + scope_paths=normalized_scope, + scope_families=_scope_families(normalized_scope), + trajectories=filter_trajectories_for_default_retrieval( + trajectory_candidates, + include_routine=include_routine, + ), + experiences=matching_experiences, + detail_level=normalized_detail, + ) + else: + coverage = { + "record_coverage": { + "scope_paths_with_memory": 0, + "scope_paths_total": 0, + "coverage_percent": None, + }, + "coverage_note": "symbol_scoped_retrieval", + } + payload: dict[str, object] = { + "project_id": project_id, + "scope_resolved_from": scope_resolved_from, + "records": records_payload, + "trajectories": trajectories_payload, + "experiences": experiences_payload, + "record_count": len(records_payload), + "trajectory_count": len(trajectories_payload), + "experience_count": len(experiences_payload), + "truncated": truncated, + "trajectories_truncated": trajectories_truncated, + "coverage": coverage, + "detail_level": normalized_detail, + "retrieval_policy": _retrieval_policy(include_drafts=effective_include_drafts), + } + return payload + + +def _load_patch_trails_for_trajectories( + store: SqliteEngineeringMemoryStore, + *, + trajectory_ids: Sequence[str], +) -> dict[str, dict[str, object]]: + return store.load_trajectory_patch_trails(trajectory_ids) + + +def _parse_filters( + filters: Mapping[str, object] | None, +) -> tuple[ + tuple[MemoryRecordType, ...], + tuple[MemoryStatus, ...], + tuple[MemoryConfidence, ...], + SearchMatchMode, + bool, +]: + types: list[MemoryRecordType] = [] + statuses: list[MemoryStatus] = [] + confidences: list[MemoryConfidence] = [] + match_mode: SearchMatchMode = "any" + include_routine = False + if filters is None: + return (), (), (), match_mode, include_routine + raw_types = filters.get("types") + if isinstance(raw_types, list): + types.extend(cast(MemoryRecordType, str(item)) for item in raw_types) + raw_statuses = filters.get("statuses") + if isinstance(raw_statuses, list): + statuses.extend(cast(MemoryStatus, str(item)) for item in raw_statuses) + raw_confidences = filters.get("confidences") + if isinstance(raw_confidences, list): + confidences.extend( + cast(MemoryConfidence, str(item)) for item in raw_confidences + ) + raw_match = filters.get("match_mode") + if raw_match in {"all", "any"}: + match_mode = cast(SearchMatchMode, raw_match) + if bool(filters.get("include_routine")): + include_routine = True + return ( + tuple(types), + tuple(statuses), + tuple(confidences), + match_mode, + include_routine, + ) + + +def _handle_status_mode( + *, + mode: str, + root_path: object, + db_path: object, + backend: str, +) -> dict[str, object]: + from pathlib import Path + + if not isinstance(root_path, Path) or not isinstance(db_path, Path): + raise TypeError("root_path and db_path must be Path instances") + report = build_memory_status_report( + root_path=root_path, + db_path=db_path, + backend=backend, + ) + payload = { + "schema_version": report.schema_version, + "project_id": report.project_id, + "project_root": report.project_root, + "backend": report.backend, + "db_path": str(report.db_path), + "db_exists": report.db_exists, + "record_count": report.record_count, + "records_by_type": report.records_by_type, + "records_by_status": report.records_by_status, + "last_analysis_fingerprint": report.last_analysis_fingerprint, + "last_init_run_id": report.last_init_run_id, + } + return {"mode": mode, "status": "ok", "payload": payload} + + +def _handle_get_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + record_id: str | None, +) -> dict[str, object]: + if not record_id: + raise MemoryContractError("mode=get requires record_id.") + record = store.find_record(record_id) + if record is None or record.project_id != project_id: + return { + "mode": mode, + "status": "not_found", + "payload": {"record_id": record_id}, + } + subjects = store.list_subjects_for_memory(record.id) + evidence = store.list_evidence_for_memory(record.id) + return { + "mode": mode, + "status": "ok", + "detail_level": "full", + "payload": { + "record": _serialize_record_summary( + record=record, + subjects=subjects, + evidence_count=len(evidence), + detail_level="full", + ), + "evidence": [_serialize_evidence(item) for item in evidence], + }, + } + + +def _serialize_list_mode_records( + store: SqliteEngineeringMemoryStore, + *, + records: Sequence[MemoryRecord], + detail_level: MemoryDetailLevel, +) -> list[dict[str, object]]: + return [ + _serialize_record_summary( + record=record, + subjects=store.list_subjects_for_memory(record.id), + evidence_count=store.count_evidence_for_memory(record.id), + detail_level=detail_level, + ) + for record in records + ] + + +def _handle_stale_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + max_results: int, + detail_level: MemoryDetailLevel, +) -> dict[str, object]: + records = store.query_records( + MemoryQuery( + project_id=project_id, + statuses=("stale",), + limit=max_results, + ) + ) + payload_records = _serialize_list_mode_records( + store, + records=records, + detail_level=detail_level, + ) + return { + "mode": mode, + "status": "ok", + "detail_level": detail_level, + "payload": { + "records": payload_records, + "record_count": len(payload_records), + "truncated": len(records) >= max_results, + }, + } + + +def _handle_coverage_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + scope: Sequence[str] | None, +) -> dict[str, object]: + scope_paths = normalize_memory_scope_paths(scope or ()) + coverage = _coverage_summary( + store, + project_id=project_id, + scope_paths=scope_paths, + ) + return {"mode": mode, "status": "ok", "payload": coverage} + + +def _handle_trajectory_status_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, +) -> dict[str, object]: + return { + "mode": mode, + "status": "ok", + "payload": trajectory_status_payload( + count=store.count_trajectories(project_id=project_id), + latest_run=store.latest_trajectory_projection_run(project_id=project_id), + ), + } + + +def _handle_trajectory_get_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + record_id: str | None, +) -> dict[str, object]: + trajectory_id = _require_query_field( + record_id, + mode=mode, + field="record_id containing trajectory_id", + ) + trajectory = store.find_trajectory(trajectory_id) + if trajectory is None or trajectory.project_id != project_id: + return { + "mode": mode, + "status": "not_found", + "payload": {"trajectory_id": trajectory_id}, + } + patch_trail_payload = store.load_trajectory_patch_trail(trajectory_id) + return { + "mode": mode, + "status": "ok", + "detail_level": "full", + "payload": { + "trajectory": serialize_trajectory_detail( + trajectory, + patch_trail_payload=patch_trail_payload, + ) + }, + } + + +def _handle_trajectory_search_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + query: str | None, + max_results: int, + match_mode: SearchMatchMode, + include_routine: bool = False, + detail_level: MemoryDetailLevel = "compact", +) -> dict[str, object]: + statement = _require_query_field(query, mode=mode, field="query") + candidates = store.search_trajectories( + project_id=project_id, + query=statement, + limit=max_results + 1, + match_mode=match_mode, + ) + trajectories, truncated = rank_trajectories_for_query( + candidates, + query=statement, + max_results=max_results, + match_mode=match_mode, + include_routine=include_routine, + detail_level=detail_level, + ) + return { + "mode": mode, + "status": "ok", + "detail_level": detail_level, + "payload": { + "trajectories": trajectories, + "trajectory_count": len(trajectories), + "truncated": truncated, + "retrieval_policy": _retrieval_policy(include_drafts=False), + }, + } + + +def _handle_trajectory_anomalies_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + max_results: int, + include_routine: bool = False, + detail_level: MemoryDetailLevel = "compact", +) -> dict[str, object]: + return { + "mode": mode, + "status": "ok", + "detail_level": detail_level, + "payload": build_trajectory_anomalies_payload( + store, + project_id=project_id, + max_results=max_results, + include_routine=include_routine, + detail_level=detail_level, + ), + } + + +def _handle_trajectory_agents_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + include_routine: bool = False, +) -> dict[str, object]: + return { + "mode": mode, + "status": "ok", + "detail_level": "compact", + "payload": build_trajectory_agent_stats_payload( + store, + project_id=project_id, + include_routine=include_routine, + ), + } + + +def _handle_trajectory_dashboard_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + max_results: int, + include_routine: bool = False, + detail_level: MemoryDetailLevel = "compact", +) -> dict[str, object]: + return { + "mode": mode, + "status": "ok", + "detail_level": detail_level, + "payload": build_trajectory_dashboard_payload( + store, + project_id=project_id, + max_results=max_results, + include_routine=include_routine, + detail_level=detail_level, + ), + } + + +def _search_statuses_for_mode( + mode: str, + *, + filter_statuses: tuple[MemoryStatus, ...], + include_stale: bool, + include_drafts: bool, +) -> tuple[MemoryStatus, ...]: + if mode != "search": + return filter_statuses or _default_statuses( + include_stale=include_stale, + include_drafts=include_drafts, + ) + if filter_statuses: + return filter_statuses + if include_stale: + return _default_statuses(include_stale=True, include_drafts=include_drafts) + return _default_statuses(include_stale=False, include_drafts=include_drafts) + + +def _require_query_field(value: str | None, *, mode: str, field: str) -> str: + text = value.strip() if value else "" + if not text: + raise MemoryContractError(f"mode={mode} requires {field}.") + return text + + +def _fetch_search_mode_records( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + query: str | None, + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + filter_confidences: tuple[MemoryConfidence, ...], + max_results: int, + match_mode: SearchMatchMode, +) -> tuple[MemoryRecord, ...]: + statement = _require_query_field(query, mode="search", field="query") + return tuple( + store.search_records( + project_id=project_id, + statement_query=statement, + types=filter_types, + statuses=statuses, + confidences=filter_confidences, + limit=max_results + 1, + match_mode=match_mode, + ) + ) + + +def _fetch_for_path_mode_records( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + path: str | None, + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + max_results: int, +) -> tuple[MemoryRecord, ...]: + rel_path = normalize_memory_scope_path( + _require_query_field(path, mode="for_path", field="path") + ) + return query_records_for_repo_path( + store, + project_id=project_id, + rel_path=rel_path, + limit=max_results + 1, + types=filter_types, + statuses=statuses, + ) + + +def _fetch_for_symbol_mode_records( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + symbol: str | None, + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + max_results: int, +) -> tuple[MemoryRecord, ...]: + symbol_key = _require_query_field(symbol, mode="for_symbol", field="symbol") + records = store.query_records( + MemoryQuery( + project_id=project_id, + types=filter_types, + statuses=statuses, + subject_kind="symbol", + subject_key=symbol_key, + limit=max_results + 1, + ) + ) + if records: + return tuple(records) + + module_prefix = symbol_key.rsplit(".", maxsplit=1)[0] + if module_prefix == symbol_key: + return () + module_records = store.query_records( + MemoryQuery( + project_id=project_id, + types=filter_types, + statuses=statuses, + subject_kind="module", + subject_key=module_prefix, + limit=max_results + 1, + ) + ) + return tuple(module_records) + + +def _records_for_list_mode( + store: SqliteEngineeringMemoryStore, + *, + mode: str, + project_id: str, + path: str | None, + symbol: str | None, + query: str | None, + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + filter_confidences: tuple[MemoryConfidence, ...], + max_results: int, + match_mode: SearchMatchMode, +) -> tuple[MemoryRecord, ...]: + if mode == "search": + return _fetch_search_mode_records( + store, + project_id=project_id, + query=query, + filter_types=filter_types, + statuses=statuses, + filter_confidences=filter_confidences, + max_results=max_results, + match_mode=match_mode, + ) + if mode == "for_path": + return _fetch_for_path_mode_records( + store, + project_id=project_id, + path=path, + filter_types=filter_types, + statuses=statuses, + max_results=max_results, + ) + if mode == "for_symbol": + return _fetch_for_symbol_mode_records( + store, + project_id=project_id, + symbol=symbol, + filter_types=filter_types, + statuses=statuses, + max_results=max_results, + ) + return () + + +def _search_payload_body( + payload_records: list[dict[str, object]], + *, + truncated: bool, + include_drafts: bool, + audit_events: list[dict[str, object]] | None = None, + trajectories: list[dict[str, object]] | None = None, +) -> dict[str, object]: + body: dict[str, object] = { + "records": payload_records, + "record_count": len(payload_records), + "truncated": truncated, + "retrieval_policy": _retrieval_policy(include_drafts=include_drafts), + } + if audit_events is not None: + body["audit_events"] = audit_events + if trajectories is not None: + body["trajectories"] = trajectories + return body + + +def _semantic_disabled_block() -> dict[str, object]: + return { + "used": False, + "backend": None, + "provider": None, + "model": None, + "index_version": None, + "reason": "disabled", + } + + +def _semantic_status_block( + status: SemanticIndexStatus, + *, + used: bool, + provider_label: str | None, + model: str | None, + reason: str | None = None, +) -> dict[str, object]: + return { + "used": used, + "backend": status.backend, + "provider": provider_label, + "model": model, + "index_version": SEMANTIC_INDEX_FORMAT_VERSION if used else None, + "reason": None if used else reason or status.reason, + } + + +def _semantic_hits( + *, + index: SemanticIndex, + provider: EmbeddingProvider, + query: str, + k: int, +) -> tuple[dict[str, float], list[SemanticHit], list[SemanticHit]]: + # Search each lane with its own top-k budget so a dense source (e.g. audit) + # cannot crowd memory hits out of one shared top-k (#3). The index applies + # the source filter, so results arrive already lane-scoped. + # The embed is the expensive step (lazy model load); give it its own span so + # embedding load time is observable separately from the vector search. + with span(name="retrieval.embed_query"): + vector = embed_query(provider, query) + proximity: dict[str, float] = {} + for hit in index.search(vector, k=k, source="memory"): + proximity.setdefault(hit.source_id, hit.score) + audit_hits = list(index.search(vector, k=k, source="audit")) + trajectory_raw = index.search( + vector, + k=max(k, k * TRAJECTORY_SEARCH_OVERSAMPLE), + source="trajectory", + ) + trajectory_hits = collapse_trajectory_hits(trajectory_raw, k=k) + return proximity, audit_hits, trajectory_hits + + +def _record_search_telemetry( + *, + fts_records: Sequence[MemoryRecord], + proximity: Mapping[str, float], + audit_hits: Sequence[SemanticHit], + trajectory_hits: Sequence[SemanticHit], + candidates: Sequence[MemoryRecord], +) -> None: + # Lane-hit telemetry for the hybrid search, attributed to the active span. + # No-op outside an observability span; the caller guards the call so the + # set-math below is skipped entirely when observability is disabled. + candidate_ids = {record.id for record in candidates} + overlap = sum(1 for record in fts_records if record.id in proximity) + filtered = sum(1 for record_id in proximity if record_id not in candidate_ids) + record_counter("retrieval.fts_hits", len(fts_records)) + record_counter("retrieval.vector_memory_hits", len(proximity)) + record_counter("retrieval.vector_audit_hits", len(audit_hits)) + record_counter("retrieval.vector_trajectory_hits", len(trajectory_hits)) + record_counter("retrieval.fts_vector_overlap", overlap) + record_counter("retrieval.semantic_filtered", filtered) + + +def _hydrate_audit_events( + audit_db_path: Path | None, hits: Sequence[SemanticHit] +) -> list[dict[str, object]]: + if audit_db_path is None: + return [] + events: list[dict[str, object]] = [] + for hit in hits: + row = audit_event_row(audit_db_path, hit.source_id) + if row is None: + continue + event_type, status, summary = row + events.append( + { + "event_id": hit.source_id, + "event_type": event_type, + "status": status, + "summary": _statement_preview(summary), + "score": hit.score, + } + ) + return events + + +def _hydrate_trajectory_hits( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + hits: Sequence[SemanticHit], + detail_level: MemoryDetailLevel, +) -> list[dict[str, object]]: + trajectories: list[dict[str, object]] = [] + for hit in hits: + trajectory_id = trajectory_parent_id(hit) + trajectory = store.find_trajectory(trajectory_id) + if trajectory is None or trajectory.project_id != project_id: + continue + payload = ( + serialize_trajectory_detail(trajectory, max_steps=4) + if detail_level == "full" + else serialize_trajectory_preview( + trajectory, + detail_level="compact", + ) + ) + payload["semantic_score"] = hit.score + if hit.chunk_index is not None: + payload["matched_chunk_index"] = hit.chunk_index + if hit.chunk_count is not None: + payload["matched_chunk_count"] = hit.chunk_count + trajectories.append(payload) + return trajectories + + +def _record_passes_filters( + record: MemoryRecord, + *, + types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + confidences: tuple[MemoryConfidence, ...], +) -> bool: + # Mirror the types/statuses/confidences predicate the FTS branch applies in + # SQL (store.search_records), so semantic candidates cannot bypass the + # public query filter contract. An empty category tuple means "no filter". + return ( + (not types or record.type in types) + and (not statuses or record.status in statuses) + and (not confidences or record.confidence in confidences) + ) + + +def _semantic_search_candidates( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + fts_records: Sequence[MemoryRecord], + proximity: Mapping[str, float], + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + filter_confidences: tuple[MemoryConfidence, ...], +) -> list[MemoryRecord]: + seen = {record.id for record in fts_records} + candidates = list(fts_records) + for record_id in proximity: + if record_id in seen: + continue + record = store.find_record(record_id) + if ( + record is not None + and record.project_id == project_id + and _record_passes_filters( + record, + types=filter_types, + statuses=statuses, + confidences=filter_confidences, + ) + ): + candidates.append(record) + seen.add(record_id) + return candidates + + +def _handle_semantic_search_mode( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + query: str | None, + filter_types: tuple[MemoryRecordType, ...], + statuses: tuple[MemoryStatus, ...], + filter_confidences: tuple[MemoryConfidence, ...], + match_mode: SearchMatchMode, + max_results: int, + detail_level: MemoryDetailLevel, + include_stale: bool, + include_drafts: bool, + semantic_index: SemanticIndex | None, + embedding_provider: EmbeddingProvider | None, + provider_label: str | None, + semantic_reason: str | None, + audit_db_path: Path | None, +) -> dict[str, object]: + statement = _require_query_field(query, mode="search", field="query") + fts_records = _fetch_search_mode_records( + store, + project_id=project_id, + query=statement, + filter_types=filter_types, + statuses=statuses, + filter_confidences=filter_confidences, + max_results=max_results, + match_mode=match_mode, + ) + status = semantic_index.status() if semantic_index is not None else None + proximity: dict[str, float] = {} + audit_hits: list[SemanticHit] = [] + trajectory_hits: list[SemanticHit] = [] + used_block: dict[str, object] | None = None + if ( + semantic_index is not None + and embedding_provider is not None + and status is not None + and status.available + ): + try: + proximity, audit_hits, trajectory_hits = _semantic_hits( + index=semantic_index, + provider=embedding_provider, + query=statement, + k=max_results, + ) + except MemorySemanticUnavailableError as exc: + # The embedding model loads lazily, so an unavailable model (e.g. + # download disabled and not cached) first surfaces here. Degrade to + # FTS-only with the reason rather than failing the whole query. + semantic_reason = str(exc) + else: + used_block = _semantic_status_block( + status, + used=True, + provider_label=provider_label, + model=embedding_provider.model_id, + ) + if used_block is not None: + candidates = _semantic_search_candidates( + store, + project_id=project_id, + fts_records=fts_records, + proximity=proximity, + filter_types=filter_types, + statuses=statuses, + filter_confidences=filter_confidences, + ) + audit_events = _hydrate_audit_events(audit_db_path, audit_hits) + trajectories = _hydrate_trajectory_hits( + store, + project_id=project_id, + hits=trajectory_hits, + detail_level=detail_level, + ) + semantic_block = used_block + else: + candidates = list(fts_records) + audit_events = [] + trajectories = [] + semantic_block = ( + _semantic_status_block( + status, + used=False, + provider_label=provider_label, + model=None, + reason=semantic_reason, + ) + if status is not None + else _semantic_disabled_block() + ) + if is_observability_enabled(): + _record_search_telemetry( + fts_records=fts_records, + proximity=proximity, + audit_hits=audit_hits, + trajectory_hits=trajectory_hits, + candidates=candidates, + ) + effective_stale = include_stale or "stale" in statuses + visible = [ + record + for record in candidates + if _record_visible( + record, + include_stale=effective_stale, + include_drafts=include_drafts, + ) + ] + context = RankingContext.from_scope(scope_paths=(), symbols=(), blast_dependents=()) + # Reciprocal-rank-fusion inputs: the FTS list is already BM25-ordered, and + # the vector hits are ranked by descending proximity (id breaks ties for + # determinism). _rank_records fuses these and uses metadata only to break + # ties, so a strong lexical/vector match is no longer re-sorted away by + # metadata boosts. + lexical_ranks = {record.id: rank for rank, record in enumerate(fts_records)} + vector_ranks = { + record_id: rank + for rank, record_id in enumerate( + sorted(proximity, key=lambda rid: (-proximity[rid], rid)) + ) + } + payload_records, truncated = _rank_records( + store, + project_id=project_id, + candidates=visible, + context=context, + max_records=max_results, + detail_level=detail_level, + lexical_ranks=lexical_ranks, + vector_ranks=vector_ranks, + ) + return { + "mode": "search", + "status": "ok", + "detail_level": detail_level, + "semantic": semantic_block, + "payload": _search_payload_body( + payload_records, + truncated=truncated, + include_drafts=include_drafts, + audit_events=audit_events, + trajectories=trajectories, + ), + } + + +def query_engineering_memory( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + root_path: object, + backend: str, + db_path: object, + mode: str, + record_id: str | None = None, + path: str | None = None, + symbol: str | None = None, + query: str | None = None, + scope: Sequence[str] | None = None, + filters: Mapping[str, object] | None = None, + max_results: int = 20, + include_stale: bool = False, + include_drafts: bool = False, + detail_level: str = "compact", + semantic: bool = False, + semantic_index: SemanticIndex | None = None, + embedding_provider: EmbeddingProvider | None = None, + provider_label: str | None = None, + semantic_reason: str | None = None, + audit_db_path: Path | None = None, +) -> dict[str, object]: + if mode not in QUERY_MODES: + raise MemoryContractError( + f"Unknown query mode {mode!r}. Allowed: {', '.join(QUERY_MODES)}." + ) + + normalized_detail = _normalize_detail_level(detail_level) + effective_include_drafts = include_drafts or mode in {"for_path", "for_symbol"} + + if mode == "status": + return _handle_status_mode( + mode=mode, + root_path=root_path, + db_path=db_path, + backend=backend, + ) + if mode == "get": + return _handle_get_mode( + store, + mode=mode, + project_id=project_id, + record_id=record_id, + ) + if mode == "stale": + return _handle_stale_mode( + store, + mode=mode, + project_id=project_id, + max_results=max_results, + detail_level=normalized_detail, + ) + if mode == "drafts": + records = store.query_records( + MemoryQuery( + project_id=project_id, + statuses=("draft",), + limit=max_results, + ) + ) + payload_records = _serialize_list_mode_records( + store, + records=records, + detail_level=normalized_detail, + ) + return { + "mode": mode, + "status": "ok", + "detail_level": normalized_detail, + "payload": { + "records": payload_records, + "record_count": len(payload_records), + "truncated": len(records) >= max_results, + }, + } + if mode == "coverage": + return _handle_coverage_mode( + store, + mode=mode, + project_id=project_id, + scope=scope, + ) + if mode == "trajectory_status": + return _handle_trajectory_status_mode( + store, + mode=mode, + project_id=project_id, + ) + if mode == "trajectory_get": + return _handle_trajectory_get_mode( + store, + mode=mode, + project_id=project_id, + record_id=record_id, + ) + + filter_types, filter_statuses, filter_confidences, match_mode, include_routine = ( + _parse_filters(filters) + ) + if mode == "trajectory_anomalies": + return _handle_trajectory_anomalies_mode( + store, + mode=mode, + project_id=project_id, + max_results=max_results, + include_routine=include_routine, + detail_level=normalized_detail, + ) + if mode == "trajectory_agents": + return _handle_trajectory_agents_mode( + store, + mode=mode, + project_id=project_id, + include_routine=include_routine, + ) + if mode == "trajectory_dashboard": + return _handle_trajectory_dashboard_mode( + store, + mode=mode, + project_id=project_id, + max_results=max_results, + include_routine=include_routine, + detail_level=normalized_detail, + ) + if mode == "trajectory_search": + return _handle_trajectory_search_mode( + store, + mode=mode, + project_id=project_id, + query=query, + max_results=max_results, + match_mode=match_mode, + include_routine=include_routine, + detail_level=normalized_detail, + ) + statuses = _search_statuses_for_mode( + mode, + filter_statuses=filter_statuses, + include_stale=include_stale, + include_drafts=effective_include_drafts, + ) + if mode == "search" and semantic: + return _handle_semantic_search_mode( + store, + project_id=project_id, + query=query, + filter_types=filter_types, + statuses=statuses, + filter_confidences=filter_confidences, + match_mode=match_mode, + max_results=max_results, + detail_level=normalized_detail, + include_stale=include_stale, + include_drafts=effective_include_drafts, + semantic_index=semantic_index, + embedding_provider=embedding_provider, + provider_label=provider_label, + semantic_reason=semantic_reason, + audit_db_path=audit_db_path, + ) + records = _records_for_list_mode( + store, + mode=mode, + project_id=project_id, + path=path, + symbol=symbol, + query=query, + filter_types=filter_types, + statuses=statuses, + filter_confidences=filter_confidences, + max_results=max_results, + match_mode=match_mode, + ) + visible = [ + record + for record in records + if _record_visible( + record, + include_stale=include_stale or (mode == "search" and "stale" in statuses), + include_drafts=effective_include_drafts, + ) + ] + truncated = len(visible) > max_results + selected = visible[:max_results] + payload_records = _serialize_list_mode_records( + store, + records=selected, + detail_level=normalized_detail, + ) + return { + "mode": mode, + "status": "ok", + "detail_level": normalized_detail, + "payload": _search_payload_body( + payload_records, + truncated=truncated, + include_drafts=effective_include_drafts, + ), + } + + +__all__ = [ + "COMPACT_MEMORY_SUBJECT_LIMIT", + "QUERY_MODES", + "MemoryDetailLevel", + "QueryMode", + "get_relevant_memory", + "normalize_repo_path", + "path_has_memory", + "query_engineering_memory", + "query_records_for_repo_path", +] diff --git a/codeclone/memory/schema.py b/codeclone/memory/schema.py new file mode 100644 index 00000000..512ea69b --- /dev/null +++ b/codeclone/memory/schema.py @@ -0,0 +1,298 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from .. import __version__ +from ..contracts import ENGINEERING_MEMORY_SCHEMA_VERSION +from ..report.meta import current_report_timestamp_utc +from ..utils.sqlite_store import ( + initialize_schema_v1, +) +from .exceptions import MemorySchemaError +from .schema_experience import ( + EXPERIENCE_DDL_STATEMENTS, + EXPERIENCE_INDEX_SQL, +) +from .schema_fts import CREATE_MEMORY_RECORDS_FTS_SQL +from .schema_jobs import ( + PROJECTION_JOBS_DDL_STATEMENTS, + PROJECTION_JOBS_INDEX_SQL, +) +from .schema_meta import MEMORY_META_TABLE, get_meta, set_meta +from .schema_trajectory import ( + TRAJECTORY_DDL_STATEMENTS, + TRAJECTORY_INDEX_SQL, + create_trajectory_schema, +) + +_CREATE_META_SQL = f""" +CREATE TABLE IF NOT EXISTS {MEMORY_META_TABLE} ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) +""" + +_CREATE_MIGRATIONS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_schema_migrations ( + version TEXT PRIMARY KEY, + applied_at_utc TEXT NOT NULL +) +""" + +_CREATE_PROJECTS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_projects ( + id TEXT PRIMARY KEY, + root TEXT NOT NULL, + git_remote TEXT, + git_branch TEXT, + git_head TEXT, + python_tag TEXT, + created_at_utc TEXT NOT NULL, + updated_at_utc TEXT NOT NULL +) +""" + +_CREATE_RECORDS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_records ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + identity_key TEXT NOT NULL, + type TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'active', + confidence TEXT NOT NULL DEFAULT 'supported', + origin TEXT NOT NULL DEFAULT 'system', + ingest_source TEXT NOT NULL, + statement TEXT NOT NULL, + summary TEXT, + payload_json TEXT, + created_at_utc TEXT NOT NULL, + updated_at_utc TEXT NOT NULL, + last_verified_at_utc TEXT, + expires_at_utc TEXT, + created_by TEXT NOT NULL, + verified_by TEXT, + approved_by TEXT, + approved_at_utc TEXT, + report_digest TEXT, + code_fingerprint TEXT, + stale_reason TEXT, + created_on_branch TEXT, + created_at_commit TEXT, + verified_on_branch TEXT, + verified_at_commit TEXT, + schema_version TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES memory_projects(id) +) +""" + +_CREATE_SUBJECTS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_subjects ( + id TEXT PRIMARY KEY, + memory_id TEXT NOT NULL, + subject_kind TEXT NOT NULL, + subject_key TEXT NOT NULL, + relation TEXT NOT NULL DEFAULT 'about', + FOREIGN KEY(memory_id) REFERENCES memory_records(id) ON DELETE CASCADE +) +""" + +_CREATE_EVIDENCE_SQL = """ +CREATE TABLE IF NOT EXISTS memory_evidence ( + id TEXT PRIMARY KEY, + memory_id TEXT NOT NULL, + evidence_kind TEXT NOT NULL, + ref TEXT NOT NULL, + locator TEXT, + quote TEXT, + digest TEXT, + created_at_utc TEXT NOT NULL, + FOREIGN KEY(memory_id) REFERENCES memory_records(id) ON DELETE CASCADE +) +""" + +_CREATE_LINKS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_links ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + from_memory_id TEXT NOT NULL, + to_memory_id TEXT NOT NULL, + relation TEXT NOT NULL, + created_by TEXT NOT NULL DEFAULT 'system', + created_at_utc TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES memory_projects(id), + FOREIGN KEY(from_memory_id) REFERENCES memory_records(id) ON DELETE CASCADE, + FOREIGN KEY(to_memory_id) REFERENCES memory_records(id) ON DELETE CASCADE +) +""" + +_CREATE_INGESTION_RUNS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_ingestion_runs ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + mode TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + finished_at_utc TEXT, + status TEXT NOT NULL DEFAULT 'running', + analysis_fingerprint TEXT, + report_digest TEXT, + branch TEXT, + "commit" TEXT, + records_created INTEGER NOT NULL DEFAULT 0, + records_updated INTEGER NOT NULL DEFAULT 0, + records_marked_stale INTEGER NOT NULL DEFAULT 0, + candidates_created INTEGER NOT NULL DEFAULT 0, + contradictions_found INTEGER NOT NULL DEFAULT 0, + message TEXT, + FOREIGN KEY(project_id) REFERENCES memory_projects(id) +) +""" + +_CREATE_REVISIONS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_revisions ( + id TEXT PRIMARY KEY, + memory_id TEXT NOT NULL, + revision_number INTEGER NOT NULL, + previous_statement TEXT, + new_statement TEXT NOT NULL, + previous_payload TEXT, + new_payload TEXT, + reason TEXT, + changed_by TEXT NOT NULL, + changed_at_utc TEXT NOT NULL, + branch TEXT, + "commit" TEXT, + FOREIGN KEY(memory_id) REFERENCES memory_records(id) ON DELETE CASCADE +) +""" + +_CREATE_BLAST_CACHE_SQL = """ +CREATE TABLE IF NOT EXISTS memory_blast_radius_cache ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + subject_key TEXT NOT NULL, + subject_kind TEXT NOT NULL, + depth TEXT NOT NULL DEFAULT 'direct', + payload_json TEXT NOT NULL, + analysis_fingerprint TEXT NOT NULL, + branch TEXT, + created_at_utc TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES memory_projects(id) +) +""" + +_DDL_STATEMENTS = ( + _CREATE_META_SQL, + _CREATE_MIGRATIONS_SQL, + _CREATE_PROJECTS_SQL, + _CREATE_RECORDS_SQL, + _CREATE_SUBJECTS_SQL, + _CREATE_EVIDENCE_SQL, + _CREATE_LINKS_SQL, + _CREATE_INGESTION_RUNS_SQL, + _CREATE_REVISIONS_SQL, + _CREATE_BLAST_CACHE_SQL, + *TRAJECTORY_DDL_STATEMENTS, + *PROJECTION_JOBS_DDL_STATEMENTS, + *EXPERIENCE_DDL_STATEMENTS, +) + +_INDEX_SQL = ( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_records_identity " + "ON memory_records(project_id, identity_key)", + "CREATE INDEX IF NOT EXISTS idx_records_project_type " + "ON memory_records(project_id, type)", + "CREATE INDEX IF NOT EXISTS idx_records_status ON memory_records(status)", + "CREATE INDEX IF NOT EXISTS idx_records_project_status " + "ON memory_records(project_id, status, type)", + "CREATE INDEX IF NOT EXISTS idx_records_origin ON memory_records(origin)", + "CREATE INDEX IF NOT EXISTS idx_subjects_memory ON memory_subjects(memory_id)", + "CREATE INDEX IF NOT EXISTS idx_subjects_kind_key " + "ON memory_subjects(subject_kind, subject_key)", + "CREATE INDEX IF NOT EXISTS idx_evidence_memory ON memory_evidence(memory_id)", + "CREATE INDEX IF NOT EXISTS idx_links_from ON memory_links(from_memory_id)", + "CREATE INDEX IF NOT EXISTS idx_links_to ON memory_links(to_memory_id)", + "CREATE INDEX IF NOT EXISTS idx_ingestion_runs_project_time " + "ON memory_ingestion_runs(project_id, started_at_utc)", + "CREATE INDEX IF NOT EXISTS idx_revisions_memory ON memory_revisions(memory_id)", + "CREATE INDEX IF NOT EXISTS idx_blast_cache_subject " + "ON memory_blast_radius_cache(project_id, subject_kind, subject_key)", + *TRAJECTORY_INDEX_SQL, + *PROJECTION_JOBS_INDEX_SQL, + *EXPERIENCE_INDEX_SQL, +) + + +def open_memory_db(path: Path) -> sqlite3.Connection: + from ..observability.sqlite_access import open_instrumented_sqlite_db + + return open_instrumented_sqlite_db( + path, + ensure_schema=ensure_schema, + foreign_keys=True, + synchronous="FULL", + ) + + +def open_memory_db_readonly(path: Path) -> sqlite3.Connection: + """Open an existing engineering-memory database without allowing writes.""" + from ..observability.sqlite_access import open_instrumented_sqlite_db_readonly + + return open_instrumented_sqlite_db_readonly(path, validate_schema=ensure_schema) + + +def ensure_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current is None: + create_schema_v1(conn) + return + if current != ENGINEERING_MEMORY_SCHEMA_VERSION: + from .schema_migrate import migrate_memory_schema + + migrate_memory_schema(conn) + current = get_meta(conn, "schema_version") + if current != ENGINEERING_MEMORY_SCHEMA_VERSION: + raise MemorySchemaError( + "Unsupported engineering memory schema version: " + f"{current!r}. Expected {ENGINEERING_MEMORY_SCHEMA_VERSION!r}." + ) + + +def create_schema_v1(conn: sqlite3.Connection) -> None: + now = current_report_timestamp_utc() + initialize_schema_v1( + conn, + ddl_statements=_DDL_STATEMENTS, + index_statements=_INDEX_SQL, + meta_table=MEMORY_META_TABLE, + seed_meta={ + "schema_version": ENGINEERING_MEMORY_SCHEMA_VERSION, + "created_at_utc": now, + "updated_at_utc": now, + "codeclone_version": __version__, + }, + ) + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + (ENGINEERING_MEMORY_SCHEMA_VERSION, now), + ) + conn.execute(CREATE_MEMORY_RECORDS_FTS_SQL) + conn.commit() + + +__all__ = [ + "create_schema_v1", + "create_trajectory_schema", + "ensure_schema", + "get_meta", + "open_memory_db", + "open_memory_db_readonly", + "set_meta", +] diff --git a/codeclone/memory/schema_experience.py b/codeclone/memory/schema_experience.py new file mode 100644 index 00000000..79eaea4f --- /dev/null +++ b/codeclone/memory/schema_experience.py @@ -0,0 +1,89 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Experience Layer persistence schema (mirrors schema_trajectory). + +Derived state: experiences are rebuilt from trajectories, not a source of +truth. Facets and evidence cascade-delete with their parent experience. +""" + +from __future__ import annotations + +import sqlite3 + +_CREATE_EXPERIENCES_SQL = """ +CREATE TABLE IF NOT EXISTS memory_experiences ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + repo_root_digest TEXT NOT NULL, + subject_family TEXT NOT NULL, + signal TEXT NOT NULL, + outcome_class TEXT NOT NULL, + support INTEGER NOT NULL, + quality_min INTEGER NOT NULL, + information_value INTEGER NOT NULL, + status TEXT NOT NULL DEFAULT 'active', + statement TEXT NOT NULL, + experience_digest TEXT NOT NULL, + distillation_version TEXT NOT NULL, + first_observed_at_utc TEXT NOT NULL, + last_observed_at_utc TEXT NOT NULL, + distilled_at_utc TEXT NOT NULL, + updated_at_utc TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES memory_projects(id), + UNIQUE(project_id, subject_family, signal, outcome_class, distillation_version) +) +""" + +_CREATE_EXPERIENCE_FACETS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_experience_facets ( + experience_id TEXT NOT NULL, + facet_kind TEXT NOT NULL, + facet_value TEXT NOT NULL, + count INTEGER NOT NULL, + PRIMARY KEY(experience_id, facet_kind, facet_value), + FOREIGN KEY(experience_id) REFERENCES memory_experiences(id) ON DELETE CASCADE +) +""" + +_CREATE_EXPERIENCE_EVIDENCE_SQL = """ +CREATE TABLE IF NOT EXISTS memory_experience_evidence ( + experience_id TEXT NOT NULL, + trajectory_id TEXT NOT NULL, + outcome TEXT NOT NULL, + finished_at_utc TEXT NOT NULL, + PRIMARY KEY(experience_id, trajectory_id), + FOREIGN KEY(experience_id) REFERENCES memory_experiences(id) ON DELETE CASCADE +) +""" + +EXPERIENCE_DDL_STATEMENTS = ( + _CREATE_EXPERIENCES_SQL, + _CREATE_EXPERIENCE_FACETS_SQL, + _CREATE_EXPERIENCE_EVIDENCE_SQL, +) + +EXPERIENCE_INDEX_SQL = ( + "CREATE INDEX IF NOT EXISTS idx_experiences_project_family " + "ON memory_experiences(project_id, subject_family)", + "CREATE INDEX IF NOT EXISTS idx_experiences_digest " + "ON memory_experiences(project_id, experience_digest)", +) + + +def create_experience_schema(conn: sqlite3.Connection) -> None: + for statement in EXPERIENCE_DDL_STATEMENTS: + conn.execute(statement) + for statement in EXPERIENCE_INDEX_SQL: + conn.execute(statement) + conn.commit() + + +__all__ = [ + "EXPERIENCE_DDL_STATEMENTS", + "EXPERIENCE_INDEX_SQL", + "create_experience_schema", +] diff --git a/codeclone/memory/schema_fts.py b/codeclone/memory/schema_fts.py new file mode 100644 index 00000000..f78c1d9c --- /dev/null +++ b/codeclone/memory/schema_fts.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +CREATE_MEMORY_RECORDS_FTS_SQL = """ +CREATE VIRTUAL TABLE IF NOT EXISTS memory_records_fts USING fts5( + memory_id UNINDEXED, + project_id UNINDEXED, + record_type UNINDEXED, + ingest_source UNINDEXED, + status UNINDEXED, + search_text, + tokenize='unicode61 remove_diacritics 2' +) +""" + +__all__ = ["CREATE_MEMORY_RECORDS_FTS_SQL"] diff --git a/codeclone/memory/schema_jobs.py b/codeclone/memory/schema_jobs.py new file mode 100644 index 00000000..dd40d67d --- /dev/null +++ b/codeclone/memory/schema_jobs.py @@ -0,0 +1,53 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 + +_CREATE_PROJECTION_JOBS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_projection_jobs ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + job_kind TEXT NOT NULL, + status TEXT NOT NULL, + trigger TEXT NOT NULL, + requested_at_utc TEXT NOT NULL, + started_at_utc TEXT, + finished_at_utc TEXT, + claimed_by TEXT, + attempt INTEGER NOT NULL DEFAULT 0, + stimulus_json TEXT NOT NULL, + result_json TEXT, + error_message TEXT, + flush_claimed_by TEXT, + FOREIGN KEY(project_id) REFERENCES memory_projects(id) +) +""" + +PROJECTION_JOBS_INDEX_SQL = ( + "CREATE INDEX IF NOT EXISTS idx_projection_jobs_project_status " + "ON memory_projection_jobs(project_id, status, requested_at_utc)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_projection_jobs_pending " + "ON memory_projection_jobs(project_id, job_kind) " + "WHERE status = 'pending'", +) + +PROJECTION_JOBS_DDL_STATEMENTS = (_CREATE_PROJECTION_JOBS_SQL,) + + +def create_projection_jobs_schema(conn: sqlite3.Connection) -> None: + for statement in PROJECTION_JOBS_DDL_STATEMENTS: + conn.execute(statement) + for statement in PROJECTION_JOBS_INDEX_SQL: + conn.execute(statement) + + +__all__ = [ + "PROJECTION_JOBS_DDL_STATEMENTS", + "PROJECTION_JOBS_INDEX_SQL", + "create_projection_jobs_schema", +] diff --git a/codeclone/memory/schema_meta.py b/codeclone/memory/schema_meta.py new file mode 100644 index 00000000..841947f8 --- /dev/null +++ b/codeclone/memory/schema_meta.py @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 + +from ..utils.sqlite_store import get_meta_value + +MEMORY_META_TABLE = "memory_meta" + + +def get_meta(conn: sqlite3.Connection, key: str) -> str | None: + return get_meta_value(conn, meta_table=MEMORY_META_TABLE, key=key) + + +def set_meta(conn: sqlite3.Connection, key: str, value: str) -> None: + conn.execute( + f"INSERT INTO {MEMORY_META_TABLE}(key, value) VALUES (?, ?) " + "ON CONFLICT(key) DO UPDATE SET value=excluded.value", + (key, value), + ) + + +__all__ = ["MEMORY_META_TABLE", "get_meta", "set_meta"] diff --git a/codeclone/memory/schema_migrate.py b/codeclone/memory/schema_migrate.py new file mode 100644 index 00000000..15ee624b --- /dev/null +++ b/codeclone/memory/schema_migrate.py @@ -0,0 +1,165 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 + +from ..report.meta import current_report_timestamp_utc +from .exceptions import MemorySchemaError +from .schema_fts import CREATE_MEMORY_RECORDS_FTS_SQL +from .schema_meta import get_meta, set_meta + + +def migrate_memory_schema(conn: sqlite3.Connection) -> None: + from ..contracts import ENGINEERING_MEMORY_SCHEMA_VERSION + + current = get_meta(conn, "schema_version") + if current is None: + return + if current == ENGINEERING_MEMORY_SCHEMA_VERSION: + return + if current == "1.0": + _migrate_1_0_to_1_1(conn) + current = "1.1" + if current == "1.1": + _migrate_1_1_to_1_2(conn) + current = "1.2" + later = {"1.3", "1.4", "1.5", "1.6", "1.7"} + if current == "1.2" and ENGINEERING_MEMORY_SCHEMA_VERSION in later: + _migrate_1_2_to_1_3(conn) + current = "1.3" + if current == "1.3" and ENGINEERING_MEMORY_SCHEMA_VERSION in later - {"1.3"}: + _migrate_1_3_to_1_4(conn) + current = "1.4" + if current == "1.4" and ENGINEERING_MEMORY_SCHEMA_VERSION in {"1.5", "1.6", "1.7"}: + _migrate_1_4_to_1_5(conn) + current = "1.5" + if current == "1.5" and ENGINEERING_MEMORY_SCHEMA_VERSION in {"1.6", "1.7"}: + _migrate_1_5_to_1_6(conn) + current = "1.6" + if current == "1.6" and ENGINEERING_MEMORY_SCHEMA_VERSION == "1.7": + _migrate_1_6_to_1_7(conn) + current = "1.7" + if current == ENGINEERING_MEMORY_SCHEMA_VERSION: + return + msg = ( + f"Unsupported engineering memory schema migration: {current!r} " + f"→ {ENGINEERING_MEMORY_SCHEMA_VERSION!r}" + ) + raise MemorySchemaError(msg) + + +def _migrate_1_0_to_1_1(conn: sqlite3.Connection) -> None: + conn.execute(CREATE_MEMORY_RECORDS_FTS_SQL) + now = current_report_timestamp_utc() + set_meta(conn, "schema_version", "1.1") + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + ("1.1", now), + ) + conn.commit() + + +def _migrate_1_1_to_1_2(conn: sqlite3.Connection) -> None: + from .schema_trajectory import create_trajectory_schema + + create_trajectory_schema(conn) + now = current_report_timestamp_utc() + set_meta(conn, "schema_version", "1.2") + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + ("1.2", now), + ) + conn.commit() + + +def _migrate_1_2_to_1_3(conn: sqlite3.Connection) -> None: + from .schema_jobs import create_projection_jobs_schema + + create_projection_jobs_schema(conn) + now = current_report_timestamp_utc() + set_meta(conn, "schema_version", "1.3") + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + ("1.3", now), + ) + conn.commit() + + +def _migrate_1_3_to_1_4(conn: sqlite3.Connection) -> None: + from .schema_trajectory import create_patch_trails_schema + + create_patch_trails_schema(conn) + now = current_report_timestamp_utc() + set_meta(conn, "schema_version", "1.4") + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + ("1.4", now), + ) + conn.commit() + + +def _add_column_if_missing( + conn: sqlite3.Connection, *, table: str, column: str, ddl_type: str +) -> None: + existing = { + str(row[1]) for row in conn.execute(f"PRAGMA table_info({table})").fetchall() + } + if column not in existing: + conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {ddl_type}") + + +def _record_schema_migration(conn: sqlite3.Connection, version: str) -> None: + now = current_report_timestamp_utc() + set_meta(conn, "schema_version", version) + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + (version, now), + ) + conn.commit() + + +def _migrate_1_4_to_1_5(conn: sqlite3.Connection) -> None: + _add_column_if_missing( + conn, + table="memory_trajectories", + column="quality_score", + ddl_type="INTEGER NOT NULL DEFAULT 0", + ) + _record_schema_migration(conn, "1.5") + + +def _migrate_1_5_to_1_6(conn: sqlite3.Connection) -> None: + from .schema_experience import create_experience_schema + + create_experience_schema(conn) + now = current_report_timestamp_utc() + set_meta(conn, "schema_version", "1.6") + conn.execute( + "INSERT OR IGNORE INTO memory_schema_migrations(version, applied_at_utc) " + "VALUES (?, ?)", + ("1.6", now), + ) + conn.commit() + + +def _migrate_1_6_to_1_7(conn: sqlite3.Connection) -> None: + _add_column_if_missing( + conn, + table="memory_projection_jobs", + column="flush_claimed_by", + ddl_type="TEXT", + ) + _record_schema_migration(conn, "1.7") + + +__all__ = ["migrate_memory_schema"] diff --git a/codeclone/memory/schema_trajectory.py b/codeclone/memory/schema_trajectory.py new file mode 100644 index 00000000..a846601a --- /dev/null +++ b/codeclone/memory/schema_trajectory.py @@ -0,0 +1,165 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 + +_CREATE_TRAJECTORIES_SQL = """ +CREATE TABLE IF NOT EXISTS memory_trajectories ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + repo_root_digest TEXT NOT NULL, + workflow_id TEXT NOT NULL, + intent_id TEXT, + primary_run_id TEXT, + first_run_id TEXT, + last_run_id TEXT, + report_digest TEXT, + outcome TEXT NOT NULL, + quality_tier TEXT NOT NULL, + quality_score INTEGER NOT NULL, + labels_json TEXT NOT NULL, + summary TEXT NOT NULL, + trajectory_digest TEXT NOT NULL, + source_event_stream_digest TEXT NOT NULL, + projection_version TEXT NOT NULL, + event_count INTEGER NOT NULL, + step_count INTEGER NOT NULL, + incident_count INTEGER NOT NULL, + started_at_utc TEXT NOT NULL, + finished_at_utc TEXT NOT NULL, + projected_at_utc TEXT NOT NULL, + updated_at_utc TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES memory_projects(id), + UNIQUE(project_id, workflow_id, projection_version) +) +""" + +_CREATE_TRAJECTORY_STEPS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_trajectory_steps ( + trajectory_id TEXT NOT NULL, + step_index INTEGER NOT NULL, + audit_sequence INTEGER NOT NULL, + event_id TEXT NOT NULL, + event_type TEXT NOT NULL, + status TEXT, + run_id TEXT, + report_digest TEXT, + event_core_sha256 TEXT NOT NULL, + event_core_json TEXT NOT NULL, + summary TEXT, + created_at_utc TEXT NOT NULL, + PRIMARY KEY(trajectory_id, step_index), + FOREIGN KEY(trajectory_id) REFERENCES memory_trajectories(id) ON DELETE CASCADE +) +""" + +_CREATE_TRAJECTORY_SUBJECTS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_trajectory_subjects ( + trajectory_id TEXT NOT NULL, + subject_kind TEXT NOT NULL, + subject_key TEXT NOT NULL, + relation TEXT NOT NULL DEFAULT 'about', + PRIMARY KEY(trajectory_id, subject_kind, subject_key, relation), + FOREIGN KEY(trajectory_id) REFERENCES memory_trajectories(id) ON DELETE CASCADE +) +""" + +_CREATE_TRAJECTORY_EVIDENCE_SQL = """ +CREATE TABLE IF NOT EXISTS memory_trajectory_evidence ( + trajectory_id TEXT NOT NULL, + evidence_kind TEXT NOT NULL, + ref TEXT NOT NULL, + locator TEXT, + digest TEXT, + created_at_utc TEXT NOT NULL, + PRIMARY KEY(trajectory_id, evidence_kind, ref), + FOREIGN KEY(trajectory_id) REFERENCES memory_trajectories(id) ON DELETE CASCADE +) +""" + +_CREATE_TRAJECTORY_PATCH_TRAILS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_trajectory_patch_trails ( + trajectory_id TEXT PRIMARY KEY, + patch_trail_digest TEXT NOT NULL, + patch_trail_json TEXT NOT NULL, + schema_version TEXT NOT NULL, + projected_at_utc TEXT NOT NULL, + FOREIGN KEY(trajectory_id) REFERENCES memory_trajectories(id) ON DELETE CASCADE +) +""" + +_CREATE_TRAJECTORY_PATCH_TRAILS_INDEX_SQL = """ +CREATE INDEX IF NOT EXISTS idx_trajectory_patch_trails_digest +ON memory_trajectory_patch_trails(patch_trail_digest) +""" + +_CREATE_TRAJECTORY_PROJECTION_RUNS_SQL = """ +CREATE TABLE IF NOT EXISTS memory_trajectory_projection_runs ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL, + repo_root_digest TEXT NOT NULL, + projection_version TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + finished_at_utc TEXT NOT NULL, + status TEXT NOT NULL, + workflows_seen INTEGER NOT NULL, + trajectories_created INTEGER NOT NULL, + trajectories_updated INTEGER NOT NULL, + trajectories_unchanged INTEGER NOT NULL, + legacy_event_count INTEGER NOT NULL, + message TEXT, + FOREIGN KEY(project_id) REFERENCES memory_projects(id) +) +""" + +TRAJECTORY_DDL_STATEMENTS = ( + _CREATE_TRAJECTORIES_SQL, + _CREATE_TRAJECTORY_STEPS_SQL, + _CREATE_TRAJECTORY_SUBJECTS_SQL, + _CREATE_TRAJECTORY_EVIDENCE_SQL, + _CREATE_TRAJECTORY_PATCH_TRAILS_SQL, + _CREATE_TRAJECTORY_PROJECTION_RUNS_SQL, +) + +TRAJECTORY_INDEX_SQL = ( + "CREATE INDEX IF NOT EXISTS idx_trajectories_project_workflow " + "ON memory_trajectories(project_id, workflow_id)", + "CREATE INDEX IF NOT EXISTS idx_trajectories_outcome " + "ON memory_trajectories(project_id, outcome, quality_tier)", + "CREATE INDEX IF NOT EXISTS idx_trajectories_updated " + "ON memory_trajectories(project_id, updated_at_utc)", + "CREATE INDEX IF NOT EXISTS idx_trajectory_steps_event " + "ON memory_trajectory_steps(event_type, audit_sequence)", + "CREATE INDEX IF NOT EXISTS idx_trajectory_subjects_key " + "ON memory_trajectory_subjects(subject_kind, subject_key)", + _CREATE_TRAJECTORY_PATCH_TRAILS_INDEX_SQL.strip(), + "CREATE INDEX IF NOT EXISTS idx_projection_runs_project_time " + "ON memory_trajectory_projection_runs(project_id, started_at_utc)", +) + + +def create_patch_trails_schema(conn: sqlite3.Connection) -> None: + conn.execute(_CREATE_TRAJECTORY_PATCH_TRAILS_SQL) + conn.execute(_CREATE_TRAJECTORY_PATCH_TRAILS_INDEX_SQL) + conn.commit() + + +def create_trajectory_schema(conn: sqlite3.Connection) -> None: + for statement in TRAJECTORY_DDL_STATEMENTS: + conn.execute(statement) + for statement in TRAJECTORY_INDEX_SQL: + conn.execute(statement) + conn.commit() + + +__all__ = [ + "TRAJECTORY_DDL_STATEMENTS", + "TRAJECTORY_INDEX_SQL", + "create_patch_trails_schema", + "create_trajectory_schema", +] diff --git a/codeclone/memory/search_index.py b/codeclone/memory/search_index.py new file mode 100644 index 00000000..db9f4f6c --- /dev/null +++ b/codeclone/memory/search_index.py @@ -0,0 +1,113 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +from collections.abc import Sequence +from typing import Literal + +from .models import MemoryRecord, MemorySubject + +SearchMatchMode = Literal["all", "any"] + +_FTS_TOKEN_RE = re.compile(r"[^\w./-]+", re.UNICODE) + + +def build_search_text( + *, + record: MemoryRecord, + subjects: Sequence[MemorySubject], +) -> str: + parts: list[str] = [ + record.statement, + record.summary or "", + record.type, + record.ingest_source, + ] + for subject in subjects: + parts.append(subject.subject_kind) + parts.append(subject.subject_key.replace("\\", "/")) + if record.payload: + parts.append(_payload_search_text(record.payload)) + return " ".join(part.strip() for part in parts if part and part.strip()) + + +def _payload_search_text(payload: object) -> str: + if isinstance(payload, dict): + tokens: list[str] = [] + for key, value in sorted(payload.items()): + tokens.append(str(key)) + if isinstance(value, str): + tokens.append(value) + elif isinstance(value, list): + tokens.extend(str(item) for item in value) + return " ".join(tokens) + return str(payload) + + +def tokenize_query(query: str) -> tuple[str, ...]: + normalized = query.replace("\\", "/").strip() + if not normalized: + return () + raw_tokens = _FTS_TOKEN_RE.split(normalized.lower()) + seen: list[str] = [] + for token in raw_tokens: + text = token.strip(".") + if len(text) < 2: + continue + if text not in seen: + seen.append(text) + return tuple(seen) + + +def fts_match_expression( + query: str, + *, + match_mode: SearchMatchMode = "any", +) -> str | None: + tokens = tokenize_query(query) + if not tokens: + return None + escaped = [_escape_fts_token(token) for token in tokens] + joiner = " AND " if match_mode == "all" else " OR " + return joiner.join(escaped) + + +def _escape_fts_token(token: str) -> str: + return '"' + token.replace('"', '""') + '"' + + +def like_match_expression( + query: str, + *, + match_mode: SearchMatchMode = "any", +) -> tuple[list[str], list[str]]: + """Return SQL LIKE clauses and params for fallback search.""" + tokens = tokenize_query(query) + if not tokens: + return [], [] + clauses: list[str] = [] + params: list[str] = [] + for token in tokens: + clauses.append("LOWER(search_blob) LIKE ? ESCAPE '\\'") + params.append(f"%{_escape_like(token.lower())}%") + if match_mode == "all": + return ["(" + " AND ".join(clauses) + ")"], params + return ["(" + " OR ".join(clauses) + ")"], params + + +def _escape_like(value: str) -> str: + return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + + +__all__ = [ + "SearchMatchMode", + "build_search_text", + "fts_match_expression", + "like_match_expression", + "tokenize_query", +] diff --git a/codeclone/memory/semantic/__init__.py b/codeclone/memory/semantic/__init__.py new file mode 100644 index 00000000..a01eb8f8 --- /dev/null +++ b/codeclone/memory/semantic/__init__.py @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from pathlib import Path +from typing import TYPE_CHECKING, Protocol + +from .chunking import ( + SEMANTIC_CHUNK_STRATEGY_VERSION, + collapse_trajectory_hits, + expand_projection, + resolve_passage_chunker, + trajectory_chunk_row_id, +) +from .models import ( + SemanticHit, + SemanticIndexStatus, + SemanticProjection, + SemanticRow, + SemanticRowFingerprint, + SemanticSource, +) +from .projection import ( + INDEXED_AUDIT_EVENTS, + INDEXED_MEMORY_TYPES, + is_indexed_audit_event, + is_indexed_memory_type, + project_audit_event, + project_memory_record, + project_trajectory, + text_hash, +) +from .rebuild import RebuildReport, rebuild_semantic_index +from .rebuild_workflow import ( + build_semantic_index_sources, + execute_semantic_index_rebuild, +) +from .sources import ( + AuditIndexSource, + IndexSource, + MemoryIndexSource, + TrajectoryIndexSource, +) + +if TYPE_CHECKING: + from ...config.memory import SemanticConfig + + +class SemanticIndex(Protocol): + """Read surface of the semantic index (search + status). + + The retrieval layer talks to this Protocol; the degraded Null/Unavailable + indexes implement it. Keeping mutation off the read surface lets the + degraded indexes stay small and cohesive. The concrete backend is loaded + lazily by the factory, so the memory package never imports a vector DB at + module level. + """ + + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: + """Top-k nearest hits. ``source`` restricts the search to one lane + (memory/audit/trajectory) so each lane gets its own budget; None + searches every lane in a single shared top-k.""" + ... + + def status(self) -> SemanticIndexStatus: ... + + +class SemanticIndexWriter(SemanticIndex, Protocol): + """Read + write surface. Only a real backend implements this, and rebuild + requires it. Confining upsert/delete here keeps the degraded read indexes + at two methods (cohesive) and isolates mutation to the stateful backend. + """ + + def upsert(self, rows: Sequence[SemanticRow]) -> None: ... + + def delete(self, ids: Sequence[str]) -> None: ... + + def known_ids(self) -> set[str]: ... + + def row_fingerprints(self, ids: Sequence[str]) -> dict[str, SemanticRowFingerprint]: + """Stored (text_hash, embedding_model) for the given ids, vectors not + loaded. Missing ids are omitted; empty ``ids`` returns ``{}``.""" + ... + + +class NullSemanticIndex: + """Disabled index: every read is empty.""" + + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: + return [] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=False, reason="disabled") + + +class UnavailableSemanticIndex: + """Enabled, but the backend could not be loaded. + + Reads degrade to empty (never raise); the recorded ``reason`` is surfaced + by ``status()`` so callers and explicit commands can fail clear. + """ + + def __init__(self, *, reason: str) -> None: + self._reason = reason + + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: + return [] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=False, reason=self._reason) + + +def close_semantic_index(index: object | None) -> None: + """Close a semantic backend if it exposes a close hook. + + The read/write Protocols deliberately do not require ``close()`` because + degraded indexes are stateless and some optional backends do not expose a + public close API. Call boundaries can still release resources by using this + best-effort helper. + """ + + if index is None: + return + close = getattr(index, "close", None) + if callable(close): + close() + + +def resolve_semantic_index(config: SemanticConfig) -> SemanticIndex: + """Resolve the semantic index for the given config. + + Null when disabled; otherwise the backend. The LanceDB backend is wired in + Phase 20.2 via a lazy import inside this function (so absence never crashes + the import of the memory package). Until then an enabled index degrades to + Unavailable — read paths stay empty and explicit commands fail clear. + """ + if not config.enabled: + return NullSemanticIndex() + if not Path(config.index_path).exists(): + return UnavailableSemanticIndex(reason="not_built") + backend = _resolve_backend(config, create=False) + if backend is None: + return UnavailableSemanticIndex(reason="lancedb_not_installed") + return backend + + +def resolve_semantic_index_writer(config: SemanticConfig) -> SemanticIndexWriter | None: + """Resolve a writable semantic index (the real backend), or None. + + None means no writable backend is available (disabled, or the optional + backend not installed) — rebuild must fail clear. + """ + if not config.enabled: + return None + return _resolve_backend(config, create=True) + + +def _resolve_backend( + config: SemanticConfig, *, create: bool +) -> SemanticIndexWriter | None: + # Lazy, isolated import: the only place a vector DB is referenced. Absence + # of the optional `semantic-lancedb` extra degrades to None (no backend). + try: + from .lancedb_backend import LanceDbSemanticIndex + + return LanceDbSemanticIndex( + path=Path(config.index_path), + dimension=config.dimension, + create=create, + ) + except ImportError: + return None + + +__all__ = [ + "INDEXED_AUDIT_EVENTS", + "INDEXED_MEMORY_TYPES", + "SEMANTIC_CHUNK_STRATEGY_VERSION", + "AuditIndexSource", + "IndexSource", + "MemoryIndexSource", + "NullSemanticIndex", + "RebuildReport", + "SemanticHit", + "SemanticIndex", + "SemanticIndexStatus", + "SemanticIndexWriter", + "SemanticProjection", + "SemanticRow", + "SemanticRowFingerprint", + "SemanticSource", + "TrajectoryIndexSource", + "UnavailableSemanticIndex", + "build_semantic_index_sources", + "close_semantic_index", + "collapse_trajectory_hits", + "execute_semantic_index_rebuild", + "expand_projection", + "is_indexed_audit_event", + "is_indexed_memory_type", + "project_audit_event", + "project_memory_record", + "project_trajectory", + "rebuild_semantic_index", + "resolve_passage_chunker", + "resolve_semantic_index", + "resolve_semantic_index_writer", + "text_hash", + "trajectory_chunk_row_id", +] diff --git a/codeclone/memory/semantic/chunking.py b/codeclone/memory/semantic/chunking.py new file mode 100644 index 00000000..02c68fc1 --- /dev/null +++ b/codeclone/memory/semantic/chunking.py @@ -0,0 +1,143 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Protocol, runtime_checkable + +from .models import SemanticHit, SemanticProjection, SemanticSource +from .projection import text_hash + +SEMANTIC_CHUNK_STRATEGY_VERSION: str = "1" +TRAJECTORY_SEARCH_OVERSAMPLE: int = 4 + + +@dataclass(frozen=True, slots=True) +class IndexedSemanticUnit: + """One embeddable semantic index row before vector assignment.""" + + row_id: str + parent_id: str | None + chunk_index: int | None + chunk_count: int | None + source: SemanticSource + project_id: str | None + subject_path: str | None + kind: str + status: str | None + text: str + text_hash: str + + +@runtime_checkable +class PassageChunker(Protocol): + def chunk_text(self, text: str) -> tuple[str, ...]: ... + + +class IdentityPassageChunker: + """Single-chunk passthrough for providers without model truncation.""" + + def chunk_text(self, text: str) -> tuple[str, ...]: + return (text,) + + +def trajectory_chunk_row_id(parent_id: str, chunk_index: int) -> str: + return f"trajectory:{parent_id}:chunk:{chunk_index:03d}" + + +def resolve_passage_chunker(provider: object) -> PassageChunker: + from ..embedding.fastembed_provider import FastEmbedEmbeddingProvider + + if isinstance(provider, FastEmbedEmbeddingProvider): + return provider + return IdentityPassageChunker() + + +def expand_projection( + projection: SemanticProjection, + chunker: PassageChunker, +) -> tuple[IndexedSemanticUnit, ...]: + if projection.source != "trajectory": + return ( + _single_unit(projection, row_id=projection.source_id, text=projection.text), + ) + chunks = chunker.chunk_text(projection.text) + if len(chunks) == 1: + return (_single_unit(projection, row_id=projection.source_id, text=chunks[0]),) + parent_id = projection.source_id + count = len(chunks) + return tuple( + IndexedSemanticUnit( + row_id=trajectory_chunk_row_id(parent_id, index), + parent_id=parent_id, + chunk_index=index, + chunk_count=count, + source=projection.source, + project_id=projection.project_id, + subject_path=projection.subject_path, + kind=projection.kind, + status=projection.status, + text=chunk, + text_hash=text_hash(chunk), + ) + for index, chunk in enumerate(chunks) + ) + + +def collapse_trajectory_hits( + hits: Sequence[SemanticHit], + *, + k: int, +) -> list[SemanticHit]: + best_by_parent: dict[str, SemanticHit] = {} + for hit in hits: + parent_id = hit.parent_id or hit.source_id + existing = best_by_parent.get(parent_id) + if existing is None or hit.score > existing.score: + best_by_parent[parent_id] = hit + ordered = sorted(best_by_parent.values(), key=lambda item: item.score, reverse=True) + return ordered[: max(0, k)] + + +def trajectory_parent_id(hit: SemanticHit) -> str: + return hit.parent_id or hit.source_id + + +def _single_unit( + projection: SemanticProjection, + *, + row_id: str, + text: str, +) -> IndexedSemanticUnit: + return IndexedSemanticUnit( + row_id=row_id, + parent_id=None, + chunk_index=None, + chunk_count=None, + source=projection.source, + project_id=projection.project_id, + subject_path=projection.subject_path, + kind=projection.kind, + status=projection.status, + text=text, + text_hash=text_hash(text), + ) + + +__all__ = [ + "SEMANTIC_CHUNK_STRATEGY_VERSION", + "TRAJECTORY_SEARCH_OVERSAMPLE", + "IdentityPassageChunker", + "IndexedSemanticUnit", + "PassageChunker", + "collapse_trajectory_hits", + "expand_projection", + "resolve_passage_chunker", + "trajectory_chunk_row_id", + "trajectory_parent_id", +] diff --git a/codeclone/memory/semantic/lancedb_backend.py b/codeclone/memory/semantic/lancedb_backend.py new file mode 100644 index 00000000..ec382e4c --- /dev/null +++ b/codeclone/memory/semantic/lancedb_backend.py @@ -0,0 +1,324 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib +from collections.abc import Sequence +from pathlib import Path +from types import ModuleType +from typing import Protocol, cast + +from ...utils.iterutils import chunked +from .models import ( + SemanticHit, + SemanticIndexStatus, + SemanticRow, + SemanticRowFingerprint, + SemanticSource, +) + +# This module is importable in the base install. The optional vector DB packages +# are loaded only when a LanceDB backend instance is constructed. + +_TABLE_NAME = "semantic_index" +_SCHEMA_MISMATCH_REASON = "schema_mismatch" +# LanceDB `id IN (...)` filter batch — bounds the predicate size per query. +_ID_QUERY_BATCH = 500 + + +class _LanceSearchQuery(Protocol): + def select(self, columns: list[str]) -> _LanceSearchQuery: ... + + def where(self, predicate: str) -> _LanceSearchQuery: ... + + def limit(self, k: int) -> _LanceSearchQuery: ... + + def to_list(self) -> list[dict[str, object]]: ... + + def to_arrow(self) -> _ArrowTable: ... + + +class _LanceMergeInsert(Protocol): + def when_matched_update_all(self) -> _LanceMergeInsert: ... + + def when_not_matched_insert_all(self) -> _LanceMergeInsert: ... + + def execute(self, records: list[dict[str, object]]) -> None: ... + + +class _LanceField(Protocol): + type: object + + +class _LanceSchema(Protocol): + def field(self, name: str) -> _LanceField: ... + + +class _ArrowColumn(Protocol): + def to_pylist(self) -> list[object]: ... + + +class _ArrowTable(Protocol): + def column(self, name: str) -> _ArrowColumn: ... + + +class _LanceTable(Protocol): + schema: _LanceSchema + + def search(self, vector: list[float] | None = None) -> _LanceSearchQuery: ... + + def count_rows(self) -> int: ... + + def merge_insert(self, key: str) -> _LanceMergeInsert: ... + + def delete(self, clause: str) -> None: ... + + +class _LanceConnection(Protocol): + def open_table(self, name: str) -> _LanceTable: ... + + def create_table( + self, name: str, schema: object, *, exist_ok: bool = False + ) -> _LanceTable: ... + + def drop_table(self, name: str) -> None: ... + + +def _schema(pa: ModuleType, dimension: int) -> object: + return pa.schema( + [ + pa.field("id", pa.string()), + pa.field("source", pa.string()), + pa.field("parent_id", pa.string()), + pa.field("chunk_index", pa.int32()), + pa.field("chunk_count", pa.int32()), + pa.field("project_id", pa.string()), + pa.field("subject_path", pa.string()), + pa.field("kind", pa.string()), + pa.field("status", pa.string()), + pa.field("text_hash", pa.string()), + pa.field("embedding_model", pa.string()), + pa.field("vector", pa.list_(pa.float32(), dimension)), + ] + ) + + +def _schema_matches(table: _LanceTable, *, dimension: int) -> bool: + try: + vector_type = table.schema.field("vector").type + parent_field = table.schema.field("parent_id") + chunk_index_field = table.schema.field("chunk_index") + chunk_count_field = table.schema.field("chunk_count") + except (AttributeError, KeyError, ValueError): + return False + return ( + getattr(vector_type, "list_size", None) == dimension + and parent_field is not None + and chunk_index_field is not None + and chunk_count_field is not None + ) + + +def _to_record(row: SemanticRow) -> dict[str, object]: + return { + "id": row.id, + "source": row.source, + "parent_id": row.parent_id, + "chunk_index": row.chunk_index, + "chunk_count": row.chunk_count, + "project_id": row.project_id, + "subject_path": row.subject_path, + "kind": row.kind, + "status": row.status, + "text_hash": row.text_hash, + "embedding_model": row.embedding_model, + "vector": list(row.vector), + } + + +def _optional_int(value: object) -> int | None: + if value is None: + return None + if isinstance(value, int): + return value + return int(str(value)) + + +def _optional_str(value: object) -> str | None: + if value is None: + return None + return str(value) + + +def _sql_quote(value: str) -> str: + return "'" + value.replace("'", "''") + "'" + + +def _as_float(value: object) -> float: + if isinstance(value, (int, float)): + return float(value) + return float(str(value)) + + +def _close_if_available(target: object | None) -> None: + if target is None: + return + close = getattr(target, "close", None) + if callable(close): + close() + + +class LanceDbSemanticIndex: + """LanceDB-backed semantic index (read + write); implements + SemanticIndexWriter. The table is keyed by ``id`` (merge-insert upsert) and + carries the projection metadata plus the embedding vector. + """ + + def __init__(self, *, path: Path, dimension: int, create: bool = False) -> None: + self._dimension = dimension + self._unavailable_reason: str | None = None + lancedb = importlib.import_module("lancedb") + self._pa = importlib.import_module("pyarrow") + self._db: _LanceConnection = lancedb.connect(str(path)) + self._table: _LanceTable | None = self._open_table(create=create) + + def _open_table(self, *, create: bool) -> _LanceTable | None: + table = self._open_existing_table() + if table is None: + if create: + return self._create_table() + return None + if _schema_matches(table, dimension=self._dimension): + return table + self._unavailable_reason = _SCHEMA_MISMATCH_REASON + if not create: + return None + self._db.drop_table(_TABLE_NAME) + self._unavailable_reason = None + return self._create_table() + + def _open_existing_table(self) -> _LanceTable | None: + try: + return self._db.open_table(_TABLE_NAME) + except ValueError as exc: + if f"Table '{_TABLE_NAME}' was not found" in str(exc): + return None + raise + + def _create_table(self) -> _LanceTable: + return self._db.create_table( + _TABLE_NAME, schema=_schema(self._pa, self._dimension), exist_ok=False + ) + + def _schema_matches(self, table: _LanceTable) -> bool: + return _schema_matches(table, dimension=self._dimension) + + def search( + self, vector: Sequence[float], *, k: int, source: SemanticSource | None = None + ) -> list[SemanticHit]: + if self._table is None: + return [] + query = self._table.search(list(vector)) + if source is not None: + query = query.where(f"source = {_sql_quote(source)}") + rows = query.limit(k).to_list() + hits: list[SemanticHit] = [] + for row in rows: + distance = row.get("_distance", 0) + hits.append( + SemanticHit( + source_id=str(row["id"]), + source=cast(SemanticSource, str(row["source"])), + parent_id=_optional_str(row.get("parent_id")), + chunk_index=_optional_int(row.get("chunk_index")), + chunk_count=_optional_int(row.get("chunk_count")), + # lancedb returns L2 _distance (smaller is closer); map to a + # bounded proximity score where higher means more similar. + score=1.0 / (1.0 + _as_float(distance)), + ) + ) + return hits + + def status(self) -> SemanticIndexStatus: + if self._table is None: + return SemanticIndexStatus( + available=False, + backend="lancedb", + dimension=self._dimension, + reason=self._unavailable_reason or "not_built", + ) + return SemanticIndexStatus( + available=True, + backend="lancedb", + dimension=self._dimension, + indexed_count=self._table.count_rows(), + ) + + def upsert(self, rows: Sequence[SemanticRow]) -> None: + if not rows: + return + if self._table is None: + self._table = self._open_table(create=True) + assert self._table is not None + records = [_to_record(row) for row in rows] + ( + self._table.merge_insert("id") + .when_matched_update_all() + .when_not_matched_insert_all() + .execute(records) + ) + + def delete(self, ids: Sequence[str]) -> None: + if not ids: + return + if self._table is None: + return + clause = ", ".join(_sql_quote(value) for value in ids) + self._table.delete(f"id IN ({clause})") + + def known_ids(self) -> set[str]: + if self._table is None: + return set() + total = self._table.count_rows() + if total == 0: + return set() + # Metadata scan projecting only the id column: vectors are never read. + arrow = self._table.search().select(["id"]).limit(total).to_arrow() + return {str(value) for value in arrow.column("id").to_pylist()} + + def row_fingerprints(self, ids: Sequence[str]) -> dict[str, SemanticRowFingerprint]: + if self._table is None or not ids: + return {} + result: dict[str, SemanticRowFingerprint] = {} + for chunk in chunked(ids, _ID_QUERY_BATCH): + clause = ", ".join(_sql_quote(value) for value in chunk) + arrow = ( + self._table.search() + .select(["id", "text_hash", "embedding_model"]) + .where(f"id IN ({clause})") + .limit(len(chunk)) + .to_arrow() + ) + row_ids = arrow.column("id").to_pylist() + hashes = arrow.column("text_hash").to_pylist() + models = arrow.column("embedding_model").to_pylist() + for row_id, text_hash, model in zip(row_ids, hashes, models, strict=True): + result[str(row_id)] = SemanticRowFingerprint( + id=str(row_id), + text_hash=str(text_hash), + embedding_model=str(model), + ) + return result + + def close(self) -> None: + table = self._table + self._table = None + _close_if_available(table) + _close_if_available(self._db) + + +__all__ = ["LanceDbSemanticIndex"] diff --git a/codeclone/memory/semantic/models.py b/codeclone/memory/semantic/models.py new file mode 100644 index 00000000..3124dab9 --- /dev/null +++ b/codeclone/memory/semantic/models.py @@ -0,0 +1,130 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + +SemanticSource = Literal["memory", "audit", "trajectory"] + + +class SemanticProjection(BaseModel): + """Deterministic, embeddable projection of a memory record or audit event. + + Pure data: the same source object always yields the same projection text + and the same ``text_hash`` (the idempotent upsert key). + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + source: SemanticSource + source_id: str = Field(min_length=1) + project_id: str | None = None + kind: str = Field(min_length=1) + subject_path: str | None = None + status: str | None = None + text: str = Field(min_length=1) + text_hash: str = Field(min_length=1) + + +class SemanticRow(BaseModel): + """A single indexed vector row — what the backend stores and returns. + + The final record/event is always re-loaded from SQLite / the audit DB; + this row only carries the vector and the filter/identity columns. + Trajectory rows may be chunked: ``parent_id`` points at the trajectory id + while ``id`` is the chunk row id. + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + id: str = Field(min_length=1) + source: SemanticSource + parent_id: str | None = None + chunk_index: int | None = None + chunk_count: int | None = None + project_id: str | None = None + subject_path: str | None = None + kind: str = Field(min_length=1) + status: str | None = None + text_hash: str = Field(min_length=1) + embedding_model: str = Field(min_length=1) + vector: tuple[float, ...] + + +class SemanticRowFingerprint(BaseModel): + """Identity of a stored row without its vector. + + The incremental rebuild fetches these (id + ``text_hash`` + model) to decide + what to re-embed, so it never loads vectors to check freshness. + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + id: str = Field(min_length=1) + text_hash: str = Field(min_length=1) + embedding_model: str = Field(min_length=1) + + +class SemanticHit(BaseModel): + """A semantic search candidate: id + source + proximity score.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + source_id: str = Field(min_length=1) + source: SemanticSource + score: float + parent_id: str | None = None + chunk_index: int | None = None + chunk_count: int | None = None + + +class SemanticIndexStatus(BaseModel): + """Transparent status of the semantic index (the observability contract). + + ``available=false`` with a ``reason`` is the fail-clear signal; ``provider`` + surfaces ``diagnostic`` so callers know hits are not real-model recall. + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + available: bool + backend: str | None = None + provider: str | None = None + embedding_model: str | None = None + dimension: int | None = None + indexed_count: int = 0 + reason: str | None = None + + +class SemanticSearchResult(BaseModel): + """A hydrated semantic search hit: the proximity score plus record/event + metadata and a bounded preview, loaded from the source of truth. + """ + + model_config = ConfigDict(frozen=True, extra="forbid") + + source: SemanticSource + source_id: str = Field(min_length=1) + score: float + kind: str = Field(min_length=1) + status: str | None = None + confidence: str | None = None + subject_path: str | None = None + preview: str = Field(min_length=1) + + +__all__ = [ + "SemanticHit", + "SemanticIndexStatus", + "SemanticProjection", + "SemanticRow", + "SemanticRowFingerprint", + "SemanticSearchResult", + "SemanticSource", +] diff --git a/codeclone/memory/semantic/projection.py b/codeclone/memory/semantic/projection.py new file mode 100644 index 00000000..5767cd57 --- /dev/null +++ b/codeclone/memory/semantic/projection.py @@ -0,0 +1,151 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from collections.abc import Iterable + +from ..models import MemoryRecord +from ..trajectory.models import Trajectory +from ..trajectory.retrieval import trajectory_semantic_text_parts +from .models import SemanticProjection + +# Prose/decision subset only. Structural records (module_role, test_anchor, +# document_link, public_surface, stale_marker) are served by exact subject +# match and are NOT semantically indexed (Phase 20 spec §6.1). +INDEXED_MEMORY_TYPES: frozenset[str] = frozenset( + { + "contract_note", + "change_rationale", + "risk_note", + "architecture_decision", + "contradiction_note", + "protocol_rule", + "human_note", + } +) + +# Forensically useful audit incidents (Phase 20 spec §6.2). Projected from the +# bounded controller_events.summary column only — never payload_json. +INDEXED_AUDIT_EVENTS: frozenset[str] = frozenset( + { + "intent.declared", + "patch_contract.violated", + "workspace.conflict_detected", + "baseline_abuse.detected", + "claim_validation.violated", + "review_receipt.created", + } +) + + +def text_hash(text: str) -> str: + """Stable sha256 of the projected text — the idempotent upsert key.""" + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def is_indexed_memory_type(record_type: str) -> bool: + return record_type in INDEXED_MEMORY_TYPES + + +def is_indexed_audit_event(event_type: str) -> bool: + return event_type in INDEXED_AUDIT_EVENTS + + +def _join(parts: Iterable[str]) -> str: + return " | ".join(part.strip() for part in parts if part and part.strip()) + + +def project_memory_record( + record: MemoryRecord, + *, + subject_path: str | None = None, +) -> SemanticProjection: + """Build the deterministic projection for a memory record.""" + parts: list[str] = [record.type] + if subject_path: + parts.append(f"subject {subject_path}") + if record.summary: + parts.append(record.summary) + parts.append(record.statement) + text = _join(parts) + return SemanticProjection( + source="memory", + source_id=record.id, + project_id=record.project_id, + kind=record.type, + subject_path=subject_path, + status=record.status, + text=text, + text_hash=text_hash(text), + ) + + +def project_audit_event( + *, + event_id: str, + event_type: str, + summary: str, + project_id: str | None = None, +) -> SemanticProjection: + """Build the deterministic projection for an audit incident. + + ``summary`` is the bounded controller_events.summary column; callers must + skip events whose summary is empty (no human text to embed). + """ + text = _join([event_type, summary]) + return SemanticProjection( + source="audit", + source_id=event_id, + project_id=project_id, + kind=event_type, + subject_path=None, + status=None, + text=text, + text_hash=text_hash(text), + ) + + +def project_trajectory( + trajectory: Trajectory, +) -> SemanticProjection: + """Build deterministic projection text for a stored trajectory. + + Only bounded trajectory projection fields are embedded: summary, outcome, + quality tier, labels, path subjects, and compact step summaries. Raw audit + payloads and event-core JSON stay out of the semantic sidecar. + """ + text = _join(trajectory_semantic_text_parts(trajectory)) + return SemanticProjection( + source="trajectory", + source_id=trajectory.id, + project_id=trajectory.project_id, + kind="trajectory", + subject_path=_primary_trajectory_path(trajectory), + status=trajectory.outcome, + text=text, + text_hash=text_hash(text), + ) + + +def _primary_trajectory_path(trajectory: Trajectory) -> str | None: + for subject in trajectory.subjects: + if subject.subject_kind == "path": + return subject.subject_key + return None + + +__all__ = [ + "INDEXED_AUDIT_EVENTS", + "INDEXED_MEMORY_TYPES", + "is_indexed_audit_event", + "is_indexed_memory_type", + "project_audit_event", + "project_memory_record", + "project_trajectory", + "text_hash", +] diff --git a/codeclone/memory/semantic/projection_probe.py b/codeclone/memory/semantic/projection_probe.py new file mode 100644 index 00000000..7da4a46f --- /dev/null +++ b/codeclone/memory/semantic/projection_probe.py @@ -0,0 +1,254 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Literal, TypedDict + +from ..embedding.length import ( + LengthDistribution, + ProjectionTokenProber, + length_distribution, + token_overflow_stats, + truncation_stats, +) +from .chunking import PassageChunker, expand_projection +from .sources import IndexSource + +SemanticLane = Literal["memory", "audit", "trajectory"] + + +class _DistributionPayload(TypedDict): + min: int + p50: int + p75: int + p95: int + p99: int + max: int + + +class _TokenOverflowPayload(TypedDict): + model_max_tokens: int | None + over_model_limit: int + max_overflow_tokens: int + + +class _TokenDistributionPayload(TypedDict): + raw: _DistributionPayload + effective: _DistributionPayload + + +class _TruncationPayload(TypedDict): + documents: int + max_dropped_tokens: int + + +class _TrajectoryChunkingPayload(TypedDict): + source_documents: int + index_units: int + multi_chunk_sources: int + + +class _OverflowExamplePayload(TypedDict): + id: str + parent_id: str | None + chunk_index: int | None + raw_tokens: int + overflow_tokens: int + + +class LaneProjectionProbePayload(TypedDict, total=False): + documents: int + chars: _DistributionPayload + tokens: _TokenDistributionPayload + truncation: _TruncationPayload + token_overflow: _TokenOverflowPayload + chunking: _TrajectoryChunkingPayload + overflow_examples: list[_OverflowExamplePayload] + + +class SemanticProjectionProbePayload(TypedDict): + action: Literal["probe_semantic_projections"] + lanes: dict[SemanticLane, LaneProjectionProbePayload] + estimator: str + model_max_tokens: int | None + + +@dataclass(slots=True) +class _ProbeUnitContext: + row_id: str | None = None + parent_id: str | None = None + chunk_index: int | None = None + + +@dataclass(slots=True) +class _LaneSamples: + char_counts: list[int] + raw_token_counts: list[int] + effective_token_counts: list[int] + unit_contexts: list[_ProbeUnitContext | None] + overflow_examples: list[_OverflowExamplePayload] + source_documents: int = 0 + multi_chunk_sources: int = 0 + + +def probe_semantic_projections( + *, + sources: Sequence[IndexSource], + token_prober: ProjectionTokenProber, + passage_chunker: PassageChunker | None = None, +) -> SemanticProjectionProbePayload: + model_max = token_prober.max_sequence_tokens() + by_lane: dict[SemanticLane, _LaneSamples] = { + "memory": _LaneSamples([], [], [], [], []), + "audit": _LaneSamples([], [], [], [], []), + "trajectory": _LaneSamples([], [], [], [], []), + } + for source in sources: + if not source.available(): + continue + lane = _lane_name(source.name()) + samples = by_lane[lane] + chunker = passage_chunker if lane == "trajectory" else None + for projection in source.iter_projections(): + if chunker is None: + _append_probe_sample(samples, token_prober, projection.text) + continue + samples.source_documents += 1 + units = expand_projection(projection, chunker) + if len(units) > 1: + samples.multi_chunk_sources += 1 + for unit in units: + _append_probe_sample( + samples, + token_prober, + unit.text, + unit=_ProbeUnitContext( + row_id=unit.row_id, + parent_id=unit.parent_id, + chunk_index=unit.chunk_index, + ), + ) + return { + "action": "probe_semantic_projections", + "estimator": token_prober.estimator_label, + "model_max_tokens": model_max, + "lanes": { + lane: _lane_payload( + samples, + model_max_tokens=model_max, + chunking=( + { + "source_documents": samples.source_documents, + "index_units": len(samples.char_counts), + "multi_chunk_sources": samples.multi_chunk_sources, + } + if lane == "trajectory" and passage_chunker is not None + else None + ), + ) + for lane, samples in by_lane.items() + }, + } + + +def _append_probe_sample( + samples: _LaneSamples, + token_prober: ProjectionTokenProber, + text: str, + *, + unit: _ProbeUnitContext | None = None, +) -> None: + (counts,) = token_prober.probe_passage_token_counts([text]) + samples.char_counts.append(len(text)) + samples.raw_token_counts.append(counts.raw) + samples.effective_token_counts.append(counts.effective) + samples.unit_contexts.append(unit) + model_max = token_prober.max_sequence_tokens() + if ( + unit is not None + and model_max is not None + and counts.raw > model_max + and samples.overflow_examples is not None + and len(samples.overflow_examples) < 5 + ): + samples.overflow_examples.append( + { + "id": unit.row_id or "", + "parent_id": unit.parent_id, + "chunk_index": unit.chunk_index, + "raw_tokens": counts.raw, + "overflow_tokens": counts.raw - model_max, + } + ) + + +def _lane_name(name: str) -> SemanticLane: + if name not in {"memory", "audit", "trajectory"}: + raise ValueError(f"unknown semantic lane: {name}") + return name # type: ignore[return-value] + + +def _lane_payload( + samples: _LaneSamples, + *, + model_max_tokens: int | None, + chunking: _TrajectoryChunkingPayload | None = None, +) -> LaneProjectionProbePayload: + char_dist = length_distribution(samples.char_counts) + raw_dist = length_distribution(samples.raw_token_counts) + effective_dist = length_distribution(samples.effective_token_counts) + overflow = token_overflow_stats( + samples.raw_token_counts, + model_max_tokens=model_max_tokens, + ) + truncation = truncation_stats( + samples.raw_token_counts, + samples.effective_token_counts, + ) + payload: LaneProjectionProbePayload = { + "documents": len(samples.char_counts), + "chars": _distribution_payload(char_dist), + "tokens": { + "raw": _distribution_payload(raw_dist), + "effective": _distribution_payload(effective_dist), + }, + "truncation": { + "documents": truncation.documents, + "max_dropped_tokens": truncation.max_dropped_tokens, + }, + "token_overflow": { + "model_max_tokens": overflow.model_max_tokens, + "over_model_limit": overflow.over_model_limit, + "max_overflow_tokens": overflow.max_overflow_tokens, + }, + } + if chunking is not None: + payload["chunking"] = chunking + if samples.overflow_examples: + payload["overflow_examples"] = list(samples.overflow_examples) + return payload + + +def _distribution_payload(distribution: LengthDistribution) -> _DistributionPayload: + return { + "min": distribution.min, + "p50": distribution.p50, + "p75": distribution.p75, + "p95": distribution.p95, + "p99": distribution.p99, + "max": distribution.max, + } + + +__all__ = [ + "LaneProjectionProbePayload", + "SemanticLane", + "SemanticProjectionProbePayload", + "probe_semantic_projections", +] diff --git a/codeclone/memory/semantic/rebuild.py b/codeclone/memory/semantic/rebuild.py new file mode 100644 index 00000000..12e6b311 --- /dev/null +++ b/codeclone/memory/semantic/rebuild.py @@ -0,0 +1,263 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from ...observability import is_observability_enabled, span +from ...utils.iterutils import chunked +from ..embedding import embed_documents +from ..embedding.batching import ( + EmbedBatchLimits, + EmbedBatchPlan, + LengthScoredItem, + pack_adaptive_batches, + score_lengths, +) +from ..embedding.length import estimate_char_counts, estimate_document_tokens +from .chunking import ( + IndexedSemanticUnit, + PassageChunker, + expand_projection, + resolve_passage_chunker, +) +from .models import SemanticRow, SemanticRowFingerprint + +if TYPE_CHECKING: + from ..embedding import EmbeddingProvider + from . import SemanticIndexWriter + from .sources import IndexSource + +# Source projections fingerprinted per round-trip before embedding the changed +# subset — bounds the changed/unchanged partition for very large corpora. +_FINGERPRINT_PAGE_SIZE = 256 + + +@dataclass(frozen=True, slots=True) +class RebuildReport: + """Outcome of a semantic rebuild: indexed total, deletions, and the + embedded vs hash-skipped split (per source).""" + + indexed: int + deleted: int = 0 + embedded: int = 0 + skipped_unchanged: int = 0 + by_source: dict[str, int] = field(default_factory=dict) + + +@dataclass(frozen=True, slots=True) +class _SourceIndexStats: + seen_ids: set[str] + embedded: int + skipped_unchanged: int + + +def rebuild_semantic_index( + *, + writer: SemanticIndexWriter, + provider: EmbeddingProvider, + sources: Sequence[IndexSource], + embed_batch_limits: EmbedBatchLimits | None = None, +) -> RebuildReport: + """Reconcile the semantic index against its sources by content hash. + + A row is re-embedded only when its projection ``text_hash`` (or the + embedding model) differs from the stored fingerprint; unchanged rows are + skipped without loading their vectors, so an unchanged corpus never loads + the embedding model. The index is a derived, rebuildable sidecar, never + updated on the write hot path. + """ + limits = embed_batch_limits or EmbedBatchLimits() + chunker = resolve_passage_chunker(provider) + by_source: dict[str, int] = {} + seen_ids: set[str] = set() + embedded = 0 + skipped = 0 + for source in sources: + if not source.available(): + continue + with span(name=f"memory.semantic.source.{source.name()}"): + stats = _index_source( + source, + writer=writer, + provider=provider, + chunker=chunker, + embed_batch_limits=limits, + ) + if stats.seen_ids: + by_source[source.name()] = _count_source_documents(source) + seen_ids |= stats.seen_ids + embedded += stats.embedded + skipped += stats.skipped_unchanged + deleted = 0 + with span(name="memory.semantic.reconcile") as reconcile_span: + stale = writer.known_ids() - seen_ids + if stale: + writer.delete(sorted(stale)) + deleted = len(stale) + if is_observability_enabled(): + reconcile_span.set_counter("indexed", len(seen_ids)) + reconcile_span.set_counter("deleted", deleted) + return RebuildReport( + indexed=len(seen_ids), + deleted=deleted, + embedded=embedded, + skipped_unchanged=skipped, + by_source=by_source, + ) + + +def _count_source_documents(source: IndexSource) -> int: + return sum(1 for _ in source.iter_projections()) + + +def _index_source( + source: IndexSource, + *, + writer: SemanticIndexWriter, + provider: EmbeddingProvider, + chunker: PassageChunker, + embed_batch_limits: EmbedBatchLimits, +) -> _SourceIndexStats: + seen: set[str] = set() + embedded = 0 + skipped = 0 + for page in chunked(source.iter_projections(), _FINGERPRINT_PAGE_SIZE): + units: list[IndexedSemanticUnit] = [] + for projection in page: + units.extend(expand_projection(projection, chunker)) + row_ids = [unit.row_id for unit in units] + seen.update(row_ids) + fingerprints = writer.row_fingerprints(row_ids) + changed = [ + unit + for unit in units + if _needs_embed( + fingerprints.get(unit.row_id), + unit, + provider.model_id, + ) + ] + skipped += len(units) - len(changed) + embedded += _embed_and_upsert( + changed, + writer=writer, + provider=provider, + embed_batch_limits=embed_batch_limits, + ) + return _SourceIndexStats( + seen_ids=seen, + embedded=embedded, + skipped_unchanged=skipped, + ) + + +def _embed_and_upsert( + units: Sequence[IndexedSemanticUnit], + *, + writer: SemanticIndexWriter, + provider: EmbeddingProvider, + embed_batch_limits: EmbedBatchLimits, +) -> int: + if not units: + return 0 + texts = [unit.text for unit in units] + char_counts = estimate_char_counts(texts) + token_counts = estimate_document_tokens(provider, texts) + scored: tuple[LengthScoredItem[IndexedSemanticUnit], ...] = score_lengths( + list(units), + char_counts=char_counts, + token_counts=token_counts, + source_kinds=[unit.source for unit in units], + source_ids=[ + ( + f"{unit.parent_id or unit.row_id}:" + f"{unit.chunk_index if unit.chunk_index is not None else 0}" + ) + for unit in units + ], + ) + batches: list[EmbedBatchPlan[IndexedSemanticUnit]] = pack_adaptive_batches( + scored, limits=embed_batch_limits + ) + embedded = 0 + with span(name="memory.semantic.embed") as embed_span: + if is_observability_enabled(): + embed_span.set_counter("max_documents", embed_batch_limits.max_documents) + embed_span.set_counter( + "max_padded_tokens", embed_batch_limits.max_padded_tokens + ) + embed_span.set_counter("pending", len(units)) + embed_span.set_counter("batches", len(batches)) + for batch in batches: + batch_units = [item.item for item in batch.items] + infer_counters = { + "documents": len(batch.items), + "total_chars": batch.total_chars, + "max_chars": batch.max_chars, + "total_tokens": batch.total_tokens, + "max_tokens": batch.max_tokens, + "padded_tokens": batch.padded_tokens, + "padding_amplification_permille": batch.padding_amplification_permille, + } + vectors = embed_documents( + provider, + [unit.text for unit in batch_units], + infer_counters=infer_counters, + ) + writer.upsert( + [ + _row(unit, vector, provider.model_id) + for unit, vector in zip(batch_units, vectors, strict=True) + ] + ) + embedded += len(batch_units) + if is_observability_enabled(): + embed_span.set_counter("embedded", embedded) + return embedded + + +def _needs_embed( + fingerprint: SemanticRowFingerprint | None, + unit: IndexedSemanticUnit, + model_id: str, +) -> bool: + if fingerprint is None: + return True + return ( + fingerprint.text_hash != unit.text_hash + or fingerprint.embedding_model != model_id + ) + + +def _row( + unit: IndexedSemanticUnit, + vector: Sequence[float], + model_id: str, +) -> SemanticRow: + return SemanticRow( + id=unit.row_id, + source=unit.source, + parent_id=unit.parent_id, + chunk_index=unit.chunk_index, + chunk_count=unit.chunk_count, + project_id=unit.project_id, + subject_path=unit.subject_path, + kind=unit.kind, + status=unit.status, + text_hash=unit.text_hash, + embedding_model=model_id, + vector=tuple(vector), + ) + + +__all__ = [ + "RebuildReport", + "rebuild_semantic_index", +] diff --git a/codeclone/memory/semantic/rebuild_workflow.py b/codeclone/memory/semantic/rebuild_workflow.py new file mode 100644 index 00000000..b8475519 --- /dev/null +++ b/codeclone/memory/semantic/rebuild_workflow.py @@ -0,0 +1,344 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path +from typing import Literal, TypedDict + +from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path +from ...config.memory import MemoryConfig, SemanticConfig +from ...observability import SpanHandle, is_observability_enabled, span +from ...observability.reason_kind import ReasonKind +from ..embedding import resolve_embedding_provider +from ..embedding.batching import EmbedBatchLimits +from ..embedding.length import ( + ProjectionTokenProber, + resolve_planning_token_estimator, +) +from ..exceptions import MemoryContractError, MemorySemanticUnavailableError +from ..models import MemoryProject +from ..project import resolve_memory_db_path, resolve_project_identity +from ..sqlite_store import SqliteEngineeringMemoryStore +from .chunking import PassageChunker, resolve_passage_chunker +from .projection_probe import SemanticProjectionProbePayload, probe_semantic_projections +from .rebuild import RebuildReport, rebuild_semantic_index +from .sources import ( + AuditIndexSource, + IndexSource, + MemoryIndexSource, + TrajectoryIndexSource, +) + + +class RebuildSemanticIndexMeta(TypedDict): + action: Literal["rebuild_semantic_index"] + index_path: str + embedding_provider: str + + +class RebuildSemanticIndexCounts(TypedDict): + indexed: int + deleted: int + embedded: int + skipped_unchanged: int + by_source: dict[str, int] + + +class RebuildSemanticIndexOkPayload( + RebuildSemanticIndexMeta, RebuildSemanticIndexCounts +): + status: Literal["ok"] + embedding_model: str + + +class RebuildSemanticIndexSkippedPayload( + RebuildSemanticIndexMeta, RebuildSemanticIndexCounts +): + status: Literal["skipped"] + reason: str + embedding_model: None + + +class RebuildSemanticIndexUnavailablePayload( + RebuildSemanticIndexMeta, RebuildSemanticIndexCounts +): + status: Literal["unavailable"] + reason: str + embedding_model: None + + +RebuildSemanticIndexPayload = ( + RebuildSemanticIndexOkPayload + | RebuildSemanticIndexSkippedPayload + | RebuildSemanticIndexUnavailablePayload +) + + +def build_semantic_index_sources( + *, + root_path: Path, + config: MemoryConfig, + store: SqliteEngineeringMemoryStore, + project: MemoryProject, +) -> list[IndexSource]: + audit_db_path = resolve_audit_path( + root_path=root_path, + value=DEFAULT_AUDIT_PATH, + ) + return [ + MemoryIndexSource(store, project_id=project.id), + AuditIndexSource( + enabled=config.semantic.index_audit, + db_path=audit_db_path, + ), + TrajectoryIndexSource(store, project_id=project.id), + ] + + +def _rebuild_base_payload(config: MemoryConfig) -> RebuildSemanticIndexMeta: + return { + "action": "rebuild_semantic_index", + "index_path": config.semantic.index_path, + "embedding_provider": config.semantic.embedding_provider, + } + + +def _rebuild_empty_counts() -> RebuildSemanticIndexCounts: + return { + "indexed": 0, + "deleted": 0, + "embedded": 0, + "skipped_unchanged": 0, + "by_source": {}, + } + + +def _rebuild_reason_kind(report: RebuildReport) -> ReasonKind: + if report.indexed == 0: + return "first_index" + if report.embedded > 0 or report.deleted > 0: + return "content_changed" + if report.skipped_unchanged > 0: + # Full reconcile with hash-skip only — operator or scheduler triggered + # rebuild but the index was already current (no embed/prune work). + return "manual_rebuild" + return "manual_rebuild" + + +def _apply_rebuild_counters( + rebuild_span: SpanHandle, + report: RebuildReport, + *, + dimensions: int, + batch_size: int, + max_padded_tokens: int, +) -> None: + if not is_observability_enabled(): + return + rebuild_span.set_counter("indexed", report.indexed) + rebuild_span.set_counter("embedded", report.embedded) + rebuild_span.set_counter("skipped_unchanged", report.skipped_unchanged) + rebuild_span.set_counter("deleted", report.deleted) + rebuild_span.set_counter("embedding_dimensions", dimensions) + rebuild_span.set_counter("embedding_batch_size", batch_size) + rebuild_span.set_counter("embedding_max_padded_tokens", max_padded_tokens) + for lane, count in sorted(report.by_source.items()): + rebuild_span.set_counter(f"lane_{lane}", count) + + +def execute_semantic_index_rebuild( + *, + root_path: Path, + config: MemoryConfig, + store: SqliteEngineeringMemoryStore | None = None, + project: MemoryProject | None = None, +) -> RebuildSemanticIndexPayload: + """Rebuild the LanceDB semantic sidecar (MCP action + CLI rebuild). + + Returns a structured payload. Raises ``MemoryContractError`` when semantic + is enabled but the engineering-memory SQLite database is missing. + """ + base = _rebuild_base_payload(config) + empty = _rebuild_empty_counts() + with span(name="memory.semantic.rebuild") as rebuild_span: + if not config.semantic.enabled: + return { + **base, + **empty, + "status": "skipped", + "reason": "disabled", + "embedding_model": None, + } + with span(name="memory.semantic.bootstrap"): + try: + provider = resolve_embedding_provider(config.semantic) + except MemorySemanticUnavailableError as exc: + return { + **base, + **empty, + "status": "unavailable", + "reason": str(exc), + "embedding_model": None, + } + from . import close_semantic_index, resolve_semantic_index_writer + + writer = resolve_semantic_index_writer(config.semantic) + if writer is None: + return { + **base, + **empty, + "status": "unavailable", + "reason": "lancedb_not_installed", + "embedding_model": None, + } + owns_store = store is None + active_store = store + report: RebuildReport | None = None + try: + resolved_project = project or resolve_project_identity(root_path) + if active_store is None: + db_path = resolve_memory_db_path(root_path, config) + if not db_path.exists(): + raise MemoryContractError( + f"Engineering memory database not found: {db_path}. " + "Run memory init or " + "manage_engineering_memory(action='refresh_from_run')." + ) + active_store = SqliteEngineeringMemoryStore(db_path) + report = rebuild_semantic_index( + writer=writer, + provider=provider, + sources=build_semantic_index_sources( + root_path=root_path, + config=config, + store=active_store, + project=resolved_project, + ), + embed_batch_limits=EmbedBatchLimits( + max_documents=config.semantic.embed_max_documents_per_batch, + max_padded_tokens=config.semantic.embed_max_padded_tokens_per_batch, + ), + ) + except MemorySemanticUnavailableError as exc: + # The embedding model loads lazily, so an unavailable model surfaces at + # the first embed here rather than at resolve. Report it the same way an + # unresolved provider does instead of letting the rebuild raise. + return { + **base, + **empty, + "status": "unavailable", + "reason": str(exc), + "embedding_model": None, + } + finally: + close_semantic_index(writer) + if owns_store and active_store is not None: + active_store.close() + assert report is not None + rebuild_span.set_reason_kind(_rebuild_reason_kind(report)) + _apply_rebuild_counters( + rebuild_span, + report, + dimensions=config.semantic.dimension, + batch_size=config.semantic.embed_max_documents_per_batch, + max_padded_tokens=config.semantic.embed_max_padded_tokens_per_batch, + ) + return { + **base, + "status": "ok", + "indexed": report.indexed, + "deleted": report.deleted, + "embedded": report.embedded, + "skipped_unchanged": report.skipped_unchanged, + "by_source": dict(sorted(report.by_source.items())), + "embedding_model": provider.model_id, + } + + +def execute_semantic_projection_probe( + *, + root_path: Path, + config: MemoryConfig, + store: SqliteEngineeringMemoryStore | None = None, + project: MemoryProject | None = None, + exact_tokens: bool = False, +) -> SemanticProjectionProbePayload | dict[str, object]: + """Measure semantic projection length distribution per lane without embedding.""" + if not config.semantic.enabled: + return { + "action": "probe_semantic_projections", + "status": "skipped", + "reason": "disabled", + } + token_prober = _resolve_projection_token_prober( + config.semantic, + exact_tokens=exact_tokens, + ) + passage_chunker = _resolve_projection_passage_chunker( + config.semantic, + exact_tokens=exact_tokens, + ) + owns_store = store is None + active_store = store + try: + resolved_project = project or resolve_project_identity(root_path) + if active_store is None: + db_path = resolve_memory_db_path(root_path, config) + if not db_path.exists(): + raise MemoryContractError( + f"Engineering memory database not found: {db_path}. " + "Run memory init or " + "manage_engineering_memory(action='refresh_from_run')." + ) + active_store = SqliteEngineeringMemoryStore(db_path) + return probe_semantic_projections( + sources=build_semantic_index_sources( + root_path=root_path, + config=config, + store=active_store, + project=resolved_project, + ), + token_prober=token_prober, + passage_chunker=passage_chunker, + ) + finally: + if owns_store and active_store is not None: + active_store.close() + + +def _resolve_projection_token_prober( + config: SemanticConfig, + *, + exact_tokens: bool = False, +) -> ProjectionTokenProber: + if exact_tokens and config.embedding_provider == "fastembed": + provider = resolve_embedding_provider(config) + if isinstance(provider, ProjectionTokenProber): + return provider + return resolve_planning_token_estimator(config) + + +def _resolve_projection_passage_chunker( + config: SemanticConfig, + *, + exact_tokens: bool = False, +) -> PassageChunker | None: + if not exact_tokens or config.embedding_provider != "fastembed": + return None + provider = resolve_embedding_provider(config) + return resolve_passage_chunker(provider) + + +__all__ = [ + "RebuildSemanticIndexOkPayload", + "RebuildSemanticIndexPayload", + "RebuildSemanticIndexSkippedPayload", + "RebuildSemanticIndexUnavailablePayload", + "build_semantic_index_sources", + "execute_semantic_index_rebuild", + "execute_semantic_projection_probe", +] diff --git a/codeclone/memory/semantic/sources.py b/codeclone/memory/semantic/sources.py new file mode 100644 index 00000000..8d8b079b --- /dev/null +++ b/codeclone/memory/semantic/sources.py @@ -0,0 +1,214 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from collections.abc import Iterator, Sequence +from pathlib import Path +from typing import Protocol + +from ...audit.schema import open_audit_db_readonly +from ...audit.validation import AuditSchemaError +from ..models import MemoryQuery, MemoryRecord, MemorySubject +from ..trajectory.models import Trajectory, TrajectoryListItem +from .models import SemanticProjection +from .projection import ( + INDEXED_AUDIT_EVENTS, + is_indexed_memory_type, + project_audit_event, + project_memory_record, + project_trajectory, +) + +# Live, retrievable statuses. rejected/archived/superseded are not surfaced by +# retrieval, so they are not embedded. +_INDEXED_STATUSES: frozenset[str] = frozenset({"active", "draft", "stale"}) +_PAGE_SIZE = 200 + + +def _primary_path(subjects: Sequence[MemorySubject]) -> str | None: + for subject in subjects: + if subject.subject_kind == "path": + return subject.subject_key + return None + + +class IndexSource(Protocol): + """A source of deterministic projections to feed the semantic index. + + Each source reports availability and yields projections (or nothing); a + rebuild iterates the available sources. + """ + + def name(self) -> str: ... + + def available(self) -> bool: ... + + def iter_projections(self) -> Iterator[SemanticProjection]: ... + + +class _MemoryReadStore(Protocol): + """Minimal read surface MemoryIndexSource needs from the memory store.""" + + def query_records(self, query: MemoryQuery) -> Sequence[MemoryRecord]: ... + + def list_subjects_for_memories( + self, memory_ids: Sequence[str] + ) -> dict[str, list[MemorySubject]]: ... + + +class _TrajectoryReadStore(Protocol): + def list_trajectories( + self, + *, + project_id: str, + limit: int = 20, + ) -> list[TrajectoryListItem]: ... + + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: ... + + +class MemoryIndexSource: + """Engineering Memory as a semantic index source. + + Always available (SQLite is truth). Yields deterministic projections for + the prose/decision record subset only; structural records and + non-retrievable statuses are skipped. + """ + + def __init__(self, store: _MemoryReadStore, *, project_id: str) -> None: + self._store = store + self._project_id = project_id + + def name(self) -> str: + return "memory" + + def available(self) -> bool: + return True + + def iter_projections(self) -> Iterator[SemanticProjection]: + offset = 0 + while True: + records = self._store.query_records( + MemoryQuery( + project_id=self._project_id, + limit=_PAGE_SIZE, + offset=offset, + ) + ) + indexed = [ + record + for record in records + if is_indexed_memory_type(record.type) + and record.status in _INDEXED_STATUSES + ] + # One batched subject load per page instead of a query per record. + subjects_by_id = self._store.list_subjects_for_memories( + [record.id for record in indexed] + ) + for record in indexed: + yield project_memory_record( + record, + subject_path=_primary_path(subjects_by_id.get(record.id, [])), + ) + if len(records) < _PAGE_SIZE: + return + offset += _PAGE_SIZE + + +class TrajectoryIndexSource: + """Trajectory memory as a semantic source. + + Trajectories are derived projections over audit event core. The semantic + source embeds their deterministic, bounded projection text only. + """ + + def __init__(self, store: _TrajectoryReadStore, *, project_id: str) -> None: + self._store = store + self._project_id = project_id + + def name(self) -> str: + return "trajectory" + + def available(self) -> bool: + return True + + def iter_projections(self) -> Iterator[SemanticProjection]: + offset = 0 + while True: + items = self._store.list_trajectories( + project_id=self._project_id, + limit=_PAGE_SIZE + offset, + ) + page = items[offset : offset + _PAGE_SIZE] + # Batch-hydrate the page instead of one find_trajectory per item. + for trajectory in self._store.find_trajectories([item.id for item in page]): + yield project_trajectory(trajectory) + if len(page) < _PAGE_SIZE: + return + offset += _PAGE_SIZE + + +class AuditIndexSource: + """Audit trail as an availability-gated semantic index source. + + Available only when audit is enabled and the DB file exists. Projects the + bounded ``controller_events.summary`` column for forensic incident types. + A missing DB, a pre-Bug-B schema without the ``summary`` column, or empty + summaries simply contribute nothing — this source never raises. + """ + + def __init__(self, *, enabled: bool, db_path: Path) -> None: + self._enabled = enabled + self._db_path = db_path + + def name(self) -> str: + return "audit" + + def available(self) -> bool: + return self._enabled and self._db_path.is_file() + + def iter_projections(self) -> Iterator[SemanticProjection]: + if not self.available(): + return + yield from self._read_projections() + + def _read_projections(self) -> Iterator[SemanticProjection]: + event_types = tuple(sorted(INDEXED_AUDIT_EVENTS)) + placeholders = ", ".join("?" for _ in event_types) + try: + conn = open_audit_db_readonly(self._db_path) + except (sqlite3.Error, AuditSchemaError, OSError): + return + try: + rows = conn.execute( + "SELECT event_id, event_type, summary FROM controller_events " + "WHERE summary IS NOT NULL AND summary != '' " + f"AND event_type IN ({placeholders}) " + "ORDER BY created_at_utc ASC, id ASC", + event_types, + ).fetchall() + except (sqlite3.Error, AuditSchemaError): + return + finally: + conn.close() + for event_id, event_type, summary in rows: + if not isinstance(summary, str) or not summary.strip(): + continue + yield project_audit_event( + event_id=str(event_id), + event_type=str(event_type), + summary=summary, + ) + + +__all__ = [ + "AuditIndexSource", + "IndexSource", + "MemoryIndexSource", + "TrajectoryIndexSource", +] diff --git a/codeclone/memory/sqlite_store.py b/codeclone/memory/sqlite_store.py new file mode 100644 index 00000000..0089721a --- /dev/null +++ b/codeclone/memory/sqlite_store.py @@ -0,0 +1,1331 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from collections.abc import Iterator, Mapping, Sequence +from contextlib import contextmanager, suppress +from pathlib import Path +from typing import cast + +from ..report.meta import current_report_timestamp_utc +from ..utils.iterutils import chunked +from .enums import LinkRelation +from .experience.models import Experience +from .locks import memory_init_lock +from .models import ( + IngestionRun, + MemoryEvidence, + MemoryLink, + MemoryProject, + MemoryQuery, + MemoryRecord, + MemoryRevision, + MemorySubject, + RecordBatch, + UpsertAction, + UpsertResult, + generate_memory_id, + parse_payload_json, + payload_json_text, +) +from .schema import get_meta, open_memory_db, set_meta +from .search_index import ( + SearchMatchMode, + build_search_text, + fts_match_expression, + tokenize_query, +) +from .trajectory.models import ( + Trajectory, + TrajectoryListItem, + TrajectoryProjectionResult, + TrajectoryProjectionRun, +) + +_SQLITE_IN_QUERY_BATCH = 500 + + +class SqliteEngineeringMemoryStore: + def __init__(self, db_path: Path) -> None: + self._db_path = db_path + self._closed = False + self._conn = open_memory_db(db_path) + self._conn.row_factory = sqlite3.Row + + @property + def db_path(self) -> Path: + return self._db_path + + def initialize(self, project: MemoryProject) -> None: + now = current_report_timestamp_utc() + self._conn.execute( + """ + INSERT INTO memory_projects( + id, root, git_remote, git_branch, git_head, python_tag, + created_at_utc, updated_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + root=excluded.root, + git_remote=excluded.git_remote, + git_branch=excluded.git_branch, + git_head=excluded.git_head, + python_tag=excluded.python_tag, + updated_at_utc=excluded.updated_at_utc + """, + ( + project.id, + project.root, + project.git_remote, + project.git_branch, + project.git_head, + project.python_tag, + project.created_at_utc, + project.updated_at_utc, + ), + ) + set_meta(self._conn, "project_id", project.id) + set_meta(self._conn, "project_root", project.root) + set_meta(self._conn, "updated_at_utc", now) + self._conn.commit() + + def get_meta(self, key: str) -> str | None: + return get_meta(self._conn, key) + + def set_meta(self, key: str, value: str) -> None: + set_meta(self._conn, key, value) + set_meta(self._conn, "updated_at_utc", current_report_timestamp_utc()) + self._conn.commit() + + def rebuild_trajectories_from_audit( + self, + *, + project: MemoryProject, + root_path: Path, + audit_db_path: Path, + ) -> TrajectoryProjectionResult: + from .trajectory.store import rebuild_trajectories_from_audit + + return rebuild_trajectories_from_audit( + conn=self._conn, + project=project, + root_path=root_path, + audit_db_path=audit_db_path, + ) + + def rebuild_trajectories_incremental( + self, + *, + project: MemoryProject, + root_path: Path, + audit_db_path: Path, + after_event_core_id: int, + ) -> TrajectoryProjectionResult: + from .trajectory.store import rebuild_trajectories_incremental + + return rebuild_trajectories_incremental( + conn=self._conn, + project=project, + root_path=root_path, + audit_db_path=audit_db_path, + after_event_core_id=after_event_core_id, + ) + + def count_trajectories(self, *, project_id: str) -> int: + from .trajectory.store import count_trajectories + + return count_trajectories(self._conn, project_id=project_id) + + def latest_trajectory_projection_run( + self, + *, + project_id: str, + ) -> TrajectoryProjectionRun | None: + from .trajectory.store import latest_projection_run + + return latest_projection_run(self._conn, project_id=project_id) + + def list_trajectories( + self, + *, + project_id: str, + limit: int = 20, + ) -> list[TrajectoryListItem]: + from .trajectory.store import list_trajectories + + return list_trajectories(self._conn, project_id=project_id, limit=limit) + + def list_trajectories_for_subjects( + self, + *, + project_id: str, + subjects: Mapping[str, Sequence[str]], + limit: int = 20, + ) -> list[Trajectory]: + from .trajectory.store import list_trajectories_for_subjects + + return list_trajectories_for_subjects( + self._conn, + project_id=project_id, + subjects=subjects, + limit=limit, + ) + + def search_trajectories( + self, + *, + project_id: str, + query: str, + limit: int = 20, + match_mode: SearchMatchMode = "any", + ) -> list[Trajectory]: + from .trajectory.store import search_trajectories + + return search_trajectories( + self._conn, + project_id=project_id, + query=query, + limit=limit, + match_mode=match_mode, + ) + + def find_trajectory(self, trajectory_id: str) -> Trajectory | None: + from .trajectory.store import find_trajectory + + return find_trajectory(self._conn, trajectory_id) + + def find_trajectories( + self, + trajectory_ids: Sequence[str], + ) -> list[Trajectory]: + from .trajectory.store import find_trajectories_by_ids + + return find_trajectories_by_ids(self._conn, trajectory_ids) + + def load_trajectory_patch_trail( + self, + trajectory_id: str, + ) -> dict[str, object] | None: + from .trajectory.store import load_trajectory_patch_trail + + return load_trajectory_patch_trail(self._conn, trajectory_id=trajectory_id) + + def load_trajectory_patch_trails( + self, + trajectory_ids: Sequence[str], + ) -> dict[str, dict[str, object]]: + from .trajectory.store import load_trajectory_patch_trails + + return load_trajectory_patch_trails( + self._conn, + trajectory_ids=trajectory_ids, + ) + + def list_canonical_trajectories_for_export( + self, + *, + project_id: str, + limit: int = 10_000, + ) -> list[Trajectory]: + from .trajectory.store import list_canonical_trajectories_for_export + + return list_canonical_trajectories_for_export( + self._conn, + project_id=project_id, + limit=limit, + ) + + def replace_experiences( + self, + *, + project_id: str, + experiences: Sequence[Experience], + ) -> int: + from .experience.store import replace_experiences + + return replace_experiences( + self._conn, project_id=project_id, experiences=experiences + ) + + def list_experiences(self, *, project_id: str) -> list[Experience]: + from .experience.store import list_experiences + + return list_experiences(self._conn, project_id=project_id) + + def count_experiences(self, *, project_id: str) -> int: + from .experience.store import count_experiences + + return count_experiences(self._conn, project_id=project_id) + + def find_experience(self, experience_id: str) -> Experience | None: + from .experience.store import find_experience + + return find_experience(self._conn, experience_id=experience_id) + + @property + def connection(self) -> sqlite3.Connection: + return self._conn + + def write_record(self, record: MemoryRecord) -> None: + self._insert_record(record) + self._conn.commit() + + def _commit_upsert_result( + self, + *, + action: UpsertAction, + record_id: str, + sync_fts: bool, + revision_written: bool = False, + commit: bool = True, + ) -> UpsertResult: + if commit: + self._conn.commit() + if sync_fts: + self.sync_fts_record(record_id) + return UpsertResult( + action=action, + record_id=record_id, + revision_written=revision_written, + ) + + def upsert_record( + self, record: MemoryRecord, *, commit: bool = True + ) -> UpsertResult: + existing = self.find_by_identity_key(record.project_id, record.identity_key) + now = current_report_timestamp_utc() + if existing is not None and ( + existing.origin == "human" or existing.approved_by + ): + self._conn.execute( + """ + UPDATE memory_records + SET last_verified_at_utc=?, updated_at_utc=?, + verified_on_branch=?, verified_at_commit=? + WHERE id=? + """, + ( + now, + now, + record.verified_on_branch, + record.verified_at_commit, + existing.id, + ), + ) + if commit: + self._conn.commit() + return UpsertResult(action="skipped", record_id=existing.id) + + revision_written = False + action: UpsertAction + if existing is None: + self._insert_record(record) + target_id = record.id + action = "created" + elif _record_content_equal(existing, record): + self._conn.execute( + """ + UPDATE memory_records SET + last_verified_at_utc=?, updated_at_utc=?, + verified_on_branch=?, verified_at_commit=?, + report_digest=?, code_fingerprint=?, + status='active', stale_reason=NULL + WHERE id=? + """, + ( + now, + now, + record.verified_on_branch, + record.verified_at_commit, + record.report_digest, + record.code_fingerprint, + existing.id, + ), + ) + target_id = existing.id + action = "unchanged" + else: + revision_number = self._next_revision_number(existing.id) + self.write_revision( + MemoryRevision( + id=generate_memory_id(prefix="rev"), + memory_id=existing.id, + revision_number=revision_number, + previous_statement=existing.statement, + new_statement=record.statement, + previous_payload=existing.payload, + new_payload=record.payload, + reason="upsert_content_changed", + changed_by=record.created_by, + changed_at_utc=now, + branch=record.verified_on_branch, + commit=record.verified_at_commit, + ) + ) + self._conn.execute( + """ + UPDATE memory_records SET + statement=?, summary=?, payload_json=?, status=?, confidence=?, + ingest_source=?, updated_at_utc=?, last_verified_at_utc=?, + report_digest=?, code_fingerprint=?, verified_on_branch=?, + verified_at_commit=?, schema_version=? + WHERE id=? + """, + ( + record.statement, + record.summary, + payload_json_text(record.payload), + record.status, + record.confidence, + record.ingest_source, + now, + now, + record.report_digest, + record.code_fingerprint, + record.verified_on_branch, + record.verified_at_commit, + record.schema_version, + existing.id, + ), + ) + target_id = existing.id + action = "updated" + revision_written = True + + return self._commit_upsert_result( + action=action, + record_id=target_id, + sync_fts=True, + revision_written=revision_written, + commit=commit, + ) + + def find_record(self, record_id: str) -> MemoryRecord | None: + row = self._conn.execute( + "SELECT * FROM memory_records WHERE id=?", + (record_id,), + ).fetchone() + if row is None: + return None + return _record_from_row(row) + + def find_by_identity_key(self, project_id: str, key: str) -> MemoryRecord | None: + row = self._conn.execute( + "SELECT * FROM memory_records WHERE project_id=? AND identity_key=?", + (project_id, key), + ).fetchone() + if row is None: + return None + return _record_from_row(row) + + def query_records(self, query: MemoryQuery) -> Sequence[MemoryRecord]: + clauses = ["project_id=?"] + params: list[object] = [query.project_id] + if query.types: + placeholders = ", ".join("?" for _ in query.types) + clauses.append(f"type IN ({placeholders})") + params.extend(query.types) + if query.statuses: + placeholders = ", ".join("?" for _ in query.statuses) + clauses.append(f"status IN ({placeholders})") + params.extend(query.statuses) + if query.subject_kind and query.subject_key: + clauses.append( + "id IN (SELECT memory_id FROM memory_subjects " + "WHERE subject_kind=? AND subject_key=?)" + ) + params.extend([query.subject_kind, query.subject_key]) + elif query.subject_kind and query.subject_key_prefix: + clauses.append( + "id IN (SELECT memory_id FROM memory_subjects " + "WHERE subject_kind=? AND subject_key LIKE ?)" + ) + params.extend([query.subject_kind, f"{query.subject_key_prefix}%"]) + where = " AND ".join(clauses) + rows = self._conn.execute( + f"SELECT * FROM memory_records WHERE {where} " + "ORDER BY updated_at_utc DESC, id ASC LIMIT ? OFFSET ?", + (*params, query.limit, query.offset), + ).fetchall() + return [_record_from_row(row) for row in rows] + + def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: + rows = self._conn.execute( + """ + SELECT MIN(id) AS id, memory_id, subject_kind, subject_key, relation + FROM memory_subjects + WHERE memory_id=? + GROUP BY memory_id, subject_kind, subject_key, relation + ORDER BY subject_kind ASC, subject_key ASC, id ASC + """, + (memory_id,), + ).fetchall() + return [ + MemorySubject( + id=str(row["id"]), + memory_id=str(row["memory_id"]), + subject_kind=str(row["subject_kind"]), # type: ignore[arg-type] + subject_key=str(row["subject_key"]), + relation=str(row["relation"]), # type: ignore[arg-type] + ) + for row in rows + ] + + def list_subjects_for_memories( + self, + memory_ids: Sequence[str], + ) -> dict[str, list[MemorySubject]]: + normalized_ids = tuple(sorted(set(memory_ids))) + grouped: dict[str, list[MemorySubject]] = { + memory_id: [] for memory_id in normalized_ids + } + for batch in chunked(normalized_ids, _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = self._conn.execute( + f""" + SELECT MIN(id) AS id, memory_id, subject_kind, subject_key, relation + FROM memory_subjects + WHERE memory_id IN ({placeholders}) + GROUP BY memory_id, subject_kind, subject_key, relation + ORDER BY memory_id ASC, subject_kind ASC, subject_key ASC, id ASC + """, + batch, + ).fetchall() + for row in rows: + memory_id = str(row["memory_id"]) + grouped[memory_id].append( + MemorySubject( + id=str(row["id"]), + memory_id=memory_id, + subject_kind=str(row["subject_kind"]), # type: ignore[arg-type] + subject_key=str(row["subject_key"]), + relation=str(row["relation"]), # type: ignore[arg-type] + ) + ) + return grouped + + def list_evidence_for_memory(self, memory_id: str) -> list[MemoryEvidence]: + rows = self._conn.execute( + """ + SELECT id, memory_id, evidence_kind, ref, locator, quote, digest, + created_at_utc + FROM memory_evidence + WHERE memory_id=? + ORDER BY created_at_utc ASC, id ASC + """, + (memory_id,), + ).fetchall() + return [ + MemoryEvidence( + id=str(row["id"]), + memory_id=str(row["memory_id"]), + evidence_kind=str(row["evidence_kind"]), # type: ignore[arg-type] + ref=str(row["ref"]), + locator=str(row["locator"]) if row["locator"] is not None else None, + quote=str(row["quote"]) if row["quote"] is not None else None, + digest=str(row["digest"]) if row["digest"] is not None else None, + created_at_utc=str(row["created_at_utc"]), + ) + for row in rows + ] + + def count_evidence_for_memory(self, memory_id: str) -> int: + row = self._conn.execute( + "SELECT COUNT(*) FROM memory_evidence WHERE memory_id=?", + (memory_id,), + ).fetchone() + return int(row[0]) if row is not None else 0 + + def count_evidence_for_memories( + self, + memory_ids: Sequence[str], + ) -> dict[str, int]: + normalized_ids = tuple(sorted(set(memory_ids))) + counts = dict.fromkeys(normalized_ids, 0) + for batch in chunked(normalized_ids, _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = self._conn.execute( + f""" + SELECT memory_id, COUNT(*) AS evidence_count + FROM memory_evidence + WHERE memory_id IN ({placeholders}) + GROUP BY memory_id + ORDER BY memory_id ASC + """, + batch, + ).fetchall() + for row in rows: + counts[str(row["memory_id"])] = int(row["evidence_count"]) + return counts + + def search_records( + self, + *, + project_id: str, + statement_query: str, + types: Sequence[str] = (), + statuses: Sequence[str] = (), + confidences: Sequence[str] = (), + limit: int = 100, + match_mode: SearchMatchMode = "any", + ) -> list[MemoryRecord]: + if self._fts_available(): + ranked = self._search_records_fts( + project_id=project_id, + statement_query=statement_query, + types=types, + statuses=statuses, + confidences=confidences, + limit=limit, + match_mode=match_mode, + ) + if ranked is not None: + return ranked + return self._search_records_like( + project_id=project_id, + statement_query=statement_query, + types=types, + statuses=statuses, + confidences=confidences, + limit=limit, + match_mode=match_mode, + ) + + def sync_fts_record(self, memory_id: str) -> None: + if not self._fts_available(): + return + record = self.find_record(memory_id) + if record is None: + self._conn.execute( + "DELETE FROM memory_records_fts WHERE memory_id=?", + (memory_id,), + ) + return + subjects = self.list_subjects_for_memory(memory_id) + self._upsert_fts_record(record, subjects) + + def rebuild_project_fts(self, project_id: str) -> int: + if not self._fts_available(): + return 0 + self._conn.execute( + "DELETE FROM memory_records_fts WHERE project_id=?", + (project_id,), + ) + count = 0 + for record in self.list_records_for_project(project_id): + subjects = self.list_subjects_for_memory(record.id) + self._upsert_fts_record(record, subjects) + count += 1 + self._conn.commit() + return count + + def _fts_available(self) -> bool: + row = self._conn.execute( + "SELECT name FROM sqlite_master WHERE name='memory_records_fts'" + ).fetchone() + return row is not None + + def _upsert_fts_record( + self, + record: MemoryRecord, + subjects: Sequence[MemorySubject], + ) -> None: + search_text = build_search_text(record=record, subjects=subjects) + self._conn.execute( + "DELETE FROM memory_records_fts WHERE memory_id=?", + (record.id,), + ) + self._conn.execute( + """ + INSERT INTO memory_records_fts( + memory_id, project_id, record_type, ingest_source, status, search_text + ) VALUES (?, ?, ?, ?, ?, ?) + """, + ( + record.id, + record.project_id, + record.type, + record.ingest_source, + record.status, + search_text, + ), + ) + + def _search_records_fts( + self, + *, + project_id: str, + statement_query: str, + types: Sequence[str], + statuses: Sequence[str], + confidences: Sequence[str], + limit: int, + match_mode: SearchMatchMode, + ) -> list[MemoryRecord] | None: + match_expr = fts_match_expression(statement_query, match_mode=match_mode) + if match_expr is None: + return [] + clauses = [ + "memory_records_fts MATCH ?", + "memory_records_fts.project_id = ?", + ] + params: list[object] = [match_expr, project_id] + _append_search_filters( + clauses, + params, + types, + statuses, + confidences, + type_column="memory_records_fts.record_type", + status_column="memory_records_fts.status", + confidence_via_subquery=True, + ) + where = " AND ".join(clauses) + rows = self._conn.execute( + f""" + SELECT memory_records.* + FROM memory_records_fts + JOIN memory_records ON memory_records.id = memory_records_fts.memory_id + WHERE {where} + ORDER BY bm25(memory_records_fts), memory_records.updated_at_utc DESC, + memory_records.id ASC + LIMIT ? + """, + (*params, limit), + ).fetchall() + return [_record_from_row(row) for row in rows] + + def _search_records_like( + self, + *, + project_id: str, + statement_query: str, + types: Sequence[str], + statuses: Sequence[str], + confidences: Sequence[str], + limit: int, + match_mode: SearchMatchMode, + ) -> list[MemoryRecord]: + tokens = tokenize_query(statement_query) + if not tokens: + return [] + clauses = ["project_id=?"] + params: list[object] = [project_id] + token_clauses: list[str] = [] + for token in tokens: + token_clauses.append( + "(LOWER(statement) LIKE ? ESCAPE '\\' OR LOWER(COALESCE(summary, '')) " + "LIKE ? ESCAPE '\\')" + ) + escaped = _escape_like(token) + params.extend([f"%{escaped}%", f"%{escaped}%"]) + joiner = " AND " if match_mode == "all" else " OR " + clauses.append(f"({joiner.join(token_clauses)})") + _append_search_filters( + clauses, + params, + types, + statuses, + confidences, + type_column="type", + status_column="status", + confidence_via_subquery=False, + ) + where = " AND ".join(clauses) + rows = self._conn.execute( + f"SELECT * FROM memory_records WHERE {where} " + "ORDER BY updated_at_utc DESC, id ASC LIMIT ?", + (*params, limit), + ).fetchall() + return [_record_from_row(row) for row in rows] + + def write_subject(self, subject: MemorySubject, *, commit: bool = True) -> None: + existing = self._conn.execute( + """ + SELECT id FROM memory_subjects + WHERE memory_id=? AND subject_kind=? AND subject_key=? AND relation=? + LIMIT 1 + """, + ( + subject.memory_id, + subject.subject_kind, + subject.subject_key, + subject.relation, + ), + ).fetchone() + if existing is not None: + return + self._conn.execute( + """ + INSERT INTO memory_subjects( + id, memory_id, subject_kind, subject_key, relation + ) VALUES (?, ?, ?, ?, ?) + """, + ( + subject.id, + subject.memory_id, + subject.subject_kind, + subject.subject_key, + subject.relation, + ), + ) + if commit: + self._conn.commit() # standalone writes must survive store.close() + + def prune_duplicate_subjects(self, *, commit: bool = True) -> int: + before = self._conn.execute("SELECT COUNT(*) FROM memory_subjects").fetchone() + before_count = int(before[0]) if before is not None else 0 + self._conn.execute( + """ + DELETE FROM memory_subjects + WHERE id NOT IN ( + SELECT MIN(id) + FROM memory_subjects + GROUP BY memory_id, subject_kind, subject_key, relation + ) + """ + ) + after = self._conn.execute("SELECT COUNT(*) FROM memory_subjects").fetchone() + after_count = int(after[0]) if after is not None else 0 + removed = max(0, before_count - after_count) + if commit and removed: + self._conn.commit() + return removed + + def write_evidence(self, evidence: MemoryEvidence) -> None: + self._conn.execute( + """ + INSERT OR REPLACE INTO memory_evidence( + id, memory_id, evidence_kind, ref, locator, quote, digest, + created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + evidence.id, + evidence.memory_id, + evidence.evidence_kind, + evidence.ref, + evidence.locator, + evidence.quote, + evidence.digest, + evidence.created_at_utc, + ), + ) + + def write_link(self, link: MemoryLink) -> None: + self._conn.execute( + """ + INSERT OR REPLACE INTO memory_links( + id, project_id, from_memory_id, to_memory_id, relation, + created_by, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + link.id, + link.project_id, + link.from_memory_id, + link.to_memory_id, + link.relation, + link.created_by, + link.created_at_utc, + ), + ) + + def list_links_for_records( + self, + *, + project_id: str, + record_ids: Sequence[str], + relations: Sequence[LinkRelation], + ) -> list[MemoryLink]: + """Typed links (in either direction) touching the given records. + + The 1-hop neighbourhood for honest retrieval: deterministic order, the + other endpoint may be outside the queried set (surfaced as a relation, + never as a new scope hit). + """ + ids = list(record_ids) + rels = list(relations) + if not ids or not rels: + return [] + id_ph = ",".join("?" * len(ids)) + rel_ph = ",".join("?" * len(rels)) + rows = self._conn.execute( + "SELECT id, project_id, from_memory_id, to_memory_id, relation, " + "created_by, created_at_utc FROM memory_links " + f"WHERE project_id=? AND relation IN ({rel_ph}) " + f"AND (from_memory_id IN ({id_ph}) OR to_memory_id IN ({id_ph})) " + "ORDER BY from_memory_id ASC, to_memory_id ASC, relation ASC", + (project_id, *rels, *ids, *ids), + ).fetchall() + return [ + MemoryLink( + id=str(row["id"]), + project_id=str(row["project_id"]), + from_memory_id=str(row["from_memory_id"]), + to_memory_id=str(row["to_memory_id"]), + relation=cast(LinkRelation, str(row["relation"])), + created_by=str(row["created_by"]), + created_at_utc=str(row["created_at_utc"]), + ) + for row in rows + ] + + def write_ingestion_run(self, run: IngestionRun) -> None: + self._conn.execute( + """ + INSERT OR REPLACE INTO memory_ingestion_runs( + id, project_id, mode, started_at_utc, finished_at_utc, status, + analysis_fingerprint, report_digest, branch, "commit", + records_created, records_updated, records_marked_stale, + candidates_created, contradictions_found, message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + run.id, + run.project_id, + run.mode, + run.started_at_utc, + run.finished_at_utc, + run.status, + run.analysis_fingerprint, + run.report_digest, + run.branch, + run.commit, + run.records_created, + run.records_updated, + run.records_marked_stale, + run.candidates_created, + run.contradictions_found, + run.message, + ), + ) + self._conn.commit() + + def write_revision(self, revision: MemoryRevision) -> None: + self._conn.execute( + """ + INSERT OR REPLACE INTO memory_revisions( + id, memory_id, revision_number, previous_statement, new_statement, + previous_payload, new_payload, reason, changed_by, changed_at_utc, + branch, "commit" + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + revision.id, + revision.memory_id, + revision.revision_number, + revision.previous_statement, + revision.new_statement, + payload_json_text(revision.previous_payload), + payload_json_text(revision.new_payload), + revision.reason, + revision.changed_by, + revision.changed_at_utc, + revision.branch, + revision.commit, + ), + ) + + def _update_lifecycle_status( + self, + record_id: str, + *, + status: str, + stale_reason: str | None, + commit: bool, + ) -> None: + now = current_report_timestamp_utc() + self._conn.execute( + """ + UPDATE memory_records + SET status=?, stale_reason=?, updated_at_utc=? + WHERE id=? + """, + (status, stale_reason, now, record_id), + ) + if commit: + self._conn.commit() + self.sync_fts_record(record_id) + + def mark_stale(self, record_id: str, reason: str, *, commit: bool = True) -> None: + self._update_lifecycle_status( + record_id, + status="stale", + stale_reason=reason, + commit=commit, + ) + + def mark_historical(self, record_id: str, *, commit: bool = True) -> None: + self._update_lifecycle_status( + record_id, + status="historical", + stale_reason=None, + commit=commit, + ) + + def restore_anchor_active(self, record_id: str, *, commit: bool = True) -> None: + self._update_lifecycle_status( + record_id, + status="active", + stale_reason=None, + commit=commit, + ) + + def list_records_for_project( + self, + project_id: str, + *, + statuses: tuple[str, ...] = (), + limit: int = 10000, + ) -> list[MemoryRecord]: + if statuses: + placeholders = ", ".join("?" for _ in statuses) + rows = self._conn.execute( + f"SELECT * FROM memory_records WHERE project_id=? " + f"AND status IN ({placeholders}) " + "ORDER BY updated_at_utc DESC, id ASC LIMIT ?", + (project_id, *statuses, limit), + ).fetchall() + else: + rows = self._conn.execute( + "SELECT * FROM memory_records WHERE project_id=? " + "ORDER BY updated_at_utc DESC, id ASC LIMIT ?", + (project_id, limit), + ).fetchall() + return [_record_from_row(row) for row in rows] + + def update_record_status( + self, + record_id: str, + *, + status: str, + approved_by: str | None = None, + approved_at_utc: str | None = None, + stale_reason: str | None = None, + commit: bool = True, + ) -> None: + now = current_report_timestamp_utc() + self._conn.execute( + """ + UPDATE memory_records + SET status=?, approved_by=COALESCE(?, approved_by), + approved_at_utc=COALESCE(?, approved_at_utc), + stale_reason=?, updated_at_utc=? + WHERE id=? + """, + (status, approved_by, approved_at_utc, stale_reason, now, record_id), + ) + if commit: + self._conn.commit() + + def count_records_by_status(self, project_id: str, status: str) -> int: + row = self._conn.execute( + "SELECT COUNT(*) FROM memory_records WHERE project_id=? AND status=?", + (project_id, status), + ).fetchone() + return int(row[0]) if row is not None else 0 + + def delete_records_older_than( + self, + *, + status: str, + updated_before_utc: str, + commit: bool = True, + ) -> int: + rows = self._conn.execute( + "SELECT id FROM memory_records WHERE status=? AND updated_at_utc < ?", + (status, updated_before_utc), + ).fetchall() + ids = [str(row["id"]) for row in rows] + for record_id in ids: + self._conn.execute( + "DELETE FROM memory_records WHERE id=?", + (record_id,), + ) + if commit: + self._conn.commit() + return len(ids) + + def next_revision_number(self, memory_id: str) -> int: + return self._next_revision_number(memory_id) + + def persist_batch( + self, batch: RecordBatch, *, commit: bool = True + ) -> dict[str, int]: + stats = {"created": 0, "updated": 0, "unchanged": 0, "skipped": 0} + record_id_map: dict[str, str] = {} + for record in batch.records: + result = self.upsert_record(record, commit=False) + record_id_map[record.id] = result.record_id + stats[result.action] = stats.get(result.action, 0) + 1 + for subject in batch.subjects: + mapped_id = record_id_map.get(subject.memory_id, subject.memory_id) + self.write_subject( + MemorySubject( + id=subject.id, + memory_id=mapped_id, + subject_kind=subject.subject_kind, + subject_key=subject.subject_key, + relation=subject.relation, + ), + commit=False, + ) + for evidence in batch.evidence: + mapped_id = record_id_map.get(evidence.memory_id, evidence.memory_id) + self.write_evidence( + MemoryEvidence( + id=evidence.id, + memory_id=mapped_id, + evidence_kind=evidence.evidence_kind, + ref=evidence.ref, + locator=evidence.locator, + quote=evidence.quote, + digest=evidence.digest, + created_at_utc=evidence.created_at_utc, + ) + ) + for link in batch.links: + self.write_link(link) + touched_ids = set(record_id_map.values()) + for memory_id in touched_ids: + self.sync_fts_record(memory_id) + if commit: + self._conn.commit() + return stats + + def close(self) -> None: + if self._closed: + return + self._closed = True + self._conn.close() + + def __del__(self) -> None: + with suppress(Exception): + self.close() + + def commit(self) -> None: + self._conn.commit() + + def count_records(self) -> int: + row = self._conn.execute("SELECT COUNT(*) FROM memory_records").fetchone() + return int(row[0]) if row is not None else 0 + + def count_records_grouped(self, *, column: str) -> dict[str, int]: + if column not in {"type", "status", "origin"}: + msg = f"unsupported count column: {column}" + raise ValueError(msg) + rows = self._conn.execute( + f"SELECT {column}, COUNT(*) FROM memory_records " + f"GROUP BY {column} ORDER BY {column}" + ).fetchall() + return {str(row[0]): int(row[1]) for row in rows} + + @contextmanager + def transaction(self) -> Iterator[None]: + try: + yield + self._conn.commit() + except Exception: + self._conn.rollback() + raise + + @contextmanager + def exclusive_init_lock(self) -> Iterator[None]: + lock_path = self._db_path.parent / ".memory_init.lock" + with memory_init_lock(lock_path): + yield + + def _insert_record(self, record: MemoryRecord) -> None: + self._conn.execute( + """ + INSERT INTO memory_records( + id, project_id, identity_key, type, status, confidence, origin, + ingest_source, statement, summary, payload_json, created_at_utc, + updated_at_utc, last_verified_at_utc, expires_at_utc, created_by, + verified_by, approved_by, approved_at_utc, report_digest, + code_fingerprint, stale_reason, created_on_branch, created_at_commit, + verified_on_branch, verified_at_commit, schema_version + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ? + ) + """, + ( + record.id, + record.project_id, + record.identity_key, + record.type, + record.status, + record.confidence, + record.origin, + record.ingest_source, + record.statement, + record.summary, + payload_json_text(record.payload), + record.created_at_utc, + record.updated_at_utc, + record.last_verified_at_utc, + record.expires_at_utc, + record.created_by, + record.verified_by, + record.approved_by, + record.approved_at_utc, + record.report_digest, + record.code_fingerprint, + record.stale_reason, + record.created_on_branch, + record.created_at_commit, + record.verified_on_branch, + record.verified_at_commit, + record.schema_version, + ), + ) + + def _next_revision_number(self, memory_id: str) -> int: + row = self._conn.execute( + "SELECT COALESCE(MAX(revision_number), 0) FROM memory_revisions " + "WHERE memory_id=?", + (memory_id,), + ).fetchone() + current = int(row[0]) if row is not None else 0 + return current + 1 + + +def _append_in_filter( + clauses: list[str], + params: list[object], + values: Sequence[str], + column: str, +) -> None: + if not values: + return + placeholders = ", ".join("?" for _ in values) + clauses.append(f"{column} IN ({placeholders})") + params.extend(values) + + +def _append_confidence_filter( + clauses: list[str], + params: list[object], + confidences: Sequence[str], + *, + via_subquery: bool, +) -> None: + if not confidences: + return + placeholders = ", ".join("?" for _ in confidences) + if via_subquery: + clauses.append( + "memory_records.id IN (" + f"SELECT id FROM memory_records WHERE confidence IN ({placeholders})" + ")" + ) + else: + clauses.append(f"confidence IN ({placeholders})") + params.extend(confidences) + + +def _append_search_filters( + clauses: list[str], + params: list[object], + types: Sequence[str], + statuses: Sequence[str], + confidences: Sequence[str], + *, + type_column: str, + status_column: str, + confidence_via_subquery: bool, +) -> None: + _append_in_filter(clauses, params, types, type_column) + _append_in_filter(clauses, params, statuses, status_column) + _append_confidence_filter( + clauses, + params, + confidences, + via_subquery=confidence_via_subquery, + ) + + +def _escape_like(value: str) -> str: + return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + + +def record_content_equal(left: MemoryRecord, right: MemoryRecord) -> bool: + return left.statement == right.statement and left.payload == right.payload + + +def _record_content_equal(left: MemoryRecord, right: MemoryRecord) -> bool: + return record_content_equal(left, right) + + +def _record_from_row(row: sqlite3.Row) -> MemoryRecord: + payload = parse_payload_json(row["payload_json"]) + return MemoryRecord( + id=str(row["id"]), + project_id=str(row["project_id"]), + identity_key=str(row["identity_key"]), + type=str(row["type"]), # type: ignore[arg-type] + status=str(row["status"]), # type: ignore[arg-type] + confidence=str(row["confidence"]), # type: ignore[arg-type] + origin=str(row["origin"]), # type: ignore[arg-type] + ingest_source=str(row["ingest_source"]), # type: ignore[arg-type] + statement=str(row["statement"]), + summary=str(row["summary"]) if row["summary"] is not None else None, + payload=payload, + created_at_utc=str(row["created_at_utc"]), + updated_at_utc=str(row["updated_at_utc"]), + last_verified_at_utc=( + str(row["last_verified_at_utc"]) + if row["last_verified_at_utc"] is not None + else None + ), + expires_at_utc=( + str(row["expires_at_utc"]) if row["expires_at_utc"] is not None else None + ), + created_by=str(row["created_by"]), + verified_by=str(row["verified_by"]) if row["verified_by"] is not None else None, + approved_by=str(row["approved_by"]) if row["approved_by"] is not None else None, + approved_at_utc=( + str(row["approved_at_utc"]) if row["approved_at_utc"] is not None else None + ), + report_digest=( + str(row["report_digest"]) if row["report_digest"] is not None else None + ), + code_fingerprint=( + str(row["code_fingerprint"]) + if row["code_fingerprint"] is not None + else None + ), + stale_reason=( + str(row["stale_reason"]) if row["stale_reason"] is not None else None + ), + created_on_branch=( + str(row["created_on_branch"]) + if row["created_on_branch"] is not None + else None + ), + created_at_commit=( + str(row["created_at_commit"]) + if row["created_at_commit"] is not None + else None + ), + verified_on_branch=( + str(row["verified_on_branch"]) + if row["verified_on_branch"] is not None + else None + ), + verified_at_commit=( + str(row["verified_at_commit"]) + if row["verified_at_commit"] is not None + else None + ), + schema_version=str(row["schema_version"]), + ) + + +__all__ = ["SqliteEngineeringMemoryStore"] diff --git a/codeclone/memory/staleness.py b/codeclone/memory/staleness.py new file mode 100644 index 00000000..d393d3dc --- /dev/null +++ b/codeclone/memory/staleness.py @@ -0,0 +1,430 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path + +from ..utils.coerce import as_mapping, as_sequence +from .enums import MemoryStatus +from .models import MemoryEvidence, MemoryRecord, MemorySubject, RecordBatch +from .project import subject_fingerprint_for_subject +from .sqlite_store import SqliteEngineeringMemoryStore, record_content_equal + +SUBJECT_FINGERPRINT_DRIFT = "subject_fingerprint_drift" +ANCHOR_SUBJECT_KINDS = ("path", "test", "doc", "module") + + +@dataclass(frozen=True, slots=True) +class StalenessReport: + records_marked_stale: int + records_marked_historical: int + records_reactivated: int + reasons: dict[str, int] + + +def inventory_paths_from_report( + report_document: Mapping[str, object], +) -> frozenset[str]: + inventory = as_mapping(report_document.get("inventory")) + file_registry = as_mapping(inventory.get("file_registry")) + file_items = as_sequence(file_registry.get("items")) + paths: set[str] = set() + for item in file_items: + file_path = str(item).replace("\\", "/").strip("/") + if file_path: + paths.add(file_path) + return frozenset(paths) + + +def _batch_evidence_index( + batch: RecordBatch, +) -> dict[tuple[str, str, str], str | None]: + record_identity: dict[str, str] = { + record.id: record.identity_key for record in batch.records + } + index: dict[tuple[str, str, str], str | None] = {} + for evidence in batch.evidence: + identity = record_identity.get(evidence.memory_id) + if identity is None: + continue + key = (identity, evidence.evidence_kind, evidence.ref) + index[key] = evidence.digest + return index + + +def _skip_refresh_candidate(record: MemoryRecord) -> bool: + # Drafts are unapproved agent candidates awaiting human governance. + return record.status == "draft" + + +def _reactivate_on_fingerprint_match(record: MemoryRecord) -> bool: + return record.status == "historical" or ( + record.status == "stale" and record.stale_reason == SUBJECT_FINGERPRINT_DRIFT + ) + + +def _primary_anchor_subject( + subjects: Sequence[MemorySubject], +) -> MemorySubject | None: + for kind in ANCHOR_SUBJECT_KINDS: + for subject in subjects: + if subject.subject_kind == kind: + return subject + return None + + +def _evaluate_anchor_drift_status( + record: MemoryRecord, + *, + anchor_subject: MemorySubject, + root_path: Path, +) -> MemoryStatus | None: + if not record.created_at_commit or record.code_fingerprint is None: + return None + + current_fingerprint = subject_fingerprint_for_subject(root_path, anchor_subject) + anchored_fingerprint = record.code_fingerprint + + if current_fingerprint is None: + if record.status == "historical": + return None + return "historical" + + if current_fingerprint == anchored_fingerprint: + if _reactivate_on_fingerprint_match(record): + return "active" + return None + + return "stale" + + +@dataclass(frozen=True, slots=True) +class _AnchorDriftOutcome: + handled: bool + marked_stale: int = 0 + marked_historical: int = 0 + reactivated: int = 0 + counter_key: str | None = None + + +@dataclass(frozen=True, slots=True) +class _DriftTransitionSpec: + apply: Callable[[SqliteEngineeringMemoryStore, str], None] + marked_stale: int = 0 + marked_historical: int = 0 + reactivated: int = 0 + counter_key: str = "" + + +def _mark_historical_transition( + store: SqliteEngineeringMemoryStore, + record_id: str, +) -> None: + store.mark_historical(record_id, commit=False) + + +def _mark_active_transition( + store: SqliteEngineeringMemoryStore, + record_id: str, +) -> None: + store.restore_anchor_active(record_id, commit=False) + + +def _mark_stale_transition( + store: SqliteEngineeringMemoryStore, + record_id: str, +) -> None: + store.mark_stale(record_id, SUBJECT_FINGERPRINT_DRIFT, commit=False) + + +_DRIFT_TRANSITIONS: dict[MemoryStatus, _DriftTransitionSpec] = { + "historical": _DriftTransitionSpec( + apply=_mark_historical_transition, + marked_historical=1, + counter_key="historical", + ), + "active": _DriftTransitionSpec( + apply=_mark_active_transition, + reactivated=1, + counter_key="reactivated", + ), + "stale": _DriftTransitionSpec( + apply=_mark_stale_transition, + marked_stale=1, + counter_key=SUBJECT_FINGERPRINT_DRIFT, + ), +} + + +def _commit_anchor_drift_transition( + store: SqliteEngineeringMemoryStore, + record_id: str, + drift_status: MemoryStatus, +) -> _AnchorDriftOutcome: + spec = _DRIFT_TRANSITIONS.get(drift_status, _DRIFT_TRANSITIONS["stale"]) + spec.apply(store, record_id) + return _AnchorDriftOutcome( + handled=True, + marked_stale=spec.marked_stale, + marked_historical=spec.marked_historical, + reactivated=spec.reactivated, + counter_key=spec.counter_key or None, + ) + + +def _apply_anchor_drift_for_record( + store: SqliteEngineeringMemoryStore, + record: MemoryRecord, + *, + anchor_subject: MemorySubject, + root_path: Path, +) -> _AnchorDriftOutcome: + drift_status = _evaluate_anchor_drift_status( + record, + anchor_subject=anchor_subject, + root_path=root_path, + ) + if drift_status is None: + return _AnchorDriftOutcome(handled=False) + if drift_status == record.status: + return _AnchorDriftOutcome(handled=True) + return _commit_anchor_drift_transition(store, record.id, drift_status) + + +def _evidence_stale_reasons( + record: MemoryRecord, + evidence_items: Sequence[MemoryEvidence], + batch_evidence: dict[tuple[str, str, str], str | None], +) -> list[str]: + for evidence in evidence_items: + key = (record.identity_key, evidence.evidence_kind, evidence.ref) + batch_digest = batch_evidence.get(key) + if batch_digest is None: + continue + if evidence.digest is not None and batch_digest != evidence.digest: + return ["evidence_digest_mismatch"] + return [] + + +def _collect_refresh_staleness_reasons( + record: MemoryRecord, + *, + batch_identity_keys: frozenset[str], + batch_by_identity: Mapping[str, MemoryRecord], + batch_evidence: dict[tuple[str, str, str], str | None], + report_digest: str | None, + evidence_items: Sequence[MemoryEvidence], +) -> list[str]: + reasons: list[str] = [] + if record.origin == "system" and record.identity_key not in batch_identity_keys: + reasons.append("missing_from_refresh") + + incoming = batch_by_identity.get(record.identity_key) + if ( + incoming is not None + and record.approved_by + and not record_content_equal(record, incoming) + ): + reasons.append("refresh_content_contradiction") + + reasons.extend(_evidence_stale_reasons(record, evidence_items, batch_evidence)) + + if ( + record.report_digest is not None + and record.identity_key not in batch_identity_keys + and report_digest is not None + and record.report_digest != report_digest + ): + reasons.append("report_digest_shift") + return reasons + + +def _refresh_stale_primary_reason( + store: SqliteEngineeringMemoryStore, + record: MemoryRecord, + *, + batch_identity_keys: frozenset[str], + batch_by_identity: Mapping[str, MemoryRecord], + batch_evidence: dict[tuple[str, str, str], str | None], + report_digest: str | None, +) -> str | None: + if _skip_refresh_candidate(record) or record.status in {"historical"}: + return None + if record.status == "stale": + return None + reasons = _collect_refresh_staleness_reasons( + record, + batch_identity_keys=batch_identity_keys, + batch_by_identity=batch_by_identity, + batch_evidence=batch_evidence, + report_digest=report_digest, + evidence_items=store.list_evidence_for_memory(record.id), + ) + return reasons[0] if reasons else None + + +@dataclass(frozen=True, slots=True) +class _RefreshStalenessDelta: + marked_stale: int = 0 + marked_historical: int = 0 + reactivated: int = 0 + reason_counts: tuple[tuple[str, int], ...] = () + + +def _refresh_staleness_for_record( + store: SqliteEngineeringMemoryStore, + record: MemoryRecord, + *, + resolved_root: Path, + batch_identity_keys: frozenset[str], + batch_by_identity: Mapping[str, MemoryRecord], + batch_evidence: dict[tuple[str, str, str], str | None], + report_digest: str | None, +) -> _RefreshStalenessDelta: + if _skip_refresh_candidate(record): + return _RefreshStalenessDelta() + + subjects = store.list_subjects_for_memory(record.id) + anchor_subject = _primary_anchor_subject(subjects) + if anchor_subject is not None: + drift_outcome = _apply_anchor_drift_for_record( + store, + record, + anchor_subject=anchor_subject, + root_path=resolved_root, + ) + if drift_outcome.handled: + reason_counts: tuple[tuple[str, int], ...] = () + if drift_outcome.counter_key is not None: + reason_counts = ((drift_outcome.counter_key, 1),) + return _RefreshStalenessDelta( + marked_stale=drift_outcome.marked_stale, + marked_historical=drift_outcome.marked_historical, + reactivated=drift_outcome.reactivated, + reason_counts=reason_counts, + ) + + primary = _refresh_stale_primary_reason( + store, + record, + batch_identity_keys=batch_identity_keys, + batch_by_identity=batch_by_identity, + batch_evidence=batch_evidence, + report_digest=report_digest, + ) + if primary is None: + return _RefreshStalenessDelta() + store.mark_stale(record.id, primary, commit=False) + return _RefreshStalenessDelta(marked_stale=1, reason_counts=((primary, 1),)) + + +def apply_refresh_staleness( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + batch: RecordBatch, + report_document: Mapping[str, object], + root_path: Path, + report_digest: str | None = None, + commit: bool = True, +) -> StalenessReport: + """Mark affected records stale/historical/active after a refresh ingest.""" + + del report_document # inventory membership no longer drives freshness. + batch_identity_keys = frozenset(record.identity_key for record in batch.records) + batch_evidence = _batch_evidence_index(batch) + batch_by_identity = {record.identity_key: record for record in batch.records} + resolved_root = root_path.resolve() + + reason_counts: dict[str, int] = {} + marked_stale = 0 + marked_historical = 0 + reactivated = 0 + + candidates = store.list_records_for_project( + project_id, + statuses=("active", "historical", "stale"), + ) + for record in candidates: + delta = _refresh_staleness_for_record( + store, + record, + resolved_root=resolved_root, + batch_identity_keys=batch_identity_keys, + batch_by_identity=batch_by_identity, + batch_evidence=batch_evidence, + report_digest=report_digest, + ) + marked_stale += delta.marked_stale + marked_historical += delta.marked_historical + reactivated += delta.reactivated + for key, count in delta.reason_counts: + reason_counts[key] = reason_counts.get(key, 0) + count + + if commit: + store.commit() + + return StalenessReport( + records_marked_stale=marked_stale, + records_marked_historical=marked_historical, + records_reactivated=reactivated, + reasons=dict(sorted(reason_counts.items())), + ) + + +def apply_scope_staleness( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + changed_paths: Sequence[str], + commit: bool = True, +) -> StalenessReport: + """Mark records stale when linked paths were touched in an accepted patch.""" + + normalized = frozenset( + path.replace("\\", "/").strip("/").removeprefix("./") for path in changed_paths + ) + reason_counts: dict[str, int] = {} + marked = 0 + for record in store.list_records_for_project(project_id, statuses=("active",)): + if record.status == "stale": + continue + for subject in store.list_subjects_for_memory(record.id): + subj_path = subject.subject_key.replace("\\", "/").strip("/") + if subj_path in normalized or any( + subj_path.startswith(f"{scope}/") for scope in normalized + ): + store.mark_stale( + record.id, + "scope_files_changed", + commit=False, + ) + marked += 1 + reason_counts["scope_files_changed"] = ( + reason_counts.get("scope_files_changed", 0) + 1 + ) + break + if commit: + store.commit() + return StalenessReport( + records_marked_stale=marked, + records_marked_historical=0, + records_reactivated=0, + reasons=dict(sorted(reason_counts.items())), + ) + + +__all__ = [ + "ANCHOR_SUBJECT_KINDS", + "SUBJECT_FINGERPRINT_DRIFT", + "StalenessReport", + "apply_refresh_staleness", + "apply_scope_staleness", + "inventory_paths_from_report", +] diff --git a/codeclone/memory/status_report.py b/codeclone/memory/status_report.py new file mode 100644 index 00000000..1eab1a36 --- /dev/null +++ b/codeclone/memory/status_report.py @@ -0,0 +1,93 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from ..contracts import ENGINEERING_MEMORY_SCHEMA_VERSION +from .project import read_git_provenance, resolve_project_identity +from .sqlite_store import SqliteEngineeringMemoryStore + + +@dataclass(frozen=True, slots=True) +class MemoryStatusReport: + db_path: Path + schema_version: str | None + project_id: str | None + project_root: str | None + backend: str + git_available: bool + git_branch: str | None + git_head: str | None + last_analysis_fingerprint: str | None + last_init_run_id: str | None + record_count: int + records_by_type: dict[str, int] + records_by_status: dict[str, int] + db_exists: bool + + +def build_memory_status_report( + *, + root_path: Path, + db_path: Path, + backend: str = "sqlite", +) -> MemoryStatusReport: + resolved_root = root_path.resolve() + project = resolve_project_identity(resolved_root) + git = read_git_provenance(resolved_root) + if not db_path.exists(): + return MemoryStatusReport( + db_path=db_path, + schema_version=None, + project_id=project.id, + project_root=str(resolved_root), + backend=backend, + git_available=git.available, + git_branch=git.branch, + git_head=git.head, + last_analysis_fingerprint=None, + last_init_run_id=None, + record_count=0, + records_by_type={}, + records_by_status={}, + db_exists=False, + ) + + store = SqliteEngineeringMemoryStore(db_path) + try: + schema_version = store.get_meta("schema_version") + project_id = store.get_meta("project_id") or project.id + project_root = store.get_meta("project_root") or str(resolved_root) + last_analysis_fingerprint = store.get_meta("last_analysis_fingerprint") + last_init_run_id = store.get_meta("last_init_run_id") + record_count = store.count_records() + records_by_type = store.count_records_grouped(column="type") + records_by_status = store.count_records_grouped(column="status") + finally: + store.close() + + return MemoryStatusReport( + db_path=db_path, + schema_version=schema_version or ENGINEERING_MEMORY_SCHEMA_VERSION, + project_id=project_id, + project_root=project_root, + backend=backend, + git_available=git.available, + git_branch=git.branch, + git_head=git.head, + last_analysis_fingerprint=last_analysis_fingerprint, + last_init_run_id=last_init_run_id, + record_count=record_count, + records_by_type=records_by_type, + records_by_status=records_by_status, + db_exists=True, + ) + + +__all__ = ["MemoryStatusReport", "build_memory_status_report"] diff --git a/codeclone/memory/store.py b/codeclone/memory/store.py new file mode 100644 index 00000000..5a9b80a1 --- /dev/null +++ b/codeclone/memory/store.py @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Iterator, Sequence +from contextlib import contextmanager +from typing import Protocol + +from .models import ( + IngestionRun, + MemoryEvidence, + MemoryLink, + MemoryProject, + MemoryQuery, + MemoryRecord, + MemoryRevision, + MemorySubject, + RecordBatch, + UpsertResult, +) + + +class EngineeringMemoryStore(Protocol): + def initialize(self, project: MemoryProject) -> None: ... + def get_meta(self, key: str) -> str | None: ... + def set_meta(self, key: str, value: str) -> None: ... + def write_record(self, record: MemoryRecord) -> None: ... + def upsert_record(self, record: MemoryRecord) -> UpsertResult: ... + def find_record(self, record_id: str) -> MemoryRecord | None: ... + def find_by_identity_key( + self, project_id: str, key: str + ) -> MemoryRecord | None: ... + def query_records(self, query: MemoryQuery) -> Sequence[MemoryRecord]: ... + def write_subject(self, subject: MemorySubject) -> None: ... + def write_evidence(self, evidence: MemoryEvidence) -> None: ... + def write_link(self, link: MemoryLink) -> None: ... + def write_ingestion_run(self, run: IngestionRun) -> None: ... + def write_revision(self, revision: MemoryRevision) -> None: ... + def mark_stale(self, record_id: str, reason: str) -> None: ... + def persist_batch( + self, batch: RecordBatch, *, commit: bool = True + ) -> dict[str, int]: ... + def close(self) -> None: ... + + @contextmanager + def transaction(self) -> Iterator[None]: ... + + @contextmanager + def exclusive_init_lock(self) -> Iterator[None]: ... + + +__all__ = ["EngineeringMemoryStore"] diff --git a/codeclone/memory/trajectory/__init__.py b/codeclone/memory/trajectory/__init__.py new file mode 100644 index 00000000..ccd64fc9 --- /dev/null +++ b/codeclone/memory/trajectory/__init__.py @@ -0,0 +1,38 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .models import ( + TRAJECTORY_PROJECTION_VERSION, + Trajectory, + TrajectoryProjectionResult, + TrajectoryProjectionRun, + TrajectoryStep, + TrajectorySubject, +) +from .projector import TrajectoryProjectionError, project_trajectory +from .retrieval import ( + rank_trajectories_for_query, + rank_trajectories_for_scope, + serialize_trajectory_detail, + serialize_trajectory_preview, +) + +__all__ = [ + "TRAJECTORY_PROJECTION_VERSION", + "Trajectory", + "TrajectoryProjectionError", + "TrajectoryProjectionResult", + "TrajectoryProjectionRun", + "TrajectoryStep", + "TrajectorySubject", + "project_trajectory", + "rank_trajectories_for_query", + "rank_trajectories_for_scope", + "serialize_trajectory_detail", + "serialize_trajectory_preview", +] diff --git a/codeclone/memory/trajectory/agents.py b/codeclone/memory/trajectory/agents.py new file mode 100644 index 00000000..391f3c27 --- /dev/null +++ b/codeclone/memory/trajectory/agents.py @@ -0,0 +1,102 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass + +from .anomalies import TrajectoryAnomaly, detect_trajectory_anomalies +from .models import Trajectory + + +@dataclass(frozen=True, slots=True) +class AgentTrajectoryRow: + agent_label: str + trajectory_count: int + intent_count: int + failed_outcome_count: int + incident_total: int + anomaly_count: int + + +def trajectory_agent_label(trajectory: Trajectory) -> str | None: + for subject in trajectory.subjects: + if subject.subject_kind == "agent" and subject.relation == "actor": + text = subject.subject_key.strip() + if text: + return text + return None + + +def aggregate_agent_rows( + trajectories: Sequence[Trajectory], + *, + anomaly_by_id: Mapping[str, tuple[TrajectoryAnomaly, ...]] | None = None, +) -> tuple[AgentTrajectoryRow, ...]: + buckets: dict[str, dict[str, int]] = {} + intent_ids: dict[str, set[str]] = {} + for trajectory in trajectories: + label = trajectory_agent_label(trajectory) + if not label: + continue + bucket = buckets.setdefault( + label, + { + "trajectory_count": 0, + "failed_outcome_count": 0, + "incident_total": 0, + "anomaly_count": 0, + }, + ) + bucket["trajectory_count"] += 1 + if trajectory.outcome in {"violated", "blocked", "abandoned"}: + bucket["failed_outcome_count"] += 1 + bucket["incident_total"] += trajectory.incident_count + anomalies = ( + anomaly_by_id.get(trajectory.id) + if anomaly_by_id is not None + else detect_trajectory_anomalies(trajectory) + ) + if anomalies: + bucket["anomaly_count"] += len(anomalies) + if trajectory.intent_id: + intent_ids.setdefault(label, set()).add(trajectory.intent_id) + + rows = [ + AgentTrajectoryRow( + agent_label=agent_label, + trajectory_count=counts["trajectory_count"], + intent_count=len(intent_ids.get(agent_label, set())), + failed_outcome_count=counts["failed_outcome_count"], + incident_total=counts["incident_total"], + anomaly_count=counts["anomaly_count"], + ) + for agent_label, counts in sorted( + buckets.items(), + key=lambda item: (-item[1]["trajectory_count"], item[0]), + ) + ] + return tuple(rows) + + +def serialize_agent_row(row: AgentTrajectoryRow) -> dict[str, object]: + return { + "agent_label": row.agent_label, + "trajectory_count": row.trajectory_count, + "intent_count": row.intent_count, + "failed_outcome_count": row.failed_outcome_count, + "incident_total": row.incident_total, + "anomaly_count": row.anomaly_count, + } + + +__all__ = [ + "AgentTrajectoryRow", + "aggregate_agent_rows", + "serialize_agent_row", + "trajectory_agent_label", +] diff --git a/codeclone/memory/trajectory/analytics.py b/codeclone/memory/trajectory/analytics.py new file mode 100644 index 00000000..6a20ef81 --- /dev/null +++ b/codeclone/memory/trajectory/analytics.py @@ -0,0 +1,187 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from ..sqlite_store import SqliteEngineeringMemoryStore +from .agents import aggregate_agent_rows, serialize_agent_row, trajectory_agent_label +from .anomalies import ( + TrajectoryAnomaly, + anomaly_summary, + detect_trajectory_anomalies, + serialize_anomaly, +) +from .models import Trajectory +from .retrieval import ( + TrajectoryDetailLevel, + filter_trajectories_for_default_retrieval, + serialize_trajectory_preview, + trajectory_list_item_to_preview, + trajectory_status_payload, +) +from .store import list_canonical_trajectories_for_export + +DEFAULT_ANALYTICS_LIMIT = 5000 +DEFAULT_ANOMALY_PREVIEW_LIMIT = 25 + + +def _load_trajectories( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + limit: int, + include_routine: bool, +) -> tuple[Trajectory, ...]: + trajectories = list_canonical_trajectories_for_export( + store.connection, + project_id=project_id, + limit=limit, + ) + return filter_trajectories_for_default_retrieval( + trajectories, + include_routine=include_routine, + ) + + +def _anomaly_map( + store: SqliteEngineeringMemoryStore, + trajectories: Sequence[Trajectory], +) -> dict[str, tuple[TrajectoryAnomaly, ...]]: + mapped: dict[str, tuple[TrajectoryAnomaly, ...]] = {} + for trajectory in trajectories: + patch_trail = store.load_trajectory_patch_trail(trajectory.id) + mapped[trajectory.id] = detect_trajectory_anomalies( + trajectory, + patch_trail_payload=patch_trail, + ) + return mapped + + +def build_trajectory_agent_stats_payload( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + limit: int = DEFAULT_ANALYTICS_LIMIT, + include_routine: bool = False, +) -> dict[str, object]: + trajectories = _load_trajectories( + store, + project_id=project_id, + limit=limit, + include_routine=include_routine, + ) + anomaly_by_id = _anomaly_map(store, trajectories) + rows = aggregate_agent_rows(trajectories, anomaly_by_id=anomaly_by_id) + unlabeled = sum( + 1 for trajectory in trajectories if trajectory_agent_label(trajectory) is None + ) + return { + "agent_count": len(rows), + "trajectory_count": len(trajectories), + "unlabeled_trajectory_count": unlabeled, + "agents": [serialize_agent_row(row) for row in rows], + } + + +def build_trajectory_anomalies_payload( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + max_results: int = DEFAULT_ANOMALY_PREVIEW_LIMIT, + limit: int = DEFAULT_ANALYTICS_LIMIT, + include_routine: bool = False, + detail_level: TrajectoryDetailLevel = "full", +) -> dict[str, object]: + trajectories = _load_trajectories( + store, + project_id=project_id, + limit=limit, + include_routine=include_routine, + ) + hits: list[tuple[Trajectory, tuple[TrajectoryAnomaly, ...]]] = [] + for trajectory in trajectories: + patch_trail = store.load_trajectory_patch_trail(trajectory.id) + anomalies = detect_trajectory_anomalies( + trajectory, + patch_trail_payload=patch_trail, + ) + if anomalies: + hits.append((trajectory, anomalies)) + hits.sort( + key=lambda item: ( + sum(1 for anomaly in item[1] if anomaly.severity == "error"), + len(item[1]), + item[0].finished_at_utc, + item[0].id, + ), + reverse=True, + ) + truncated = len(hits) > max_results + selected = hits[: max(1, int(max_results))] + payload_items: list[dict[str, object]] = [] + for trajectory, anomalies in selected: + preview = serialize_trajectory_preview( + trajectory, + detail_level=detail_level, + ) + preview["agent_label"] = trajectory_agent_label(trajectory) + preview["anomalies"] = [serialize_anomaly(item) for item in anomalies] + payload_items.append(preview) + return { + "trajectories": payload_items, + "trajectory_count": len(payload_items), + "truncated": truncated, + "summary": anomaly_summary(hits), + } + + +def build_trajectory_dashboard_payload( + store: SqliteEngineeringMemoryStore, + *, + project_id: str, + max_results: int = DEFAULT_ANOMALY_PREVIEW_LIMIT, + include_routine: bool = False, + detail_level: TrajectoryDetailLevel = "full", +) -> dict[str, object]: + status = trajectory_status_payload( + count=store.count_trajectories(project_id=project_id), + latest_run=store.latest_trajectory_projection_run(project_id=project_id), + ) + agents = build_trajectory_agent_stats_payload( + store, + project_id=project_id, + include_routine=include_routine, + ) + anomalies = build_trajectory_anomalies_payload( + store, + project_id=project_id, + max_results=max_results, + include_routine=include_routine, + detail_level=detail_level, + ) + recent_items = store.list_trajectories( + project_id=project_id, + limit=max_results, + ) + return { + "status": status, + "agents": agents, + "anomalies": anomalies, + "recent_trajectories": [ + trajectory_list_item_to_preview(item) for item in recent_items + ], + } + + +__all__ = [ + "DEFAULT_ANALYTICS_LIMIT", + "DEFAULT_ANOMALY_PREVIEW_LIMIT", + "build_trajectory_agent_stats_payload", + "build_trajectory_anomalies_payload", + "build_trajectory_dashboard_payload", +] diff --git a/codeclone/memory/trajectory/anomalies.py b/codeclone/memory/trajectory/anomalies.py new file mode 100644 index 00000000..d38129e3 --- /dev/null +++ b/codeclone/memory/trajectory/anomalies.py @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Literal + +from ...audit.events import ( + EVENT_INTENT_CLEARED, + EVENT_PATCH_VERIFIED, +) +from .models import Trajectory, TrajectoryLabel +from .patch_trail import patch_trail_from_mapping + +TrajectoryAnomalySeverity = Literal["warn", "error"] + +INCIDENT_LABELS: frozenset[TrajectoryLabel] = frozenset( + { + "baseline_abuse_detected", + "claim_guard_failed", + "foreign_conflict_seen", + "hook_blocked", + "recovered", + } +) + +ELEVATED_INCIDENT_THRESHOLD = 2 + + +@dataclass(frozen=True, slots=True) +class TrajectoryAnomaly: + kind: str + severity: TrajectoryAnomalySeverity + message: str + + +def detect_trajectory_anomalies( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, +) -> tuple[TrajectoryAnomaly, ...]: + """Return deterministic anomaly tags for one stored trajectory.""" + anomalies: list[TrajectoryAnomaly] = [] + label_set = set(trajectory.labels) + event_types = {step.event_type for step in trajectory.steps} + + if trajectory.outcome == "violated": + anomalies.append( + TrajectoryAnomaly( + kind="outcome_violated", + severity="error", + message="Trajectory outcome is violated.", + ) + ) + elif trajectory.outcome == "blocked": + anomalies.append( + TrajectoryAnomaly( + kind="outcome_blocked", + severity="error", + message="Trajectory outcome is blocked.", + ) + ) + elif trajectory.outcome == "abandoned": + anomalies.append( + TrajectoryAnomaly( + kind="outcome_abandoned", + severity="warn", + message="Trajectory outcome is abandoned.", + ) + ) + + if trajectory.quality_tier == "incident": + anomalies.append( + TrajectoryAnomaly( + kind="quality_incident", + severity="error", + message="Trajectory quality tier is incident.", + ) + ) + + if trajectory.incident_count >= ELEVATED_INCIDENT_THRESHOLD: + anomalies.append( + TrajectoryAnomaly( + kind="elevated_incidents", + severity="warn", + message=( + f"Trajectory recorded {trajectory.incident_count} audit incidents." + ), + ) + ) + + for label in sorted(label_set & INCIDENT_LABELS): + severity: TrajectoryAnomalySeverity = ( + "error" + if label in {"baseline_abuse_detected", "claim_guard_failed"} + else "warn" + ) + anomalies.append( + TrajectoryAnomaly( + kind=f"label_{label}", + severity=severity, + message=f"Incident label present: {label}.", + ) + ) + + if ( + "change_control_workflow" in label_set + and "verified_finish" not in label_set + and trajectory.outcome not in {"accepted", "accepted_with_external_changes"} + ): + anomalies.append( + TrajectoryAnomaly( + kind="incomplete_change_cycle", + severity="warn", + message="Change-control workflow did not reach verified finish.", + ) + ) + + if EVENT_INTENT_CLEARED not in event_types and EVENT_PATCH_VERIFIED in event_types: + anomalies.append( + TrajectoryAnomaly( + kind="missing_intent_clear", + severity="warn", + message="Patch verified without intent.cleared in the audit stream.", + ) + ) + + trail = ( + patch_trail_from_mapping(patch_trail_payload) + if patch_trail_payload is not None + else None + ) + if trail is not None: + if trail.scope_check_status == "violated": + anomalies.append( + TrajectoryAnomaly( + kind="scope_violation", + severity="error", + message="Patch trail scope check is violated.", + ) + ) + if trail.verification_status in {"violated", "not_reached"} and ( + trajectory.outcome in {"partial", "violated", "blocked"} + ): + anomalies.append( + TrajectoryAnomaly( + kind="verification_gap", + severity="warn", + message=( + f"Patch verification status is {trail.verification_status}." + ), + ) + ) + + return tuple(anomalies) + + +def serialize_anomaly(anomaly: TrajectoryAnomaly) -> dict[str, str]: + return { + "kind": anomaly.kind, + "severity": anomaly.severity, + "message": anomaly.message, + } + + +def anomaly_summary( + items: Sequence[tuple[Trajectory, tuple[TrajectoryAnomaly, ...]]], +) -> dict[str, object]: + by_kind: dict[str, int] = {} + error_count = 0 + warn_count = 0 + for _trajectory, anomalies in items: + for anomaly in anomalies: + by_kind[anomaly.kind] = by_kind.get(anomaly.kind, 0) + 1 + if anomaly.severity == "error": + error_count += 1 + else: + warn_count += 1 + return { + "trajectories_with_anomalies": len(items), + "anomaly_count": error_count + warn_count, + "error_count": error_count, + "warn_count": warn_count, + "by_kind": dict(sorted(by_kind.items())), + } + + +__all__ = [ + "ELEVATED_INCIDENT_THRESHOLD", + "INCIDENT_LABELS", + "TrajectoryAnomaly", + "TrajectoryAnomalySeverity", + "anomaly_summary", + "detect_trajectory_anomalies", + "serialize_anomaly", +] diff --git a/codeclone/memory/trajectory/cli_render.py b/codeclone/memory/trajectory/cli_render.py new file mode 100644 index 00000000..772a0214 --- /dev/null +++ b/codeclone/memory/trajectory/cli_render.py @@ -0,0 +1,220 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import Protocol + +from .models import Trajectory, TrajectoryListItem, TrajectoryProjectionRun +from .step_labels import step_display_name + + +class PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +def render_trajectory_status( + *, + console: PrinterLike, + enabled: bool, + count: int, + latest_run: TrajectoryProjectionRun | None, +) -> None: + state = "enabled" if enabled else "disabled" + console.print(f"trajectory memory: {state}") + console.print(f" trajectories: {count}") + if latest_run is None: + console.print(" latest projection: none") + return + console.print( + " latest projection: " + f"{latest_run.finished_at_utc} " + f"({latest_run.workflows_seen} workflows, " + f"+{latest_run.trajectories_created}/" + f"~{latest_run.trajectories_updated}/" + f"={latest_run.trajectories_unchanged})", + markup=False, + ) + if latest_run.legacy_event_count: + console.print(f" legacy events without core: {latest_run.legacy_event_count}") + + +def render_projection_run( + *, + console: PrinterLike, + run: TrajectoryProjectionRun, +) -> None: + console.print( + "Rebuilt trajectories: " + f"{run.workflows_seen} workflows " + f"({run.trajectories_created} created, " + f"{run.trajectories_updated} updated, " + f"{run.trajectories_unchanged} unchanged).", + markup=False, + ) + if run.legacy_event_count: + console.print( + f"Skipped legacy audit events without event core: {run.legacy_event_count}", + markup=False, + ) + + +def render_trajectory_list( + *, + console: PrinterLike, + items: list[TrajectoryListItem], +) -> None: + if not items: + console.print("No trajectories found.") + return + for item in items: + console.print( + f"{item.id} {item.outcome}/{item.quality_tier} " + f"{item.event_count} events {item.workflow_id}", + markup=False, + ) + console.print(f" {item.summary}", markup=False) + + +def render_trajectory_search_results( + *, + console: PrinterLike, + query: str, + trajectories: list[dict[str, object]], +) -> None: + console.print(f"Trajectory matches for: {query}", markup=False) + if not trajectories: + console.print(" No matching trajectories.") + return + for item in trajectories: + trajectory_id = str(item.get("trajectory_id", "")) + outcome = str(item.get("outcome", "")) + tier = str(item.get("quality_tier", "")) + score = item.get("relevance_score") + score_text = f" score={score}" if score is not None else "" + console.print( + f" {trajectory_id} {outcome}/{tier}{score_text}", + markup=False, + ) + console.print(f" {item.get('summary', '')}", markup=False) + + +def render_trajectory_agents( + *, + console: PrinterLike, + payload: dict[str, object], +) -> None: + agents = payload.get("agents") + if not isinstance(agents, list) or not agents: + console.print("No agent-labeled trajectories found.") + return + console.print( + f"Agents: {payload.get('agent_count', 0)} · " + f"trajectories: {payload.get('trajectory_count', 0)} · " + f"unlabeled: {payload.get('unlabeled_trajectory_count', 0)}", + markup=False, + ) + for item in agents: + if not isinstance(item, dict): + continue + console.print( + f" {item.get('agent_label', '?')} " + f"trajectories={item.get('trajectory_count', 0)} " + f"intents={item.get('intent_count', 0)} " + f"failed={item.get('failed_outcome_count', 0)} " + f"anomalies={item.get('anomaly_count', 0)}", + markup=False, + ) + + +def render_trajectory_anomalies( + *, + console: PrinterLike, + payload: dict[str, object], +) -> None: + summary = payload.get("summary") + if isinstance(summary, dict): + console.print( + "Anomaly summary: " + f"{summary.get('trajectories_with_anomalies', 0)} trajectories · " + f"{summary.get('anomaly_count', 0)} tags " + f"({summary.get('error_count', 0)} error / " + f"{summary.get('warn_count', 0)} warn)", + markup=False, + ) + trajectories = payload.get("trajectories") + if not isinstance(trajectories, list) or not trajectories: + console.print("No trajectory anomalies detected.") + return + for item in trajectories: + if not isinstance(item, dict): + continue + trajectory_id = str(item.get("trajectory_id", "")) + agent = item.get("agent_label") + agent_text = f" agent={agent}" if agent else "" + console.print( + f" {trajectory_id}{agent_text} " + f"{item.get('outcome', '')}/{item.get('quality_tier', '')}", + markup=False, + ) + anomalies = item.get("anomalies") + if isinstance(anomalies, list): + for anomaly in anomalies: + if isinstance(anomaly, dict): + console.print( + f" [{anomaly.get('severity', '?')}] " + f"{anomaly.get('kind', '?')}: " + f"{anomaly.get('message', '')}", + markup=False, + ) + + +def render_trajectory_detail( + *, + console: PrinterLike, + trajectory: Trajectory, +) -> None: + console.print(f"trajectory: {trajectory.id}") + console.print(f" workflow: {trajectory.workflow_id}") + console.print(f" outcome: {trajectory.outcome}") + console.print( + f" quality: {trajectory.quality_tier} ({trajectory.quality_score}/100)" + ) + if trajectory.labels: + console.print(f" labels: {', '.join(trajectory.labels)}", markup=False) + console.print(f" digest: {trajectory.trajectory_digest}") + console.print(f" source stream: {trajectory.source_event_stream_digest}") + if trajectory.report_digest: + console.print(f" report digest: {trajectory.report_digest}") + console.print(f" summary: {trajectory.summary}", markup=False) + console.print(" steps:") + for step in trajectory.steps: + label = step_display_name(event_type=step.event_type, status=step.status) + console.print( + f" {step.step_index + 1}. #{step.audit_sequence} {label}", + markup=False, + ) + if step.summary: + console.print(f" {step.summary[:120]}", markup=False) + if trajectory.subjects: + console.print(" subjects:") + for subject in trajectory.subjects: + console.print( + f" {subject.subject_kind}:{subject.subject_key} " + f"({subject.relation})", + markup=False, + ) + + +__all__ = [ + "render_projection_run", + "render_trajectory_agents", + "render_trajectory_anomalies", + "render_trajectory_detail", + "render_trajectory_list", + "render_trajectory_search_results", + "render_trajectory_status", +] diff --git a/codeclone/memory/trajectory/dto.py b/codeclone/memory/trajectory/dto.py new file mode 100644 index 00000000..9f22f427 --- /dev/null +++ b/codeclone/memory/trajectory/dto.py @@ -0,0 +1,69 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class BlastRadiusSnapshot: + do_not_touch_declared: tuple[str, ...] + review_context_declared: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class VerifySnapshot: + verification_profile: str + verification_status: str + verification_skipped: tuple[str, ...] + verification_failed: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class HygieneSnapshot: + blocks_finish: bool + finish_block_reason: str | None + unacknowledged_dirty_in_scope: tuple[str, ...] + dirty_paths_outside_scope: tuple[str, ...] + attribution_counts: dict[str, int] + + +@dataclass(frozen=True, slots=True) +class PatchTrailEvidenceInput: + repo_root_digest: str + report_digest: str | None + intent_declared_audit_sequence: int | None = None + scope_check_audit_sequence: int | None = None + patch_verify_audit_sequence: int | None = None + receipt_audit_sequence: int | None = None + patch_trail_audit_sequence: int | None = None + + +@dataclass(frozen=True, slots=True) +class PatchTrailInputs: + intent_id: str | None + intent_description: str + declared_files: tuple[str, ...] + declared_related: tuple[str, ...] + changed_files: tuple[str, ...] + unexpected_files: tuple[str, ...] + forbidden_touched: tuple[str, ...] + expanded_related_files: tuple[str, ...] + scope_check_status: str + blast_radius: BlastRadiusSnapshot + verify: VerifySnapshot + hygiene: HygieneSnapshot + evidence: PatchTrailEvidenceInput + + +__all__ = [ + "BlastRadiusSnapshot", + "HygieneSnapshot", + "PatchTrailEvidenceInput", + "PatchTrailInputs", + "VerifySnapshot", +] diff --git a/codeclone/memory/trajectory/export.py b/codeclone/memory/trajectory/export.py new file mode 100644 index 00000000..e5d0d037 --- /dev/null +++ b/codeclone/memory/trajectory/export.py @@ -0,0 +1,220 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import TypedDict + +from ...audit.events import repo_root_digest +from ...config.memory import MemoryConfig +from ...report.meta import current_report_timestamp_utc +from ...utils.json_io import json_text +from ...utils.repo_paths import ( + PathOutsideRepoError, + RepoPathPolicy, + resolve_under_repo_root, +) +from ..exceptions import MemoryContractError +from ..models import MemoryProject +from ..sqlite_store import SqliteEngineeringMemoryStore +from .export_context import ( + build_export_context, + build_export_record, + resolve_export_scope_paths, + trajectory_index_for_export, + trajectory_path_subjects, +) +from .models import Trajectory +from .profiles import ( + TRAJECTORY_EXPORT_SCHEMA_VERSION, + resolve_export_profile, + trajectory_eligible_for_export, +) + + +class TrajectoryExportManifest(TypedDict): + schema_version: str + profile: str + profile_schema_version: str + exported_at_utc: str + project_id: str + repo_root_digest: str + record_count: int + bytes_written: int + truncated_records: int + skipped_ineligible: int + deduplicated_workflows: int + + +@dataclass(frozen=True, slots=True) +class TrajectoryExportResult: + output_path: Path + manifest: TrajectoryExportManifest + records_written: int + + +@dataclass +class _JsonlExportAccumulator: + bytes_written: int = 0 + truncated_records: int = 0 + records_written: int = 0 + lines: list[str] = field(default_factory=list) + + def try_append( + self, + line: str, + *, + record_limit: int, + file_limit: int, + ) -> bool: + encoded_len = len(line.encode("utf-8")) + if encoded_len > record_limit: + self.truncated_records += 1 + return False + projected = self.bytes_written + encoded_len + 1 + if projected > file_limit: + return False + self.lines.append(line) + self.bytes_written = projected + self.records_written += 1 + return True + + +def resolve_export_output_path( + *, + root_path: Path, + raw_path: str, + allow_external_out: bool, +) -> Path: + policy = RepoPathPolicy( + allow_absolute=True, + allow_external=allow_external_out, + ) + try: + return resolve_under_repo_root(root_path, raw_path, policy=policy) + except PathOutsideRepoError as exc: + msg = ( + f"Export output path escapes repository root: {raw_path}. " + "Pass --allow-external-out for an explicit external destination." + ) + raise MemoryContractError(msg) from exc + + +def export_trajectories_jsonl( + *, + store: SqliteEngineeringMemoryStore, + project: MemoryProject, + root_path: Path, + config: MemoryConfig, + profile_name: str, + output_path: Path, + include_payloads: bool | None = None, + max_record_bytes: int | None = None, + max_file_bytes: int | None = None, + force_enabled: bool = False, +) -> TrajectoryExportResult: + if not config.trajectory_export_enabled and not force_enabled: + raise MemoryContractError( + "Trajectory export is disabled. Set " + "[tool.codeclone.memory].trajectory_export_enabled=true or pass " + "--force on the CLI export command." + ) + profile = resolve_export_profile(profile_name) + include = ( + config.trajectory_export_include_payloads + if include_payloads is None + else include_payloads + ) + record_limit = max_record_bytes or config.trajectory_export_max_record_bytes + file_limit = max_file_bytes or config.trajectory_export_max_file_bytes + loaded = store.list_canonical_trajectories_for_export( + project_id=project.id, + limit=10_000, + ) + deduplicated = len(loaded) + eligible = [ + trajectory + for trajectory in sorted(loaded, key=_trajectory_sort_key) + if trajectory_eligible_for_export(trajectory, profile=profile) + ] + skipped = deduplicated - len(eligible) + canonical_index = trajectory_index_for_export(eligible, profile=profile) + tmp_path = output_path.with_suffix(output_path.suffix + ".tmp") + accumulator = _JsonlExportAccumulator() + for trajectory in eligible: + patch_trail_payload = store.load_trajectory_patch_trail( + trajectory_id=trajectory.id + ) + scope_paths = trajectory_path_subjects( + trajectory, relations={"about", "touched"} + ) + enrichment = build_export_context( + store.connection, + project_id=project.id, + trajectory=trajectory, + scope_paths=scope_paths, + patch_trail_payload=patch_trail_payload, + canonical_by_workflow=canonical_index, + ) + record = build_export_record( + trajectory=trajectory, + profile=profile, + project=project, + include_payloads=include, + enrichment=enrichment, + scope_paths=resolve_export_scope_paths( + trajectory, + patch_trail_payload=patch_trail_payload, + ), + ) + accumulator.try_append( + _canonical_json_line(record), + record_limit=record_limit, + file_limit=file_limit, + ) + manifest: TrajectoryExportManifest = { + "schema_version": TRAJECTORY_EXPORT_SCHEMA_VERSION, + "profile": profile.name, + "profile_schema_version": profile.schema_version, + "exported_at_utc": current_report_timestamp_utc(), + "project_id": project.id, + "repo_root_digest": repo_root_digest(root_path.resolve()), + "record_count": accumulator.records_written, + "bytes_written": accumulator.bytes_written, + "truncated_records": accumulator.truncated_records, + "skipped_ineligible": skipped, + "deduplicated_workflows": deduplicated, + } + payload = "\n".join(accumulator.lines) + if payload: + payload += "\n" + tmp_path.parent.mkdir(parents=True, exist_ok=True) + tmp_path.write_text(payload, encoding="utf-8") + os.replace(tmp_path, output_path) + return TrajectoryExportResult( + output_path=output_path, + manifest=manifest, + records_written=accumulator.records_written, + ) + + +def _trajectory_sort_key(trajectory: Trajectory) -> tuple[str, str]: + return (trajectory.finished_at_utc, trajectory.id) + + +def _canonical_json_line(payload: dict[str, object]) -> str: + return json_text(payload, sort_keys=True) + + +__all__ = [ + "TrajectoryExportManifest", + "TrajectoryExportResult", + "export_trajectories_jsonl", + "resolve_export_output_path", +] diff --git a/codeclone/memory/trajectory/export_context.py b/codeclone/memory/trajectory/export_context.py new file mode 100644 index 00000000..eeb2638a --- /dev/null +++ b/codeclone/memory/trajectory/export_context.py @@ -0,0 +1,506 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +import sqlite3 +from collections.abc import Mapping, Sequence + +import orjson + +from ..models import MemoryProject +from ..paths import normalize_memory_scope_path +from .models import Trajectory +from .patch_trail import patch_trail_from_mapping +from .profiles import TrajectoryExportProfile, trajectory_eligible_for_export +from .retrieval import compact_step_text, serialize_patch_trail_summary + +_REDACT_HOME = re.compile(r"(?i)(/Users/[^/\s]+|/home/[^/\s]+)") + +MAX_MEMORY_PRECEDENTS = 8 +MAX_TRAJECTORY_PRECEDENTS = 5 +MAX_CITATIONS = 32 +MAX_OVERLAP_PATHS = 12 +MAX_STATEMENT_PREVIEW = 220 + +_PROJECTION_VERSION_PREFIX = "trajectory-v" + + +def projection_version_rank(version: str) -> int: + """Rank trajectory projection versions by numeric suffix so newer + projections supersede older ones; unknown formats rank 0.""" + if version.startswith(_PROJECTION_VERSION_PREFIX): + suffix = version[len(_PROJECTION_VERSION_PREFIX) :] + if suffix.isdigit(): + return int(suffix) + return 0 + + +def select_canonical_trajectories( + trajectories: Sequence[Trajectory], +) -> list[Trajectory]: + best: dict[str, Trajectory] = {} + for trajectory in trajectories: + current = best.get(trajectory.workflow_id) + if current is None or _prefer_trajectory_projection(trajectory, current): + best[trajectory.workflow_id] = trajectory + return sorted(best.values(), key=lambda item: (item.finished_at_utc, item.id)) + + +def build_export_context( + conn: sqlite3.Connection, + *, + project_id: str, + trajectory: Trajectory, + scope_paths: Sequence[str], + patch_trail_payload: Mapping[str, object] | None, + canonical_by_workflow: Mapping[str, Trajectory], +) -> dict[str, object]: + effective_scope = _effective_scope_paths( + trajectory, + scope_paths=scope_paths, + patch_trail_payload=patch_trail_payload, + ) + memory_precedents = _memory_precedents( + conn, + project_id=project_id, + trajectory=trajectory, + scope_paths=effective_scope, + ) + trajectory_precedents = _trajectory_precedents( + trajectory=trajectory, + scope_paths=effective_scope, + canonical_by_workflow=canonical_by_workflow, + ) + citations = extract_trajectory_citations(trajectory) + context: dict[str, object] = { + "memory_precedents": memory_precedents, + "trajectory_precedents": trajectory_precedents, + } + payload: dict[str, object] = { + "context": context, + "citations": citations, + } + summary = serialize_patch_trail_summary(patch_trail_payload) + if summary is not None: + payload["patch_trail_summary"] = summary + return payload + + +def build_export_record( + *, + trajectory: Trajectory, + profile: TrajectoryExportProfile, + project: MemoryProject, + include_payloads: bool, + enrichment: Mapping[str, object], + scope_paths: Sequence[str], +) -> dict[str, object]: + context = enrichment.get("context") + if not isinstance(context, dict): + context = {"memory_precedents": [], "trajectory_precedents": []} + citations = enrichment.get("citations") + if not isinstance(citations, list): + citations = [] + record: dict[str, object] = { + "schema_version": profile.schema_version, + "profile": profile.name, + "trajectory_id": trajectory.id, + "project_fingerprint": project.id, + "projection_version": trajectory.projection_version, + "task": { + "intent_summary": _redact_text(trajectory.summary), + "scope": { + "paths": [_redact_text(path) for path in scope_paths], + }, + }, + "context": context, + "actions": [ + { + "type": _redact_text(step.event_type), + "result": _redact_text(step.status or ""), + "summary": _redact_text(step.summary or ""), + } + for step in trajectory.steps[:12] + ], + "outcome": { + "label": trajectory.outcome, + "quality_tier": trajectory.quality_tier, + }, + "lessons": list(trajectory.labels), + "citations": citations, + "digests": { + "trajectory_digest": f"sha256:{trajectory.trajectory_digest}", + "source_event_stream_digest": ( + f"sha256:{trajectory.source_event_stream_digest}" + ), + }, + } + patch_trail_summary = enrichment.get("patch_trail_summary") + if isinstance(patch_trail_summary, dict): + record["patch_trail_summary"] = patch_trail_summary + if include_payloads: + record["steps"] = compact_step_text(trajectory) + return record + + +def extract_trajectory_citations(trajectory: Trajectory) -> list[dict[str, object]]: + citations: list[dict[str, object]] = [] + seen: set[tuple[str, str, int]] = set() + for step in trajectory.steps: + facts = _event_core_facts(step.event_core_json) + if facts is not None: + for item in _citation_items_from_facts(facts): + _append_trajectory_citation( + citations, + seen, + kind=str(item.get("kind", "")).strip(), + cited_id=str(item.get("cited_id", "")).strip(), + valid=bool(item.get("valid", True)), + source_event_type=step.event_type, + audit_sequence=step.audit_sequence, + dedupe_sequence=step.audit_sequence, + ) + for subject in trajectory.subjects: + if subject.subject_kind == "report_digest": + _append_trajectory_citation( + citations, + seen, + kind="report_digest", + cited_id=subject.subject_key, + valid=True, + source_event_type="trajectory.subject", + audit_sequence=None, + dedupe_sequence=0, + ) + citations.sort( + key=lambda item: ( + item["audit_sequence"] is None, + item["audit_sequence"] or 0, + str(item["kind"]), + str(item["cited_id"]), + ) + ) + return citations[:MAX_CITATIONS] + + +def _event_core_facts(event_core_json: str) -> Mapping[str, object] | None: + core = _load_event_core(event_core_json) + facts = core.get("facts") + return facts if isinstance(facts, Mapping) else None + + +def _citation_items_from_facts( + facts: Mapping[str, object], +) -> list[Mapping[str, object]]: + raw_citations = facts.get("citations") + if not isinstance(raw_citations, list): + return [] + return [item for item in raw_citations if isinstance(item, Mapping)] + + +def _append_trajectory_citation( + citations: list[dict[str, object]], + seen: set[tuple[str, str, int]], + *, + kind: str, + cited_id: str, + valid: bool, + source_event_type: str, + audit_sequence: int | None, + dedupe_sequence: int, +) -> None: + if not kind or not cited_id: + return + key = (kind, cited_id, dedupe_sequence) + if key in seen: + return + seen.add(key) + citations.append( + { + "kind": kind, + "cited_id": cited_id, + "valid": valid, + "source_event_type": source_event_type, + "audit_sequence": audit_sequence, + } + ) + + +def trajectory_path_subjects( + trajectory: Trajectory, + *, + relations: set[str], +) -> tuple[str, ...]: + paths = [ + subject.subject_key + for subject in trajectory.subjects + if subject.subject_kind == "path" and subject.relation in relations + ] + return tuple(sorted(set(paths))) + + +def resolve_export_scope_paths( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None, +) -> tuple[str, ...]: + scope_paths = trajectory_path_subjects(trajectory, relations={"about", "touched"}) + return _effective_scope_paths( + trajectory, + scope_paths=scope_paths, + patch_trail_payload=patch_trail_payload, + ) + + +def _effective_scope_paths( + trajectory: Trajectory, + *, + scope_paths: Sequence[str], + patch_trail_payload: Mapping[str, object] | None, +) -> tuple[str, ...]: + if scope_paths: + return tuple(sorted(set(scope_paths))) + trail = patch_trail_from_mapping(patch_trail_payload or {}) + if trail is None: + return () + merged = [*trail.declared_files, *trail.changed_files] + return tuple(sorted(set(merged))) + + +def _memory_precedents( + conn: sqlite3.Connection, + *, + project_id: str, + trajectory: Trajectory, + scope_paths: Sequence[str], +) -> list[dict[str, object]]: + precedents: list[dict[str, object]] = [] + seen: set[str] = set() + + linked_rows = conn.execute( + """ + SELECT m.id, m.type, m.status, m.statement, e.evidence_kind + FROM memory_evidence e + JOIN memory_records m ON m.id = e.memory_id + WHERE m.project_id = ? + AND e.evidence_kind = 'trajectory' + AND e.ref = ? + ORDER BY m.updated_at_utc DESC, m.id ASC + LIMIT ? + """, + (project_id, trajectory.id, MAX_MEMORY_PRECEDENTS), + ).fetchall() + for row in linked_rows: + _append_memory_precedent( + precedents, + seen=seen, + row=row, + link_kind="trajectory_evidence", + overlap_paths=(), + ) + + normalized_scope = [ + path + for path in (normalize_memory_scope_path(item) for item in scope_paths) + if path + ] + if not normalized_scope or len(precedents) >= MAX_MEMORY_PRECEDENTS: + return precedents + + placeholders = ", ".join("?" for _ in normalized_scope) + path_rows = conn.execute( + f""" + SELECT DISTINCT m.id, m.type, m.status, m.statement, s.subject_key + FROM memory_records m + JOIN memory_subjects s ON s.memory_id = m.id + WHERE m.project_id = ? + AND m.status = 'active' + AND s.subject_kind = 'path' + AND s.subject_key IN ({placeholders}) + ORDER BY m.updated_at_utc DESC, m.id ASC + LIMIT ? + """, + (project_id, *normalized_scope, MAX_MEMORY_PRECEDENTS), + ).fetchall() + overlap_by_memory: dict[str, list[str]] = {} + for row in path_rows: + memory_id = str(row["id"]) + overlap_by_memory.setdefault(memory_id, []).append(str(row["subject_key"])) + for memory_id in sorted(overlap_by_memory): + if len(precedents) >= MAX_MEMORY_PRECEDENTS: + break + row = conn.execute( + "SELECT id, type, status, statement FROM memory_records WHERE id=?", + (memory_id,), + ).fetchone() + if row is None: + continue + overlap = tuple(sorted(set(overlap_by_memory[memory_id])))[:MAX_OVERLAP_PATHS] + _append_memory_precedent( + precedents, + seen=seen, + row=row, + link_kind="path_overlap", + overlap_paths=overlap, + ) + return precedents + + +def _append_memory_precedent( + precedents: list[dict[str, object]], + *, + seen: set[str], + row: sqlite3.Row, + link_kind: str, + overlap_paths: Sequence[str], +) -> None: + memory_id = str(row["id"]) + if memory_id in seen: + return + seen.add(memory_id) + precedents.append( + _memory_precedent_row( + row, + link_kind=link_kind, + overlap_paths=overlap_paths, + ) + ) + + +def _trajectory_precedents( + *, + trajectory: Trajectory, + scope_paths: Sequence[str], + canonical_by_workflow: Mapping[str, Trajectory], +) -> list[dict[str, object]]: + if not scope_paths: + return [] + scope_set = set(scope_paths) + candidates: list[tuple[str, str, Trajectory, tuple[str, ...]]] = [] + for candidate in canonical_by_workflow.values(): + match = _trajectory_precedent_match( + candidate, + trajectory=trajectory, + scope_set=scope_set, + ) + if match is not None: + candidates.append(match) + candidates.sort(key=lambda item: (item[0], item[1]), reverse=True) + precedents: list[dict[str, object]] = [] + for _finished, _trajectory_id, candidate, overlap in candidates[ + :MAX_TRAJECTORY_PRECEDENTS + ]: + precedents.append( + { + "trajectory_id": candidate.id, + "workflow_id": candidate.workflow_id, + "outcome": candidate.outcome, + "quality_tier": candidate.quality_tier, + "finished_at_utc": candidate.finished_at_utc, + "overlap_paths": list(overlap), + "summary": _preview_text(candidate.summary), + } + ) + return precedents + + +def _trajectory_precedent_match( + candidate: Trajectory, + *, + trajectory: Trajectory, + scope_set: set[str], +) -> tuple[str, str, Trajectory, tuple[str, ...]] | None: + if ( + candidate.id == trajectory.id + or candidate.workflow_id == trajectory.workflow_id + or candidate.finished_at_utc >= trajectory.started_at_utc + ): + return None + candidate_paths = set( + trajectory_path_subjects(candidate, relations={"about", "touched", "untouched"}) + ) + overlap = tuple(sorted(scope_set & candidate_paths))[:MAX_OVERLAP_PATHS] + if not overlap: + return None + return (candidate.finished_at_utc, candidate.id, candidate, overlap) + + +def _memory_precedent_row( + row: sqlite3.Row, + *, + link_kind: str, + overlap_paths: Sequence[str], +) -> dict[str, object]: + payload: dict[str, object] = { + "memory_id": str(row["id"]), + "record_type": str(row["type"]), + "status": str(row["status"]), + "statement_preview": _preview_text(str(row["statement"])), + "link_kind": link_kind, + } + if overlap_paths: + payload["overlap_paths"] = list(overlap_paths) + return payload + + +def _prefer_trajectory_projection( + candidate: Trajectory, + incumbent: Trajectory, +) -> bool: + candidate_rank = projection_version_rank(candidate.projection_version) + incumbent_rank = projection_version_rank(incumbent.projection_version) + if candidate_rank != incumbent_rank: + return candidate_rank > incumbent_rank + if candidate.finished_at_utc != incumbent.finished_at_utc: + return candidate.finished_at_utc > incumbent.finished_at_utc + return candidate.id > incumbent.id + + +def _load_event_core(event_core_json: str) -> Mapping[str, object]: + try: + loaded = orjson.loads(event_core_json) + except orjson.JSONDecodeError: + return {} + return loaded if isinstance(loaded, Mapping) else {} + + +def _preview_text(value: str) -> str: + text = value.strip() + if len(text) <= MAX_STATEMENT_PREVIEW: + return text + return text[: MAX_STATEMENT_PREVIEW - 3] + "..." + + +def _redact_text(value: str) -> str: + return _REDACT_HOME.sub("~", value) + + +def trajectory_index_for_export( + trajectories: Sequence[Trajectory], + *, + profile: TrajectoryExportProfile, +) -> dict[str, Trajectory]: + canonical = select_canonical_trajectories(trajectories) + eligible = [ + trajectory + for trajectory in canonical + if trajectory_eligible_for_export(trajectory, profile=profile) + ] + return {trajectory.workflow_id: trajectory for trajectory in eligible} + + +__all__ = [ + "build_export_context", + "build_export_record", + "extract_trajectory_citations", + "projection_version_rank", + "resolve_export_scope_paths", + "select_canonical_trajectories", + "trajectory_index_for_export", + "trajectory_path_subjects", +] diff --git a/codeclone/memory/trajectory/models.py b/codeclone/memory/trajectory/models.py new file mode 100644 index 00000000..e1b457c5 --- /dev/null +++ b/codeclone/memory/trajectory/models.py @@ -0,0 +1,163 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +from ...contracts import ( + TRAJECTORY_PROJECTION_VERSION, + TRAJECTORY_PROJECTION_VERSION_V1, +) + +TrajectoryOutcome = Literal[ + "accepted", + "accepted_with_external_changes", + "violated", + "blocked", + "abandoned", + "partial", +] +TrajectoryQualityTier = Literal[ + "corrected", + "verified", + "incident", + "partial", + "routine", +] +TrajectoryLabel = Literal[ + "analysis_observed", + "baseline_abuse_detected", + "change_control_workflow", + "claim_guard_failed", + "claim_validated", + "external_changes_accepted", + "foreign_conflict_seen", + "hook_blocked", + "memory_used", + "patch_trail_recorded", + "queue_used", + "receipt_issued", + "recovered", + "scope_clean", + "scope_expanded", + "verified_finish", +] + + +@dataclass(frozen=True, slots=True) +class TrajectoryStep: + step_index: int + audit_sequence: int + event_id: str + event_type: str + status: str | None + run_id: str | None + report_digest: str | None + event_core_sha256: str + event_core_json: str + summary: str | None + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class TrajectorySubject: + subject_kind: str + subject_key: str + relation: str = "about" + + +@dataclass(frozen=True, slots=True) +class TrajectoryEvidence: + evidence_kind: str + ref: str + locator: str | None + digest: str | None + created_at_utc: str + + +@dataclass(frozen=True, slots=True) +class Trajectory: + id: str + project_id: str + repo_root_digest: str + workflow_id: str + intent_id: str | None + primary_run_id: str | None + first_run_id: str | None + last_run_id: str | None + report_digest: str | None + outcome: TrajectoryOutcome + quality_tier: TrajectoryQualityTier + quality_score: int + labels: tuple[TrajectoryLabel, ...] + summary: str + trajectory_digest: str + source_event_stream_digest: str + projection_version: str + event_count: int + step_count: int + incident_count: int + started_at_utc: str + finished_at_utc: str + projected_at_utc: str + updated_at_utc: str + steps: tuple[TrajectoryStep, ...] + subjects: tuple[TrajectorySubject, ...] + evidence: tuple[TrajectoryEvidence, ...] + + +@dataclass(frozen=True, slots=True) +class TrajectoryProjectionRun: + id: str + project_id: str + repo_root_digest: str + projection_version: str + started_at_utc: str + finished_at_utc: str + status: str + workflows_seen: int + trajectories_created: int + trajectories_updated: int + trajectories_unchanged: int + legacy_event_count: int + message: str | None + + +@dataclass(frozen=True, slots=True) +class TrajectoryProjectionResult: + run: TrajectoryProjectionRun + trajectories: tuple[Trajectory, ...] + + +@dataclass(frozen=True, slots=True) +class TrajectoryListItem: + id: str + workflow_id: str + outcome: str + quality_tier: str + quality_score: int + event_count: int + started_at_utc: str + finished_at_utc: str + summary: str + + +__all__ = [ + "TRAJECTORY_PROJECTION_VERSION", + "TRAJECTORY_PROJECTION_VERSION_V1", + "Trajectory", + "TrajectoryEvidence", + "TrajectoryLabel", + "TrajectoryListItem", + "TrajectoryOutcome", + "TrajectoryProjectionResult", + "TrajectoryProjectionRun", + "TrajectoryQualityTier", + "TrajectoryStep", + "TrajectorySubject", +] diff --git a/codeclone/memory/trajectory/patch_trail.py b/codeclone/memory/trajectory/patch_trail.py new file mode 100644 index 00000000..68af6c20 --- /dev/null +++ b/codeclone/memory/trajectory/patch_trail.py @@ -0,0 +1,400 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from collections.abc import Mapping, Sequence +from dataclasses import dataclass + +from ...contracts import PATCH_TRAIL_SCHEMA_VERSION +from ...utils.json_io import json_text +from ..paths import normalize_memory_scope_paths +from .dto import PatchTrailInputs + +MAX_DECLARED_FILES = 500 +MAX_DECLARED_RELATED = 200 +MAX_CHANGED_FILES = 500 +MAX_UNTOUCHED_IN_DECLARED = 500 +MAX_UNEXPECTED_FILES = 200 +MAX_FORBIDDEN_TOUCHED = 50 +MAX_EXPANDED_RELATED = 200 +MAX_BOUNDARY_PATHS = 200 +MAX_HYGIENE_PATHS = 50 +MAX_VERIFY_ITEMS = 32 +INTENT_DESCRIPTION_COMPACT = 500 + +_EXTERNAL_EXECUTION_STUB: dict[str, object] = { + "schema_version": "0", + "status": "not_collected", + "failed_commands": [], + "test_deltas": [], +} + + +@dataclass(frozen=True, slots=True) +class PatchTrail: + schema_version: str + intent_id: str | None + intent_description: str + declared_files: tuple[str, ...] + declared_related: tuple[str, ...] + changed_files: tuple[str, ...] + untouched_in_declared: tuple[str, ...] + unexpected_files: tuple[str, ...] + forbidden_touched: tuple[str, ...] + expanded_related_files: tuple[str, ...] + do_not_touch_declared: tuple[str, ...] + do_not_touch_held: tuple[str, ...] + review_context_declared: tuple[str, ...] + review_context_untouched: tuple[str, ...] + scope_check_status: str + verification_profile: str + verification_status: str + verification_skipped: tuple[str, ...] + verification_failed: tuple[str, ...] + workspace_hygiene: Mapping[str, object] + external_execution: Mapping[str, object] + evidence: Mapping[str, object] + truncation: Mapping[str, bool] + patch_trail_digest: str + + def counts(self) -> dict[str, int]: + return { + "declared": len(self.declared_files), + "changed": len(self.changed_files), + "untouched_in_declared": len(self.untouched_in_declared), + "unexpected": len(self.unexpected_files), + "forbidden_touched": len(self.forbidden_touched), + "do_not_touch_declared": len(self.do_not_touch_declared), + "do_not_touch_held": len(self.do_not_touch_held), + "review_context_declared": len(self.review_context_declared), + "review_context_untouched": len(self.review_context_untouched), + } + + def to_payload(self, *, detail_level: str = "summary") -> dict[str, object]: + if detail_level == "full": + return self._full_payload() + return self._summary_payload() + + def _summary_payload(self) -> dict[str, object]: + return { + "schema_version": self.schema_version, + "intent_id": self.intent_id, + "intent_description": self.intent_description[:INTENT_DESCRIPTION_COMPACT], + "scope_check_status": self.scope_check_status, + "verification_status": self.verification_status, + "counts": self.counts(), + "truncation": dict(self.truncation), + "patch_trail_digest": self.patch_trail_digest, + "evidence": dict(self.evidence), + "retrieval_policy": { + "patch_trail_does_not_authorize_edits": True, + "patch_trail_does_not_override_findings": True, + }, + } + + def _full_payload(self) -> dict[str, object]: + payload = self._canonical_dict(include_digest=False) + payload["patch_trail_digest"] = self.patch_trail_digest + payload["retrieval_policy"] = { + "patch_trail_does_not_authorize_edits": True, + "patch_trail_does_not_override_findings": True, + } + return payload + + def _canonical_dict(self, *, include_digest: bool) -> dict[str, object]: + payload: dict[str, object] = { + "schema_version": self.schema_version, + "intent_id": self.intent_id, + "intent_description": self.intent_description, + "declared_files": list(self.declared_files), + "declared_related": list(self.declared_related), + "changed_files": list(self.changed_files), + "untouched_in_declared": list(self.untouched_in_declared), + "unexpected_files": list(self.unexpected_files), + "forbidden_touched": list(self.forbidden_touched), + "expanded_related_files": list(self.expanded_related_files), + "do_not_touch_declared": list(self.do_not_touch_declared), + "do_not_touch_held": list(self.do_not_touch_held), + "review_context_declared": list(self.review_context_declared), + "review_context_untouched": list(self.review_context_untouched), + "scope_check_status": self.scope_check_status, + "verification_profile": self.verification_profile, + "verification_status": self.verification_status, + "verification_skipped": list(self.verification_skipped), + "verification_failed": list(self.verification_failed), + "workspace_hygiene": dict(self.workspace_hygiene), + "external_execution": dict(self.external_execution), + "evidence": dict(self.evidence), + "truncation": dict(self.truncation), + } + if include_digest: + payload["patch_trail_digest"] = self.patch_trail_digest + return payload + + def audit_payload(self) -> dict[str, object]: + return self._canonical_dict(include_digest=True) + + +def compute_patch_trail(inputs: PatchTrailInputs) -> PatchTrail: + declared_files, declared_trunc = _bounded_paths( + inputs.declared_files, + limit=MAX_DECLARED_FILES, + ) + declared_related, related_trunc = _bounded_paths( + inputs.declared_related, + limit=MAX_DECLARED_RELATED, + ) + changed_files, changed_trunc = _bounded_paths( + inputs.changed_files, + limit=MAX_CHANGED_FILES, + ) + unexpected_files, unexpected_trunc = _bounded_paths( + inputs.unexpected_files, + limit=MAX_UNEXPECTED_FILES, + ) + forbidden_touched, forbidden_trunc = _bounded_paths( + inputs.forbidden_touched, + limit=MAX_FORBIDDEN_TOUCHED, + ) + expanded_related_files, expanded_trunc = _bounded_paths( + inputs.expanded_related_files, + limit=MAX_EXPANDED_RELATED, + ) + do_not_touch_declared, dnt_decl_trunc = _bounded_paths( + inputs.blast_radius.do_not_touch_declared, + limit=MAX_BOUNDARY_PATHS, + ) + review_context_declared, rc_decl_trunc = _bounded_paths( + inputs.blast_radius.review_context_declared, + limit=MAX_BOUNDARY_PATHS, + ) + changed_set = set(changed_files) + untouched_full = tuple(sorted(set(declared_files) - changed_set)) + untouched_in_declared, untouched_trunc = _bounded_paths( + untouched_full, + limit=MAX_UNTOUCHED_IN_DECLARED, + ) + do_not_touch_held, dnt_held_trunc = _bounded_paths( + tuple(sorted(set(do_not_touch_declared) - changed_set)), + limit=MAX_BOUNDARY_PATHS, + ) + review_context_untouched, rc_untouched_trunc = _bounded_paths( + tuple(sorted(set(review_context_declared) - changed_set)), + limit=MAX_BOUNDARY_PATHS, + ) + unack, unack_trunc = _bounded_paths( + inputs.hygiene.unacknowledged_dirty_in_scope, + limit=MAX_HYGIENE_PATHS, + ) + outside, outside_trunc = _bounded_paths( + inputs.hygiene.dirty_paths_outside_scope, + limit=MAX_HYGIENE_PATHS, + ) + verification_skipped, skipped_trunc = _bounded_strings( + inputs.verify.verification_skipped, + limit=MAX_VERIFY_ITEMS, + ) + verification_failed, failed_trunc = _bounded_strings( + inputs.verify.verification_failed, + limit=MAX_VERIFY_ITEMS, + ) + truncation: dict[str, bool] = { + "declared_files": declared_trunc, + "declared_related": related_trunc, + "changed_files": changed_trunc, + "untouched_in_declared": untouched_trunc, + "unexpected_files": unexpected_trunc, + "forbidden_touched": forbidden_trunc, + "expanded_related_files": expanded_trunc, + "do_not_touch_declared": dnt_decl_trunc, + "do_not_touch_held": dnt_held_trunc, + "review_context_declared": rc_decl_trunc, + "review_context_untouched": rc_untouched_trunc, + "hygiene_paths": unack_trunc or outside_trunc, + "verification_skipped": skipped_trunc, + "verification_failed": failed_trunc, + } + workspace_hygiene: dict[str, object] = { + "blocks_finish": inputs.hygiene.blocks_finish, + "finish_block_reason": inputs.hygiene.finish_block_reason, + "unacknowledged_dirty_in_scope": list(unack), + "dirty_paths_outside_scope": list(outside), + "attribution_counts": dict(inputs.hygiene.attribution_counts), + } + evidence: dict[str, object] = { + "repo_root_digest": inputs.evidence.repo_root_digest, + "report_digest": inputs.evidence.report_digest, + "intent_declared_audit_sequence": ( + inputs.evidence.intent_declared_audit_sequence + ), + "scope_check_audit_sequence": inputs.evidence.scope_check_audit_sequence, + "patch_verify_audit_sequence": inputs.evidence.patch_verify_audit_sequence, + "receipt_audit_sequence": inputs.evidence.receipt_audit_sequence, + "patch_trail_audit_sequence": inputs.evidence.patch_trail_audit_sequence, + } + trail = PatchTrail( + schema_version=PATCH_TRAIL_SCHEMA_VERSION, + intent_id=inputs.intent_id, + intent_description=inputs.intent_description, + declared_files=declared_files, + declared_related=declared_related, + changed_files=changed_files, + untouched_in_declared=untouched_in_declared, + unexpected_files=unexpected_files, + forbidden_touched=forbidden_touched, + expanded_related_files=expanded_related_files, + do_not_touch_declared=do_not_touch_declared, + do_not_touch_held=do_not_touch_held, + review_context_declared=review_context_declared, + review_context_untouched=review_context_untouched, + scope_check_status=inputs.scope_check_status, + verification_profile=inputs.verify.verification_profile, + verification_status=inputs.verify.verification_status, + verification_skipped=verification_skipped, + verification_failed=verification_failed, + workspace_hygiene=workspace_hygiene, + external_execution=dict(_EXTERNAL_EXECUTION_STUB), + evidence=evidence, + truncation=truncation, + patch_trail_digest="", + ) + digest = _patch_trail_digest(trail._canonical_dict(include_digest=False)) + return PatchTrail( + schema_version=trail.schema_version, + intent_id=trail.intent_id, + intent_description=trail.intent_description, + declared_files=trail.declared_files, + declared_related=trail.declared_related, + changed_files=trail.changed_files, + untouched_in_declared=trail.untouched_in_declared, + unexpected_files=trail.unexpected_files, + forbidden_touched=trail.forbidden_touched, + expanded_related_files=trail.expanded_related_files, + do_not_touch_declared=trail.do_not_touch_declared, + do_not_touch_held=trail.do_not_touch_held, + review_context_declared=trail.review_context_declared, + review_context_untouched=trail.review_context_untouched, + scope_check_status=trail.scope_check_status, + verification_profile=trail.verification_profile, + verification_status=trail.verification_status, + verification_skipped=trail.verification_skipped, + verification_failed=trail.verification_failed, + workspace_hygiene=trail.workspace_hygiene, + external_execution=trail.external_execution, + evidence=trail.evidence, + truncation=trail.truncation, + patch_trail_digest=digest, + ) + + +def patch_trail_summary_line(trail: PatchTrail) -> str: + counts = trail.counts() + return ( + f"declared={counts['declared']} changed={counts['changed']} " + f"untouched={counts['untouched_in_declared']} " + f"verify={trail.verification_status} tier={trail.scope_check_status}" + ) + + +def patch_trail_from_mapping(payload: Mapping[str, object]) -> PatchTrail | None: + if str(payload.get("schema_version", "")) != PATCH_TRAIL_SCHEMA_VERSION: + return None + return PatchTrail( + schema_version=PATCH_TRAIL_SCHEMA_VERSION, + intent_id=_optional_str(payload.get("intent_id")), + intent_description=str(payload.get("intent_description", "")), + declared_files=_path_tuple(payload.get("declared_files")), + declared_related=_path_tuple(payload.get("declared_related")), + changed_files=_path_tuple(payload.get("changed_files")), + untouched_in_declared=_path_tuple(payload.get("untouched_in_declared")), + unexpected_files=_path_tuple(payload.get("unexpected_files")), + forbidden_touched=_path_tuple(payload.get("forbidden_touched")), + expanded_related_files=_path_tuple(payload.get("expanded_related_files")), + do_not_touch_declared=_path_tuple(payload.get("do_not_touch_declared")), + do_not_touch_held=_path_tuple(payload.get("do_not_touch_held")), + review_context_declared=_path_tuple(payload.get("review_context_declared")), + review_context_untouched=_path_tuple(payload.get("review_context_untouched")), + scope_check_status=str(payload.get("scope_check_status", "")), + verification_profile=str(payload.get("verification_profile", "")), + verification_status=str(payload.get("verification_status", "")), + verification_skipped=_string_tuple(payload.get("verification_skipped")), + verification_failed=_string_tuple(payload.get("verification_failed")), + workspace_hygiene=_mapping(payload.get("workspace_hygiene")), + external_execution=_mapping(payload.get("external_execution")), + evidence=_mapping(payload.get("evidence")), + truncation={ + str(key): bool(value) + for key, value in _mapping(payload.get("truncation")).items() + }, + patch_trail_digest=str(payload.get("patch_trail_digest", "")), + ) + + +def _bounded_paths( + paths: Sequence[str], + *, + limit: int, +) -> tuple[tuple[str, ...], bool]: + if not paths: + return (), False + normalized = normalize_memory_scope_paths(paths) + unique = tuple(sorted(set(normalized))) + if len(unique) <= limit: + return unique, False + return unique[:limit], True + + +def _bounded_strings( + values: Sequence[str], + *, + limit: int, +) -> tuple[tuple[str, ...], bool]: + unique = tuple(sorted({str(item).strip() for item in values if str(item).strip()})) + if len(unique) <= limit: + return unique, False + return unique[:limit], True + + +def _patch_trail_digest(payload: Mapping[str, object]) -> str: + return hashlib.sha256(_canonical_json(payload).encode("utf-8")).hexdigest() + + +def _canonical_json(payload: Mapping[str, object]) -> str: + return json_text(payload, sort_keys=True) + + +def _path_tuple(value: object) -> tuple[str, ...]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return () + items = [str(item) for item in value if str(item).strip()] + if not items: + return () + return normalize_memory_scope_paths(items) + + +def _string_tuple(value: object) -> tuple[str, ...]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return () + return tuple(sorted({str(item).strip() for item in value if str(item).strip()})) + + +def _mapping(value: object) -> dict[str, object]: + return dict(value) if isinstance(value, Mapping) else {} + + +def _optional_str(value: object) -> str | None: + text = str(value or "").strip() + return text or None + + +__all__ = [ + "PatchTrail", + "compute_patch_trail", + "patch_trail_from_mapping", + "patch_trail_summary_line", +] diff --git a/codeclone/memory/trajectory/patch_trail_projector.py b/codeclone/memory/trajectory/patch_trail_projector.py new file mode 100644 index 00000000..3ba6142a --- /dev/null +++ b/codeclone/memory/trajectory/patch_trail_projector.py @@ -0,0 +1,203 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, field + +import orjson + +from ...audit.events import ( + EVENT_INTENT_CHECKED, + EVENT_INTENT_DECLARED, + EVENT_INTENT_VIOLATED, + EVENT_PATCH_TRAIL_COMPUTED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, +) +from ...audit.reader import AuditRecord +from .dto import ( + BlastRadiusSnapshot, + HygieneSnapshot, + PatchTrailEvidenceInput, + PatchTrailInputs, + VerifySnapshot, +) +from .patch_trail import PatchTrail, compute_patch_trail +from .projector import TrajectoryProjectionError + + +@dataclass +class _WorkflowAuditState: + intent_id: str | None = None + intent_description: str = "" + declared_files: tuple[str, ...] = () + declared_related: tuple[str, ...] = () + changed_files: tuple[str, ...] = () + unexpected_files: tuple[str, ...] = () + forbidden_touched: tuple[str, ...] = () + scope_check_status: str = "partial" + verify: VerifySnapshot = field( + default_factory=lambda: VerifySnapshot( + verification_profile="unknown", + verification_status="not_reached", + verification_skipped=(), + verification_failed=(), + ) + ) + intent_declared_seq: int | None = None + scope_check_seq: int | None = None + patch_verify_seq: int | None = None + patch_trail_seq: int | None = None + receipt_seq: int | None = None + report_digest: str | None = None + + +def project_patch_trail_from_audit( + *, + records: Sequence[AuditRecord], + repo_root_digest: str, +) -> PatchTrail | None: + if not records: + return None + workflow_id = records[0].workflow_id or "" + if not workflow_id.startswith("intent:"): + return None + ordered = tuple(sorted(records, key=_record_order_key)) + state = _WorkflowAuditState() + for record in ordered: + _apply_audit_record(state, record) + if not state.declared_files and not state.changed_files: + return None + inputs = PatchTrailInputs( + intent_id=state.intent_id, + intent_description=state.intent_description, + declared_files=state.declared_files, + declared_related=state.declared_related, + changed_files=state.changed_files, + unexpected_files=state.unexpected_files, + forbidden_touched=state.forbidden_touched, + expanded_related_files=tuple( + sorted( + path + for path in state.changed_files + if path in set(state.declared_related) + ) + ), + scope_check_status=state.scope_check_status, + blast_radius=BlastRadiusSnapshot( + do_not_touch_declared=(), + review_context_declared=(), + ), + verify=state.verify, + hygiene=HygieneSnapshot( + blocks_finish=False, + finish_block_reason=None, + unacknowledged_dirty_in_scope=(), + dirty_paths_outside_scope=(), + attribution_counts={}, + ), + evidence=PatchTrailEvidenceInput( + repo_root_digest=repo_root_digest, + report_digest=state.report_digest, + intent_declared_audit_sequence=state.intent_declared_seq, + scope_check_audit_sequence=state.scope_check_seq, + patch_verify_audit_sequence=state.patch_verify_seq, + receipt_audit_sequence=state.receipt_seq, + patch_trail_audit_sequence=state.patch_trail_seq, + ), + ) + return compute_patch_trail(inputs) + + +def _apply_audit_record(state: _WorkflowAuditState, record: AuditRecord) -> None: + if record.audit_sequence is None: + return + if record.report_digest: + state.report_digest = record.report_digest + if record.event_type == EVENT_INTENT_DECLARED: + state.intent_id = record.intent_id or state.intent_id + state.intent_declared_seq = record.audit_sequence + if record.summary: + state.intent_description = record.summary + core = _event_core(record) + state.declared_files = _facts_paths(core, "scope_paths") or state.declared_files + return + if record.event_type in {EVENT_INTENT_CHECKED, EVENT_INTENT_VIOLATED}: + state.scope_check_seq = record.audit_sequence + state.scope_check_status = ( + _clean_text(record.status) or state.scope_check_status + ) + core = _event_core(record) + state.declared_files = ( + _facts_paths(core, "declared_scope_paths") or state.declared_files + ) + state.changed_files = _facts_paths(core, "changed_files") + state.unexpected_files = _facts_paths(core, "unexpected_files_list") + state.forbidden_touched = _facts_paths(core, "forbidden_touched_list") + if not state.scope_check_status: + state.scope_check_status = _clean_text(core.get("status")) or "partial" + return + if record.event_type in {EVENT_PATCH_VERIFIED, EVENT_PATCH_VIOLATED}: + state.patch_verify_seq = record.audit_sequence + core = _event_core(record) + status = _clean_text(record.status) or _clean_text(core.get("status")) + state.verify = VerifySnapshot( + verification_profile="unknown", + verification_status=status or "not_reached", + verification_skipped=(), + verification_failed=(), + ) + return + if record.event_type == EVENT_PATCH_TRAIL_COMPUTED: + state.patch_trail_seq = record.audit_sequence + return + if record.event_type == "receipt.created": + state.receipt_seq = record.audit_sequence + + +def _record_order_key(record: AuditRecord) -> tuple[int, str]: + sequence = record.audit_sequence + if sequence is None: + raise TrajectoryProjectionError("audit event is missing audit_sequence") + return (sequence, record.event_id) + + +def _event_core(record: AuditRecord) -> Mapping[str, object]: + if not record.event_core_json or not record.event_core_sha256: + return {} + actual = hashlib.sha256(record.event_core_json.encode("utf-8")).hexdigest() + if actual != record.event_core_sha256: + raise TrajectoryProjectionError("event core digest mismatch") + loaded = orjson.loads(record.event_core_json) + return loaded if isinstance(loaded, dict) else {} + + +def _facts_paths(core: Mapping[str, object], key: str) -> tuple[str, ...]: + facts = core.get("facts") + if not isinstance(facts, Mapping): + return () + raw = facts.get(key) + if not isinstance(raw, list): + return () + paths = [ + text.strip().replace("\\", "/") + for item in raw + if isinstance(item, str) and (text := item.strip()) + ] + return tuple(sorted(set(paths))) + + +def _clean_text(value: object) -> str | None: + if not isinstance(value, str): + return None + stripped = value.strip() + return stripped or None + + +__all__ = ["project_patch_trail_from_audit"] diff --git a/codeclone/memory/trajectory/profiles.py b/codeclone/memory/trajectory/profiles.py new file mode 100644 index 00000000..8b84c58b --- /dev/null +++ b/codeclone/memory/trajectory/profiles.py @@ -0,0 +1,108 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final, Literal + +from .models import Trajectory, TrajectoryQualityTier + +TRAJECTORY_EXPORT_SCHEMA_VERSION: Final = "2" + +ExportProfileName = Literal[ + "agent-change-control-v1", + "agent-memory-retrieval-v1", + "agent-recovery-v1", + "agent-security-hardening-v1", +] + + +@dataclass(frozen=True, slots=True) +class TrajectoryExportProfile: + name: ExportProfileName + schema_version: str + description: str + allowed_quality_tiers: frozenset[TrajectoryQualityTier] + allow_partial: bool + allow_incident: bool + + +EXPORT_PROFILES: Final[dict[str, TrajectoryExportProfile]] = { + "agent-change-control-v1": TrajectoryExportProfile( + name="agent-change-control-v1", + schema_version=TRAJECTORY_EXPORT_SCHEMA_VERSION, + description="Edit discipline, scope, verify, and receipt outcomes.", + allowed_quality_tiers=frozenset({"verified", "corrected"}), + allow_partial=False, + allow_incident=False, + ), + "agent-memory-retrieval-v1": TrajectoryExportProfile( + name="agent-memory-retrieval-v1", + schema_version=TRAJECTORY_EXPORT_SCHEMA_VERSION, + description="Scoped memory and trajectory context usage patterns.", + allowed_quality_tiers=frozenset( + {"verified", "corrected", "partial", "incident"} + ), + allow_partial=True, + allow_incident=False, + ), + "agent-recovery-v1": TrajectoryExportProfile( + name="agent-recovery-v1", + schema_version=TRAJECTORY_EXPORT_SCHEMA_VERSION, + description="Failed verify, conflict, and recovery examples.", + allowed_quality_tiers=frozenset({"corrected", "incident", "partial"}), + allow_partial=True, + allow_incident=True, + ), + "agent-security-hardening-v1": TrajectoryExportProfile( + name="agent-security-hardening-v1", + schema_version=TRAJECTORY_EXPORT_SCHEMA_VERSION, + description="Safe handling of path and security denials.", + allowed_quality_tiers=frozenset({"verified", "corrected", "incident"}), + allow_partial=False, + allow_incident=True, + ), +} + + +def resolve_export_profile(profile: str) -> TrajectoryExportProfile: + normalized = profile.strip() + resolved = EXPORT_PROFILES.get(normalized) + if resolved is None: + supported = ", ".join(sorted(EXPORT_PROFILES)) + msg = ( + f"Unsupported trajectory export profile: {profile!r}. " + f"Supported: {supported}" + ) + raise ValueError(msg) + return resolved + + +def trajectory_eligible_for_export( + trajectory: Trajectory, + *, + profile: TrajectoryExportProfile, +) -> bool: + if not trajectory.source_event_stream_digest or not trajectory.trajectory_digest: + return False + if trajectory.quality_tier == "routine": + return False + if trajectory.quality_tier not in profile.allowed_quality_tiers: + return False + if trajectory.outcome == "partial" and not profile.allow_partial: + return False + return not (trajectory.quality_tier == "incident" and not profile.allow_incident) + + +__all__ = [ + "EXPORT_PROFILES", + "TRAJECTORY_EXPORT_SCHEMA_VERSION", + "ExportProfileName", + "TrajectoryExportProfile", + "resolve_export_profile", + "trajectory_eligible_for_export", +] diff --git a/codeclone/memory/trajectory/projector.py b/codeclone/memory/trajectory/projector.py new file mode 100644 index 00000000..ea3c6ba0 --- /dev/null +++ b/codeclone/memory/trajectory/projector.py @@ -0,0 +1,596 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from collections.abc import Iterable, Mapping, Sequence + +import orjson + +from ...audit.events import ( + EVENT_ANALYSIS_COMPLETED, + EVENT_BASELINE_ABUSE, + EVENT_CLAIM_COMPLETED, + EVENT_CLAIM_VIOLATED, + EVENT_INTENT_CHECKED, + EVENT_INTENT_DECLARED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_EXPIRED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_PATCH_TRAIL_COMPUTED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_RECEIPT_CREATED, + EVENT_WORKSPACE_CONFLICT, + projection_supplement_facts_from_payload, +) +from ...audit.reader import AuditRecord +from ...report.meta import current_report_timestamp_utc +from ...utils.json_io import json_text +from .models import ( + TRAJECTORY_PROJECTION_VERSION, + Trajectory, + TrajectoryEvidence, + TrajectoryLabel, + TrajectoryOutcome, + TrajectoryQualityTier, + TrajectoryStep, + TrajectorySubject, +) + + +class TrajectoryProjectionError(ValueError): + """Raised when audit event core cannot be projected deterministically.""" + + +def project_trajectory( + *, + project_id: str, + repo_root_digest: str, + workflow_id: str, + records: Sequence[AuditRecord], + projection_version: str = TRAJECTORY_PROJECTION_VERSION, + projected_at_utc: str | None = None, + patch_trail_digest: str | None = None, +) -> Trajectory: + if not records: + raise TrajectoryProjectionError("trajectory projection requires events") + ordered = tuple(sorted(records, key=_record_order_key)) + _validate_single_workflow(workflow_id, ordered) + cores = tuple(_validated_event_core(record) for record in ordered) + steps = tuple( + _step_from_record(index, record) for index, record in enumerate(ordered) + ) + outcome = _outcome(ordered, cores) + labels = _labels(ordered, cores, outcome=outcome) + quality_tier = _quality_tier(outcome=outcome, records=ordered, labels=labels) + run_ids = tuple( + value for value in (_clean_text(record.run_id) for record in ordered) if value + ) + report_digests = tuple( + value + for value in ( + _canonical_report_digest(record.report_digest) for record in ordered + ) + if value + ) + intent_id = _first_text(record.intent_id for record in ordered) + now = projected_at_utc or current_report_timestamp_utc() + event_count = len(ordered) + incident_count = sum( + 1 for record in ordered if record.severity in {"warn", "error"} + ) + first_run_id = run_ids[0] if run_ids else None + last_run_id = run_ids[-1] if run_ids else None + primary_run_id = last_run_id or first_run_id + report_digest = report_digests[-1] if report_digests else None + source_stream_digest = _source_event_stream_digest(ordered) + summary = _summary( + workflow_id=workflow_id, + outcome=outcome, + quality_tier=quality_tier, + event_count=event_count, + incident_count=incident_count, + labels=labels, + first_summary=_first_text(record.summary for record in ordered), + ) + trajectory_id = _trajectory_id( + projection_version=projection_version, + repo_root_digest=repo_root_digest, + workflow_id=workflow_id, + ) + subjects = _subjects( + workflow_id=workflow_id, + intent_id=intent_id, + run_ids=run_ids, + report_digests=report_digests, + cores=_cores_with_payload_supplements(cores, ordered), + agent_label=_primary_agent_label(ordered), + ) + evidence = ( + TrajectoryEvidence( + evidence_kind="audit_event_stream", + ref=workflow_id, + locator=str(ordered[0].audit_sequence), + digest=source_stream_digest, + created_at_utc=now, + ), + ) + trajectory_digest = _trajectory_digest( + projection_version=projection_version, + repo_root_digest=repo_root_digest, + workflow_id=workflow_id, + outcome=outcome, + quality_tier=quality_tier, + quality_score=0, + labels=labels, + summary=summary, + source_event_stream_digest=source_stream_digest, + steps=steps, + patch_trail_digest=patch_trail_digest, + ) + return Trajectory( + id=trajectory_id, + project_id=project_id, + repo_root_digest=repo_root_digest, + workflow_id=workflow_id, + intent_id=intent_id, + primary_run_id=primary_run_id, + first_run_id=first_run_id, + last_run_id=last_run_id, + report_digest=report_digest, + outcome=outcome, + quality_tier=quality_tier, + quality_score=0, + labels=labels, + summary=summary, + trajectory_digest=trajectory_digest, + source_event_stream_digest=source_stream_digest, + projection_version=projection_version, + event_count=event_count, + step_count=len(steps), + incident_count=incident_count, + started_at_utc=ordered[0].created_at_utc, + finished_at_utc=ordered[-1].created_at_utc, + projected_at_utc=now, + updated_at_utc=now, + steps=steps, + subjects=subjects, + evidence=evidence, + ) + + +def _record_order_key(record: AuditRecord) -> tuple[int, str]: + sequence = record.audit_sequence + if sequence is None: + raise TrajectoryProjectionError("audit event is missing audit_sequence") + return (sequence, record.event_id) + + +def _validate_single_workflow( + workflow_id: str, + records: Sequence[AuditRecord], +) -> None: + for record in records: + if record.workflow_id != workflow_id: + raise TrajectoryProjectionError("mixed workflow ids in trajectory") + + +def _validated_event_core(record: AuditRecord) -> Mapping[str, object]: + if not record.event_core_json or not record.event_core_sha256: + raise TrajectoryProjectionError("audit event is missing event core") + actual = hashlib.sha256(record.event_core_json.encode("utf-8")).hexdigest() + if actual != record.event_core_sha256: + raise TrajectoryProjectionError("event core digest mismatch") + loaded = orjson.loads(record.event_core_json) + if not isinstance(loaded, dict): + raise TrajectoryProjectionError("event core must be a JSON object") + return loaded + + +def _step_from_record(index: int, record: AuditRecord) -> TrajectoryStep: + if record.audit_sequence is None: + raise TrajectoryProjectionError("audit event is missing audit_sequence") + if not record.event_core_json or not record.event_core_sha256: + raise TrajectoryProjectionError("audit event is missing event core") + return TrajectoryStep( + step_index=index, + audit_sequence=record.audit_sequence, + event_id=record.event_id, + event_type=record.event_type, + status=record.status, + run_id=record.run_id, + report_digest=_canonical_report_digest(record.report_digest), + event_core_sha256=record.event_core_sha256, + event_core_json=record.event_core_json, + summary=record.summary, + created_at_utc=record.created_at_utc, + ) + + +def _outcome( + records: Sequence[AuditRecord], + cores: Sequence[Mapping[str, object]], +) -> TrajectoryOutcome: + event_types = {record.event_type for record in records} + statuses = { + status + for status in (_clean_text(record.status) for record in records) + if status is not None + } + core_statuses = { + status + for status in (_clean_text(core.get("status")) for core in cores) + if status is not None + } + all_statuses = statuses | core_statuses + if event_types & { + EVENT_BASELINE_ABUSE, + EVENT_CLAIM_VIOLATED, + EVENT_PATCH_VIOLATED, + EVENT_INTENT_VIOLATED, + }: + return "violated" + if any(_core_fact_bool(core, "baseline_abuse") for core in cores): + return "violated" + if EVENT_PATCH_VERIFIED in event_types: + if "accepted_with_external_changes" in all_statuses: + return "accepted_with_external_changes" + if "accepted" in all_statuses: + return "accepted" + if event_types & {EVENT_PATCH_EXPIRED, EVENT_INTENT_EXPIRED}: + return "abandoned" + if event_types & {EVENT_INTENT_QUEUE_BLOCKED, EVENT_WORKSPACE_CONFLICT}: + return "blocked" + return "partial" + + +_CHANGE_CONTROL_EVENT_TYPES = frozenset( + { + EVENT_INTENT_DECLARED, + EVENT_INTENT_CHECKED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_VIOLATED, + EVENT_INTENT_EXPIRED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, + EVENT_PATCH_EXPIRED, + EVENT_PATCH_TRAIL_COMPUTED, + EVENT_CLAIM_COMPLETED, + EVENT_CLAIM_VIOLATED, + EVENT_RECEIPT_CREATED, + } +) + + +def _labels( + records: Sequence[AuditRecord], + cores: Sequence[Mapping[str, object]], + *, + outcome: TrajectoryOutcome, +) -> tuple[TrajectoryLabel, ...]: + labels: set[TrajectoryLabel] = set() + event_types = {record.event_type for record in records} + if event_types & _CHANGE_CONTROL_EVENT_TYPES: + labels.add("change_control_workflow") + elif EVENT_ANALYSIS_COMPLETED in event_types: + labels.add("analysis_observed") + if outcome in {"accepted", "accepted_with_external_changes"} and ( + EVENT_PATCH_VERIFIED in event_types + ): + labels.add("verified_finish") + if any( + record.event_type == EVENT_INTENT_CHECKED + and _clean_text(record.status) in {"clean", "expanded"} + for record in records + ): + labels.add("scope_clean") + if EVENT_INTENT_EXPANDED in event_types: + labels.add("scope_expanded") + if EVENT_INTENT_QUEUED in event_types: + labels.add("queue_used") + if EVENT_PATCH_TRAIL_COMPUTED in event_types: + labels.add("patch_trail_recorded") + if EVENT_RECEIPT_CREATED in event_types: + labels.add("receipt_issued") + if EVENT_CLAIM_COMPLETED in event_types: + labels.add("claim_validated") + if EVENT_BASELINE_ABUSE in event_types or any( + _core_fact_bool(core, "baseline_abuse") for core in cores + ): + labels.add("baseline_abuse_detected") + if EVENT_CLAIM_VIOLATED in event_types: + labels.add("claim_guard_failed") + if EVENT_WORKSPACE_CONFLICT in event_types: + labels.add("foreign_conflict_seen") + if EVENT_INTENT_PROMOTED in event_types: + labels.add("recovered") + if any( + record.surface == "hook" and record.severity in {"warn", "error"} + for record in records + ): + labels.add("hook_blocked") + if any( + (record.tool_name or "").startswith("manage_engineering_memory") + for record in records + ): + labels.add("memory_used") + if any(record.status == "accepted_with_external_changes" for record in records): + labels.add("external_changes_accepted") + return tuple(sorted(labels)) + + +def _quality_tier( + *, + outcome: TrajectoryOutcome, + records: Sequence[AuditRecord], + labels: Sequence[TrajectoryLabel], +) -> TrajectoryQualityTier: + if outcome == "violated" or any( + label in {"baseline_abuse_detected", "claim_guard_failed", "hook_blocked"} + for label in labels + ): + return "incident" + if outcome == "partial": + return "partial" + if outcome in {"accepted", "accepted_with_external_changes"}: + if any( + record.event_type in {EVENT_PATCH_VIOLATED, EVENT_INTENT_VIOLATED} + for record in records + ): + return "corrected" + return "verified" + return "routine" + + +def _cores_with_payload_supplements( + cores: Sequence[Mapping[str, object]], + records: Sequence[AuditRecord], +) -> tuple[Mapping[str, object], ...]: + if not any(record.payload_json for record in records): + return tuple(cores) + supplemented: list[Mapping[str, object]] = list(cores) + for record in records: + facts = projection_supplement_facts_from_payload( + record.event_type, + record.payload_json, + ) + if facts: + supplemented.append({"facts": facts}) + return tuple(supplemented) + + +def _primary_agent_label(records: Sequence[AuditRecord]) -> str | None: + for record in records: + if record.event_type == EVENT_INTENT_DECLARED: + label = _clean_text(record.agent_label) + if label: + return label + for record in records: + label = _clean_text(record.agent_label) + if label: + return label + return None + + +def _subjects( + *, + workflow_id: str, + intent_id: str | None, + run_ids: Sequence[str], + report_digests: Sequence[str], + cores: Sequence[Mapping[str, object]], + agent_label: str | None = None, +) -> tuple[TrajectorySubject, ...]: + path_about, path_touched, path_untouched = _path_subjects_from_cores(cores) + subjects = { + ("workflow", workflow_id, "about"), + *{("run", run_id, "observed") for run_id in run_ids}, + *{("report_digest", digest, "evidence") for digest in report_digests}, + *{("path", path, "about") for path in path_about}, + *{("path", path, "touched") for path in path_touched}, + *{("path", path, "untouched") for path in path_untouched}, + } + if intent_id: + subjects.add(("intent", intent_id, "about")) + if agent_label: + subjects.add(("agent", agent_label, "actor")) + return tuple( + TrajectorySubject(subject_kind=kind, subject_key=key, relation=relation) + for kind, key, relation in sorted(subjects) + ) + + +def _path_subjects_from_cores( + cores: Sequence[Mapping[str, object]], +) -> tuple[tuple[str, ...], tuple[str, ...], tuple[str, ...]]: + about: set[str] = set() + touched: set[str] = set() + untouched: set[str] = set() + for core in cores: + facts = core.get("facts") + if not isinstance(facts, Mapping): + continue + about.update(_facts_path_list(facts, "scope_paths")) + about.update(_facts_path_list(facts, "declared_scope_paths")) + touched.update(_facts_path_list(facts, "changed_files")) + untouched.update(_facts_path_list(facts, "untouched_in_declared")) + return ( + tuple(sorted(about)), + tuple(sorted(touched)), + tuple(sorted(untouched)), + ) + + +def _facts_path_list(facts: Mapping[str, object], key: str) -> tuple[str, ...]: + raw_paths = facts.get(key) + if not isinstance(raw_paths, list): + return () + paths = [ + text.strip() + for item in raw_paths + if isinstance(item, str) and (text := item.strip()) + ] + return tuple(sorted(set(paths))) + + +def _summary( + *, + workflow_id: str, + outcome: TrajectoryOutcome, + quality_tier: TrajectoryQualityTier, + event_count: int, + incident_count: int, + labels: Sequence[TrajectoryLabel], + first_summary: str | None, +) -> str: + label_text = ",".join(labels) if labels else "none" + prefix = ( + f"{workflow_id}: outcome={outcome}; tier={quality_tier}; " + f"events={event_count}; incidents={incident_count}; labels={label_text}" + ) + if first_summary: + return f"{prefix}; first_summary={first_summary[:160]}" + return prefix + + +def _trajectory_id( + *, + projection_version: str, + repo_root_digest: str, + workflow_id: str, +) -> str: + payload = _canonical_json( + { + "projection_version": projection_version, + "repo_root_digest": repo_root_digest, + "workflow_id": workflow_id, + } + ) + return f"traj-{_sha256(payload)[:32]}" + + +def _source_event_stream_digest(records: Sequence[AuditRecord]) -> str: + items = [ + { + "audit_sequence": record.audit_sequence, + "event_core_sha256": record.event_core_sha256, + } + for record in records + ] + return _sha256(_canonical_json({"events": items})) + + +def trajectory_digest_for( + trajectory: Trajectory, + *, + quality_score: int, + patch_trail_digest: str | None = None, +) -> str: + return _trajectory_digest( + projection_version=trajectory.projection_version, + repo_root_digest=trajectory.repo_root_digest, + workflow_id=trajectory.workflow_id, + outcome=trajectory.outcome, + quality_tier=trajectory.quality_tier, + quality_score=quality_score, + labels=trajectory.labels, + summary=trajectory.summary, + source_event_stream_digest=trajectory.source_event_stream_digest, + steps=trajectory.steps, + patch_trail_digest=patch_trail_digest, + ) + + +def _trajectory_digest( + *, + projection_version: str, + repo_root_digest: str, + workflow_id: str, + outcome: TrajectoryOutcome, + quality_tier: TrajectoryQualityTier, + quality_score: int, + labels: Sequence[TrajectoryLabel], + summary: str, + source_event_stream_digest: str, + steps: Sequence[TrajectoryStep], + patch_trail_digest: str | None = None, +) -> str: + payload: dict[str, object] = { + "projection_version": projection_version, + "repo_root_digest": repo_root_digest, + "workflow_id": workflow_id, + "outcome": outcome, + "quality_tier": quality_tier, + "quality_score": quality_score, + "labels": list(labels), + "summary": summary, + "source_event_stream_digest": source_event_stream_digest, + "steps": [ + { + "event_type": step.event_type, + "status": step.status, + "run_id": step.run_id, + "report_digest": step.report_digest, + "event_core_sha256": step.event_core_sha256, + "summary": step.summary, + } + for step in steps + ], + } + if patch_trail_digest: + payload["patch_trail_digest"] = patch_trail_digest + return _sha256(_canonical_json(payload)) + + +def _core_fact_bool(core: Mapping[str, object], key: str) -> bool: + facts = core.get("facts") + return bool(facts.get(key)) if isinstance(facts, Mapping) else False + + +def _first_text(values: Iterable[object]) -> str | None: + for value in values: + text = _clean_text(value) + if text: + return text + return None + + +def _clean_text(value: object) -> str | None: + if not isinstance(value, str): + return None + stripped = value.strip() + return stripped or None + + +def _canonical_report_digest(value: str | None) -> str | None: + text = _clean_text(value) + if text is None: + return None + lowered = text.lower() + digest = lowered[7:] if lowered.startswith("sha256:") else lowered + if len(digest) == 64 and all(char in "0123456789abcdef" for char in digest): + return f"sha256:{digest}" + return text + + +def _canonical_json(payload: Mapping[str, object]) -> str: + return json_text(payload, sort_keys=True) + + +def _sha256(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest() + + +__all__ = ["TrajectoryProjectionError", "project_trajectory"] diff --git a/codeclone/memory/trajectory/quality.py b/codeclone/memory/trajectory/quality.py new file mode 100644 index 00000000..18e873a0 --- /dev/null +++ b/codeclone/memory/trajectory/quality.py @@ -0,0 +1,527 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass, replace +from datetime import datetime, timezone +from typing import Literal, overload + +from ...contracts import TRAJECTORY_QUALITY_SCORE_VERSION +from .anomalies import detect_trajectory_anomalies +from .models import Trajectory, TrajectoryOutcome, TrajectoryQualityTier +from .patch_trail import PatchTrail, patch_trail_from_mapping +from .projector import trajectory_digest_for + +_ANOMALY_ERROR_PENALTY = 12 +_ANOMALY_WARN_PENALTY = 5 +_INCIDENT_PENALTY_PER = 10 + +_OUTCOME_SCORES: dict[TrajectoryOutcome, int] = { + "accepted": 100, + "accepted_with_external_changes": 85, + "partial": 55, + "abandoned": 40, + "blocked": 30, + "violated": 20, +} + +_SCOPE_SCORES: dict[str, int] = { + "clean": 100, + "expanded": 85, + "partial": 70, + "violated": 0, +} + +_VERIFICATION_FROM_TRAIL: dict[str, int] = { + "accepted": 100, + "accepted_with_external_changes": 85, + "unverified": 50, + "violated": 0, + "blocked": 0, + "not_reached": 40, +} + +_VERIFICATION_FROM_TIER: dict[TrajectoryQualityTier, int] = { + "verified": 100, + "corrected": 90, + "routine": 85, + "partial": 60, + "incident": 45, +} + + +@dataclass(frozen=True, slots=True) +class TrajectoryQualityComponent: + component_id: str + score: int + pass_gate: bool + label: str + + +@dataclass(frozen=True, slots=True) +class TrajectoryQualityContract: + quality_score: int + complexity_score: int + scope_accuracy: int + duration_seconds: int + anomaly_count: int + score_version: str + components: tuple[TrajectoryQualityComponent, ...] + + +def compute_trajectory_duration_seconds(trajectory: Trajectory) -> int: + """Return non-negative whole seconds between trajectory start and finish.""" + started = _parse_utc_timestamp(trajectory.started_at_utc) + finished = _parse_utc_timestamp(trajectory.finished_at_utc) + if started is None or finished is None: + return 0 + delta = finished - started + return max(0, int(delta.total_seconds())) + + +_COMPLEXITY_DECLARED_CAP = 40 +_COMPLEXITY_EVENT_CAP = 30 +_COMPLEXITY_STEP_CAP = 20 +_QUALITY_FORMULA = ( + "quality_score = min(outcome, verification, scope, incidents, anomalies, receipt)" +) +_COMPLEXITY_FORMULA = "complexity_score = min(100, declared*2 + events*3 + steps*2)" + + +def compute_trajectory_complexity_score( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, +) -> int: + """Return a deterministic 0-100 complexity score (separate from quality).""" + score, *_rest = _complexity_factors( + trajectory, + patch_trail_payload=patch_trail_payload, + ) + return score + + +def _complexity_band_label(score: int) -> tuple[str, str]: + if score >= 70: + return "high", "High" + if score >= 35: + return "moderate", "Moderate" + return "low", "Low" + + +@overload +def _complexity_factors( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, + include_band: Literal[False] = False, +) -> tuple[int, int, int, int, int, int, int]: ... + + +@overload +def _complexity_factors( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None, + include_band: Literal[True], +) -> tuple[int, int, int, int, int, int, int, str, str]: ... + + +def _complexity_factors( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, + include_band: bool = False, +) -> ( + tuple[int, int, int, int, int, int, int] + | tuple[ + int, + int, + int, + int, + int, + int, + int, + str, + str, + ] +): + trail = _trail_from_payload(patch_trail_payload) + declared = trail.counts().get("declared", 0) if trail is not None else 0 + declared_raw = int(declared) + events_raw = trajectory.event_count + steps_raw = trajectory.step_count + declared_part = min(_COMPLEXITY_DECLARED_CAP, declared_raw * 2) + events_part = min(_COMPLEXITY_EVENT_CAP, events_raw * 3) + steps_part = min(_COMPLEXITY_STEP_CAP, steps_raw * 2) + score = min(100, declared_part + events_part + steps_part) + base = ( + score, + declared_raw, + events_raw, + steps_raw, + declared_part, + events_part, + steps_part, + ) + if include_band: + band, band_label = _complexity_band_label(score) + return (*base, band, band_label) + return base + + +def compute_trajectory_quality_contract( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, +) -> TrajectoryQualityContract: + """Return contract-derived quality metrics and an explainable breakdown.""" + trail = _trail_from_payload(patch_trail_payload) + label_set = {str(label) for label in trajectory.labels} + anomalies = detect_trajectory_anomalies( + trajectory, + patch_trail_payload=patch_trail_payload, + ) + anomaly_count = len(anomalies) + + outcome_score = _OUTCOME_SCORES.get(trajectory.outcome, 50) + outcome_pass = trajectory.outcome == "accepted" and outcome_score == 100 + + if trail is not None and trail.verification_status: + verification_score = _VERIFICATION_FROM_TRAIL.get( + trail.verification_status, + 70, + ) + else: + verification_score = _VERIFICATION_FROM_TIER.get(trajectory.quality_tier, 70) + verification_pass = ( + trajectory.quality_tier == "verified" and verification_score == 100 + ) + + scope_accuracy = _scope_accuracy(trajectory, trail=trail, label_set=label_set) + scope_pass = scope_accuracy == 100 + + if trajectory.incident_count == 0: + incident_score = 100 + incident_pass = True + else: + incident_score = max( + 0, + 100 - trajectory.incident_count * _INCIDENT_PENALTY_PER, + ) + incident_pass = False + + anomaly_score = 100 + for anomaly in anomalies: + anomaly_score -= ( + _ANOMALY_ERROR_PENALTY + if anomaly.severity == "error" + else _ANOMALY_WARN_PENALTY + ) + anomaly_score = max(0, anomaly_score) + anomaly_pass = anomaly_count == 0 + + if "change_control_workflow" in label_set: + if "receipt_issued" in label_set: + receipt_score = 100 + receipt_pass = True + else: + receipt_score = 85 + receipt_pass = False + else: + receipt_score = 100 + receipt_pass = True + + quality_score = min( + outcome_score, + verification_score, + scope_accuracy, + incident_score, + anomaly_score, + receipt_score, + ) + components = ( + TrajectoryQualityComponent( + "outcome", + outcome_score, + outcome_pass, + f"Outcome {trajectory.outcome}", + ), + TrajectoryQualityComponent( + "verification", + verification_score, + verification_pass, + f"Verification tier {trajectory.quality_tier}", + ), + TrajectoryQualityComponent( + "scope", + scope_accuracy, + scope_pass, + _scope_label(trail, label_set), + ), + TrajectoryQualityComponent( + "incidents", + incident_score, + incident_pass, + ( + "No audit incidents" + if incident_pass + else f"{trajectory.incident_count} audit incident(s)" + ), + ), + TrajectoryQualityComponent( + "anomalies", + anomaly_score, + anomaly_pass, + "No structural anomalies" + if anomaly_pass + else f"{anomaly_count} anomaly(ies)", + ), + TrajectoryQualityComponent( + "receipt", + receipt_score, + receipt_pass, + "Receipt issued" + if receipt_pass + else "Receipt missing for change-control cycle", + ), + ) + return TrajectoryQualityContract( + quality_score=max(0, min(100, quality_score)), + complexity_score=compute_trajectory_complexity_score( + trajectory, + patch_trail_payload=patch_trail_payload, + ), + scope_accuracy=scope_accuracy, + duration_seconds=compute_trajectory_duration_seconds(trajectory), + anomaly_count=anomaly_count, + score_version=TRAJECTORY_QUALITY_SCORE_VERSION, + components=components, + ) + + +def compute_trajectory_quality_score( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, +) -> int: + """Return a deterministic 0-100 trajectory quality score.""" + return compute_trajectory_quality_contract( + trajectory, + patch_trail_payload=patch_trail_payload, + ).quality_score + + +def serialize_trajectory_quality_contract( + contract: TrajectoryQualityContract, + *, + trajectory: Trajectory | None = None, + patch_trail_payload: Mapping[str, object] | None = None, +) -> dict[str, object]: + limiting_ids = _limiting_component_ids(contract) + calculation_lines = [ + { + "id": component.component_id, + "label": component.label, + "score": component.score, + "pass": component.pass_gate, + "limits_quality": component.component_id in limiting_ids, + } + for component in contract.components + ] + return { + "score_version": contract.score_version, + "quality_score": contract.quality_score, + "complexity_score": contract.complexity_score, + "scope_accuracy": contract.scope_accuracy, + "duration_seconds": contract.duration_seconds, + "anomaly_count": contract.anomaly_count, + "components": [ + { + "id": component.component_id, + "score": component.score, + "pass": component.pass_gate, + "label": component.label, + } + for component in contract.components + ], + "calculation": { + "method": "contract_min", + "formula": _QUALITY_FORMULA, + "quality_score": contract.quality_score, + "limiting_component_ids": list(limiting_ids), + "lines": calculation_lines, + }, + "complexity_calculation": _serialize_complexity_calculation( + contract.complexity_score, + trajectory=trajectory, + patch_trail_payload=patch_trail_payload, + ), + } + + +def _serialize_complexity_calculation( + complexity_score: int, + *, + trajectory: Trajectory | None, + patch_trail_payload: Mapping[str, object] | None, +) -> dict[str, object]: + if trajectory is None: + return { + "method": "weighted_sum", + "formula": _COMPLEXITY_FORMULA, + "complexity_score": complexity_score, + "band": _complexity_band_label(complexity_score)[0], + "band_label": _complexity_band_label(complexity_score)[1], + "hint": "Higher = larger change surface (not a pass/fail grade).", + "lines": [], + } + ( + score, + declared_raw, + events_raw, + steps_raw, + declared_part, + events_part, + steps_part, + band, + band_label, + ) = _complexity_factors( + trajectory, + patch_trail_payload=patch_trail_payload, + include_band=True, + ) + del score + return { + "method": "weighted_sum", + "formula": _COMPLEXITY_FORMULA, + "complexity_score": complexity_score, + "band": band, + "band_label": band_label, + "hint": "Higher = larger change surface (not a pass/fail grade).", + "lines": [ + { + "id": "declared_files", + "label": "Declared files", + "raw": declared_raw, + "unit": "files", + "contribution": declared_part, + "cap": _COMPLEXITY_DECLARED_CAP, + }, + { + "id": "events", + "label": "Audit events", + "raw": events_raw, + "unit": "events", + "contribution": events_part, + "cap": _COMPLEXITY_EVENT_CAP, + }, + { + "id": "steps", + "label": "Trajectory steps", + "raw": steps_raw, + "unit": "steps", + "contribution": steps_part, + "cap": _COMPLEXITY_STEP_CAP, + }, + ], + } + + +def _limiting_component_ids(contract: TrajectoryQualityContract) -> tuple[str, ...]: + minimum = contract.quality_score + return tuple( + component.component_id + for component in contract.components + if component.score == minimum + ) + + +def apply_trajectory_quality_score( + trajectory: Trajectory, + *, + patch_trail_payload: Mapping[str, object] | None = None, + patch_trail_digest: str | None = None, +) -> Trajectory: + """Attach quality_score and refresh trajectory_digest for storage.""" + contract = compute_trajectory_quality_contract( + trajectory, + patch_trail_payload=patch_trail_payload, + ) + trajectory_digest = trajectory_digest_for( + trajectory, + quality_score=contract.quality_score, + patch_trail_digest=patch_trail_digest, + ) + return replace( + trajectory, + quality_score=contract.quality_score, + trajectory_digest=trajectory_digest, + ) + + +def _scope_accuracy( + trajectory: Trajectory, + *, + trail: PatchTrail | None, + label_set: set[str], +) -> int: + del trajectory + if trail is not None and trail.scope_check_status: + return _SCOPE_SCORES.get(trail.scope_check_status, 70) + if "scope_clean" in label_set: + return 100 + if "scope_expanded" in label_set: + return 85 + return 70 + + +def _scope_label(trail: PatchTrail | None, label_set: set[str]) -> str: + if trail is not None and trail.scope_check_status: + return f"Scope {trail.scope_check_status}" + if "scope_clean" in label_set: + return "Scope clean" + if "scope_expanded" in label_set: + return "Scope expanded" + return "Scope partial" + + +def _trail_from_payload( + patch_trail_payload: Mapping[str, object] | None, +) -> PatchTrail | None: + if patch_trail_payload is None: + return None + return patch_trail_from_mapping(patch_trail_payload) + + +def _parse_utc_timestamp(value: str) -> datetime | None: + text = value.strip() + if not text: + return None + try: + if text.endswith("Z"): + text = f"{text[:-1]}+00:00" + parsed = datetime.fromisoformat(text) + except ValueError: + return None + if parsed.tzinfo is None: + return parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +__all__ = [ + "TRAJECTORY_QUALITY_SCORE_VERSION", + "TrajectoryQualityComponent", + "TrajectoryQualityContract", + "apply_trajectory_quality_score", + "compute_trajectory_complexity_score", + "compute_trajectory_duration_seconds", + "compute_trajectory_quality_contract", + "compute_trajectory_quality_score", + "serialize_trajectory_quality_contract", +] diff --git a/codeclone/memory/trajectory/rebuild_workflow.py b/codeclone/memory/trajectory/rebuild_workflow.py new file mode 100644 index 00000000..36c92bda --- /dev/null +++ b/codeclone/memory/trajectory/rebuild_workflow.py @@ -0,0 +1,138 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path +from typing import Literal, TypedDict + +from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path +from ...config.memory import MemoryConfig +from ..exceptions import MemoryContractError +from ..models import MemoryProject +from ..project import resolve_memory_db_path, resolve_project_identity +from ..sqlite_store import SqliteEngineeringMemoryStore +from .models import TrajectoryProjectionResult + + +class RebuildTrajectoriesMeta(TypedDict): + action: Literal["rebuild_trajectories"] + projection_version: str + + +class RebuildTrajectoriesCounts(TypedDict): + workflows_seen: int + trajectories_created: int + trajectories_updated: int + trajectories_unchanged: int + legacy_event_count: int + + +class RebuildTrajectoriesOkPayload(RebuildTrajectoriesMeta, RebuildTrajectoriesCounts): + status: Literal["ok"] + run_id: str + mode: Literal["full", "incremental"] + + +class RebuildTrajectoriesSkippedPayload( + RebuildTrajectoriesMeta, RebuildTrajectoriesCounts +): + status: Literal["skipped"] + reason: str + run_id: None + + +RebuildTrajectoriesPayload = ( + RebuildTrajectoriesOkPayload | RebuildTrajectoriesSkippedPayload +) + + +def execute_trajectory_rebuild( + *, + root_path: Path, + config: MemoryConfig, + store: SqliteEngineeringMemoryStore | None = None, + project: MemoryProject | None = None, + incremental_after_event_core_id: int | None = None, +) -> RebuildTrajectoriesPayload: + from .models import TRAJECTORY_PROJECTION_VERSION + + base: RebuildTrajectoriesMeta = { + "action": "rebuild_trajectories", + "projection_version": TRAJECTORY_PROJECTION_VERSION, + } + empty: RebuildTrajectoriesCounts = { + "workflows_seen": 0, + "trajectories_created": 0, + "trajectories_updated": 0, + "trajectories_unchanged": 0, + "legacy_event_count": 0, + } + if not config.trajectories_enabled: + return { + **base, + **empty, + "status": "skipped", + "reason": "trajectories_disabled", + "run_id": None, + } + owns_store = store is None + active_store = store + try: + resolved_project = project or resolve_project_identity(root_path) + if active_store is None: + db_path = resolve_memory_db_path(root_path, config) + if not db_path.exists(): + raise MemoryContractError( + f"Engineering memory database not found: {db_path}. " + "Run memory init or " + "manage_engineering_memory(action='refresh_from_run')." + ) + active_store = SqliteEngineeringMemoryStore(db_path) + audit_db_path = resolve_audit_path( + root_path=root_path, + value=DEFAULT_AUDIT_PATH, + ) + mode: Literal["full", "incremental"] + if incremental_after_event_core_id is None: + mode = "full" + result: TrajectoryProjectionResult = ( + active_store.rebuild_trajectories_from_audit( + project=resolved_project, + root_path=root_path, + audit_db_path=audit_db_path, + ) + ) + else: + mode = "incremental" + result = active_store.rebuild_trajectories_incremental( + project=resolved_project, + root_path=root_path, + audit_db_path=audit_db_path, + after_event_core_id=incremental_after_event_core_id, + ) + finally: + if owns_store and active_store is not None: + active_store.close() + return { + **base, + "status": "ok", + "mode": mode, + "run_id": result.run.id, + "workflows_seen": result.run.workflows_seen, + "trajectories_created": result.run.trajectories_created, + "trajectories_updated": result.run.trajectories_updated, + "trajectories_unchanged": result.run.trajectories_unchanged, + "legacy_event_count": result.run.legacy_event_count, + } + + +__all__ = [ + "RebuildTrajectoriesOkPayload", + "RebuildTrajectoriesPayload", + "RebuildTrajectoriesSkippedPayload", + "execute_trajectory_rebuild", +] diff --git a/codeclone/memory/trajectory/retrieval.py b/codeclone/memory/trajectory/retrieval.py new file mode 100644 index 00000000..f0533603 --- /dev/null +++ b/codeclone/memory/trajectory/retrieval.py @@ -0,0 +1,677 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Iterable, Mapping, Sequence +from dataclasses import dataclass +from typing import Literal + +from ..paths import normalize_memory_scope_path, repo_path_to_module_key +from ..search_index import SearchMatchMode, tokenize_query +from .agents import trajectory_agent_label +from .models import Trajectory, TrajectoryListItem +from .patch_trail import patch_trail_from_mapping, patch_trail_summary_line +from .quality import ( + compute_trajectory_quality_contract, + serialize_trajectory_quality_contract, +) +from .step_labels import step_display_name + +DEFAULT_TRAJECTORY_PREVIEW_LIMIT = 5 +DEFAULT_TRAJECTORY_STEP_LIMIT = 12 +COMPACT_TRAJECTORY_SUBJECT_LIMIT = 8 +COMPACT_TRAJECTORY_LABEL_LIMIT = 8 +TRAJECTORY_PREVIEW_CHARS = 220 +TrajectoryDetailLevel = Literal["compact", "full"] + + +def _preview_labels( + labels: Sequence[str], *, detail_level: TrajectoryDetailLevel +) -> list[str]: + # Full keeps the raw label list; compact sorts (deterministic) and caps so a + # trajectory that accumulated many labels cannot bloat the preview. + if detail_level == "full": + return list(labels) + return sorted(labels)[:COMPACT_TRAJECTORY_LABEL_LIMIT] + + +def trajectory_excluded_from_default_retrieval( + trajectory: Trajectory, + *, + include_routine: bool, +) -> bool: + if include_routine: + return False + if trajectory.workflow_id.startswith("run:"): + return True + return trajectory.quality_tier == "routine" + + +def filter_trajectories_for_default_retrieval( + trajectories: Sequence[Trajectory], + *, + include_routine: bool, +) -> tuple[Trajectory, ...]: + return tuple( + trajectory + for trajectory in trajectories + if not trajectory_excluded_from_default_retrieval( + trajectory, + include_routine=include_routine, + ) + ) + + +@dataclass(frozen=True, slots=True) +class TrajectorySearchResult: + trajectory: Trajectory + relevance_score: float + + +def trajectory_status_payload( + *, + count: int, + latest_run: object | None, +) -> dict[str, object]: + payload: dict[str, object] = {"trajectory_count": count} + if latest_run is not None: + payload["latest_projection"] = { + "id": getattr(latest_run, "id", ""), + "projection_version": getattr(latest_run, "projection_version", ""), + "finished_at_utc": getattr(latest_run, "finished_at_utc", ""), + "status": getattr(latest_run, "status", ""), + "workflows_seen": getattr(latest_run, "workflows_seen", 0), + "created": getattr(latest_run, "trajectories_created", 0), + "updated": getattr(latest_run, "trajectories_updated", 0), + "unchanged": getattr(latest_run, "trajectories_unchanged", 0), + "legacy_event_count": getattr(latest_run, "legacy_event_count", 0), + } + else: + payload["latest_projection"] = None + return payload + + +def trajectory_list_item_to_preview(item: TrajectoryListItem) -> dict[str, object]: + return { + "type": "trajectory", + "trajectory_id": item.id, + "workflow_id": item.workflow_id, + "outcome": item.outcome, + "quality_tier": item.quality_tier, + "quality_score": item.quality_score, + "summary": _preview_text(item.summary), + "event_count": item.event_count, + "started_at_utc": item.started_at_utc, + "finished_at_utc": item.finished_at_utc, + } + + +def serialize_trajectory_preview( + trajectory: Trajectory, + *, + relevance_score: float | None = None, + patch_trail_payload: Mapping[str, object] | None = None, + detail_level: TrajectoryDetailLevel = "full", + preferred_subjects: frozenset[tuple[str, str]] = frozenset(), +) -> dict[str, object]: + subjects = trajectory.subjects + serialized_subjects, matched_subject_count = _preview_subjects( + subjects, + detail_level=detail_level, + preferred_subjects=preferred_subjects, + ) + payload: dict[str, object] = { + "type": "trajectory", + "trajectory_id": trajectory.id, + "workflow_id": trajectory.workflow_id, + "outcome": trajectory.outcome, + "quality_tier": trajectory.quality_tier, + "quality_score": trajectory.quality_score, + "summary": _preview_text(trajectory.summary), + "labels": _preview_labels(trajectory.labels, detail_level=detail_level), + "agent_label": trajectory_agent_label(trajectory), + "subjects": serialized_subjects, + "evidence_count": len(trajectory.evidence), + "event_count": trajectory.event_count, + "step_count": trajectory.step_count, + "incident_count": trajectory.incident_count, + "started_at_utc": trajectory.started_at_utc, + "finished_at_utc": trajectory.finished_at_utc, + } + if relevance_score is not None: + payload["relevance_score"] = round(relevance_score, 3) + summary = serialize_patch_trail_summary(patch_trail_payload) + if summary is not None: + payload["patch_trail_summary"] = summary + _add_quality_fields( + payload, + trajectory=trajectory, + patch_trail_payload=patch_trail_payload, + detail_level=detail_level, + subject_count=len(subjects), + matched_subject_count=matched_subject_count, + serialized_subject_count=len(serialized_subjects), + ) + return payload + + +def _preview_subjects( + subjects: Sequence[object], + *, + detail_level: TrajectoryDetailLevel, + preferred_subjects: frozenset[tuple[str, str]], +) -> tuple[list[dict[str, object]], int]: + matched_subject_count = sum( + ( + str(getattr(subject, "subject_kind", "")), + str(getattr(subject, "subject_key", "")), + ) + in preferred_subjects + for subject in subjects + ) + selected = ( + _compact_trajectory_subjects( + subjects, + preferred_subjects=preferred_subjects, + ) + if detail_level == "compact" + else tuple(subjects) + ) + return [_serialize_subject(subject) for subject in selected], matched_subject_count + + +def _add_quality_fields( + payload: dict[str, object], + *, + trajectory: Trajectory, + patch_trail_payload: Mapping[str, object] | None, + detail_level: TrajectoryDetailLevel, + subject_count: int, + matched_subject_count: int, + serialized_subject_count: int, +) -> None: + contract = compute_trajectory_quality_contract( + trajectory, + patch_trail_payload=patch_trail_payload, + ) + if detail_level == "full": + payload["quality_contract"] = serialize_trajectory_quality_contract( + contract, + trajectory=trajectory, + patch_trail_payload=patch_trail_payload, + ) + # Contract-component numbers are interpretable only alongside the full + # breakdown; compact keeps quality_score + anomaly_count as the headline. + payload["complexity_score"] = contract.complexity_score + payload["scope_accuracy"] = contract.scope_accuracy + payload["duration_seconds"] = contract.duration_seconds + else: + payload["subject_count"] = subject_count + payload["matched_subject_count"] = matched_subject_count + payload["subjects_truncated"] = serialized_subject_count < subject_count + payload["anomaly_count"] = contract.anomaly_count + + +def serialize_patch_trail_summary( + payload: Mapping[str, object] | None, +) -> dict[str, object] | None: + if payload is None: + return None + trail = patch_trail_from_mapping(payload) + if trail is None: + return None + summary_payload = trail.to_payload(detail_level="summary") + return { + "summary_line": patch_trail_summary_line(trail), + "patch_trail_digest": trail.patch_trail_digest, + "counts": summary_payload.get("counts", {}), + "scope_check_status": trail.scope_check_status, + "verification_status": trail.verification_status, + } + + +def serialize_trajectory_detail( + trajectory: Trajectory, + *, + max_steps: int = DEFAULT_TRAJECTORY_STEP_LIMIT, + patch_trail_payload: Mapping[str, object] | None = None, +) -> dict[str, object]: + steps = trajectory.steps[: max(1, int(max_steps))] + detail = { + **serialize_trajectory_preview( + trajectory, + patch_trail_payload=patch_trail_payload, + ), + "trajectory_digest": trajectory.trajectory_digest, + "source_event_stream_digest": trajectory.source_event_stream_digest, + "projection_version": trajectory.projection_version, + "intent_id": trajectory.intent_id, + "primary_run_id": trajectory.primary_run_id, + "first_run_id": trajectory.first_run_id, + "last_run_id": trajectory.last_run_id, + "report_digest": trajectory.report_digest, + "steps": [ + { + "step_index": step.step_index, + "audit_sequence": step.audit_sequence, + "event_id": step.event_id, + "event_type": step.event_type, + "step_label": step_display_name( + event_type=step.event_type, + status=step.status, + ), + "status": step.status, + "run_id": step.run_id, + "report_digest": step.report_digest, + "summary": _preview_text(step.summary or ""), + "created_at_utc": step.created_at_utc, + } + for step in steps + ], + "steps_truncated": len(trajectory.steps) > len(steps), + "evidence": [ + { + "evidence_kind": item.evidence_kind, + "ref": item.ref, + "locator": item.locator, + "digest": item.digest, + "created_at_utc": item.created_at_utc, + } + for item in trajectory.evidence + ], + } + if patch_trail_payload is not None: + trail = patch_trail_from_mapping(patch_trail_payload) + if trail is not None: + detail["patch_trail"] = trail.to_payload(detail_level="summary") + return detail + + +def rank_trajectories_for_scope( + trajectories: Sequence[Trajectory], + *, + scope_paths: Sequence[str], + symbols: Sequence[str], + max_results: int = DEFAULT_TRAJECTORY_PREVIEW_LIMIT, + include_routine: bool = False, + patch_trails: Mapping[str, Mapping[str, object]] | None = None, + detail_level: TrajectoryDetailLevel = "full", +) -> tuple[list[dict[str, object]], bool]: + normalized_scope = tuple(normalize_memory_scope_path(path) for path in scope_paths) + preferred_subjects = _preferred_subjects( + scope_paths=normalized_scope, + symbols=symbols, + ) + visible = filter_trajectories_for_default_retrieval( + trajectories, + include_routine=include_routine, + ) + scored = _score_trajectories( + visible, + scope_paths=normalized_scope, + symbols=symbols, + query_tokens=(), + patch_trails=patch_trails or {}, + ) + return _preview_results( + scored, + max_results=max_results, + patch_trails=patch_trails or {}, + detail_level=detail_level, + preferred_subjects=preferred_subjects, + diversify=True, + ) + + +def rank_trajectories_for_query( + trajectories: Sequence[Trajectory], + *, + query: str, + max_results: int, + match_mode: SearchMatchMode, + include_routine: bool = False, + detail_level: TrajectoryDetailLevel = "full", +) -> tuple[list[dict[str, object]], bool]: + tokens = tokenize_query(query) + if not tokens: + return [], False + visible = filter_trajectories_for_default_retrieval( + trajectories, + include_routine=include_routine, + ) + scored = _score_trajectories( + visible, + scope_paths=(), + symbols=(), + query_tokens=tokens, + match_mode=match_mode, + ) + return _preview_results( + scored, + max_results=max_results, + detail_level=detail_level, + ) + + +def filter_trajectories_for_query( + trajectories: Sequence[Trajectory], + *, + query: str, + match_mode: SearchMatchMode, + include_routine: bool = False, +) -> tuple[TrajectorySearchResult, ...]: + tokens = tokenize_query(query) + if not tokens: + return () + visible = filter_trajectories_for_default_retrieval( + trajectories, + include_routine=include_routine, + ) + return tuple( + _score_trajectories( + visible, + scope_paths=(), + symbols=(), + query_tokens=tokens, + match_mode=match_mode, + ) + ) + + +def trajectory_subject_keys( + *, + scope_paths: Sequence[str], + symbols: Sequence[str], +) -> dict[str, tuple[str, ...]]: + paths = tuple(normalize_memory_scope_path(path) for path in scope_paths) + modules = tuple(sorted({repo_path_to_module_key(path) for path in paths})) + return { + "path": paths, + "module": modules, + "symbol": tuple(sorted({symbol for symbol in symbols if symbol.strip()})), + } + + +def _score_trajectories( + trajectories: Sequence[Trajectory], + *, + scope_paths: Sequence[str], + symbols: Sequence[str], + query_tokens: Sequence[str], + match_mode: SearchMatchMode = "any", + patch_trails: Mapping[str, Mapping[str, object]] | None = None, +) -> list[TrajectorySearchResult]: + trails = patch_trails or {} + scored: list[TrajectorySearchResult] = [] + for trajectory in trajectories: + score = _trajectory_relevance( + trajectory, + scope_paths=scope_paths, + symbols=symbols, + query_tokens=query_tokens, + match_mode=match_mode, + patch_trail_payload=trails.get(trajectory.id), + ) + if score <= 0.0: + continue + scored.append( + TrajectorySearchResult( + trajectory=trajectory, + relevance_score=score, + ) + ) + scored.sort(key=lambda item: (-item.relevance_score, item.trajectory.id)) + return scored + + +def _trajectory_relevance( + trajectory: Trajectory, + *, + scope_paths: Sequence[str], + symbols: Sequence[str], + query_tokens: Sequence[str], + match_mode: SearchMatchMode, + patch_trail_payload: Mapping[str, object] | None = None, +) -> float: + score = 0.0 + subjects = { + (item.subject_kind, item.subject_key, item.relation) + for item in trajectory.subjects + } + subject_pairs = {(kind, key) for kind, key, _relation in subjects} + for path in scope_paths: + if ("path", path) in subject_pairs: + score += 1.4 + if ("path", path, "untouched") in subjects: + score += 0.45 + module_key = repo_path_to_module_key(path) + if ("module", module_key) in subject_pairs: + score += 0.8 + untouched_overlap = _patch_trail_untouched_overlap( + scope_paths=scope_paths, + patch_trail_payload=patch_trail_payload, + subjects=subjects, + ) + if untouched_overlap: + score += 0.25 * untouched_overlap + for symbol in symbols: + if ("symbol", symbol) in subject_pairs: + score += 1.2 + if query_tokens: + haystack = _trajectory_search_text(trajectory) + matches = [token in haystack for token in query_tokens] + if match_mode == "all" and not all(matches): + return 0.0 + if match_mode == "any" and not any(matches): + return 0.0 + score += 0.4 + sum(0.15 for matched in matches if matched) + if trajectory.quality_tier in {"corrected", "incident"}: + score += 0.15 + return score + + +def _preview_results( + results: Sequence[TrajectorySearchResult], + *, + max_results: int, + patch_trails: Mapping[str, Mapping[str, object]] | None = None, + detail_level: TrajectoryDetailLevel = "full", + preferred_subjects: frozenset[tuple[str, str]] = frozenset(), + diversify: bool = False, +) -> tuple[list[dict[str, object]], bool]: + limit = max(1, int(max_results)) + truncated = len(results) > limit + selected = ( + _select_diverse_scope_results(results, limit=limit) + if diversify + else results[:limit] + ) + trails = patch_trails or {} + return [ + serialize_trajectory_preview( + item.trajectory, + relevance_score=item.relevance_score, + patch_trail_payload=trails.get(item.trajectory.id), + detail_level=detail_level, + preferred_subjects=preferred_subjects, + ) + for item in selected + ], truncated + + +def _select_diverse_scope_results( + results: Sequence[TrajectorySearchResult], + *, + limit: int, +) -> Sequence[TrajectorySearchResult]: + selected: list[TrajectorySearchResult] = [] + selected_ids: set[str] = set() + seen_examples: set[tuple[str, str, str]] = set() + for item in results: + trajectory = item.trajectory + example_key = ( + trajectory.outcome, + trajectory.quality_tier, + trajectory_agent_label(trajectory) or "", + ) + if example_key not in seen_examples: + selected.append(item) + selected_ids.add(trajectory.id) + seen_examples.add(example_key) + if len(selected) >= limit: + return selected + for item in results: + if item.trajectory.id not in selected_ids: + selected.append(item) + if len(selected) >= limit: + break + return selected + + +def _preferred_subjects( + *, + scope_paths: Sequence[str], + symbols: Sequence[str], +) -> frozenset[tuple[str, str]]: + subject_keys = trajectory_subject_keys( + scope_paths=scope_paths, + symbols=symbols, + ) + return frozenset((kind, key) for kind, keys in subject_keys.items() for key in keys) + + +def _compact_trajectory_subjects( + subjects: Sequence[object], + *, + preferred_subjects: frozenset[tuple[str, str]], +) -> tuple[object, ...]: + indexed = tuple(enumerate(subjects)) + ranked = sorted( + indexed, + key=lambda item: _compact_subject_sort_key( + item[1], + index=item[0], + preferred_subjects=preferred_subjects, + ), + ) + return tuple( + subject for _index, subject in ranked[:COMPACT_TRAJECTORY_SUBJECT_LIMIT] + ) + + +def _compact_subject_sort_key( + subject: object, + *, + index: int, + preferred_subjects: frozenset[tuple[str, str]], +) -> tuple[int, int, int, int]: + kind = str(getattr(subject, "subject_kind", "")) + key = str(getattr(subject, "subject_key", "")) + relation = str(getattr(subject, "relation", "")) + return ( + 0 if (kind, key) in preferred_subjects else 1, + {"touched": 0, "untouched": 1, "about": 2}.get(relation, 3), + {"path": 0, "symbol": 1, "module": 2}.get(kind, 3), + index, + ) + + +def _patch_trail_untouched_overlap( + *, + scope_paths: Sequence[str], + patch_trail_payload: Mapping[str, object] | None, + subjects: set[tuple[str, str, str]], +) -> int: + scope = set(scope_paths) + if patch_trail_payload is not None: + trail = patch_trail_from_mapping(patch_trail_payload) + if trail is not None: + return len(scope & set(trail.untouched_in_declared)) + untouched = { + key + for kind, key, relation in subjects + if kind == "path" and relation == "untouched" + } + return len(scope & untouched) + + +def _trajectory_search_text(trajectory: Trajectory) -> str: + parts: list[str] = [ + trajectory.id, + trajectory.workflow_id, + trajectory.outcome, + trajectory.quality_tier, + trajectory.summary, + *trajectory.labels, + ] + parts.extend(subject.subject_key for subject in trajectory.subjects) + parts.extend(step.event_type for step in trajectory.steps) + parts.extend(step.summary or "" for step in trajectory.steps) + return " ".join(part.lower() for part in parts if part) + + +def _serialize_subject(subject: object) -> dict[str, object]: + return { + "subject_kind": getattr(subject, "subject_kind", ""), + "subject_key": getattr(subject, "subject_key", ""), + "relation": getattr(subject, "relation", ""), + } + + +def _preview_text(text: str, *, max_chars: int = TRAJECTORY_PREVIEW_CHARS) -> str: + if len(text) <= max_chars: + return text + return f"{text[: max_chars - 1].rstrip()}…" + + +def compact_step_text(trajectory: Trajectory, *, max_steps: int = 12) -> str: + step_parts = [] + for step in trajectory.steps[: max(1, int(max_steps))]: + summary = f" {step.summary}" if step.summary else "" + status = f" status={step.status}" if step.status else "" + step_parts.append(f"{step.step_index + 1}:{step.event_type}{status}{summary}") + return " ; ".join(step_parts) + + +def trajectory_semantic_text_parts(trajectory: Trajectory) -> Iterable[str]: + yield "trajectory" + yield f"outcome {trajectory.outcome}" + yield f"quality {trajectory.quality_tier}" + yield trajectory.summary + if trajectory.labels: + yield f"labels {' '.join(trajectory.labels)}" + path_subjects = [ + subject.subject_key + for subject in trajectory.subjects + if subject.subject_kind == "path" + ] + if path_subjects: + yield f"paths {' '.join(sorted(path_subjects))}" + steps = compact_step_text(trajectory) + if steps: + yield f"steps {steps}" + + +__all__ = [ + "COMPACT_TRAJECTORY_SUBJECT_LIMIT", + "DEFAULT_TRAJECTORY_PREVIEW_LIMIT", + "DEFAULT_TRAJECTORY_STEP_LIMIT", + "TrajectorySearchResult", + "compact_step_text", + "filter_trajectories_for_default_retrieval", + "filter_trajectories_for_query", + "rank_trajectories_for_query", + "rank_trajectories_for_scope", + "serialize_patch_trail_summary", + "serialize_trajectory_detail", + "serialize_trajectory_preview", + "trajectory_excluded_from_default_retrieval", + "trajectory_list_item_to_preview", + "trajectory_semantic_text_parts", + "trajectory_status_payload", + "trajectory_subject_keys", +] diff --git a/codeclone/memory/trajectory/step_labels.py b/codeclone/memory/trajectory/step_labels.py new file mode 100644 index 00000000..8b6ab788 --- /dev/null +++ b/codeclone/memory/trajectory/step_labels.py @@ -0,0 +1,47 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +STEP_DISPLAY_NAMES: dict[str, str] = { + "analysis.completed": "Analysis run completed", + "baseline_abuse.detected": "Baseline abuse detected", + "blast_radius.computed": "Blast radius computed", + "claim_validation.completed": "Claim validation passed", + "claim_validation.violated": "Claim validation failed", + "intent.checked": "Scope checked", + "intent.cleared": "Intent cleared", + "intent.declared": "Change intent declared", + "intent.expanded": "Scope expanded", + "intent.expired": "Intent lease expired", + "intent.promoted": "Queued intent promoted", + "intent.queue_blocked": "Intent queue blocked", + "intent.queued": "Intent queued", + "intent.renewed": "Intent lease renewed", + "intent.violated": "Intent scope violated", + "patch_budget.computed": "Patch budget computed", + "patch_contract.expired": "Patch verification expired", + "patch_contract.verified": "Patch contract verified", + "patch_contract.violated": "Patch contract violated", + "patch_trail.computed": "Patch trail computed", + "review_receipt.created": "Review receipt created", + "workspace.conflict_detected": "Workspace conflict detected", + "workspace.gc_completed": "Workspace intent GC completed", +} + + +def step_display_name(*, event_type: str, status: str | None = None) -> str: + base = STEP_DISPLAY_NAMES.get( + event_type, + event_type.replace(".", " \u2192 ").replace("_", " "), + ) + cleaned = status.strip() if isinstance(status, str) else "" + if cleaned: + return f"{base} ({cleaned})" + return base + + +__all__ = ["STEP_DISPLAY_NAMES", "step_display_name"] diff --git a/codeclone/memory/trajectory/store.py b/codeclone/memory/trajectory/store.py new file mode 100644 index 00000000..49faf5f7 --- /dev/null +++ b/codeclone/memory/trajectory/store.py @@ -0,0 +1,1031 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import sqlite3 +import uuid +from collections import Counter, defaultdict +from collections.abc import Callable, Iterable, Mapping, Sequence +from pathlib import Path +from typing import TypeVar + +import orjson + +from ...audit.events import repo_root_digest +from ...audit.reader import ( + AuditRecord, + count_audit_event_core_gaps, + list_workflow_ids_with_events_after, + read_audit_event_core_records, +) +from ...report.meta import current_report_timestamp_utc +from ...utils.iterutils import chunked +from ...utils.json_io import json_text +from ..models import MemoryProject +from ..search_index import SearchMatchMode, tokenize_query +from .models import ( + TRAJECTORY_PROJECTION_VERSION, + Trajectory, + TrajectoryEvidence, + TrajectoryListItem, + TrajectoryProjectionResult, + TrajectoryProjectionRun, + TrajectoryStep, + TrajectorySubject, +) +from .patch_trail_projector import project_patch_trail_from_audit +from .projector import project_trajectory +from .quality import apply_trajectory_quality_score + +_SQLITE_IN_QUERY_BATCH = 500 + +_T = TypeVar("_T") + +# Batch child-table loads for trajectory hydration. Each is ORDER BY +# trajectory_id first (rows for one trajectory stay contiguous) then the same +# keys the per-trajectory query uses, so grouping preserves identical ordering. +_STEPS_BATCH_SQL = ( + "SELECT * FROM memory_trajectory_steps " + "WHERE trajectory_id IN ({placeholders}) " + "ORDER BY trajectory_id ASC, step_index ASC" +) +_SUBJECTS_BATCH_SQL = ( + "SELECT trajectory_id, subject_kind, subject_key, relation " + "FROM memory_trajectory_subjects " + "WHERE trajectory_id IN ({placeholders}) " + "ORDER BY trajectory_id ASC, subject_kind ASC, subject_key ASC" +) +_EVIDENCE_BATCH_SQL = ( + "SELECT trajectory_id, evidence_kind, ref, locator, digest, created_at_utc " + "FROM memory_trajectory_evidence " + "WHERE trajectory_id IN ({placeholders}) " + "ORDER BY trajectory_id ASC, created_at_utc ASC, evidence_kind ASC, ref ASC" +) + + +def _group_rows_by_trajectory_id( + conn: sqlite3.Connection, + *, + ids: Sequence[str], + sql: str, + build: Callable[[sqlite3.Row], _T], +) -> dict[str, list[_T]]: + """Run ``sql`` (one ``{placeholders}`` slot) over ``ids`` in chunks and group + the rows by ``trajectory_id``, preserving SQL order within each group.""" + grouped: dict[str, list[_T]] = {trajectory_id: [] for trajectory_id in ids} + for batch in chunked(tuple(ids), _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = conn.execute(sql.format(placeholders=placeholders), batch).fetchall() + for row in rows: + grouped.setdefault(str(row["trajectory_id"]), []).append(build(row)) + return grouped + + +def _project_and_upsert_workflow( + conn: sqlite3.Connection, + *, + project: MemoryProject, + root_digest: str, + workflow_id: str, + records: Sequence[AuditRecord], + projection_version: str, + projected_at_utc: str, +) -> tuple[Trajectory, str]: + """Project one workflow's audit records into a trajectory and upsert it. + + Single source of truth for the per-workflow projection used by both the + full and the incremental rebuild; returns the trajectory and the upsert + action ("created" / "updated" / "unchanged"). + """ + patch_trail = project_patch_trail_from_audit( + records=records, + repo_root_digest=root_digest, + ) + patch_trail_digest = ( + patch_trail.patch_trail_digest if patch_trail is not None else None + ) + trajectory = project_trajectory( + project_id=project.id, + repo_root_digest=root_digest, + workflow_id=workflow_id, + records=records, + projection_version=projection_version, + projected_at_utc=projected_at_utc, + patch_trail_digest=patch_trail_digest, + ) + patch_payload = ( + patch_trail._canonical_dict(include_digest=True) + if patch_trail is not None + else None + ) + trajectory = apply_trajectory_quality_score( + trajectory, + patch_trail_payload=patch_payload, + patch_trail_digest=patch_trail_digest, + ) + action = upsert_trajectory(conn, trajectory) + if patch_trail is not None: + upsert_trajectory_patch_trail( + conn, + trajectory_id=trajectory.id, + patch_trail_json=_json_object( + patch_trail._canonical_dict(include_digest=True) + ), + patch_trail_digest=patch_trail.patch_trail_digest, + schema_version=patch_trail.schema_version, + projected_at_utc=projected_at_utc, + ) + supersede_stale_projection_trajectories( + conn, + project_id=project.id, + workflow_id=workflow_id, + keep_trajectory_id=trajectory.id, + keep_trajectory_digest=trajectory.trajectory_digest, + ) + return trajectory, action + + +def _finalize_projection_run( + conn: sqlite3.Connection, + *, + project: MemoryProject, + root_digest: str, + projection_version: str, + started_at_utc: str, + workflow_count: int, + actions: Counter[str], + trajectories: Sequence[Trajectory], + legacy_event_count: int, +) -> TrajectoryProjectionResult: + run = TrajectoryProjectionRun( + id=_projection_run_id( + project_id=project.id, + repo_root_digest=root_digest, + projection_version=projection_version, + started_at_utc=started_at_utc, + workflow_count=workflow_count, + ), + project_id=project.id, + repo_root_digest=root_digest, + projection_version=projection_version, + started_at_utc=started_at_utc, + finished_at_utc=current_report_timestamp_utc(), + status="ok", + workflows_seen=workflow_count, + trajectories_created=actions["created"], + trajectories_updated=actions["updated"], + trajectories_unchanged=actions["unchanged"], + legacy_event_count=legacy_event_count, + message=None, + ) + write_projection_run(conn, run) + conn.commit() + return TrajectoryProjectionResult(run=run, trajectories=tuple(trajectories)) + + +def rebuild_trajectories_from_audit( + *, + conn: sqlite3.Connection, + project: MemoryProject, + root_path: Path, + audit_db_path: Path, + projection_version: str = TRAJECTORY_PROJECTION_VERSION, +) -> TrajectoryProjectionResult: + root_digest = repo_root_digest(root_path.resolve()) + started = current_report_timestamp_utc() + events = read_audit_event_core_records( + db_path=audit_db_path, + repo_root_digest=root_digest, + ) + legacy_event_count = count_audit_event_core_gaps( + db_path=audit_db_path, + repo_root_digest=root_digest, + ) + grouped = _group_by_workflow(events) + actions: Counter[str] = Counter() + trajectories: list[Trajectory] = [] + for workflow_id, records in grouped.items(): + trajectory, action = _project_and_upsert_workflow( + conn, + project=project, + root_digest=root_digest, + workflow_id=workflow_id, + records=records, + projection_version=projection_version, + projected_at_utc=started, + ) + actions[action] += 1 + trajectories.append(trajectory) + return _finalize_projection_run( + conn, + project=project, + root_digest=root_digest, + projection_version=projection_version, + started_at_utc=started, + workflow_count=len(grouped), + actions=actions, + trajectories=trajectories, + legacy_event_count=legacy_event_count, + ) + + +def rebuild_trajectories_incremental( + *, + conn: sqlite3.Connection, + project: MemoryProject, + root_path: Path, + audit_db_path: Path, + after_event_core_id: int, + projection_version: str = TRAJECTORY_PROJECTION_VERSION, +) -> TrajectoryProjectionResult: + """Re-project only workflows with audit events newer than the watermark. + + The audit trail is append-only, so workflows untouched since + ``after_event_core_id`` are byte-identical and skipped entirely. Each + changed workflow is re-read in full (its complete event set) and projected + through the same path as the full rebuild, preserving digest-stable upserts. + """ + root_digest = repo_root_digest(root_path.resolve()) + started = current_report_timestamp_utc() + workflow_ids = list_workflow_ids_with_events_after( + db_path=audit_db_path, + repo_root_digest=root_digest, + after_id=after_event_core_id, + ) + legacy_event_count = count_audit_event_core_gaps( + db_path=audit_db_path, + repo_root_digest=root_digest, + ) + actions: Counter[str] = Counter() + trajectories: list[Trajectory] = [] + for workflow_id in workflow_ids: + records = read_audit_event_core_records( + db_path=audit_db_path, + repo_root_digest=root_digest, + workflow_id=workflow_id, + ) + if not records: + continue + trajectory, action = _project_and_upsert_workflow( + conn, + project=project, + root_digest=root_digest, + workflow_id=workflow_id, + records=records, + projection_version=projection_version, + projected_at_utc=started, + ) + actions[action] += 1 + trajectories.append(trajectory) + return _finalize_projection_run( + conn, + project=project, + root_digest=root_digest, + projection_version=projection_version, + started_at_utc=started, + workflow_count=len(workflow_ids), + actions=actions, + trajectories=trajectories, + legacy_event_count=legacy_event_count, + ) + + +def upsert_trajectory(conn: sqlite3.Connection, trajectory: Trajectory) -> str: + existing = conn.execute( + "SELECT trajectory_digest FROM memory_trajectories WHERE id=?", + (trajectory.id,), + ).fetchone() + action = ( + "created" + if existing is None + else "unchanged" + if str(existing[0]) == trajectory.trajectory_digest + else "updated" + ) + if action == "unchanged": + conn.execute( + "UPDATE memory_trajectories SET projected_at_utc=?, updated_at_utc=? " + "WHERE id=?", + (trajectory.projected_at_utc, trajectory.updated_at_utc, trajectory.id), + ) + return action + conn.execute( + """ + INSERT INTO memory_trajectories( + id, project_id, repo_root_digest, workflow_id, intent_id, + primary_run_id, first_run_id, last_run_id, report_digest, + outcome, quality_tier, quality_score, labels_json, summary, + trajectory_digest, + source_event_stream_digest, projection_version, event_count, + step_count, incident_count, started_at_utc, finished_at_utc, + projected_at_utc, updated_at_utc + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? + ) + ON CONFLICT(id) DO UPDATE SET + intent_id=excluded.intent_id, + primary_run_id=excluded.primary_run_id, + first_run_id=excluded.first_run_id, + last_run_id=excluded.last_run_id, + report_digest=excluded.report_digest, + outcome=excluded.outcome, + quality_tier=excluded.quality_tier, + quality_score=excluded.quality_score, + labels_json=excluded.labels_json, + summary=excluded.summary, + trajectory_digest=excluded.trajectory_digest, + source_event_stream_digest=excluded.source_event_stream_digest, + event_count=excluded.event_count, + step_count=excluded.step_count, + incident_count=excluded.incident_count, + started_at_utc=excluded.started_at_utc, + finished_at_utc=excluded.finished_at_utc, + projected_at_utc=excluded.projected_at_utc, + updated_at_utc=excluded.updated_at_utc + """, + ( + trajectory.id, + trajectory.project_id, + trajectory.repo_root_digest, + trajectory.workflow_id, + trajectory.intent_id, + trajectory.primary_run_id, + trajectory.first_run_id, + trajectory.last_run_id, + trajectory.report_digest, + trajectory.outcome, + trajectory.quality_tier, + trajectory.quality_score, + _json_array(trajectory.labels), + trajectory.summary, + trajectory.trajectory_digest, + trajectory.source_event_stream_digest, + trajectory.projection_version, + trajectory.event_count, + trajectory.step_count, + trajectory.incident_count, + trajectory.started_at_utc, + trajectory.finished_at_utc, + trajectory.projected_at_utc, + trajectory.updated_at_utc, + ), + ) + conn.execute( + "DELETE FROM memory_trajectory_steps WHERE trajectory_id=?", (trajectory.id,) + ) + conn.execute( + "DELETE FROM memory_trajectory_subjects WHERE trajectory_id=?", + (trajectory.id,), + ) + conn.execute( + "DELETE FROM memory_trajectory_evidence WHERE trajectory_id=?", + (trajectory.id,), + ) + _insert_steps(conn, trajectory) + _insert_subjects(conn, trajectory.id, trajectory.subjects) + _insert_evidence(conn, trajectory.id, trajectory.evidence) + return action + + +def supersede_stale_projection_trajectories( + conn: sqlite3.Connection, + *, + project_id: str, + workflow_id: str, + keep_trajectory_id: str, + keep_trajectory_digest: str, +) -> int: + stale_rows = conn.execute( + """ + SELECT id FROM memory_trajectories + WHERE project_id=? AND workflow_id=? AND id != ? + """, + (project_id, workflow_id, keep_trajectory_id), + ).fetchall() + removed = 0 + for row in stale_rows: + old_id = str(row["id"]) + conn.execute( + """ + UPDATE memory_evidence + SET ref=?, digest=? + WHERE evidence_kind='trajectory' AND ref=? + """, + (keep_trajectory_id, keep_trajectory_digest, old_id), + ) + conn.execute("DELETE FROM memory_trajectories WHERE id=?", (old_id,)) + removed += 1 + return removed + + +def list_canonical_trajectories_for_export( + conn: sqlite3.Connection, + *, + project_id: str, + limit: int = 10_000, +) -> list[Trajectory]: + rows = conn.execute( + """ + SELECT id FROM memory_trajectories + WHERE project_id=? + ORDER BY finished_at_utc DESC, id ASC + LIMIT ? + """, + (project_id, max(1, int(limit))), + ).fetchall() + trajectories = find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) + from .export_context import select_canonical_trajectories + + return select_canonical_trajectories(trajectories) + + +def write_projection_run( + conn: sqlite3.Connection, + run: TrajectoryProjectionRun, +) -> None: + conn.execute( + """ + INSERT INTO memory_trajectory_projection_runs( + id, project_id, repo_root_digest, projection_version, started_at_utc, + finished_at_utc, status, workflows_seen, trajectories_created, + trajectories_updated, trajectories_unchanged, legacy_event_count, message + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + run.id, + run.project_id, + run.repo_root_digest, + run.projection_version, + run.started_at_utc, + run.finished_at_utc, + run.status, + run.workflows_seen, + run.trajectories_created, + run.trajectories_updated, + run.trajectories_unchanged, + run.legacy_event_count, + run.message, + ), + ) + + +def list_trajectories( + conn: sqlite3.Connection, + *, + project_id: str, + limit: int = 20, +) -> list[TrajectoryListItem]: + rows = conn.execute( + """ + SELECT id, workflow_id, outcome, quality_tier, quality_score, event_count, + started_at_utc, finished_at_utc, summary + FROM memory_trajectories + WHERE project_id=? + ORDER BY finished_at_utc DESC, id ASC + LIMIT ? + """, + (project_id, max(1, int(limit))), + ).fetchall() + return [ + TrajectoryListItem( + id=str(row["id"]), + workflow_id=str(row["workflow_id"]), + outcome=str(row["outcome"]), + quality_tier=str(row["quality_tier"]), + quality_score=int(row["quality_score"]), + event_count=int(row["event_count"]), + started_at_utc=str(row["started_at_utc"]), + finished_at_utc=str(row["finished_at_utc"]), + summary=str(row["summary"]), + ) + for row in rows + ] + + +def list_trajectories_for_subjects( + conn: sqlite3.Connection, + *, + project_id: str, + subjects: Mapping[str, Sequence[str]], + limit: int = 20, +) -> list[Trajectory]: + pairs = tuple( + (kind, key) + for kind, keys in sorted(subjects.items()) + for key in sorted(set(keys)) + if key + ) + if not pairs: + return [] + clauses = " OR ".join( + "(s.subject_kind=? AND s.subject_key=?)" for _kind, _key in pairs + ) + params: list[object] = [project_id] + for kind, key in pairs: + params.extend([kind, key]) + rows = conn.execute( + f""" + SELECT DISTINCT t.id, t.finished_at_utc + FROM memory_trajectories t + JOIN memory_trajectory_subjects s ON s.trajectory_id = t.id + WHERE t.project_id=? AND ({clauses}) + ORDER BY t.finished_at_utc DESC, t.id ASC + LIMIT ? + """, + (*params, max(1, int(limit))), + ).fetchall() + return find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) + + +def list_trajectories_for_intent_id( + conn: sqlite3.Connection, + *, + project_id: str, + intent_id: str, +) -> tuple[Trajectory, ...]: + rows = conn.execute( + """ + SELECT id + FROM memory_trajectories + WHERE project_id=? AND intent_id=? + ORDER BY finished_at_utc DESC, id ASC + """, + (project_id, intent_id), + ).fetchall() + return tuple(find_trajectories_by_ids(conn, [str(row["id"]) for row in rows])) + + +def search_trajectories( + conn: sqlite3.Connection, + *, + project_id: str, + query: str, + limit: int = 20, + match_mode: SearchMatchMode = "any", +) -> list[Trajectory]: + tokens = tokenize_query(query) + if not tokens: + return [] + token_clauses: list[str] = [] + params: list[object] = [project_id] + for token in tokens: + escaped = _escape_like(token) + token_clauses.append( + "(" + "LOWER(t.summary) LIKE ? ESCAPE '\\' OR " + "LOWER(t.workflow_id) LIKE ? ESCAPE '\\' OR " + "LOWER(t.labels_json) LIKE ? ESCAPE '\\' OR " + "EXISTS (" + "SELECT 1 FROM memory_trajectory_subjects s " + "WHERE s.trajectory_id=t.id AND " + "LOWER(s.subject_key) LIKE ? ESCAPE '\\'" + ") OR " + "EXISTS (" + "SELECT 1 FROM memory_trajectory_steps st " + "WHERE st.trajectory_id=t.id AND " + "(LOWER(st.event_type) LIKE ? ESCAPE '\\' OR " + "LOWER(COALESCE(st.summary, '')) LIKE ? ESCAPE '\\')" + ")" + ")" + ) + needle = f"%{escaped}%" + params.extend([needle, needle, needle, needle, needle, needle]) + joiner = " AND " if match_mode == "all" else " OR " + rows = conn.execute( + f""" + SELECT t.id + FROM memory_trajectories t + WHERE t.project_id=? AND ({joiner.join(token_clauses)}) + ORDER BY t.finished_at_utc DESC, t.id ASC + LIMIT ? + """, + (*params, max(1, int(limit))), + ).fetchall() + return find_trajectories_by_ids(conn, [str(row["id"]) for row in rows]) + + +def _row_to_trajectory( + row: sqlite3.Row, + *, + steps: Sequence[TrajectoryStep], + subjects: Sequence[TrajectorySubject], + evidence: Sequence[TrajectoryEvidence], +) -> Trajectory: + return Trajectory( + id=str(row["id"]), + project_id=str(row["project_id"]), + repo_root_digest=str(row["repo_root_digest"]), + workflow_id=str(row["workflow_id"]), + intent_id=_optional_text(row["intent_id"]), + primary_run_id=_optional_text(row["primary_run_id"]), + first_run_id=_optional_text(row["first_run_id"]), + last_run_id=_optional_text(row["last_run_id"]), + report_digest=_optional_text(row["report_digest"]), + outcome=str(row["outcome"]), # type: ignore[arg-type] + quality_tier=str(row["quality_tier"]), # type: ignore[arg-type] + quality_score=int(row["quality_score"]), + labels=tuple(orjson.loads(str(row["labels_json"]))), + summary=str(row["summary"]), + trajectory_digest=str(row["trajectory_digest"]), + source_event_stream_digest=str(row["source_event_stream_digest"]), + projection_version=str(row["projection_version"]), + event_count=int(row["event_count"]), + step_count=int(row["step_count"]), + incident_count=int(row["incident_count"]), + started_at_utc=str(row["started_at_utc"]), + finished_at_utc=str(row["finished_at_utc"]), + projected_at_utc=str(row["projected_at_utc"]), + updated_at_utc=str(row["updated_at_utc"]), + steps=tuple(steps), + subjects=tuple(subjects), + evidence=tuple(evidence), + ) + + +def find_trajectory(conn: sqlite3.Connection, trajectory_id: str) -> Trajectory | None: + row = conn.execute( + "SELECT * FROM memory_trajectories WHERE id=?", + (trajectory_id,), + ).fetchone() + if row is None: + return None + return _row_to_trajectory( + row, + steps=_steps_for_trajectory(conn, trajectory_id), + subjects=_subjects_for_trajectory(conn, trajectory_id), + evidence=_evidence_for_trajectory(conn, trajectory_id), + ) + + +def find_trajectories_by_ids( + conn: sqlite3.Connection, + ids: Sequence[str], +) -> list[Trajectory]: + # Batch hydration: 4 chunked IN(...) queries total instead of 4 per id + # (row + steps + subjects + evidence). Preserves input id order and the + # per-trajectory child ordering of the single-id path. + id_list = [str(trajectory_id) for trajectory_id in ids] + if not id_list: + return [] + rows_by_id: dict[str, sqlite3.Row] = {} + for batch in chunked(tuple(id_list), _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + for row in conn.execute( + f"SELECT * FROM memory_trajectories WHERE id IN ({placeholders})", + batch, + ).fetchall(): + rows_by_id[str(row["id"])] = row + present_ids = [tid for tid in id_list if tid in rows_by_id] + steps_by_id = _group_rows_by_trajectory_id( + conn, ids=present_ids, sql=_STEPS_BATCH_SQL, build=_row_to_step + ) + subjects_by_id = _group_rows_by_trajectory_id( + conn, ids=present_ids, sql=_SUBJECTS_BATCH_SQL, build=_row_to_subject + ) + evidence_by_id = _group_rows_by_trajectory_id( + conn, ids=present_ids, sql=_EVIDENCE_BATCH_SQL, build=_row_to_evidence + ) + return [ + _row_to_trajectory( + rows_by_id[tid], + steps=steps_by_id.get(tid, []), + subjects=subjects_by_id.get(tid, []), + evidence=evidence_by_id.get(tid, []), + ) + for tid in present_ids + ] + + +def count_trajectories(conn: sqlite3.Connection, *, project_id: str) -> int: + row = conn.execute( + "SELECT COUNT(*) FROM memory_trajectories WHERE project_id=?", + (project_id,), + ).fetchone() + return int(row[0]) if row is not None else 0 + + +def latest_projection_run( + conn: sqlite3.Connection, + *, + project_id: str, +) -> TrajectoryProjectionRun | None: + row = conn.execute( + "SELECT * FROM memory_trajectory_projection_runs WHERE project_id=? " + "ORDER BY finished_at_utc DESC, id DESC LIMIT 1", + (project_id,), + ).fetchone() + if row is None: + return None + return TrajectoryProjectionRun( + id=str(row["id"]), + project_id=str(row["project_id"]), + repo_root_digest=str(row["repo_root_digest"]), + projection_version=str(row["projection_version"]), + started_at_utc=str(row["started_at_utc"]), + finished_at_utc=str(row["finished_at_utc"]), + status=str(row["status"]), + workflows_seen=int(row["workflows_seen"]), + trajectories_created=int(row["trajectories_created"]), + trajectories_updated=int(row["trajectories_updated"]), + trajectories_unchanged=int(row["trajectories_unchanged"]), + legacy_event_count=int(row["legacy_event_count"]), + message=_optional_text(row["message"]), + ) + + +def _group_by_workflow( + events: Sequence[AuditRecord], +) -> dict[str, tuple[AuditRecord, ...]]: + grouped: defaultdict[str, list[AuditRecord]] = defaultdict(list) + for event in events: + if event.workflow_id: + grouped[event.workflow_id].append(event) + return { + workflow_id: tuple( + sorted(records, key=lambda item: (item.audit_sequence or 0, item.event_id)) + ) + for workflow_id, records in sorted(grouped.items()) + } + + +def _insert_steps(conn: sqlite3.Connection, trajectory: Trajectory) -> None: + conn.executemany( + """ + INSERT INTO memory_trajectory_steps( + trajectory_id, step_index, audit_sequence, event_id, event_type, status, + run_id, report_digest, event_core_sha256, event_core_json, summary, + created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ( + trajectory.id, + step.step_index, + step.audit_sequence, + step.event_id, + step.event_type, + step.status, + step.run_id, + step.report_digest, + step.event_core_sha256, + step.event_core_json, + step.summary, + step.created_at_utc, + ) + for step in trajectory.steps + ], + ) + + +def _insert_subjects( + conn: sqlite3.Connection, + trajectory_id: str, + subjects: Iterable[TrajectorySubject], +) -> None: + conn.executemany( + """ + INSERT OR IGNORE INTO memory_trajectory_subjects( + trajectory_id, subject_kind, subject_key, relation + ) VALUES (?, ?, ?, ?) + """, + [ + (trajectory_id, subject.subject_kind, subject.subject_key, subject.relation) + for subject in subjects + ], + ) + + +def _insert_evidence( + conn: sqlite3.Connection, + trajectory_id: str, + evidence: Iterable[TrajectoryEvidence], +) -> None: + conn.executemany( + """ + INSERT OR IGNORE INTO memory_trajectory_evidence( + trajectory_id, evidence_kind, ref, locator, digest, created_at_utc + ) VALUES (?, ?, ?, ?, ?, ?) + """, + [ + ( + trajectory_id, + item.evidence_kind, + item.ref, + item.locator, + item.digest, + item.created_at_utc, + ) + for item in evidence + ], + ) + + +def _row_to_step(row: sqlite3.Row) -> TrajectoryStep: + return TrajectoryStep( + step_index=int(row["step_index"]), + audit_sequence=int(row["audit_sequence"]), + event_id=str(row["event_id"]), + event_type=str(row["event_type"]), + status=_optional_text(row["status"]), + run_id=_optional_text(row["run_id"]), + report_digest=_optional_text(row["report_digest"]), + event_core_sha256=str(row["event_core_sha256"]), + event_core_json=str(row["event_core_json"]), + summary=_optional_text(row["summary"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _row_to_subject(row: sqlite3.Row) -> TrajectorySubject: + return TrajectorySubject( + subject_kind=str(row["subject_kind"]), + subject_key=str(row["subject_key"]), + relation=str(row["relation"]), + ) + + +def _row_to_evidence(row: sqlite3.Row) -> TrajectoryEvidence: + return TrajectoryEvidence( + evidence_kind=str(row["evidence_kind"]), + ref=str(row["ref"]), + locator=_optional_text(row["locator"]), + digest=_optional_text(row["digest"]), + created_at_utc=str(row["created_at_utc"]), + ) + + +def _steps_for_trajectory( + conn: sqlite3.Connection, + trajectory_id: str, +) -> list[TrajectoryStep]: + rows = conn.execute( + "SELECT * FROM memory_trajectory_steps WHERE trajectory_id=? " + "ORDER BY step_index ASC", + (trajectory_id,), + ).fetchall() + return [_row_to_step(row) for row in rows] + + +def _subjects_for_trajectory( + conn: sqlite3.Connection, + trajectory_id: str, +) -> list[TrajectorySubject]: + rows = conn.execute( + "SELECT subject_kind, subject_key, relation FROM memory_trajectory_subjects " + "WHERE trajectory_id=? ORDER BY subject_kind ASC, subject_key ASC", + (trajectory_id,), + ).fetchall() + return [_row_to_subject(row) for row in rows] + + +def _evidence_for_trajectory( + conn: sqlite3.Connection, + trajectory_id: str, +) -> list[TrajectoryEvidence]: + rows = conn.execute( + "SELECT evidence_kind, ref, locator, digest, created_at_utc " + "FROM memory_trajectory_evidence WHERE trajectory_id=? " + "ORDER BY created_at_utc ASC, evidence_kind ASC, ref ASC", + (trajectory_id,), + ).fetchall() + return [_row_to_evidence(row) for row in rows] + + +def _projection_run_id( + *, + project_id: str, + repo_root_digest: str, + projection_version: str, + started_at_utc: str, + workflow_count: int, +) -> str: + payload = json_text( + { + "project_id": project_id, + "repo_root_digest": repo_root_digest, + "projection_version": projection_version, + "started_at_utc": started_at_utc, + "workflow_count": workflow_count, + "nonce": uuid.uuid4().hex, + }, + sort_keys=True, + ) + return f"trajrun-{hashlib.sha256(payload.encode('utf-8')).hexdigest()[:16]}" + + +def _json_array(values: Sequence[str]) -> str: + return json_text(list(values), sort_keys=True) + + +def _json_object(payload: Mapping[str, object]) -> str: + return json_text(payload, sort_keys=True) + + +def _optional_text(value: object) -> str | None: + if value is None: + return None + text = str(value).strip() + return text or None + + +def _escape_like(value: str) -> str: + return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_").lower() + + +def upsert_trajectory_patch_trail( + conn: sqlite3.Connection, + *, + trajectory_id: str, + patch_trail_json: str, + patch_trail_digest: str, + schema_version: str, + projected_at_utc: str, +) -> None: + conn.execute( + """ + INSERT INTO memory_trajectory_patch_trails( + trajectory_id, patch_trail_digest, patch_trail_json, + schema_version, projected_at_utc + ) VALUES (?, ?, ?, ?, ?) + ON CONFLICT(trajectory_id) DO UPDATE SET + patch_trail_digest=excluded.patch_trail_digest, + patch_trail_json=excluded.patch_trail_json, + schema_version=excluded.schema_version, + projected_at_utc=excluded.projected_at_utc + """, + ( + trajectory_id, + patch_trail_digest, + patch_trail_json, + schema_version, + projected_at_utc, + ), + ) + + +def load_trajectory_patch_trail( + conn: sqlite3.Connection, + *, + trajectory_id: str, +) -> dict[str, object] | None: + row = conn.execute( + """ + SELECT patch_trail_json + FROM memory_trajectory_patch_trails + WHERE trajectory_id=? + """, + (trajectory_id,), + ).fetchone() + if row is None: + return None + loaded = orjson.loads(str(row["patch_trail_json"])) + return loaded if isinstance(loaded, dict) else None + + +def load_trajectory_patch_trails( + conn: sqlite3.Connection, + *, + trajectory_ids: Sequence[str], +) -> dict[str, dict[str, object]]: + loaded_by_id: dict[str, dict[str, object]] = {} + normalized_ids = tuple(sorted(set(trajectory_ids))) + for batch in chunked(normalized_ids, _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = conn.execute( + f""" + SELECT trajectory_id, patch_trail_json + FROM memory_trajectory_patch_trails + WHERE trajectory_id IN ({placeholders}) + ORDER BY trajectory_id ASC + """, + batch, + ).fetchall() + for row in rows: + loaded = orjson.loads(str(row["patch_trail_json"])) + if isinstance(loaded, dict): + loaded_by_id[str(row["trajectory_id"])] = loaded + return loaded_by_id + + +__all__ = [ + "count_trajectories", + "find_trajectories_by_ids", + "find_trajectory", + "latest_projection_run", + "list_trajectories", + "list_trajectories_for_intent_id", + "list_trajectories_for_subjects", + "load_trajectory_patch_trail", + "load_trajectory_patch_trails", + "rebuild_trajectories_from_audit", + "rebuild_trajectories_incremental", + "search_trajectories", + "upsert_trajectory", + "upsert_trajectory_patch_trail", + "write_projection_run", +] diff --git a/codeclone/memory/vacuum.py b/codeclone/memory/vacuum.py new file mode 100644 index 00000000..0b6ac9f4 --- /dev/null +++ b/codeclone/memory/vacuum.py @@ -0,0 +1,68 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone + +from ..config.memory import MemoryConfig +from .enums import MemoryStatus +from .sqlite_store import SqliteEngineeringMemoryStore + + +@dataclass(frozen=True, slots=True) +class VacuumReport: + deleted_by_status: dict[str, int] + total_deleted: int + + +def _retention_days_for_status( + status: MemoryStatus, + config: MemoryConfig, +) -> int | None: + mapping: dict[MemoryStatus, int] = { + "draft": config.draft_retention_days, + "rejected": config.rejected_retention_days, + "archived": config.archived_retention_days, + } + days = mapping.get(status) + if days is None or days < 0: + return None + return days + + +def run_memory_vacuum( + store: SqliteEngineeringMemoryStore, + config: MemoryConfig, + *, + commit: bool = True, +) -> VacuumReport: + now = datetime.now(tz=timezone.utc) + deleted_by_status: dict[str, int] = {} + total = 0 + for status in ("draft", "rejected", "archived"): + days = _retention_days_for_status(status, config) + if days is None: + continue + cutoff = (now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ") + count = store.delete_records_older_than( + status=status, + updated_before_utc=cutoff, + commit=False, + ) + if count: + deleted_by_status[status] = count + total += count + if commit: + store.commit() + return VacuumReport( + deleted_by_status=dict(sorted(deleted_by_status.items())), + total_deleted=total, + ) + + +__all__ = ["VacuumReport", "run_memory_vacuum"] diff --git a/codeclone/metrics/cohesion.py b/codeclone/metrics/cohesion.py index d60ccf4b..d8df91ac 100644 --- a/codeclone/metrics/cohesion.py +++ b/codeclone/metrics/cohesion.py @@ -7,6 +7,7 @@ from __future__ import annotations import ast +from collections.abc import Sequence from ..contracts import COHESION_RISK_MEDIUM_MAX from ._risk import RiskLevel, threshold_risk @@ -22,19 +23,22 @@ def _self_attribute_name(node: ast.AST) -> str | None: return None -def compute_lcom4(class_node: ast.ClassDef) -> tuple[int, int, int]: - methods: list[ast.FunctionDef | ast.AsyncFunctionDef] = [ +def _class_methods( + class_node: ast.ClassDef, +) -> list[ast.FunctionDef | ast.AsyncFunctionDef]: + return [ node for node in class_node.body if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) ] - method_names = tuple(method.name for method in methods) - if not methods: - return 1, 0, 0 + +def _collect_method_cohesion_facts( + methods: Sequence[ast.FunctionDef | ast.AsyncFunctionDef], + method_names: tuple[str, ...], +) -> tuple[dict[str, set[str]], dict[str, set[str]]]: method_to_attrs: dict[str, set[str]] = {name: set() for name in method_names} method_calls: dict[str, set[str]] = {name: set() for name in method_names} - for method in methods: for node in ast.walk(method): attr_name = _self_attribute_name(node) @@ -50,7 +54,14 @@ def compute_lcom4(class_node: ast.ClassDef) -> tuple[int, int, int]: callee = node.func.attr if callee in method_calls: method_calls[method.name].add(callee) + return method_to_attrs, method_calls + +def _build_adjacency( + method_names: tuple[str, ...], + method_to_attrs: dict[str, set[str]], + method_calls: dict[str, set[str]], +) -> dict[str, set[str]]: adjacency: dict[str, set[str]] = {name: set() for name in method_names} for name in method_names: adjacency[name].update(method_calls[name]) @@ -63,7 +74,13 @@ def compute_lcom4(class_node: ast.ClassDef) -> tuple[int, int, int]: if left_attrs & method_to_attrs[right]: adjacency[left].add(right) adjacency[right].add(left) + return adjacency + +def _count_connected_components( + method_names: tuple[str, ...], + adjacency: dict[str, set[str]], +) -> int: visited: set[str] = set() components = 0 @@ -76,9 +93,47 @@ def compute_lcom4(class_node: ast.ClassDef) -> tuple[int, int, int]: if current not in visited: visited.add(current) stack.extend(sorted(adjacency[current] - visited)) - - instance_vars = set().union(*method_to_attrs.values()) if method_to_attrs else set() - return components, len(method_names), len(instance_vars) + return components + + +def _instance_var_count(method_to_attrs: dict[str, set[str]]) -> int: + if not method_to_attrs: + return 0 + return len(set().union(*method_to_attrs.values())) + + +def compute_lcom4( + class_node: ast.ClassDef, + *, + ignored_methods: frozenset[str] = frozenset(), +) -> tuple[int, int, int]: + """Compute LCOM4 cohesion over behavior-carrying methods. + + ``ignored_methods`` are excluded from the cohesion graph (Protocol stub + methods and Pydantic validator/serializer hooks). They never carry + instance-level behavioral cohesion, so counting them inflates the + component count. The reported ``method_count`` still reflects all methods + so the class size stays honest; only the cohesion graph and component + count use the analyzed subset. When one or zero analyzed methods remain, + cohesion is not measurable and LCOM4 collapses to ``1`` (no penalty). + """ + all_methods = _class_methods(class_node) + all_method_count = len(all_methods) + analyzed_methods = [ + method for method in all_methods if method.name not in ignored_methods + ] + method_names = tuple(method.name for method in analyzed_methods) + + method_to_attrs, method_calls = _collect_method_cohesion_facts( + analyzed_methods, + method_names, + ) + if len(analyzed_methods) <= 1: + return 1, all_method_count, _instance_var_count(method_to_attrs) + + adjacency = _build_adjacency(method_names, method_to_attrs, method_calls) + components = _count_connected_components(method_names, adjacency) + return components, all_method_count, _instance_var_count(method_to_attrs) def cohesion_risk(lcom4: int) -> RiskLevel: diff --git a/codeclone/metrics/coverage_join.py b/codeclone/metrics/coverage_join.py index 386b16b9..02f145c4 100644 --- a/codeclone/metrics/coverage_join.py +++ b/codeclone/metrics/coverage_join.py @@ -6,15 +6,17 @@ from __future__ import annotations +import importlib from collections import defaultdict from collections.abc import Sequence from dataclasses import dataclass from pathlib import Path -from typing import Literal +from typing import Literal, cast from xml.etree import ElementTree from ..models import CoverageJoinResult, GroupItemLike, UnitCoverageFact from ..utils.coerce import as_int, as_str +from ..utils.json_io import BoundedReadError, read_bounded_bytes __all__ = [ "CoverageJoinParseError", @@ -28,6 +30,7 @@ _MISSING_FROM_REPORT_STATUS: _CoverageStatus = "missing_from_report" _NO_EXECUTABLE_LINES_STATUS: _CoverageStatus = "no_executable_lines" _HOTSPOT_RISKS: frozenset[_Risk] = frozenset({"medium", "high"}) +MAX_COVERAGE_XML_BYTES = 25 * 1024 * 1024 class CoverageJoinParseError(ValueError): @@ -161,19 +164,35 @@ def _iter_cobertura_line_hits( return tuple(rows) +def _parse_xml_bytes(payload: bytes) -> ElementTree.Element: + try: + safe_element_tree = importlib.import_module("defusedxml.ElementTree") + except ImportError: + return ElementTree.fromstring(payload) + try: + return cast(ElementTree.Element, safe_element_tree.fromstring(payload)) + except Exception as exc: + if exc.__class__.__module__.startswith("defusedxml"): + raise ElementTree.ParseError(str(exc)) from exc + raise + + def _parse_coverage_report( *, coverage_xml: Path, root_path: Path, ) -> _CoverageReport: try: - tree = ElementTree.parse(coverage_xml) - except (ElementTree.ParseError, OSError) as exc: + payload = read_bounded_bytes( + coverage_xml, + max_bytes=MAX_COVERAGE_XML_BYTES, + ) + root_element = _parse_xml_bytes(payload) + except (BoundedReadError, ElementTree.ParseError, OSError) as exc: raise CoverageJoinParseError( f"Invalid Cobertura XML at {coverage_xml}: {exc}" ) from exc - root_element = tree.getroot() source_roots = _resolved_coverage_sources( root_element=root_element, root_path=root_path ) diff --git a/codeclone/models.py b/codeclone/models.py index 5475cb9a..dc5abb18 100644 --- a/codeclone/models.py +++ b/codeclone/models.py @@ -33,6 +33,30 @@ class Unit: side_effect_order_profile: str = "none" +RelationshipKind = Literal["call", "reference"] +RelationshipResolutionStatus = Literal["resolved", "unresolved"] +RelationshipOriginLane = Literal["production", "test"] + + +@dataclass(frozen=True, slots=True) +class RelationshipRecord: + relation_kind: RelationshipKind + resolution_status: RelationshipResolutionStatus + origin_lane: RelationshipOriginLane + source_qualname: str + target_qualname: str | None + path: str + line: int + expression: str | None = None + resolution_rule: str | None = None + + +@dataclass(frozen=True, slots=True) +class FunctionRelationshipFacts: + source_qualname: str + relationships: tuple[RelationshipRecord, ...] + + @dataclass(frozen=True, slots=True) class BlockUnit: block_hash: str @@ -209,6 +233,7 @@ class FileMetrics: typing_coverage: ModuleTypingCoverage | None = None docstring_coverage: ModuleDocstringCoverage | None = None api_surface: ModuleApiSurface | None = None + function_relationship_facts: tuple[FunctionRelationshipFacts, ...] = () @dataclass(frozen=True, slots=True) diff --git a/codeclone/observability/__init__.py b/codeclone/observability/__init__.py new file mode 100644 index 00000000..f9f0b3a8 --- /dev/null +++ b/codeclone/observability/__init__.py @@ -0,0 +1,49 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Platform observability (Phase 29). + +A runtime-profiling plane separate from audit truth, the analysis report, and +the memory store: operations and stage spans for CLI / MCP / projection workers. +Default OFF, bounded, deterministic shape. ``bootstrap`` once per process, then +wrap work in ``operation`` / ``span``. +""" + +from __future__ import annotations + +from .runtime import ( + OperationHandle, + SpanHandle, + bind_root, + bootstrap, + current_operation_context, + instrument_db_connection, + is_observability_enabled, + operation, + payload_capture_enabled, + record_counter, + record_db_query, + record_elapsed_span, + shutdown, + span, +) + +__all__ = [ + "OperationHandle", + "SpanHandle", + "bind_root", + "bootstrap", + "current_operation_context", + "instrument_db_connection", + "is_observability_enabled", + "operation", + "payload_capture_enabled", + "record_counter", + "record_db_query", + "record_elapsed_span", + "shutdown", + "span", +] diff --git a/codeclone/observability/db_fingerprint.py b/codeclone/observability/db_fingerprint.py new file mode 100644 index 00000000..b9ce496e --- /dev/null +++ b/codeclone/observability/db_fingerprint.py @@ -0,0 +1,156 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""SQL statement fingerprinting for DB observability (Phase 29.DB, Track B). + +Performance-truth only: reduce a SQL statement to its normalized *shape* so the +cockpit can turn "1892 queries" into "1200x SELECT evidence by trajectory_id". +The fingerprint is literal-free by construction — every string/number value is +replaced with ``?`` — so it is safe to persist without leaking row data. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +# Bound the persisted shape; pathological statements must not bloat the column. +_MAX_FINGERPRINT_CHARS = 200 + +_WHITESPACE_RE = re.compile(r"\s+") +# Single-quoted string literal with doubled-quote ('') escapes; unrolled so it +# stays linear-time (no nested quantifier to backtrack on). +_STRING_RE = re.compile(r"'[^']*(?:''[^']*)*'") +_HEX_RE = re.compile(r"\b0x[0-9a-f]+\b") +_NUMBER_RE = re.compile(r"\b\d+(?:\.\d+)?\b") +# ``( ?, ?, ? )`` / ``( ? )`` -> ``(?)`` so IN/VALUES arity does not fan out +# distinct shapes for the same statement. +_PLACEHOLDER_LIST_RE = re.compile(r"\(\s*\?(?:\s*,\s*\?)*\s*\)") +# First identifier after a table-introducing keyword. +_TABLE_HINT_RE = re.compile(r"\b(?:from|into|update|join)\s+([a-z_][a-z0-9_$]*)") + +_KINDS = frozenset({"select", "insert", "update", "delete"}) + +# Everything after the first WHERE — the predicate columns live here. +_WHERE_RE = re.compile(r"\bwhere\b(.*)") +# An identifier immediately left of a comparison operator — a filter column. +_WHERE_COLUMN_RE = re.compile( + r"([a-z_][a-z0-9_$.]*)\s*(?:<=|>=|!=|<>|=|<|>|\bin\b|\bis\b|\blike\b)" +) +# The projection list between SELECT and FROM (count(*) / distinct x / columns). +_PROJECTION_RE = re.compile(r"^select\s+(.*?)\s+from\b") +_MAX_WHERE_COLUMNS = 4 + + +@dataclass(frozen=True, slots=True) +class SqlFingerprint: + """Normalized shape of one SQL statement (literal-free).""" + + fingerprint: str + table_hint: str | None + kind: str # select | insert | update | delete | other + + +@dataclass(frozen=True, slots=True) +class SqlShape: + """Human-facing interpretation of a fingerprint for the cockpit. + + ``summary`` reads like "count by repo_root_digest, workflow_id" or + "by memory_id" — the predicate, not the raw SQL, so a query count decodes + into *what it filters on*. + """ + + kind: str + table: str | None + where_columns: tuple[str, ...] + summary: str + + +def _normalize(sql: str) -> str: + normalized = _WHITESPACE_RE.sub(" ", sql.strip().lower()) + normalized = _STRING_RE.sub("?", normalized) + normalized = _HEX_RE.sub("?", normalized) + normalized = _NUMBER_RE.sub("?", normalized) + normalized = _PLACEHOLDER_LIST_RE.sub("(?)", normalized) + return normalized.strip() + + +def fingerprint_sql(sql: str) -> SqlFingerprint: + """Reduce a SQL statement to its literal-free shape, table hint, and kind. + + Idempotent on its own output: fingerprinting an already-normalized statement + returns the same shape, so a persisted fingerprint can be re-parsed for its + table hint and kind without storing them separately. + """ + normalized = _normalize(sql) + if not normalized: + return SqlFingerprint(fingerprint="", table_hint=None, kind="other") + head = normalized.split(" ", 1)[0] + kind = head if head in _KINDS else "other" + table_match = _TABLE_HINT_RE.search(normalized) + table_hint = table_match.group(1) if table_match else None + return SqlFingerprint( + fingerprint=normalized[:_MAX_FINGERPRINT_CHARS], + table_hint=table_hint, + kind=kind, + ) + + +def _where_columns(normalized: str) -> tuple[str, ...]: + match = _WHERE_RE.search(normalized) + if not match: + return () + seen: list[str] = [] + for raw in _WHERE_COLUMN_RE.findall(match.group(1)): + # Strip a table/alias prefix (t.id -> id); keep first-seen order. + column = raw.split(".")[-1] + if column not in seen: + seen.append(column) + return tuple(seen) + + +def _projection(normalized: str) -> str | None: + match = _PROJECTION_RE.match(normalized) + if not match: + return None + columns = match.group(1).strip() + if columns.startswith("count("): + return "count" + if columns.startswith("distinct "): + target = columns[len("distinct ") :].split(",", 1)[0].strip() + return f"distinct {target}" + return None + + +def _summarize(kind: str, normalized: str, where_columns: tuple[str, ...]) -> str: + shown = ", ".join(where_columns[:_MAX_WHERE_COLUMNS]) + if len(where_columns) > _MAX_WHERE_COLUMNS: + shown += ", …" + head = _projection(normalized) or "" + if shown and head: + return f"{head} by {shown}" + if shown: + return f"by {shown}" + if head: + return head + return "all rows" if kind == "select" else "" + + +def describe_fingerprint(fingerprint: str) -> SqlShape: + """Interpret a (normalized or raw) statement into a cockpit-facing shape: + its kind, table, predicate columns, and a one-line ``summary``. + """ + fp = fingerprint_sql(fingerprint) + where_columns = _where_columns(fp.fingerprint) + return SqlShape( + kind=fp.kind, + table=fp.table_hint, + where_columns=where_columns, + summary=_summarize(fp.kind, fp.fingerprint, where_columns), + ) + + +__all__ = ["SqlFingerprint", "SqlShape", "describe_fingerprint", "fingerprint_sql"] diff --git a/codeclone/observability/models.py b/codeclone/observability/models.py new file mode 100644 index 00000000..7c1e395d --- /dev/null +++ b/codeclone/observability/models.py @@ -0,0 +1,75 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Persisted observability records — the shared shape across runtime (writer), +store, and read model. + +Pure data, no clock and no DB: the runtime stamps timestamps/durations and the +writer/reader move these between memory and sqlite. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass, field + +from .reason_kind import ReasonKind + + +@dataclass(frozen=True, slots=True) +class ProfileSample: + """Optional resource snapshot (``codeclone[perf]`` / ``profile=true`` only).""" + + rss_mb: float | None = None + rss_delta_mb: float | None = None + peak_rss_mb: float | None = None + peak_rss_delta_mb: float | None = None + cpu_user_ms: float | None = None + cpu_system_ms: float | None = None + open_fds: int | None = None + thread_count: int | None = None + + +@dataclass(frozen=True, slots=True) +class SpanRecord: + span_id: str + operation_id: str + name: str + started_at_utc: str + duration_ms: float + status: str + parent_span_id: str | None = None + reason_kind: ReasonKind | None = None + reason: str | None = None + dedupe_key: str | None = None + counters: Mapping[str, int] = field(default_factory=dict) + # Top-N literal-free SQL shapes seen on this span -> occurrence count. + db_fingerprints: Mapping[str, int] = field(default_factory=dict) + profile: ProfileSample | None = None + + +@dataclass(frozen=True, slots=True) +class OperationRecord: + operation_id: str + correlation_id: str + surface: str + name: str + started_at_utc: str + duration_ms: float + status: str + parent_operation_id: str | None = None + error_kind: str | None = None + session_id: str | None = None + repo_root_digest: str | None = None + request_bytes: int | None = None + response_bytes: int | None = None + request_tokens: int | None = None + response_tokens: int | None = None + profile: ProfileSample | None = None + spans: tuple[SpanRecord, ...] = () + + +__all__ = ["OperationRecord", "ProfileSample", "SpanRecord"] diff --git a/codeclone/observability/profile.py b/codeclone/observability/profile.py new file mode 100644 index 00000000..8636bd26 --- /dev/null +++ b/codeclone/observability/profile.py @@ -0,0 +1,132 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""psutil resource sampling for observability profiling (Phase 29, profile=true). + +psutil is an optional dependency (``codeclone[perf]``) imported lazily inside the +capture functions, so a disabled or non-profiling process never loads it. Every +function degrades to ``None`` when psutil is unavailable — profiling is best +effort and must never break the work it measures. +""" + +from __future__ import annotations + +import sys +import time +from datetime import datetime, timezone + +from .models import ProfileSample + +_BYTES_PER_MB = 1024 * 1024 +ProfileBaseline = tuple[int, float, float, int | None] + + +def worker_bootstrap_sample() -> tuple[str, float] | None: + """Process cold-start as ``(creation_timestamp_iso, ms_elapsed_to_now)``. + + The elapsed time spans process spawn, interpreter startup, imports and setup + up to this call — the part of the spawn->job handoff a worker cannot wrap + with a normal span. Returns ``None`` when psutil is unavailable. + """ + try: + import psutil + except ImportError: + return None + created = psutil.Process().create_time() # epoch seconds + elapsed_ms = max(0.0, (time.time() - created) * 1000.0) + created_iso = datetime.fromtimestamp(created, tz=timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%S.%fZ" + ) + return created_iso, elapsed_ms + + +def capture_rss_cpu() -> tuple[int, float, float] | None: + """Snapshot ``(rss_bytes, cpu_user_s, cpu_system_s)`` for this process. + + Returns ``None`` when psutil is not installed. + """ + try: + import psutil + except ImportError: + return None + process = psutil.Process() + memory = process.memory_info() + cpu = process.cpu_times() + return memory.rss, cpu.user, cpu.system + + +def capture_process_peak_rss() -> int | None: + """Process high-water RSS via ``getrusage`` (monotonic since process start). + + Returns ``None`` when ``resource.getrusage`` is unavailable. + """ + try: + import resource + except ImportError: + return None + usage = resource.getrusage(resource.RUSAGE_SELF) + peak = int(usage.ru_maxrss) + if sys.platform == "darwin": + return peak + return peak * 1024 + + +def capture_profile_baseline() -> ProfileBaseline | None: + """Capture RSS/CPU plus the process peak-RSS watermark at span/operation start.""" + snapshot = capture_rss_cpu() + if snapshot is None: + return None + base_rss, base_user, base_system = snapshot + return base_rss, base_user, base_system, capture_process_peak_rss() + + +def build_profile_sample(baseline: ProfileBaseline | None) -> ProfileSample | None: + """Build a ``ProfileSample`` as the delta from ``baseline`` to now. + + Returns ``None`` when no baseline was captured or psutil is unavailable. + """ + if baseline is None: + return None + try: + import psutil + except ImportError: + return None + base_rss, base_user, base_system, base_peak = baseline + process = psutil.Process() + memory = process.memory_info() + cpu = process.cpu_times() + end_peak = capture_process_peak_rss() + peak_rss_mb: float | None = None + peak_rss_delta_mb: float | None = None + if end_peak is not None: + peak_rss_mb = end_peak / _BYTES_PER_MB + if base_peak is not None: + peak_rss_delta_mb = max(0, end_peak - base_peak) / _BYTES_PER_MB + try: + open_fds: int | None = process.num_fds() + except (AttributeError, NotImplementedError, OSError): + # num_fds() is Unix-only; degrade gracefully elsewhere. + open_fds = None + return ProfileSample( + rss_mb=memory.rss / _BYTES_PER_MB, + rss_delta_mb=(memory.rss - base_rss) / _BYTES_PER_MB, + peak_rss_mb=peak_rss_mb, + peak_rss_delta_mb=peak_rss_delta_mb, + cpu_user_ms=(cpu.user - base_user) * 1000.0, + cpu_system_ms=(cpu.system - base_system) * 1000.0, + open_fds=open_fds, + thread_count=process.num_threads(), + ) + + +__all__ = [ + "ProfileBaseline", + "build_profile_sample", + "capture_process_peak_rss", + "capture_profile_baseline", + "capture_rss_cpu", + "worker_bootstrap_sample", +] diff --git a/codeclone/observability/query.py b/codeclone/observability/query.py new file mode 100644 index 00000000..674c0e37 --- /dev/null +++ b/codeclone/observability/query.py @@ -0,0 +1,452 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""``query_platform_observability`` — a sectioned, read-only diagnostics slicer +over the Phase 29 runtime telemetry (RFC specs/rfc-29-observability-query-tool). + +A **slicer, not a trace export API**: each call returns one bounded *section* +projected from the already-computed ``AggregatesView``; no response embeds the +full trace. Dev-only telemetry about the runtime of *CodeClone itself* — it is +NOT a repository-quality signal and MUST NOT affect reports, gates, baselines, +memory facts, or edit authorization. Numeric metrics only: no raw SQL, no raw +payload bodies, no prompts. +""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path + +from ..config.observability import resolve_observability_config +from .store.reader import build_trace_view, open_observability_store_readonly +from .views import AggregatesView, OperationView, TraceView + +_DETAIL_LEVELS = ("compact", "normal", "full") +_LIMIT_MIN = 1 +_LIMIT_MAX = 50 +_LIMIT_DEFAULT = 10 +_COMPACT_ROWS = 5 +_CHAIN_CHILD_CAP = 12 +_MAX_DIAGNOSTICS = 3 + +# Heuristic thresholds — telemetry hints, NOT report findings. +_DB_CHATTY_QPC = 200 +_CONTEXT_HEAVY_PCT = 25 +_MEMORY_HEAVY_MB = 200.0 +_CONTEXT_PRESSURE_TOKENS = 8000 + +_AGGREGATE_SECTIONS = ( + "summary", + "slow_operations", + "memory_pipeline_cost", + "db_cost", + "agent_context", + "mcp_tool_matrix", + "correlated_chains", + "costly_noops", + "pipeline", +) + + +def _round1(value: float | None) -> float | None: + return round(value, 1) if value is not None else None + + +def _db_per_call(total_queries: int, span_count: int) -> int: + return round(total_queries / span_count) if span_count else 0 + + +def _envelope(section: str, detail_level: str, window: str) -> dict[str, object]: + return { + "surface": "platform_observability", + "audience": "codeclone_development", + "user_facing": False, + "affects_analysis_truth": False, + "affects_edit_permission": False, + "section": section, + "detail_level": detail_level, + "window": window, + } + + +def _resolve_detail(detail_level: str, warnings: list[str]) -> str: + if detail_level not in _DETAIL_LEVELS: + warnings.append(f"unknown detail_level {detail_level!r}; using compact") + return "compact" + if detail_level == "full": + # No aggregate section supports full; only operation_detail/span_detail + # (a future phase) do. Downgrade rather than error so an agent never + # stalls mid-diagnosis. + warnings.append( + "full detail is only available for operation_detail/span_detail; " + "downgraded to normal" + ) + return "normal" + return detail_level + + +def _clamp_limit(limit: int, warnings: list[str]) -> int: + if not isinstance(limit, int) or isinstance(limit, bool) or limit < _LIMIT_MIN: + warnings.append(f"limit {limit!r} invalid; using {_LIMIT_DEFAULT}") + return _LIMIT_DEFAULT + if limit > _LIMIT_MAX: + warnings.append(f"limit {limit} clamped to {_LIMIT_MAX}") + return _LIMIT_MAX + return limit + + +def _ignored_parameters(operation_id: str | None, span_id: str | None) -> list[str]: + # P1 has only aggregate sections; the by-id selectors are not consumed yet. + ignored = [] + if operation_id is not None: + ignored.append("operation_id") + if span_id is not None: + ignored.append("span_id") + return ignored + + +def _absent_status() -> str: + # Two distinct diagnoses: observability is configured off ("disabled") vs. + # it could collect but no store exists for this root yet ("no_store"). + return "no_store" if resolve_observability_config().enabled else "disabled" + + +def _build_trace(conn: object, window: str) -> TraceView: + if window == "latest": + return build_trace_view(conn) # type: ignore[arg-type] + return build_trace_view(conn, correlation_id=window) # type: ignore[arg-type] + + +def _slow_operations(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "operation": op.name, + "surface": op.surface, + "duration_ms": round(op.duration_ms, 1), + "rss_delta_mb": _round1(op.rss_delta_mb), + } + for op in agg.slowest[:cap] + ] + + +def _memory_pipeline_cost(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "span": s.name, + "operation": s.operation_name, + "duration_ms": round(s.duration_ms, 1), + "produced": s.produced, + "skipped": s.skipped, + "no_op": s.no_op, + } + for s in agg.semantic_costs[:cap] + ] + + +def _db_cost(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + rows: list[dict[str, object]] = [] + for r in agg.db_costs[:cap]: + per_call = _db_per_call(r.total_queries, r.span_count) + rows.append( + { + "span": r.span_name, + "calls": r.span_count, + "queries": r.total_queries, + "writes": r.total_writes, + "queries_per_call": per_call, + "verdict": "query_chatty" if per_call >= _DB_CHATTY_QPC else "ok", + } + ) + return rows + + +def _mcp_tool_matrix(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "tool": t.name, + "calls": t.count, + "p50_ms": round(t.p50_duration_ms, 1), + "p95_ms": round(t.p95_duration_ms, 1), + "p95_request_bytes": t.p95_request_bytes, + "p95_response_bytes": t.p95_response_bytes, + "p95_response_tokens": t.p95_response_tokens, + } + for t in agg.mcp_tools[:cap] + ] + + +def _costly_noops(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + noops = [s for s in agg.semantic_costs if s.no_op] + return [ + { + "span": s.name, + "operation": s.operation_name, + "duration_ms": round(s.duration_ms, 1), + "rss_delta_mb": _round1(s.rss_delta_mb), + } + for s in noops[:cap] + ] + + +def _pipeline(agg: AggregatesView, cap: int) -> list[dict[str, object]]: + return [ + { + "subsystem": g.name, + "operations": g.op_count, + "duration_ms": round(g.duration_ms, 1), + "cpu_ms": round(g.cpu_ms, 1), + } + for g in agg.pipeline[:cap] + ] + + +def _agent_context_body(agg: AggregatesView, cap: int) -> dict[str, object]: + agent = agg.agent + if agent is None: + return {"total_response_tokens": 0, "rows": []} + total = agent.response_tokens + rows = [ + { + "tool": c.name, + "calls": c.calls, + "response_tokens": c.response_tokens, + "context_percent": round(100 * c.response_tokens / total) if total else 0, + "verdict": ( + "context_heavy" + if total and 100 * c.response_tokens / total >= _CONTEXT_HEAVY_PCT + else "ok" + ), + } + for c in agent.consumers[:cap] + ] + return {"total_response_tokens": total, "rows": rows} + + +def _chain_descendant_names(op: OperationView) -> list[str]: + names: list[str] = [] + for child in op.children: + names.append(child.name) + names.extend(span.name for span in child.spans) + names.extend(_chain_descendant_names(child)) + return names + + +def _chain_peak_rss(op: OperationView) -> float | None: + values = [op.rss_delta_mb] if op.rss_delta_mb is not None else [] + values.extend(s.rss_delta_mb for s in op.spans if s.rss_delta_mb is not None) + for child in op.children: + child_peak = _chain_peak_rss(child) + if child_peak is not None: + values.append(child_peak) + return max(values) if values else None + + +def _chain_peak_rss_absolute(op: OperationView) -> float | None: + values: list[float] = [ + candidate for candidate in (op.peak_rss_mb, op.rss_mb) if candidate is not None + ] + for span in op.spans: + values.extend( + candidate + for candidate in (span.peak_rss_mb, span.rss_mb) + if candidate is not None + ) + for child in op.children: + child_peak = _chain_peak_rss_absolute(child) + if child_peak is not None: + values.append(child_peak) + return max(values) if values else None + + +def _correlated_chains(trace: TraceView, cap: int) -> list[dict[str, object]]: + return [ + { + "root": root.name, + "children": _chain_descendant_names(root)[:_CHAIN_CHILD_CAP], + "duration_ms": round(root.duration_ms, 1), + "peak_rss_delta_mb": _round1(_chain_peak_rss(root)), + "peak_rss_mb": _round1(_chain_peak_rss_absolute(root)), + } + for root in trace.operation_tree[:cap] + ] + + +def _memory_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + span = agg.peak_memory_span + if span is None: + return None + peak = span.peak_rss_mb or span.rss_mb + delta = span.peak_rss_delta_mb or span.rss_delta_mb + if peak is None and delta is None: + return None + if ( + delta is not None + and delta < _MEMORY_HEAVY_MB + and (peak is None or peak < _MEMORY_HEAVY_MB) + ): + return None + detail = [] + if peak is not None: + detail.append(f"peak {round(peak)} MB") + if delta is not None: + detail.append(f"Δ{round(delta)} MB") + return { + "kind": "memory", + "message": ( + f"{span.name} used {' · '.join(detail)} (produced {span.produced})." + ), + } + + +def _db_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + if not agg.db_costs: + return None + top = agg.db_costs[0] + per_call = _db_per_call(top.total_queries, top.span_count) + if per_call < _DB_CHATTY_QPC: + return None + return { + "kind": "db", + "message": f"{top.span_name} executed {per_call} queries per call.", + } + + +def _context_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + agent = agg.agent + if agent is None or not agent.consumers or not agent.response_tokens: + return None + lead = agent.consumers[0] + pct = round(100 * lead.response_tokens / agent.response_tokens) + if pct < _CONTEXT_HEAVY_PCT: + return None + return { + "kind": "context", + "message": f"{lead.name} consumed {pct}% of returned tokens.", + } + + +def _top_diagnostics(agg: AggregatesView) -> list[dict[str, object]]: + candidates = ( + _memory_diagnostic(agg), + _db_diagnostic(agg), + _context_diagnostic(agg), + ) + return [d for d in candidates if d is not None][:_MAX_DIAGNOSTICS] + + +def _summary_body(trace: TraceView) -> dict[str, object]: + agg = trace.aggregates + return { + "operations": agg.operation_count, + "peak_rss_delta_mb": _round1(agg.max_rss_delta_mb), + "peak_rss_mb": _round1(agg.max_peak_rss_mb), + "context_pressure_tokens": agg.agent.response_tokens if agg.agent else 0, + "costly_noops": sum(1 for s in agg.semantic_costs if s.no_op), + "top_diagnostics": _top_diagnostics(agg), + } + + +def _recommended_next_sections( + section: str, agg: AggregatesView +) -> list[dict[str, object]]: + if section != "summary": + return [] + recs: list[dict[str, object]] = [] + if agg.db_costs: + top = agg.db_costs[0] + if _db_per_call(top.total_queries, top.span_count) >= _DB_CHATTY_QPC: + recs.append( + { + "section": "db_cost", + "reason": f"high query count in {top.span_name}.", + } + ) + if agg.agent and agg.agent.response_tokens >= _CONTEXT_PRESSURE_TOKENS: + recs.append( + {"section": "agent_context", "reason": "high context-token pressure."} + ) + if any(s.no_op for s in agg.semantic_costs): + recs.append( + {"section": "costly_noops", "reason": "a span ran but produced nothing."} + ) + return recs + + +_ROW_SECTIONS: dict[str, Callable[[AggregatesView, int], list[dict[str, object]]]] = { + "slow_operations": _slow_operations, + "memory_pipeline_cost": _memory_pipeline_cost, + "db_cost": _db_cost, + "mcp_tool_matrix": _mcp_tool_matrix, + "costly_noops": _costly_noops, + "pipeline": _pipeline, +} + + +def query_platform_observability( + *, + root: str | Path, + section: str, + detail_level: str = "compact", + limit: int = _LIMIT_DEFAULT, + window: str = "latest", + operation_id: str | None = None, + span_id: str | None = None, +) -> dict[str, object]: + """Return one bounded telemetry section. Read-only; never raises on missing + data — an absent store yields an inert ``disabled``/``no_store`` envelope. + """ + warnings: list[str] = [] + detail = _resolve_detail(detail_level, warnings) + clamped = _clamp_limit(limit, warnings) + row_cap = clamped if detail == "normal" else min(clamped, _COMPACT_ROWS) + + response = _envelope(section, detail, window) + if detail != detail_level: + response["requested_detail_level"] = detail_level + ignored = _ignored_parameters(operation_id, span_id) + if ignored: + response["ignored_parameters"] = ignored + + if section not in _AGGREGATE_SECTIONS: + response["status"] = "invalid_section" + response["error"] = f"unknown section {section!r}" + response["available_sections"] = list(_AGGREGATE_SECTIONS) + response["rows"] = [] + return _finalize(response, warnings) + + conn = open_observability_store_readonly(Path(root)) + if conn is None: + response["status"] = _absent_status() + response["rows"] = [] + return _finalize(response, warnings) + try: + trace = _build_trace(conn, window) + finally: + conn.close() + + agg = trace.aggregates + if section == "summary": + response.update(_summary_body(trace)) + elif section == "agent_context": + response.update(_agent_context_body(agg, row_cap)) + elif section == "correlated_chains": + response["rows"] = _correlated_chains(trace, row_cap) + else: + response["rows"] = _ROW_SECTIONS[section](agg, row_cap) + + recommended = _recommended_next_sections(section, agg) + if recommended: + response["recommended_next_sections"] = recommended + return _finalize(response, warnings) + + +def _finalize(response: dict[str, object], warnings: list[str]) -> dict[str, object]: + if warnings: + response["warnings"] = warnings + return response + + +__all__ = ["query_platform_observability"] diff --git a/codeclone/observability/reason_kind.py b/codeclone/observability/reason_kind.py new file mode 100644 index 00000000..86629509 --- /dev/null +++ b/codeclone/observability/reason_kind.py @@ -0,0 +1,46 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic classifier for why an expensive maintenance span ran. + +Free-text ``reason`` is optional; ``reason_kind`` is the closed, deterministic +vocabulary. ``unknown`` on an expensive semantic/trajectory span is a red flag +(aggregated by the read model as ``unknown_expensive_rebuild_count``). +""" + +from __future__ import annotations + +from typing import Literal + +ReasonKind = Literal[ + "content_changed", + "schema_version_changed", + "model_changed", + "manual_rebuild", + "first_index", + "unknown", +] + +REASON_KINDS: frozenset[str] = frozenset( + { + "content_changed", + "schema_version_changed", + "model_changed", + "manual_rebuild", + "first_index", + "unknown", + } +) + + +def validate_reason_kind(value: str | None) -> str | None: + if value is None or value in REASON_KINDS: + return value + msg = f"unknown reason_kind: {value!r}" + raise ValueError(msg) + + +__all__ = ["REASON_KINDS", "ReasonKind", "validate_reason_kind"] diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py new file mode 100644 index 00000000..2e09cbc3 --- /dev/null +++ b/codeclone/observability/render_html.py @@ -0,0 +1,852 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Branded HTML renderer for the observability ``TraceView`` (Phase 29 output). + +A single self-contained page rendered as a *runtime-diagnosis cockpit*, not a +data dump. It is laid out for a top-down reading trajectory that answers the +operator's questions in order: an executive summary that names where time and +memory went; the correlated finish->worker event chains (a horizontal causality +breadcrumb plus indented detail — nesting is shown with an indent rail, never a +card inside a card); a memory-pipeline cost table that flags spans that ran but +produced nothing; and an MCP tool matrix that surfaces payload noise. + +CodeClone brand mark + brand tokens (Inter / JetBrains Mono / oklch indigo, auto +dark-light), inline SVG bars, no JS, no external assets, no ``report`` import. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from html import escape + +from .views import ( + AgentTokenRow, + AggregatesView, + DbCostRow, + DbFingerprintRow, + McpToolAggregate, + OperationView, + PipelineGroup, + SpanCostView, + SpanView, + TraceView, + WasteItem, + WaterfallGroup, + WaterfallRow, +) + +# A no-op span only deserves a "costly" warning once it has actually spent time. +_NOOP_COSTLY_MS = 50.0 +_KNOWN_SURFACES = frozenset({"mcp", "cli", "memory"}) + +# Reuse of the CodeClone brand mark (report/html/widgets/icons.py:BRAND_LOGO). +_LOGO = ( + '' +) + +_CSS = """ +*{box-sizing:border-box;margin:0;padding:0} +:root{ +--bg:oklch(15% 0.018 275);--surface:oklch(20% 0.022 275); +--surface-2:oklch(24% 0.026 275);--border:oklch(31% 0.034 275); +--text:oklch(96% 0.010 275);--dim:oklch(74% 0.028 275);--mute:oklch(56% 0.028 275); +--accent:#818cf8;--accent-soft:color-mix(in oklch,#818cf8 30%,transparent); +--track:oklch(28% 0.02 275);--warn:#f59e0b; +--warn-soft:color-mix(in oklch,#f59e0b 14%,transparent); +--mcp:#818cf8;--cli:#2dd4bf;--memory:#fbbf24; +--font:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI", +Roboto,sans-serif; +--mono:"JetBrains Mono",ui-monospace,SFMono-Regular,Menlo,Consolas,monospace; +} +@media (prefers-color-scheme:light){:root{ +--bg:oklch(98.5% 0.006 275);--surface:#fff;--surface-2:oklch(97.3% 0.006 275); +--border:oklch(89% 0.018 275);--text:oklch(24% 0.040 275); +--dim:oklch(44% 0.046 275);--mute:oklch(55% 0.040 275); +--accent:#4f46e5;--accent-soft:color-mix(in oklch,#4f46e5 26%,transparent); +--track:oklch(92% 0.012 275);--warn:#b45309; +--warn-soft:color-mix(in oklch,#b45309 12%,transparent); +--mcp:#4f46e5;--cli:#0d9488;--memory:#b45309; +}} +html{-webkit-text-size-adjust:100%} +body{background:var(--bg);color:var(--text);font-family:var(--font); +font-size:14px;line-height:1.5;-webkit-font-smoothing:antialiased; +padding:36px 22px 90px} +.wrap{max-width:1040px;margin:0 auto} +.head{display:flex;align-items:center;gap:13px;margin-bottom:5px} +.logo{flex-shrink:0} +h1{font-size:20px;font-weight:600;letter-spacing:-0.01em} +.sub{color:var(--dim);font-size:12.5px;margin:0 0 30px 43px;font-family:var(--mono)} +.sub b{color:var(--text);font-weight:550} +section{margin-bottom:30px} +h2{font-size:11px;text-transform:uppercase;letter-spacing:0.09em; +color:var(--mute);font-weight:600;margin:0 0 4px 2px} +.shint{color:var(--mute);font-size:12px;margin:0 0 11px 2px} +.panel{background:var(--surface);border:1px solid var(--border); +border-radius:11px;overflow:hidden} +.grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(148px,1fr)); +gap:10px;margin-bottom:12px} +.stats{display:grid;grid-template-columns:repeat(5,minmax(0,1fr));gap:10px; +margin-bottom:12px} +@media (max-width:760px){.stats{grid-template-columns:repeat(2,minmax(0,1fr))}} +.card{background:var(--surface);border:1px solid var(--border); +border-radius:11px;padding:14px 16px} +.card .v{font-size:24px;font-weight:600;letter-spacing:-0.02em; +font-family:var(--mono)} +.card .l{color:var(--mute);font-size:10.5px;text-transform:uppercase; +letter-spacing:0.07em;margin-top:4px} +.card.warn{border-color:var(--warn-soft)} +.card.warn .v{color:var(--warn)} +.card.accent .v{color:var(--accent)} +.hipanel{padding:6px 16px 10px} +.hirow{display:grid;grid-template-columns:minmax(128px,152px) minmax(0,1fr) auto; +align-items:start;gap:10px 16px;padding:12px 0;border-top:1px solid var(--border)} +.hirow:first-child{border-top:none} +.hilabel{color:var(--mute);font-size:11px;text-transform:uppercase; +letter-spacing:0.05em;padding-top:2px} +.hibody{min-width:0} +.hiprimary{display:flex;flex-wrap:wrap;align-items:center;gap:8px} +.hmono{font-family:var(--mono);font-size:13px;line-height:1.45} +.hctx{font-family:var(--mono);font-size:11.5px;color:var(--mute);margin-top:4px; +line-height:1.45} +.himetric{font-family:var(--mono);font-size:13px;font-weight:600;white-space:nowrap; +text-align:right;padding-top:2px;line-height:1.45} +.lead{padding:4px 16px} +.lrow{display:grid;grid-template-columns:158px minmax(0,1fr) auto;align-items:center; +gap:14px;padding:11px 0;border-top:1px solid var(--border)} +.lrow:first-child{border-top:none} +.llabel{color:var(--mute);font-size:11px;text-transform:uppercase; +letter-spacing:0.05em} +.lval{display:flex;align-items:center;gap:9px;min-width:0} +.lname{font-family:var(--mono);font-size:13px;overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.lin{color:var(--mute);font-size:11.5px;font-family:var(--mono)} +.lmetric{font-family:var(--mono);font-size:14px;font-weight:600;white-space:nowrap} +.badge{font-size:10px;font-weight:600;font-family:var(--mono);padding:2px 7px; +border-radius:5px;text-transform:uppercase;letter-spacing:0.03em;flex-shrink:0; +background:color-mix(in oklch,var(--c,var(--accent)) 16%,transparent); +color:var(--c,var(--accent))} +.surf-mcp{--c:var(--mcp)}.surf-cli{--c:var(--cli)}.surf-memory{--c:var(--memory)} +.chip{font-size:10.5px;font-family:var(--mono);padding:1px 8px;border-radius:20px; +background:var(--surface-2);color:var(--dim);border:1px solid var(--border); +white-space:nowrap} +.chip.warn{color:var(--warn);border-color:transparent;background:var(--warn-soft); +font-weight:600} +.bar{display:block;width:100%;height:7px} +.dur{font-family:var(--mono);font-size:12.5px;text-align:right;white-space:nowrap; +color:var(--dim)} +.mem{font-family:var(--mono);font-size:11.5px;color:var(--warn);font-weight:550; +text-align:right;white-space:nowrap;overflow:hidden} +.extra{display:flex;align-items:center;justify-content:flex-end;gap:6px; +min-width:0;overflow:hidden} +.pay{font-family:var(--mono);font-size:11px;color:var(--mute);white-space:nowrap} +.chain{padding:6px 16px 12px} +.group{padding:13px 0;border-top:1px solid var(--border)} +.group:first-child{border-top:none} +.crumb{display:flex;align-items:center;flex-wrap:wrap;gap:9px;margin-bottom:10px} +.crumb .node{display:flex;align-items:center;gap:7px} +.crumb .cname{font-family:var(--mono);font-size:12px;color:var(--text)} +.crumb .arrow{color:var(--mute);font-size:13px} +.oprow,.spanrow{display:grid; +grid-template-columns:minmax(0,1fr) 140px 56px 70px 120px; +align-items:center;column-gap:13px;row-gap:2px;padding:5px 0} +.lead-cell{display:flex;align-items:center;gap:9px;min-width:0} +.opname{font-family:var(--mono);font-size:13px;font-weight:550;overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.spanname{font-family:var(--mono);font-size:12px;color:var(--dim);overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.tick{color:var(--accent);opacity:0.6;font-size:11px;flex-shrink:0} +.spanrow .counters{grid-column:2/-1;font-family:var(--mono);font-size:10.5px; +color:var(--mute);display:flex;flex-wrap:wrap;gap:0 15px} +.counters b{color:var(--dim);font-weight:550;margin-right:4px} +.spans{padding-left:17px} +.kids{margin-left:13px;padding-left:17px;border-left:2px solid var(--accent-soft)} +.wf{padding:8px 16px 12px} +.wf-group{padding:13px 0;border-top:1px solid var(--border)} +.wf-group:first-child{border-top:none} +.wf-cap{display:flex;align-items:center;gap:8px;margin-bottom:9px; +font-family:var(--mono);font-size:11px;color:var(--mute)} +.wf-cap b{color:var(--dim);font-weight:600} +.wf-row{display:grid;grid-template-columns:minmax(150px,238px) minmax(0,1fr) 58px; +align-items:center;column-gap:12px;padding:2px 0} +.wf-label{font-family:var(--mono);font-size:11.5px;overflow:hidden; +text-overflow:ellipsis;white-space:nowrap} +.wf-label.op{color:var(--text);font-weight:550} +.wf-label.span{color:var(--dim)} +.wf-track{position:relative;height:14px;background:var(--track);border-radius:4px} +.wf-bar{position:absolute;top:2px;height:10px;border-radius:3px; +background:var(--c,var(--accent))} +.wf-bar.span{top:3px;height:8px;opacity:0.8} +.wf-dur{font-family:var(--mono);font-size:11px;color:var(--mute);text-align:right; +white-space:nowrap} +table{width:100%;border-collapse:collapse;font-size:12.5px} +th{text-align:left;padding:9px 16px;color:var(--mute);font-size:10.5px; +text-transform:uppercase;letter-spacing:0.05em; +border-bottom:1px solid var(--border);white-space:nowrap} +td{padding:9px 16px;border-top:1px solid var(--border);font-family:var(--mono); +white-space:nowrap} +td.t{font-family:var(--font)} +th.r,td.r{text-align:right} +.shape{font-family:var(--font);font-size:12.5px} +.sqlraw{font-family:var(--mono);font-size:11px;color:var(--mute);max-width:440px; +overflow:hidden;text-overflow:ellipsis;white-space:nowrap;margin-top:3px} +tr.flag td{background:var(--warn-soft)} +.muted{color:var(--mute)} +.empty{padding:30px;text-align:center;color:var(--mute);font-size:13px} +.foot{margin-top:38px;color:var(--mute);font-size:11px;text-align:center; +font-family:var(--mono)} +""" + + +def _esc(value: object) -> str: + return escape(str(value)) + + +def _ms(value: float) -> str: + return f"{value / 1000:.2f}s" if value >= 1000 else f"{value:.0f}ms" + + +def _mb(value: float | None) -> str: + if value is None: + return "—" + return f"{value / 1024:.1f} GB" if value >= 1024 else f"{value:.1f} MB" + + +def _bytes(value: int | None) -> str: + if value is None: + return "—" + if value >= 1024 * 1024: + return f"{value / 1024 / 1024:.1f} MB" + if value >= 1024: + return f"{value / 1024:.1f} KB" + return f"{value} B" + + +def _tokens(value: int | None) -> str: + if not value: + return "—" + return f"{value / 1000:.1f}k" if value >= 1000 else str(value) + + +def _bar(value: float, maximum: float, *, color: str = "var(--accent)") -> str: + frac = value / maximum if maximum > 0 else 0.0 + fill = max(1.5, round(frac * 100, 1)) + return ( + '' + '' + f'' + ) + + +def _surface_badge(surface: str) -> str: + cls = f"surf-{surface}" if surface in _KNOWN_SURFACES else "" + return f'{_esc(surface)}' + + +def _reason_chip(reason_kind: str | None) -> str: + if not reason_kind: + return "" + extra = " warn" if reason_kind == "unknown" else "" + return f'{_esc(reason_kind)}' + + +def _counters(counters: Mapping[str, int]) -> str: + if not counters: + return "" + items = "".join( + f"{_esc(key)}{value}" + for key, value in sorted(counters.items()) + ) + return f'{items}' + + +def _rss_text( + delta: float | None, + *, + end: float | None = None, + peak: float | None = None, + peak_delta: float | None = None, +) -> str: + parts: list[str] = [] + if end is not None and end >= 0.05: + parts.append(f"end {_mb(end)}") + if peak is not None and peak >= 0.05: + parts.append(f"peak {_mb(peak)}") + if peak_delta is not None and peak_delta >= 0.05: + parts.append(f"peakΔ{_mb(peak_delta)}") + elif delta is not None and delta >= 0.05: + parts.append(f"Δ{_mb(delta)}") + return " · ".join(parts) + + +def _view_rss_text(view: OperationView | SpanView | SpanCostView) -> str: + return _rss_text( + view.rss_delta_mb, + end=view.rss_mb, + peak=view.peak_rss_mb, + peak_delta=view.peak_rss_delta_mb, + ) + + +def _payload(op: OperationView) -> str: + parts = [] + if op.request_bytes is not None: + parts.append(f"↑{_bytes(op.request_bytes)}") + if op.response_bytes is not None: + parts.append(f"↓{_bytes(op.response_bytes)}") + return f'{" ".join(parts)}' if parts else "" + + +def _header(trace: TraceView) -> str: + agg = trace.aggregates + window = ( + f"{_esc(trace.window_started_at_utc)} → {_esc(trace.window_ended_at_utc)}" + if trace.window_started_at_utc + else "no operations recorded" + ) + digest = f" · repo {_esc(trace.repo_root_digest)}" if trace.repo_root_digest else "" + return ( + f'
{_LOGO}

Platform Observability

' + f'

{agg.operation_count} operations · ' + f"{window}{digest}

" + ) + + +def _stat(value: str, label: str, variant: str = "") -> str: + cls = f"card {variant}".strip() + return ( + f'
{value}
' + f'
{label}
' + ) + + +def _section(title: str, body: str, *, subtitle: str = "") -> str: + hint = f'

{_esc(subtitle)}

' if subtitle else "" + return f"

{_esc(title)}

{hint}{body}
" + + +def _table(headers: tuple[tuple[str, bool], ...], rows: str) -> str: + ths = "".join( + f'{_esc(label)}' if right else f"{_esc(label)}" + for label, right in headers + ) + return ( + f'
{ths}' + f"{rows}
" + ) + + +def _highlight_row( + label: str, + *, + badge_html: str, + primary: str, + metric_html: str, + context: str | None = None, + chips_html: str = "", +) -> str: + ctx = f'
in {_esc(context)}
' if context else "" + return ( + f'
{_esc(label)}' + f'
{badge_html}' + f'{_esc(primary)}{chips_html}
{ctx}
' + f'
{metric_html}
' + ) + + +def _highlights(agg: AggregatesView) -> str: + rows: list[str] = [] + if agg.slowest: + op = agg.slowest[0] + rows.append( + _highlight_row( + "Slowest operation", + badge_html=_surface_badge(op.surface), + primary=op.name, + metric_html=_esc(_ms(op.duration_ms)), + ) + ) + if agg.slowest_span is not None: + span = agg.slowest_span + rows.append( + _highlight_row( + "Hottest span", + badge_html=_surface_badge(span.surface), + primary=span.name, + context=span.operation_name, + chips_html=_reason_chip(span.reason_kind), + metric_html=_esc(_ms(span.duration_ms)), + ) + ) + if agg.peak_memory_span is not None and ( + agg.max_rss_delta_mb or agg.max_peak_rss_mb or agg.max_rss_absolute_mb + ): + peak = agg.peak_memory_span + metric = ( + peak.peak_rss_mb + or peak.rss_mb + or peak.peak_rss_delta_mb + or peak.rss_delta_mb + ) + denom = ( + agg.max_peak_rss_mb + or agg.max_rss_absolute_mb + or agg.max_rss_delta_mb + or 1.0 + ) + share = round((metric or 0.0) / denom * 100) + detail = _rss_text( + peak.rss_delta_mb, + end=peak.rss_mb, + peak=peak.peak_rss_mb, + peak_delta=peak.peak_rss_delta_mb, + ) + rows.append( + _highlight_row( + "Top memory consumer", + badge_html=_surface_badge(peak.surface), + primary=peak.name, + context=peak.operation_name, + metric_html=f"{_esc(detail)} · {share}%", + ) + ) + elif agg.max_peak_rss_mb is not None: + rows.append( + _highlight_row( + "Process peak RSS", + badge_html="", + primary="high-water resident set", + metric_html=_esc(_mb(agg.max_peak_rss_mb)), + ) + ) + elif agg.max_rss_delta_mb is not None: + rows.append( + _highlight_row( + "Peak memory Δ", + badge_html="", + primary="resident set growth", + metric_html=_esc(_mb(agg.max_rss_delta_mb)), + ) + ) + if agg.heaviest_cpu is not None: + op = agg.heaviest_cpu + cpu_ms = (op.cpu_user_ms or 0.0) + (op.cpu_system_ms or 0.0) + ratio = cpu_ms / op.duration_ms if op.duration_ms else 0.0 + rows.append( + _highlight_row( + "Heaviest CPU", + badge_html=_surface_badge(op.surface), + primary=op.name, + metric_html=f"{_esc(_ms(cpu_ms))} · {ratio:.1f}x wall", + ) + ) + return f'
{"".join(rows)}
' if rows else "" + + +def _summary(trace: TraceView) -> str: + agg = trace.aggregates + costly = sum( + 1 + for span in agg.semantic_costs + if span.no_op and span.duration_ms >= _NOOP_COSTLY_MS + ) + unknown = agg.unknown_expensive_rebuild_count + cards = ( + '
' + + _stat(str(agg.operation_count), "operations", "accent") + + _stat(_mb(agg.max_peak_rss_mb or agg.max_rss_absolute_mb), "peak rss") + + _stat(_mb(agg.max_rss_delta_mb), "peak rss Δ") + + _stat(str(costly), "costly no-ops", "warn" if costly else "") + + _stat(str(unknown), "unknown reason", "warn" if unknown else "") + + "
" + ) + highlights = _highlights(agg) + body = cards + highlights if highlights else cards + return _section( + "Runtime summary", + body, + subtitle="Headline counters, then where time and memory actually went.", + ) + + +def _waste_row(item: WasteItem) -> str: + return ( + '' + f'{_esc(item.kind)}' + f'{_surface_badge(item.surface)} {_esc(item.subject)}' + f'{_esc(item.detail)}' + ) + + +def _waste_section(agg: AggregatesView) -> str: + if not agg.waste: + return "" + rows = "".join(_waste_row(item) for item in agg.waste) + headers = (("Kind", False), ("What", False), ("Cost", False)) + return _section( + "Waste", + _table(headers, rows), + subtitle="Resources spent without payoff — no-op rebuilds and " + "payload-heavy calls. Ranked fix candidates.", + ) + + +def _op_lineage(op: OperationView) -> list[OperationView]: + flat = [op] + for child in op.children: + flat.extend(_op_lineage(child)) + return flat + + +def _breadcrumb(lineage: list[OperationView]) -> str: + if len(lineage) < 2: + return "" + nodes = ' '.join( + f'{_surface_badge(op.surface)}' + f'{_esc(op.name)}' + for op in lineage + ) + return f'
{nodes}
' + + +def _op_row(op: OperationView, group_max: float) -> str: + # Fixed metric columns: name | bar | dur | mem | extra. Splitting rss (mem) + # and payload (extra) into their own cells keeps every column right-anchored + # so bars and durations line up across nesting depths. + return ( + '
' + f'{_surface_badge(op.surface)}{_esc(op.name)}' + f"{_bar(op.duration_ms, group_max)}" + f'{_ms(op.duration_ms)}' + f'{_view_rss_text(op)}' + f'{_payload(op)}
' + ) + + +def _span_row(span: SpanView, op_duration: float) -> str: + color = "var(--warn)" if span.reason_kind == "unknown" else "var(--accent)" + return ( + '
' + f'' + f'{_esc(span.name)}' + f"{_bar(span.duration_ms, op_duration, color=color)}" + f'{_ms(span.duration_ms)}' + f'{_view_rss_text(span)}' + f'{_reason_chip(span.reason_kind)}' + f"{_counters(span.counters)}
" + ) + + +def _op_block(op: OperationView, group_max: float) -> str: + op_duration = op.duration_ms or 1.0 + spans = "".join(_span_row(span, op_duration) for span in op.spans) + spans_block = f'
{spans}
' if spans else "" + kids = "".join(_op_block(child, group_max) for child in op.children) + kids_block = f'
{kids}
' if kids else "" + return ( + f'
{_op_row(op, group_max)}{spans_block}
{kids_block}' + ) + + +def _chain_group(root: OperationView) -> str: + lineage = _op_lineage(root) + group_max = max((op.duration_ms for op in lineage), default=1.0) or 1.0 + return ( + f'
{_breadcrumb(lineage)}{_op_block(root, group_max)}
' + ) + + +def _chain(trace: TraceView) -> str: + if not trace.operation_tree: + body = ( + '
' + "No operations recorded yet.
" + ) + return _section("Correlated event chains", body) + groups = "".join(_chain_group(op) for op in trace.operation_tree) + return _section( + "Correlated event chains", + f'
{groups}
', + subtitle="What triggered what, across processes — finish → spawned worker.", + ) + + +def _semantic_row(span: SpanCostView) -> str: + costly = span.no_op and span.duration_ms >= _NOOP_COSTLY_MS + if costly: + verdict = 'no-op · costly' + elif span.no_op: + verdict = 'no-op' + else: + verdict = 'productive' + reason = ( + _esc(span.reason_kind) if span.reason_kind else '' + ) + return ( + f'' + f'{_esc(span.name)}' + f'{_esc(span.operation_name)}' + f"{reason}" + f'{span.produced}' + f'{span.skipped}' + f'{_ms(span.duration_ms)}' + f'{_view_rss_text(span)}' + f"{verdict}" + ) + + +def _semantic(agg: AggregatesView) -> str: + if not agg.semantic_costs: + return "" + rows = "".join(_semantic_row(span) for span in agg.semantic_costs) + headers = ( + ("Span", False), + ("Operation", False), + ("Reason", False), + ("Produced", True), + ("Skipped", True), + ("Duration", True), + ("Memory", True), + ("Verdict", False), + ) + return _section( + "Memory pipeline cost", + _table(headers, rows), + subtitle="Semantic and memory-product spans — flags work that ran but " + "produced nothing (including CLI-triggered rebuilds).", + ) + + +def _mcp_row(tool: McpToolAggregate) -> str: + return ( + f'{_esc(tool.name)}' + f'{tool.count}' + f'{_ms(tool.p50_duration_ms)}' + f'{_ms(tool.p95_duration_ms)}' + f'{_bytes(tool.p95_request_bytes)}' + f'{_bytes(tool.p95_response_bytes)}' + f'{_tokens(tool.p95_response_tokens)}' + ) + + +def _mcp(tools: tuple[McpToolAggregate, ...]) -> str: + if not tools: + return "" + rows = "".join(_mcp_row(tool) for tool in tools) + headers = ( + ("Tool", False), + ("Calls", True), + ("p50", True), + ("p95", True), + ("↑ req p95", True), + ("↓ resp p95", True), + ("resp tok p95", True), + ) + return _section( + "MCP tool matrix", + _table(headers, rows), + subtitle="Per-tool latency and payload — spot tools that flood request " + "or response bytes.", + ) + + +def _wf_bar(row: WaterfallRow, total_ms: float) -> str: + span = total_ms if total_ms > 0 else 1.0 + left = round(min(row.offset_ms / span * 100, 99.0), 2) + width = max(0.6, round(row.duration_ms / span * 100, 2)) + kind = "op" if row.kind == "operation" else "span" + surf = f"surf-{row.surface}" if row.surface in _KNOWN_SURFACES else "" + tick = '' if kind == "span" else "" + return ( + '
' + f'' + f"{tick}{_esc(row.label)}" + f'
' + f'{_ms(row.duration_ms)}
' + ) + + +def _wf_group(group: WaterfallGroup) -> str: + rows = "".join(_wf_bar(row, group.duration_ms) for row in group.rows) + cid = group.correlation_id[:8] if group.correlation_id else "—" + return ( + f'
{_esc(cid)}' + f"{_esc(group.started_at_utc)}" + f"span {_ms(group.duration_ms)}
{rows}
" + ) + + +def _waterfall(trace: TraceView) -> str: + if not trace.waterfall: + return "" + groups = "".join(_wf_group(group) for group in trace.waterfall) + return _section( + "Timeline", + f'
{groups}
', + subtitle="Each causal chain on its own time axis — bars placed by start " + "offset, width by duration; a gap before a worker bar is the spawn handoff.", + ) + + +def _agent_row(row: AgentTokenRow, total_response: int) -> str: + share = round(row.response_tokens / total_response * 100) if total_response else 0 + return ( + f'{_esc(row.name)}' + f'{row.calls}' + f'{_tokens(row.request_tokens)}' + f'{_tokens(row.response_tokens)}' + f'{share}%' + ) + + +def _agent(agg: AggregatesView) -> str: + view = agg.agent + if view is None: + return "" + cards = ( + '
' + + _stat(_tokens(view.response_tokens), "context pressure (tok)", "accent") + + _stat(_tokens(view.request_tokens), "sent (tok)") + + _stat(str(view.mcp_calls), "mcp calls") + + _stat(str(len(view.consumers)), "tools") + + "
" + ) + rows = "".join(_agent_row(row, view.response_tokens) for row in view.consumers) + headers = ( + ("Tool", False), + ("Calls", True), + ("↑ tok", True), + ("↓ tok", True), + ("Context %", True), + ) + return _section( + "Agent context", + cards + _table(headers, rows), + subtitle="Tokens MCP tools push back into the agent's context — the real " + "per-call cost for an LLM. The top row is your biggest context consumer.", + ) + + +def _db_row(row: DbCostRow) -> str: + per_call = round(row.total_queries / row.span_count) if row.span_count else 0 + return ( + f'{_esc(row.span_name)}' + f'{row.span_count}' + f'{row.total_queries}' + f'{row.total_writes}' + f'{per_call}' + f'{row.max_queries}' + ) + + +def _db_cost(agg: AggregatesView) -> str: + if not agg.db_costs: + return "" + rows = "".join(_db_row(row) for row in agg.db_costs) + headers = ( + ("Span", False), + ("Spans", True), + ("Queries", True), + ("Writes", True), + ("Q / call", True), + ("Max", True), + ) + return _section( + "DB cost", + _table(headers, rows), + subtitle="SQLite work per span (performance-truth) — a high Q/call is " + "N+1-shaped: many reads for little produced.", + ) + + +def _db_fingerprint_row(row: DbFingerprintRow) -> str: + table = _esc(row.table_hint) if row.table_hint else "—" + shape = _esc(row.summary) if row.summary else "—" + raw = _esc(row.fingerprint) + return ( + f'{_esc(row.span_name)}' + f"{table}" + f'{_esc(row.kind.upper())}' + f'{row.count}' + f'
{shape}
' + f'
{raw}
' + ) + + +def _db_fingerprints(agg: AggregatesView) -> str: + if not agg.db_fingerprints: + return "" + rows = "".join(_db_fingerprint_row(row) for row in agg.db_fingerprints) + headers = ( + ("Span", False), + ("Table", False), + ("Kind", False), + ("Count", True), + ("Shape", False), + ) + return _section( + "DB query shapes", + _table(headers, rows), + subtitle="Each query count decoded into what it filters on — the high-count " + "rows name the N+1 to batch. Raw shape is the second line.", + ) + + +def _pipeline_row(group: PipelineGroup) -> str: + return ( + f'{_esc(group.name)}' + f'{group.op_count}' + f'{_ms(group.duration_ms)}' + f'{_ms(group.cpu_ms)}' + ) + + +def _pipeline_section(agg: AggregatesView) -> str: + if not agg.pipeline: + return "" + rows = "".join(_pipeline_row(group) for group in agg.pipeline) + headers = (("Subsystem", False), ("Ops", True), ("Wall", True), ("CPU", True)) + return _section( + "Pipeline", + _table(headers, rows), + subtitle="Where the run spends wall time and CPU, grouped by subsystem.", + ) + + +def render_trace_html(trace: TraceView) -> str: + """Render a ``TraceView`` as a self-contained, branded diagnosis cockpit.""" + foot = f"CodeClone · platform observability · schema {_esc(trace.schema_version)}" + return ( + '' + '' + "CodeClone · Platform Observability" + f'
' + + _header(trace) + + _summary(trace) + + _waste_section(trace.aggregates) + + _waterfall(trace) + + _chain(trace) + + _semantic(trace.aggregates) + + _db_cost(trace.aggregates) + + _db_fingerprints(trace.aggregates) + + _agent(trace.aggregates) + + _mcp(trace.aggregates.mcp_tools) + + _pipeline_section(trace.aggregates) + + f'

{foot}

' + + "
" + ) + + +__all__ = ["render_trace_html"] diff --git a/codeclone/observability/render_json.py b/codeclone/observability/render_json.py new file mode 100644 index 00000000..b00d3217 --- /dev/null +++ b/codeclone/observability/render_json.py @@ -0,0 +1,26 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""JSON renderer for the observability ``TraceView`` (Phase 29 output). + +Deterministic: sorted keys, stable indentation. The read model is the source of +truth; this is a faithful projection of it. +""" + +from __future__ import annotations + +import json +from dataclasses import asdict + +from .views import TraceView + + +def render_trace_json(trace: TraceView) -> str: + """Render a ``TraceView`` as canonical, human-readable JSON.""" + return json.dumps(asdict(trace), sort_keys=True, indent=2, ensure_ascii=False) + + +__all__ = ["render_trace_json"] diff --git a/codeclone/observability/runtime.py b/codeclone/observability/runtime.py new file mode 100644 index 00000000..2bb19fc4 --- /dev/null +++ b/codeclone/observability/runtime.py @@ -0,0 +1,520 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Observability write API (Phase 29 §4.3). + +``bootstrap`` freezes the enabled decision once per process. When disabled, +``operation``/``span`` yield a cheap inert handle and return immediately — no +clock, no id, no contextvar, no store import (the near-zero-overhead contract). +When enabled, spans accumulate on their operation and the whole operation is +flushed in a single transaction on exit. +""" + +from __future__ import annotations + +import sqlite3 +import time +import uuid +from collections.abc import Iterator +from contextlib import contextmanager +from contextvars import ContextVar +from datetime import datetime, timezone +from pathlib import Path + +from ..config.observability import ObservabilityConfig, resolve_observability_config +from .db_fingerprint import fingerprint_sql +from .models import OperationRecord, ProfileSample, SpanRecord +from .reason_kind import ReasonKind + +# Bound how many distinct SQL shapes a span persists; the diagnostic value is in +# the few high-count statements, not the long tail. +_DB_FINGERPRINT_TOP_N = 8 + +_ENABLED: bool = False +_RUNTIME: _ActiveRuntime | None = None +_CURRENT_OP: ContextVar[OperationHandle | None] = ContextVar("_obs_op", default=None) +_CURRENT_SPAN: ContextVar[SpanHandle | None] = ContextVar("_obs_span", default=None) + + +def _now_utc() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + +def _new_id() -> str: + return uuid.uuid4().hex + + +class OperationHandle: + """Mutable accumulator for a surface operation and its spans.""" + + def __init__( + self, + *, + operation_id: str, + correlation_id: str, + surface: str, + name: str, + started_at_utc: str, + parent_operation_id: str | None, + session_id: str | None, + repo_root_digest: str | None, + ) -> None: + self.operation_id = operation_id + self.correlation_id = correlation_id + self._surface = surface + self._name = name + self._started_at_utc = started_at_utc + self._parent_operation_id = parent_operation_id + self._session_id = session_id + self._repo_root_digest = repo_root_digest + self._status = "ok" + self._error_kind: str | None = None + self._request_bytes: int | None = None + self._response_bytes: int | None = None + self._request_tokens: int | None = None + self._response_tokens: int | None = None + self._spans: list[SpanRecord] = [] + + # Wired by the 29.9 MCP registrar (per-tool request/response payload sizes). + def set_request( + self, *, request_bytes: int | None = None, request_tokens: int | None = None + ) -> None: + if request_bytes is not None: + self._request_bytes = request_bytes + if request_tokens is not None: + self._request_tokens = request_tokens + + def set_response( + self, *, response_bytes: int | None = None, response_tokens: int | None = None + ) -> None: + if response_bytes is not None: + self._response_bytes = response_bytes + if response_tokens is not None: + self._response_tokens = response_tokens + + def _to_record( + self, *, duration_ms: float, profile: ProfileSample | None = None + ) -> OperationRecord: + return OperationRecord( + operation_id=self.operation_id, + correlation_id=self.correlation_id, + surface=self._surface, + name=self._name, + started_at_utc=self._started_at_utc, + duration_ms=duration_ms, + status=self._status, + parent_operation_id=self._parent_operation_id, + error_kind=self._error_kind, + session_id=self._session_id, + repo_root_digest=self._repo_root_digest, + request_bytes=self._request_bytes, + response_bytes=self._response_bytes, + request_tokens=self._request_tokens, + response_tokens=self._response_tokens, + profile=profile, + spans=tuple(self._spans), + ) + + +class SpanHandle: + """Mutable accumulator for a single span (stage/subsystem).""" + + def __init__( + self, + *, + span_id: str, + operation_id: str, + name: str, + started_at_utc: str, + parent_span_id: str | None, + reason_kind: ReasonKind | None, + reason: str | None, + dedupe_key: str | None, + ) -> None: + self.span_id = span_id + self._operation_id = operation_id + self._name = name + self._started_at_utc = started_at_utc + self._parent_span_id = parent_span_id + self._reason_kind = reason_kind + self._reason = reason + self._dedupe_key = dedupe_key + self._status = "ok" + self._counters: dict[str, int] = {} + self._db_fingerprints: dict[str, int] = {} + + # set_counter is wired by the 29.10 worker instrumentation; add_counter by + # the 29.DB query-trace hook (record_db_query). set_reason_kind stays + # forward-declared until a caller needs post-hoc reason classification. + def add_counter(self, key: str, value: int = 1) -> None: + self._counters[key] = self._counters.get(key, 0) + value + + def set_counter(self, key: str, value: int) -> None: + self._counters[key] = value + + # Wired by the 29.DB query-trace hook (record_db_query): accumulate the + # normalized SQL shape so _to_record can flush the top-N per span. + def add_db_fingerprint(self, fingerprint: str) -> None: + self._db_fingerprints[fingerprint] = ( + self._db_fingerprints.get(fingerprint, 0) + 1 + ) + + def _top_db_fingerprints(self) -> dict[str, int]: + if not self._db_fingerprints: + return {} + ranked = sorted(self._db_fingerprints.items(), key=lambda kv: (-kv[1], kv[0])) + return dict(ranked[:_DB_FINGERPRINT_TOP_N]) + + # codeclone: ignore[dead-code] + def set_reason_kind(self, reason_kind: ReasonKind) -> None: + self._reason_kind = reason_kind + + def _to_record( + self, *, duration_ms: float, profile: ProfileSample | None = None + ) -> SpanRecord: + return SpanRecord( + span_id=self.span_id, + operation_id=self._operation_id, + name=self._name, + started_at_utc=self._started_at_utc, + duration_ms=duration_ms, + status=self._status, + parent_span_id=self._parent_span_id, + reason_kind=self._reason_kind, + reason=self._reason, + dedupe_key=self._dedupe_key, + counters=dict(self._counters), + db_fingerprints=self._top_db_fingerprints(), + profile=profile, + ) + + +def _inert_operation() -> OperationHandle: + return OperationHandle( + operation_id="", + correlation_id="", + surface="", + name="", + started_at_utc="", + parent_operation_id=None, + session_id=None, + repo_root_digest=None, + ) + + +def _inert_span() -> SpanHandle: + return SpanHandle( + span_id="", + operation_id="", + name="", + started_at_utc="", + parent_span_id=None, + reason_kind=None, + reason=None, + dedupe_key=None, + ) + + +class _ActiveRuntime: + """Holds the config + lazily-opened store; flushes finished operations. + + The store modules are imported only here (never at module load), so a + disabled process never imports the observability store. + """ + + def __init__(self, config: ObservabilityConfig, *, root: Path | None) -> None: + self.config = config + self.session_id: str | None = None + self._root = root + self._conn: object | None = None + + def bind_root(self, root: Path) -> None: + # First rooted call wins: an MCP server bootstraps root-less, then binds + # the store to the root of the first tool that carries one. + if self._root is None: + self._root = root + + def persist(self, record: OperationRecord) -> None: + # Persisted to the per-root store; a root-less enabled session simply + # drops the record (no in-memory ring in the MVP). + if self.config.persist and self._root is not None: + self._write(record) + + def _write(self, record: OperationRecord) -> None: + from .store.schema import observability_store_path, open_observability_store + from .store.writer import write_operation + + if self._conn is None: + assert self._root is not None + self._conn = open_observability_store(observability_store_path(self._root)) + import sqlite3 + + assert isinstance(self._conn, sqlite3.Connection) + write_operation(self._conn, record) + + def close(self) -> None: + if self._conn is not None: + import sqlite3 + + if isinstance(self._conn, sqlite3.Connection): + self._conn.close() + self._conn = None + + +def bootstrap( + config: ObservabilityConfig | None = None, + *, + root: Path | None = None, + session_id: str | None = None, +) -> None: + """Freeze the enabled decision for this process and install the runtime.""" + global _ENABLED, _RUNTIME + cfg = config if config is not None else resolve_observability_config() + _ENABLED = cfg.enabled + if cfg.enabled: + runtime = _ActiveRuntime(cfg, root=root) + runtime.session_id = session_id + _RUNTIME = runtime + else: + _RUNTIME = None + + +def shutdown() -> None: + """Close the store and reset process state (mainly for tests).""" + global _ENABLED, _RUNTIME + if _RUNTIME is not None: + _RUNTIME.close() + _ENABLED = False + _RUNTIME = None + + +def is_observability_enabled() -> bool: + return _ENABLED + + +def current_operation_context() -> tuple[str, str] | None: + """Return ``(operation_id, correlation_id)`` of the active operation for + cross-process handoff, or ``None`` when disabled or outside an operation. + """ + op = _CURRENT_OP.get() + if op is None or not op.operation_id: + return None + return op.operation_id, op.correlation_id + + +def bind_root(root: Path) -> None: + """Bind the store to ``root`` if the active runtime has none yet (no-op when + disabled). Lets a root-less MCP-server session open its store on the first + tool call that carries a ``root``. + """ + runtime = _RUNTIME + if _ENABLED and runtime is not None: + runtime.bind_root(root) + + +def payload_capture_enabled() -> bool: + """True when enabled and payload-size capture is configured on.""" + runtime = _RUNTIME + return bool( + _ENABLED and runtime is not None and runtime.config.capture_payload_sizes + ) + + +def _profile_baseline() -> tuple[int, float, float, int | None] | None: + """Capture an rss/cpu/peak baseline when profiling is on (else None, no psutil).""" + runtime = _RUNTIME + if _ENABLED and runtime is not None and runtime.config.profile: + from .profile import capture_profile_baseline + + return capture_profile_baseline() + return None + + +def _profile_sample( + baseline: tuple[int, float, float, int | None] | None, +) -> ProfileSample | None: + if baseline is None: + return None + from .profile import build_profile_sample + + return build_profile_sample(baseline) + + +@contextmanager +def operation( + *, + name: str, + surface: str, + correlation_id: str | None = None, + parent_operation_id: str | None = None, + session_id: str | None = None, + repo_root_digest: str | None = None, +) -> Iterator[OperationHandle]: + runtime = _RUNTIME + if not _ENABLED or runtime is None: + yield _inert_operation() + return + operation_id = _new_id() + handle = OperationHandle( + operation_id=operation_id, + correlation_id=correlation_id or operation_id, + surface=surface, + name=name, + started_at_utc=_now_utc(), + parent_operation_id=parent_operation_id, + session_id=session_id or runtime.session_id, + repo_root_digest=repo_root_digest, + ) + token = _CURRENT_OP.set(handle) + baseline = _profile_baseline() + start = time.perf_counter() + try: + yield handle + except Exception as exc: + handle._status = "error" + handle._error_kind = type(exc).__name__ + raise + finally: + duration_ms = (time.perf_counter() - start) * 1000.0 + _CURRENT_OP.reset(token) + runtime.persist( + handle._to_record( + duration_ms=duration_ms, profile=_profile_sample(baseline) + ) + ) + + +@contextmanager +def span( + *, + name: str, + reason: str | None = None, + reason_kind: ReasonKind | None = None, + dedupe_key: str | None = None, +) -> Iterator[SpanHandle]: + runtime = _RUNTIME + parent_op = _CURRENT_OP.get() + if not _ENABLED or runtime is None or parent_op is None: + yield _inert_span() + return + parent_span = _CURRENT_SPAN.get() + handle = SpanHandle( + span_id=_new_id(), + operation_id=parent_op.operation_id, + name=name, + started_at_utc=_now_utc(), + parent_span_id=parent_span.span_id if parent_span is not None else None, + reason_kind=reason_kind, + reason=reason, + dedupe_key=dedupe_key, + ) + token = _CURRENT_SPAN.set(handle) + baseline = _profile_baseline() + start = time.perf_counter() + try: + yield handle + except Exception: + handle._status = "error" + raise + finally: + duration_ms = (time.perf_counter() - start) * 1000.0 + _CURRENT_SPAN.reset(token) + parent_op._spans.append( + handle._to_record( + duration_ms=duration_ms, profile=_profile_sample(baseline) + ) + ) + + +def record_elapsed_span( + name: str, + *, + started_at_utc: str, + duration_ms: float, + reason_kind: ReasonKind | None = None, +) -> None: + """Attach a span with explicit timing to the active operation, for work that + finished before instrumentation could wrap it (e.g. a worker's cold-start). + No-op when disabled or outside an operation. + """ + parent_op = _CURRENT_OP.get() + if not _ENABLED or parent_op is None: + return + handle = SpanHandle( + span_id=_new_id(), + operation_id=parent_op.operation_id, + name=name, + started_at_utc=started_at_utc, + parent_span_id=None, + reason_kind=reason_kind, + reason=None, + dedupe_key=None, + ) + parent_op._spans.append(handle._to_record(duration_ms=duration_ms)) + + +_DB_WRITE_KINDS = frozenset({"insert", "update", "delete", "replace"}) + + +def _classify_sql(sql: str) -> str: + stripped = sql.lstrip() + if not stripped: + return "" + return stripped.split(None, 1)[0].lower() + + +def record_db_query(sql: str) -> None: + """Trace-callback sink: attribute one SQL statement to the active span as a + ``db_queries`` counter (plus ``db_writes`` for mutations). No-op outside a + span. Performance telemetry only — never audit or contract truth. + """ + span_handle = _CURRENT_SPAN.get() + if span_handle is None: + return + span_handle.add_counter("db_queries", 1) + if _classify_sql(sql) in _DB_WRITE_KINDS: + span_handle.add_counter("db_writes", 1) + fingerprint = fingerprint_sql(sql).fingerprint + if fingerprint: + span_handle.add_db_fingerprint(fingerprint) + + +def record_counter(key: str, value: int = 1) -> None: + """Add ``value`` to the named counter on the active span. No-op outside a + span (or when disabled). Companion to ``record_db_query`` for non-SQL + counters — e.g. retrieval lane hits emitted by the memory query path. + Performance telemetry only — never audit or contract truth. + """ + span_handle = _CURRENT_SPAN.get() + if span_handle is None: + return + span_handle.add_counter(key, value) + + +def instrument_db_connection(conn: sqlite3.Connection) -> None: + """Attach the per-span DB-query counter to ``conn``. No-op (and no per-query + trace overhead) when observability is disabled for this process. + """ + if _ENABLED: + conn.set_trace_callback(record_db_query) + + +__all__ = [ + "OperationHandle", + "SpanHandle", + "bind_root", + "bootstrap", + "current_operation_context", + "instrument_db_connection", + "is_observability_enabled", + "operation", + "payload_capture_enabled", + "record_counter", + "record_db_query", + "record_elapsed_span", + "shutdown", + "span", +] diff --git a/codeclone/observability/sqlite_access.py b/codeclone/observability/sqlite_access.py new file mode 100644 index 00000000..372ea71b --- /dev/null +++ b/codeclone/observability/sqlite_access.py @@ -0,0 +1,52 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared SQLite open helpers with observability instrumentation.""" + +from __future__ import annotations + +import sqlite3 +from collections.abc import Callable +from pathlib import Path + +from ..utils.sqlite_store import open_sqlite_db, open_sqlite_db_readonly + + +def open_instrumented_sqlite_db( + path: Path, + *, + ensure_schema: Callable[[sqlite3.Connection], None], + foreign_keys: bool = False, + synchronous: str | None = None, +) -> sqlite3.Connection: + conn = open_sqlite_db( + path, + ensure_schema=ensure_schema, + foreign_keys=foreign_keys, + synchronous=synchronous, + ) + from codeclone.observability.runtime import instrument_db_connection + + instrument_db_connection(conn) + return conn + + +def open_instrumented_sqlite_db_readonly( + path: Path, + *, + validate_schema: Callable[[sqlite3.Connection], None], +) -> sqlite3.Connection: + conn = open_sqlite_db_readonly(path, validate_schema=validate_schema) + from codeclone.observability.runtime import instrument_db_connection + + instrument_db_connection(conn) + return conn + + +__all__ = [ + "open_instrumented_sqlite_db", + "open_instrumented_sqlite_db_readonly", +] diff --git a/codeclone/observability/store/__init__.py b/codeclone/observability/store/__init__.py new file mode 100644 index 00000000..3698f412 --- /dev/null +++ b/codeclone/observability/store/__init__.py @@ -0,0 +1,13 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Observability sqlite store: schema + bounded batched writer + (Cycle 3) reader. + +Imported only on the enabled write path or the read path — never when +observability is disabled (the near-zero-overhead contract). +""" + +from __future__ import annotations diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py new file mode 100644 index 00000000..5ef6f15d --- /dev/null +++ b/codeclone/observability/store/reader.py @@ -0,0 +1,680 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Read path: build the primary ``TraceView`` from the observability store. + +Read-only — never creates the store or its schema. Deterministic ordering and +deterministic percentiles (sorted-index, no numpy). +""" + +from __future__ import annotations + +import sqlite3 +from collections import defaultdict +from datetime import datetime +from pathlib import Path +from typing import cast + +import orjson + +from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION +from ..db_fingerprint import describe_fingerprint +from ..views import ( + AgentTokenRow, + AgentView, + AggregatesView, + DbCostRow, + DbFingerprintRow, + McpToolAggregate, + OperationView, + PipelineGroup, + SpanCostView, + SpanView, + TraceView, + WasteItem, + WaterfallGroup, + WaterfallRow, +) +from .schema import observability_store_path + +_DEFAULT_WINDOW = 20 + +# Counters whose presence marks a span as *meant* to do productive work; when +# they are all present-and-zero the span ran but touched nothing (a no-op). +_PRODUCTIVE_COUNTER_KEYS = ("embedded", "workflows_seen", "experiences_distilled") +_MEMORY_PIPELINE_PREFIX = "memory." +_SEMANTIC_COST_LIMIT = 8 +_DB_FINGERPRINT_ROW_LIMIT = 15 + +# Waste thresholds: a no-op span is only worth flagging once it has spent time; +# an MCP response is "heavy" past these payload sizes. +_WASTE_NOOP_MS = 50.0 +_HIGH_PAYLOAD_BYTES = 16 * 1024 +_HIGH_PAYLOAD_TOKENS = 4000 + + +def open_observability_store_readonly(root: Path) -> sqlite3.Connection | None: + """Open the store read-only, or None when it does not exist yet.""" + path = observability_store_path(root) + if not path.is_file(): + return None + conn = sqlite3.connect(str(path)) + conn.row_factory = sqlite3.Row + return conn + + +def _percentile(values: list[float], q: float) -> float: + if not values: + return 0.0 + ordered = sorted(values) + index = round(q * (len(ordered) - 1)) + return ordered[min(index, len(ordered) - 1)] + + +def _parse_counters(raw: object) -> dict[str, int]: + if not raw: + return {} + parsed = orjson.loads(cast("str", raw)) + return ( + {str(k): int(v) for k, v in parsed.items()} if isinstance(parsed, dict) else {} + ) + + +def _optional_float(row: sqlite3.Row, key: str) -> float | None: + columns = row.keys() + if key not in columns: + return None + value = row[key] + return float(value) if value is not None else None + + +def _span_view(row: sqlite3.Row) -> SpanView: + # sqlite3.Row membership (`x in row`) tests values, so probe column names via + # keys() to stay tolerant of stores written before db_fingerprints existed. + columns = row.keys() + return SpanView( + span_id=str(row["span_id"]), + name=str(row["name"]), + duration_ms=float(row["duration_ms"]), + status=str(row["status"]), + parent_span_id=row["parent_span_id"], + reason_kind=row["reason_kind"], + reason=row["reason"], + dedupe_key=row["dedupe_key"], + counters=_parse_counters(row["counters_json"]), + rss_delta_mb=_optional_float(row, "rss_delta_mb"), + rss_mb=_optional_float(row, "rss_mb"), + peak_rss_mb=_optional_float(row, "peak_rss_mb"), + peak_rss_delta_mb=_optional_float(row, "peak_rss_delta_mb"), + started_at_utc=str(row["started_at_utc"]), + db_fingerprints=_parse_counters( + row["db_fingerprints"] if "db_fingerprints" in columns else None + ), + ) + + +def _span_views( + conn: sqlite3.Connection, operation_ids: list[str] +) -> dict[str, tuple[SpanView, ...]]: + if not operation_ids: + return {} + placeholders = ",".join("?" * len(operation_ids)) + rows = conn.execute( + f"SELECT * FROM platform_spans WHERE operation_id IN ({placeholders}) " + "ORDER BY started_at_utc ASC, span_id ASC", + tuple(operation_ids), + ).fetchall() + grouped: dict[str, list[SpanView]] = defaultdict(list) + for row in rows: + grouped[str(row["operation_id"])].append(_span_view(row)) + return {key: tuple(value) for key, value in grouped.items()} + + +def _operation_view( + row: sqlite3.Row, + spans: tuple[SpanView, ...], + children: tuple[OperationView, ...], +) -> OperationView: + return OperationView( + operation_id=str(row["operation_id"]), + correlation_id=str(row["correlation_id"]), + surface=str(row["surface"]), + name=str(row["name"]), + started_at_utc=str(row["started_at_utc"]), + duration_ms=float(row["duration_ms"]), + status=str(row["status"]), + parent_operation_id=row["parent_operation_id"], + error_kind=row["error_kind"], + request_bytes=row["request_bytes"], + response_bytes=row["response_bytes"], + request_tokens=row["request_tokens"], + response_tokens=row["response_tokens"], + rss_delta_mb=_optional_float(row, "rss_delta_mb"), + rss_mb=_optional_float(row, "rss_mb"), + peak_rss_mb=_optional_float(row, "peak_rss_mb"), + peak_rss_delta_mb=_optional_float(row, "peak_rss_delta_mb"), + spans=spans, + children=children, + cpu_user_ms=row["cpu_user_ms"], + cpu_system_ms=row["cpu_system_ms"], + ) + + +def _by_correlations( + conn: sqlite3.Connection, correlation_ids: list[str] +) -> list[sqlite3.Row]: + if not correlation_ids: + return [] + placeholders = ",".join("?" * len(correlation_ids)) + return list( + conn.execute( + "SELECT * FROM platform_operations " + f"WHERE correlation_id IN ({placeholders}) " + "ORDER BY started_at_utc ASC, operation_id ASC", + tuple(correlation_ids), + ).fetchall() + ) + + +def _select_operations( + conn: sqlite3.Connection, + *, + operation_id: str | None, + correlation_id: str | None, + session_id: str | None, + last: int | None, +) -> tuple[list[sqlite3.Row], str | None]: + if operation_id is not None: + row = conn.execute( + "SELECT correlation_id FROM platform_operations WHERE operation_id=?", + (operation_id,), + ).fetchone() + if row is None: + return [], None + return _by_correlations(conn, [str(row["correlation_id"])]), operation_id + if correlation_id is not None: + return _by_correlations(conn, [correlation_id]), None + if session_id is not None: + rows = conn.execute( + "SELECT * FROM platform_operations WHERE session_id=? " + "ORDER BY started_at_utc ASC, operation_id ASC", + (session_id,), + ).fetchall() + return list(rows), None + root_rows = conn.execute( + "SELECT operation_id, correlation_id FROM platform_operations " + "WHERE parent_operation_id IS NULL " + "ORDER BY started_at_utc DESC, operation_id DESC LIMIT ?", + (last if last is not None else _DEFAULT_WINDOW,), + ).fetchall() + if not root_rows: + return [], None + correlations = sorted({str(row["correlation_id"]) for row in root_rows}) + return _by_correlations(conn, correlations), str(root_rows[0]["operation_id"]) + + +def _build_forest( + rows: list[sqlite3.Row], spans_by_op: dict[str, tuple[SpanView, ...]] +) -> tuple[OperationView, ...]: + by_id = {str(row["operation_id"]): row for row in rows} + children_ids: dict[str | None, list[str]] = defaultdict(list) + for row in rows: + parent = row["parent_operation_id"] + parent_key = ( + str(parent) if parent is not None and str(parent) in by_id else None + ) + children_ids[parent_key].append(str(row["operation_id"])) + + def _order(operation_id: str) -> tuple[str, str]: + return (str(by_id[operation_id]["started_at_utc"]), operation_id) + + def build(operation_id: str) -> OperationView: + children = tuple( + build(child) for child in sorted(children_ids[operation_id], key=_order) + ) + return _operation_view( + by_id[operation_id], spans_by_op.get(operation_id, ()), children + ) + + return tuple(build(root) for root in sorted(children_ids[None], key=_order)) + + +def _is_memory_pipeline_span(name: str) -> bool: + """Memory-product spans may run under CLI/MCP operations — classify by name.""" + return name.startswith(_MEMORY_PIPELINE_PREFIX) + + +def _span_cost_view(op: OperationView, span: SpanView) -> SpanCostView: + """Flatten a span with its owning operation's identity and classify whether + it did productive work (see ``SpanCostView.no_op``).""" + productive = [ + span.counters[key] for key in _PRODUCTIVE_COUNTER_KEYS if key in span.counters + ] + produced = sum(productive) + return SpanCostView( + span_id=span.span_id, + name=span.name, + surface=op.surface, + operation_id=op.operation_id, + operation_name=op.name, + duration_ms=span.duration_ms, + reason_kind=span.reason_kind, + rss_delta_mb=span.rss_delta_mb, + rss_mb=span.rss_mb, + peak_rss_mb=span.peak_rss_mb, + peak_rss_delta_mb=span.peak_rss_delta_mb, + produced=produced, + skipped=int(span.counters.get("skipped_unchanged", 0)), + no_op=bool(productive) and produced == 0, + ) + + +def _mcp_tool_aggregates(flat: list[OperationView]) -> tuple[McpToolAggregate, ...]: + by_name: dict[str, list[OperationView]] = defaultdict(list) + for view in flat: + if view.surface == "mcp": + by_name[view.name].append(view) + aggregates: list[McpToolAggregate] = [] + for name in sorted(by_name): + ops = by_name[name] + durations = [op.duration_ms for op in ops] + requests = [ + float(op.request_bytes) for op in ops if op.request_bytes is not None + ] + responses = [ + float(op.response_bytes) for op in ops if op.response_bytes is not None + ] + response_tokens = [ + float(op.response_tokens) for op in ops if op.response_tokens is not None + ] + aggregates.append( + McpToolAggregate( + name=name, + count=len(ops), + p50_duration_ms=_percentile(durations, 0.5), + p95_duration_ms=_percentile(durations, 0.95), + p95_response_bytes=int(_percentile(responses, 0.95)), + p95_request_bytes=int(_percentile(requests, 0.95)), + p95_response_tokens=int(_percentile(response_tokens, 0.95)), + ) + ) + return tuple(aggregates) + + +def _waste( + semantic_costs: tuple[SpanCostView, ...], + mcp_tools: tuple[McpToolAggregate, ...], +) -> tuple[WasteItem, ...]: + items: list[WasteItem] = [] + for span in semantic_costs: + if span.no_op and span.duration_ms >= _WASTE_NOOP_MS: + rss = ( + f", +{span.rss_delta_mb:.0f} MB" + if span.rss_delta_mb and span.rss_delta_mb >= 1 + else "" + ) + items.append( + WasteItem( + kind="no-op", + subject=span.name, + surface=span.surface, + detail=f"ran {span.duration_ms:.0f}ms{rss}, skipped {span.skipped}", + severity=span.duration_ms, + ) + ) + items.extend( + WasteItem( + kind="high payload", + subject=tool.name, + surface="mcp", + detail=( + f"p95 {tool.p95_response_bytes / 1024:.0f} KB resp · " + f"{tool.p95_response_tokens} tok" + ), + severity=float(tool.p95_response_bytes), + ) + for tool in mcp_tools + if tool.p95_response_bytes >= _HIGH_PAYLOAD_BYTES + or tool.p95_response_tokens >= _HIGH_PAYLOAD_TOKENS + ) + items.sort(key=lambda w: (-w.severity, w.kind, w.subject)) + return tuple(items) + + +_CONTROLLER_TOOLS = frozenset( + { + "mcp.start_controlled_change", + "mcp.finish_controlled_change", + "mcp.manage_change_intent", + "mcp.check_patch_contract", + "mcp.create_review_receipt", + "mcp.validate_review_claims", + } +) + + +def _cpu_ms(op: OperationView) -> float: + return (op.cpu_user_ms or 0.0) + (op.cpu_system_ms or 0.0) + + +def _subsystem(op: OperationView) -> str: + if op.surface == "memory": + return "memory" + if "analyze" in op.name: + return "analysis" + if op.name in _CONTROLLER_TOOLS: + return "controller" + if op.surface == "mcp": + return "mcp query" + return op.surface or "other" + + +def _pipeline(flat: list[OperationView]) -> tuple[PipelineGroup, ...]: + grouped: dict[str, list[OperationView]] = defaultdict(list) + for op in flat: + grouped[_subsystem(op)].append(op) + rows = [ + PipelineGroup( + name=name, + op_count=len(ops), + duration_ms=sum(op.duration_ms for op in ops), + cpu_ms=sum(_cpu_ms(op) for op in ops), + ) + for name, ops in grouped.items() + ] + return tuple(sorted(rows, key=lambda g: (-g.duration_ms, g.name))) + + +def _agent_view(flat: list[OperationView]) -> AgentView | None: + mcp_ops = [op for op in flat if op.surface == "mcp"] + if not mcp_ops: + return None + grouped: dict[str, list[OperationView]] = defaultdict(list) + for op in mcp_ops: + grouped[op.name].append(op) + rows = [ + AgentTokenRow( + name=name, + calls=len(ops), + request_tokens=sum(op.request_tokens or 0 for op in ops), + response_tokens=sum(op.response_tokens or 0 for op in ops), + ) + for name, ops in grouped.items() + ] + rows.sort(key=lambda r: (-r.response_tokens, r.name)) + return AgentView( + mcp_calls=len(mcp_ops), + request_tokens=sum(row.request_tokens for row in rows), + response_tokens=sum(row.response_tokens for row in rows), + consumers=tuple(rows), + ) + + +def _db_costs(flat: list[OperationView]) -> tuple[DbCostRow, ...]: + grouped: dict[str, list[SpanView]] = defaultdict(list) + surface_of: dict[str, str] = {} + for op in flat: + for span in op.spans: + if "db_queries" in span.counters: + grouped[span.name].append(span) + surface_of.setdefault(span.name, op.surface) + rows = [ + DbCostRow( + span_name=name, + surface=surface_of[name], + span_count=len(spans), + total_queries=sum(s.counters.get("db_queries", 0) for s in spans), + total_writes=sum(s.counters.get("db_writes", 0) for s in spans), + max_queries=max(s.counters.get("db_queries", 0) for s in spans), + ) + for name, spans in grouped.items() + ] + return tuple(sorted(rows, key=lambda r: (-r.total_queries, r.span_name))) + + +def _fingerprint_row( + span_name: str, surface: str, fingerprint: str, count: int +) -> DbFingerprintRow: + shape = describe_fingerprint(fingerprint) + return DbFingerprintRow( + span_name=span_name, + surface=surface, + fingerprint=fingerprint, + table_hint=shape.table, + count=count, + kind=shape.kind, + summary=shape.summary, + ) + + +def _db_fingerprints(flat: list[OperationView]) -> tuple[DbFingerprintRow, ...]: + grouped: dict[tuple[str, str], int] = defaultdict(int) + surface_of: dict[str, str] = {} + for op in flat: + for span in op.spans: + for fingerprint, count in span.db_fingerprints.items(): + grouped[(span.name, fingerprint)] += count + surface_of.setdefault(span.name, op.surface) + rows = [ + _fingerprint_row(span_name, surface_of[span_name], fingerprint, count) + for (span_name, fingerprint), count in grouped.items() + ] + rows.sort(key=lambda r: (-r.count, r.span_name, r.fingerprint)) + return tuple(rows[:_DB_FINGERPRINT_ROW_LIMIT]) + + +def _aggregates( + flat: list[OperationView], spans_by_op: dict[str, tuple[SpanView, ...]] +) -> AggregatesView: + slowest = tuple(sorted(flat, key=lambda v: (-v.duration_ms, v.operation_id))[:5]) + with_response = [v for v in flat if v.response_bytes is not None] + largest = tuple( + sorted(with_response, key=lambda v: (-(v.response_bytes or 0), v.operation_id))[ + :5 + ] + ) + rss = [v.rss_delta_mb for v in flat if v.rss_delta_mb is not None] + rss.extend( + span.rss_delta_mb + for spans in spans_by_op.values() + for span in spans + if span.rss_delta_mb is not None + ) + unknown = sum( + 1 + for spans in spans_by_op.values() + for span in spans + if span.reason_kind == "unknown" + ) + span_costs = sorted( + (_span_cost_view(op, span) for op in flat for span in op.spans), + key=lambda s: (-s.duration_ms, s.operation_id, s.span_id), + ) + semantic_costs = tuple(s for s in span_costs if _is_memory_pipeline_span(s.name)) + memory_ranked = sorted( + ( + s + for s in span_costs + if any( + value is not None + for value in ( + s.peak_rss_mb, + s.peak_rss_delta_mb, + s.rss_mb, + s.rss_delta_mb, + ) + ) + ), + key=lambda s: ( + -(s.peak_rss_mb or s.rss_mb or 0.0), + -(s.peak_rss_delta_mb or s.rss_delta_mb or 0.0), + s.operation_id, + s.span_id, + ), + ) + rss_abs = [v.rss_mb for v in flat if v.rss_mb is not None] + rss_abs.extend( + span.rss_mb + for spans in spans_by_op.values() + for span in spans + if span.rss_mb is not None + ) + peak_rss = [v.peak_rss_mb for v in flat if v.peak_rss_mb is not None] + peak_rss.extend( + span.peak_rss_mb + for spans in spans_by_op.values() + for span in spans + if span.peak_rss_mb is not None + ) + mcp_tools = _mcp_tool_aggregates(flat) + cpu_ranked = sorted(flat, key=lambda v: (-_cpu_ms(v), v.operation_id)) + heaviest_cpu = cpu_ranked[0] if cpu_ranked and _cpu_ms(cpu_ranked[0]) > 0 else None + return AggregatesView( + operation_count=len(flat), + slowest=slowest, + largest_responses=largest, + max_rss_delta_mb=max(rss) if rss else None, + anomaly_count=0, + unknown_expensive_rebuild_count=unknown, + mcp_tools=mcp_tools, + slowest_span=span_costs[0] if span_costs else None, + semantic_costs=semantic_costs[:_SEMANTIC_COST_LIMIT], + peak_memory_span=memory_ranked[0] if memory_ranked else None, + max_rss_absolute_mb=max(rss_abs) if rss_abs else None, + max_peak_rss_mb=max(peak_rss) if peak_rss else None, + db_costs=_db_costs(flat), + agent=_agent_view(flat), + waste=_waste(semantic_costs, mcp_tools), + heaviest_cpu=heaviest_cpu, + pipeline=_pipeline(flat), + db_fingerprints=_db_fingerprints(flat), + ) + + +def _epoch_ms(iso: str) -> float: + """Parse a store timestamp to epoch milliseconds (0.0 when absent/unparsable).""" + if not iso: + return 0.0 + try: + return datetime.fromisoformat(iso.replace("Z", "+00:00")).timestamp() * 1000.0 + except ValueError: + return 0.0 + + +def _wf_row( + *, + label: str, + surface: str, + kind: str, + depth: int, + start_iso: str, + duration_ms: float, + base_ms: float, + reason_kind: str | None = None, + status: str = "ok", +) -> WaterfallRow: + return WaterfallRow( + label=label, + surface=surface, + kind=kind, + depth=depth, + offset_ms=max(0.0, _epoch_ms(start_iso) - base_ms), + duration_ms=duration_ms, + reason_kind=reason_kind, + status=status, + ) + + +def _waterfall_rows( + op: OperationView, depth: int, base_ms: float +) -> list[WaterfallRow]: + rows = [ + _wf_row( + label=op.name, + surface=op.surface, + kind="operation", + depth=depth, + start_iso=op.started_at_utc, + duration_ms=op.duration_ms, + base_ms=base_ms, + status=op.status, + ) + ] + rows.extend( + _wf_row( + label=span.name, + surface=op.surface, + kind="span", + depth=depth + 1, + start_iso=span.started_at_utc, + duration_ms=span.duration_ms, + base_ms=base_ms, + reason_kind=span.reason_kind, + status=span.status, + ) + for span in op.spans + ) + for child in op.children: + rows.extend(_waterfall_rows(child, depth + 1, base_ms)) + return rows + + +def _waterfall_groups(tree: tuple[OperationView, ...]) -> tuple[WaterfallGroup, ...]: + """One self-contained timeline per causal chain (tree root); offsets are + relative to that root's start so a long-idle window never crushes the bars.""" + groups: list[WaterfallGroup] = [] + for root in tree: + base_ms = _epoch_ms(root.started_at_utc) + rows = tuple(_waterfall_rows(root, 0, base_ms)) + span_ms = max((row.offset_ms + row.duration_ms for row in rows), default=0.0) + groups.append( + WaterfallGroup( + correlation_id=root.correlation_id, + started_at_utc=root.started_at_utc, + duration_ms=span_ms, + rows=rows, + ) + ) + return tuple(groups) + + +def build_trace_view( + conn: sqlite3.Connection, + *, + operation_id: str | None = None, + correlation_id: str | None = None, + session_id: str | None = None, + last: int | None = None, +) -> TraceView: + rows, focus_id = _select_operations( + conn, + operation_id=operation_id, + correlation_id=correlation_id, + session_id=session_id, + last=last, + ) + operation_ids = [str(row["operation_id"]) for row in rows] + spans_by_op = _span_views(conn, operation_ids) + flat = [ + _operation_view(row, spans_by_op.get(str(row["operation_id"]), ()), ()) + for row in rows + ] + by_id = {view.operation_id: view for view in flat} + starts = [str(row["started_at_utc"]) for row in rows] + operation_tree = _build_forest(rows, spans_by_op) + return TraceView( + schema_version=PLATFORM_OBSERVABILITY_SCHEMA_VERSION, + window_started_at_utc=min(starts) if starts else "", + window_ended_at_utc=max(starts) if starts else "", + aggregates=_aggregates(flat, spans_by_op), + focus_operation=by_id.get(focus_id) if focus_id is not None else None, + operation_tree=operation_tree, + correlated_operations=tuple(flat), + waterfall=_waterfall_groups(operation_tree), + ) + + +__all__ = ["build_trace_view", "open_observability_store_readonly"] diff --git a/codeclone/observability/store/schema.py b/codeclone/observability/store/schema.py new file mode 100644 index 00000000..3f0cf22f --- /dev/null +++ b/codeclone/observability/store/schema.py @@ -0,0 +1,152 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Observability sqlite schema (Phase 29 §4.5). + +Two tables — operations (surface-level) and spans (stage/subsystem) — plus a +meta row carrying the schema version. Profile columns are nullable +(populated only when ``profile=true`` with the ``codeclone[perf]`` extra). +""" + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION + +_OBSERVABILITY_DB_RELATIVE = ".codeclone/db/platform_observability.sqlite3" + +_SCHEMA = """ +CREATE TABLE IF NOT EXISTS platform_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS platform_operations ( + operation_id TEXT PRIMARY KEY, + parent_operation_id TEXT, + correlation_id TEXT NOT NULL, + surface TEXT NOT NULL, + name TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + duration_ms REAL NOT NULL, + status TEXT NOT NULL, + error_kind TEXT, + session_id TEXT, + repo_root_digest TEXT, + request_bytes INTEGER, + response_bytes INTEGER, + request_tokens INTEGER, + response_tokens INTEGER, + rss_mb REAL, + rss_delta_mb REAL, + peak_rss_mb REAL, + peak_rss_delta_mb REAL, + cpu_user_ms REAL, + cpu_system_ms REAL, + open_fds INTEGER, + thread_count INTEGER +); + +CREATE TABLE IF NOT EXISTS platform_spans ( + span_id TEXT PRIMARY KEY, + operation_id TEXT NOT NULL, + parent_span_id TEXT, + name TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + duration_ms REAL NOT NULL, + status TEXT NOT NULL, + reason_kind TEXT, + reason TEXT, + dedupe_key TEXT, + counters_json TEXT, + db_fingerprints TEXT, + rss_mb REAL, + rss_delta_mb REAL, + peak_rss_mb REAL, + peak_rss_delta_mb REAL, + cpu_user_ms REAL, + cpu_system_ms REAL, + open_fds INTEGER, + thread_count INTEGER +); + +CREATE INDEX IF NOT EXISTS idx_platform_operations_session + ON platform_operations (session_id, started_at_utc); +CREATE INDEX IF NOT EXISTS idx_platform_operations_correlation + ON platform_operations (correlation_id); +CREATE INDEX IF NOT EXISTS idx_platform_operations_parent + ON platform_operations (parent_operation_id); +CREATE INDEX IF NOT EXISTS idx_platform_spans_operation + ON platform_spans (operation_id); +""" + + +def observability_store_path(root: Path) -> Path: + return root.resolve() / _OBSERVABILITY_DB_RELATIVE + + +def _ensure_span_columns(conn: sqlite3.Connection) -> None: + """Additive migration for stores created before a span column existed. + + ``CREATE TABLE IF NOT EXISTS`` never alters an existing table, so a store + written by an older build keeps its old shape. This backfills the column + with ``ALTER TABLE ... ADD COLUMN`` (a no-op on fresh stores, which already + have it from ``_SCHEMA``) so writes/reads stay forward-compatible without a + destructive rebuild of disposable telemetry. + """ + existing = {row[1] for row in conn.execute("PRAGMA table_info(platform_spans)")} + if "db_fingerprints" not in existing: + conn.execute("ALTER TABLE platform_spans ADD COLUMN db_fingerprints TEXT") + _ensure_peak_rss_columns(conn, table="platform_spans", existing=existing) + + +def _ensure_peak_rss_columns( + conn: sqlite3.Connection, *, table: str, existing: set[str] | None = None +) -> None: + columns = ( + existing + if existing is not None + else {row[1] for row in conn.execute(f"PRAGMA table_info({table})")} + ) + if "peak_rss_mb" not in columns: + conn.execute(f"ALTER TABLE {table} ADD COLUMN peak_rss_mb REAL") + if "peak_rss_delta_mb" not in columns: + conn.execute(f"ALTER TABLE {table} ADD COLUMN peak_rss_delta_mb REAL") + + +def _ensure_operation_columns(conn: sqlite3.Connection) -> None: + existing = { + row[1] for row in conn.execute("PRAGMA table_info(platform_operations)") + } + _ensure_peak_rss_columns(conn, table="platform_operations", existing=existing) + + +def create_observability_schema(conn: sqlite3.Connection) -> None: + conn.executescript(_SCHEMA) + _ensure_span_columns(conn) + _ensure_operation_columns(conn) + conn.execute( + "INSERT OR REPLACE INTO platform_meta(key, value) VALUES('schema_version', ?)", + (PLATFORM_OBSERVABILITY_SCHEMA_VERSION,), + ) + conn.commit() + + +def open_observability_store(path: Path) -> sqlite3.Connection: + """Open (creating the parent dir + schema) the observability store.""" + path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(path)) + create_observability_schema(conn) + return conn + + +__all__ = [ + "create_observability_schema", + "observability_store_path", + "open_observability_store", +] diff --git a/codeclone/observability/store/writer.py b/codeclone/observability/store/writer.py new file mode 100644 index 00000000..c8cb53a5 --- /dev/null +++ b/codeclone/observability/store/writer.py @@ -0,0 +1,114 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Bounded, batched observability writer (Phase 29 §4.5). + +A whole operation — its row plus every span — is persisted in a single sqlite +transaction. We do NOT copy the audit per-emit commit-per-row pattern. +""" + +from __future__ import annotations + +import sqlite3 + +from ...utils.json_io import json_text +from ..models import OperationRecord, ProfileSample, SpanRecord + +_PROFILE_NULL: tuple[None, ...] = (None,) * 8 + +_OPERATION_SQL = ( + "INSERT OR REPLACE INTO platform_operations(" + "operation_id, parent_operation_id, correlation_id, surface, name, " + "started_at_utc, duration_ms, status, error_kind, session_id, " + "repo_root_digest, request_bytes, response_bytes, request_tokens, " + "response_tokens, rss_mb, rss_delta_mb, peak_rss_mb, peak_rss_delta_mb, " + "cpu_user_ms, cpu_system_ms, open_fds, thread_count) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" +) + +_SPAN_SQL = ( + "INSERT OR REPLACE INTO platform_spans(" + "span_id, operation_id, parent_span_id, name, started_at_utc, duration_ms, " + "status, reason_kind, reason, dedupe_key, counters_json, db_fingerprints, " + "rss_mb, rss_delta_mb, peak_rss_mb, peak_rss_delta_mb, cpu_user_ms, " + "cpu_system_ms, open_fds, thread_count) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" +) + + +def _profile_cols( + profile: ProfileSample | None, +) -> tuple[object, ...]: + if profile is None: + return _PROFILE_NULL + return ( + profile.rss_mb, + profile.rss_delta_mb, + profile.peak_rss_mb, + profile.peak_rss_delta_mb, + profile.cpu_user_ms, + profile.cpu_system_ms, + profile.open_fds, + profile.thread_count, + ) + + +def _operation_row(operation: OperationRecord) -> tuple[object, ...]: + return ( + operation.operation_id, + operation.parent_operation_id, + operation.correlation_id, + operation.surface, + operation.name, + operation.started_at_utc, + operation.duration_ms, + operation.status, + operation.error_kind, + operation.session_id, + operation.repo_root_digest, + operation.request_bytes, + operation.response_bytes, + operation.request_tokens, + operation.response_tokens, + *_profile_cols(operation.profile), + ) + + +def _span_row(span: SpanRecord) -> tuple[object, ...]: + counters_json = ( + json_text(dict(span.counters), sort_keys=True) if span.counters else None + ) + db_fingerprints_json = ( + json_text(dict(span.db_fingerprints), sort_keys=True) + if span.db_fingerprints + else None + ) + return ( + span.span_id, + span.operation_id, + span.parent_span_id, + span.name, + span.started_at_utc, + span.duration_ms, + span.status, + span.reason_kind, + span.reason, + span.dedupe_key, + counters_json, + db_fingerprints_json, + *_profile_cols(span.profile), + ) + + +def write_operation(conn: sqlite3.Connection, operation: OperationRecord) -> None: + """Persist the operation and all its spans in one transaction.""" + with conn: + conn.execute(_OPERATION_SQL, _operation_row(operation)) + if operation.spans: + conn.executemany(_SPAN_SQL, [_span_row(span) for span in operation.spans]) + + +__all__ = ["write_operation"] diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py new file mode 100644 index 00000000..66b9e5a9 --- /dev/null +++ b/codeclone/observability/views.py @@ -0,0 +1,258 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Read-model views (Phase 29 §4.6). + +``TraceView`` is the primary artifact; JSON/text/HTML renderers are projections +over it and must not drive the schema. Pure data, built by ``store/reader.py``. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass, field + + +@dataclass(frozen=True, slots=True) +class SpanView: + span_id: str + name: str + duration_ms: float + status: str + parent_span_id: str | None = None + reason_kind: str | None = None + reason: str | None = None + dedupe_key: str | None = None + counters: Mapping[str, int] = field(default_factory=dict) + rss_delta_mb: float | None = None + rss_mb: float | None = None + peak_rss_mb: float | None = None + peak_rss_delta_mb: float | None = None + started_at_utc: str = "" + # Top-N literal-free SQL shapes seen on this span -> occurrence count. + db_fingerprints: Mapping[str, int] = field(default_factory=dict) + + +@dataclass(frozen=True, slots=True) +class SpanCostView: + """A span flattened with its owning operation's identity, for the cockpit + cost views (slowest-span highlight, semantic/memory cost table). + + ``no_op`` is the deterministic answer to "did this span do productive work?": + true when the span declares productive counters and they sum to zero — a + rebuild/reindex that touched nothing yet still spent wall time and memory. + """ + + span_id: str + name: str + surface: str + operation_id: str + operation_name: str + duration_ms: float + reason_kind: str | None = None + rss_delta_mb: float | None = None + rss_mb: float | None = None + peak_rss_mb: float | None = None + peak_rss_delta_mb: float | None = None + produced: int = 0 + skipped: int = 0 + no_op: bool = False + + +@dataclass(frozen=True, slots=True) +class OperationView: + operation_id: str + correlation_id: str + surface: str + name: str + started_at_utc: str + duration_ms: float + status: str + parent_operation_id: str | None = None + error_kind: str | None = None + request_bytes: int | None = None + response_bytes: int | None = None + request_tokens: int | None = None + response_tokens: int | None = None + rss_delta_mb: float | None = None + rss_mb: float | None = None + peak_rss_mb: float | None = None + peak_rss_delta_mb: float | None = None + spans: tuple[SpanView, ...] = () + children: tuple[OperationView, ...] = () + cpu_user_ms: float | None = None + cpu_system_ms: float | None = None + + +@dataclass(frozen=True, slots=True) +class McpToolAggregate: + name: str + count: int + p50_duration_ms: float + p95_duration_ms: float + p95_response_bytes: int + p95_request_bytes: int = 0 + p95_response_tokens: int = 0 + + +@dataclass(frozen=True, slots=True) +class DbCostRow: + """SQLite work attributed to a span class (performance-truth, not audit). + + Aggregated from span db_queries/db_writes counters; ``max_queries`` is the + worst single instance and ``queries`` ÷ a per-row productive count exposes + N+1-shaped access (many reads, little produced).""" + + span_name: str + surface: str + span_count: int + total_queries: int + total_writes: int + max_queries: int + + +@dataclass(frozen=True, slots=True) +class DbFingerprintRow: + """One literal-free SQL shape attributed to a span class, with how often it + ran — the decomposition of a span's ``db_queries`` total into named + statements, so an N+1 reads as "1200x SELECT evidence by memory_id" instead + of a bare count. ``table_hint`` is re-derived from the stored shape.""" + + span_name: str + surface: str + fingerprint: str + table_hint: str | None + count: int + kind: str = "other" + # Human predicate summary, e.g. "count by repo_root_digest, workflow_id". + summary: str = "" + + +@dataclass(frozen=True, slots=True) +class AgentTokenRow: + """One MCP tool's cumulative token economics across the window.""" + + name: str + calls: int + request_tokens: int + response_tokens: int + + +@dataclass(frozen=True, slots=True) +class AgentView: + """Agentic context economics: how many tokens MCP tools pushed back into + the agent's context (``response_tokens`` = context pressure), ranked by the + biggest consumer. Built only when MCP operations are present.""" + + mcp_calls: int = 0 + request_tokens: int = 0 + response_tokens: int = 0 + consumers: tuple[AgentTokenRow, ...] = () + + +@dataclass(frozen=True, slots=True) +class WasteItem: + """One ranked "fix candidate": resources spent without payoff — a no-op + rebuild that ran but produced nothing, or a payload-heavy call. ``severity`` + is the descending sort key (magnitude of the wasted cost).""" + + kind: str + subject: str + surface: str + detail: str + severity: float = 0.0 + + +@dataclass(frozen=True, slots=True) +class PipelineGroup: + """Operations rolled up by subsystem (memory / analysis / controller / …), + showing where the run spends wall time and CPU.""" + + name: str + op_count: int + duration_ms: float + cpu_ms: float + + +@dataclass(frozen=True, slots=True) +class AggregatesView: + operation_count: int + slowest: tuple[OperationView, ...] = () + largest_responses: tuple[OperationView, ...] = () + max_rss_delta_mb: float | None = None + anomaly_count: int = 0 + unknown_expensive_rebuild_count: int = 0 + mcp_tools: tuple[McpToolAggregate, ...] = () + slowest_span: SpanCostView | None = None + semantic_costs: tuple[SpanCostView, ...] = () + peak_memory_span: SpanCostView | None = None + max_rss_absolute_mb: float | None = None + max_peak_rss_mb: float | None = None + db_costs: tuple[DbCostRow, ...] = () + agent: AgentView | None = None + waste: tuple[WasteItem, ...] = () + heaviest_cpu: OperationView | None = None + pipeline: tuple[PipelineGroup, ...] = () + db_fingerprints: tuple[DbFingerprintRow, ...] = () + + +@dataclass(frozen=True, slots=True) +class WaterfallRow: + """One time-positioned bar in a waterfall: a span or operation placed at + ``offset_ms`` after its group's start, ``duration_ms`` wide. ``depth`` nests + spans under their operation and child operations under their parent.""" + + label: str + surface: str + kind: str # "operation" | "span" + depth: int + offset_ms: float + duration_ms: float + reason_kind: str | None = None + status: str = "ok" + + +@dataclass(frozen=True, slots=True) +class WaterfallGroup: + """One correlated causal chain rendered as a self-contained timeline; every + row's ``offset_ms`` is relative to ``started_at_utc`` and bounded by + ``duration_ms`` (the group's own window, not the whole trace).""" + + correlation_id: str + started_at_utc: str + duration_ms: float + rows: tuple[WaterfallRow, ...] = () + + +@dataclass(frozen=True, slots=True) +class TraceView: + schema_version: str + window_started_at_utc: str + window_ended_at_utc: str + aggregates: AggregatesView + repo_root_digest: str | None = None + focus_operation: OperationView | None = None + operation_tree: tuple[OperationView, ...] = () + correlated_operations: tuple[OperationView, ...] = () + waterfall: tuple[WaterfallGroup, ...] = () + + +__all__ = [ + "AgentTokenRow", + "AgentView", + "AggregatesView", + "DbCostRow", + "DbFingerprintRow", + "McpToolAggregate", + "OperationView", + "PipelineGroup", + "SpanCostView", + "SpanView", + "TraceView", + "WasteItem", + "WaterfallGroup", + "WaterfallRow", +] diff --git a/codeclone/paths/gitignore.py b/codeclone/paths/gitignore.py new file mode 100644 index 00000000..78fec300 --- /dev/null +++ b/codeclone/paths/gitignore.py @@ -0,0 +1,97 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic .gitignore checks for CodeClone workspace hygiene.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Final + +from .workspace import WORKSPACE_DIR_NAME + +_COVERING_PATTERN_CORES: Final[frozenset[str]] = frozenset( + { + ".codeclone", + ".codeclone/**", + ".cache", + ".cache/**", + ".cache/codeclone", + ".cache/codeclone/**", + } +) + +GITIGNORE_CODECLONE_CACHE_TIP_ID: Final = "gitignore-codeclone-cache" +WORKSPACE_HYGIENE_CATEGORY: Final = "workspace_hygiene" +GITIGNORE_CODECLONE_CACHE_SUGGESTED_ENTRY: Final = f"{WORKSPACE_DIR_NAME}/" +GITIGNORE_CODECLONE_CACHE_MESSAGE: Final = ( + f"Add `{WORKSPACE_DIR_NAME}/` to `.gitignore` to keep CodeClone " + "coordination state, audit DB, and generated artifacts out of " + "version control." +) + + +def normalize_gitignore_pattern(line: str) -> str: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + return "" + if stripped.startswith("\\#"): + stripped = stripped[1:] + return stripped.lstrip("/").rstrip("/") + + +def gitignore_pattern_covers_codeclone_cache(pattern: str) -> bool: + """Return True when a single gitignore line covers the CodeClone workspace.""" + normalized = normalize_gitignore_pattern(pattern) + if not normalized or normalized.startswith("!"): + return False + core = normalized.lstrip("/").rstrip("/") + if core in _COVERING_PATTERN_CORES: + return True + return core.endswith( + ( + ".codeclone", + ".codeclone/**", + ".cache/codeclone", + ".cache/codeclone/**", + ) + ) + + +def repo_gitignore_covers_codeclone_cache(root: Path) -> bool: + """Return True when the repository root ``.gitignore`` covers CodeClone cache.""" + gitignore_path = root / ".gitignore" + if not gitignore_path.is_file(): + return False + try: + text = gitignore_path.read_text(encoding="utf-8") + except OSError: + return False + return any( + gitignore_pattern_covers_codeclone_cache(line) for line in text.splitlines() + ) + + +def gitignore_codeclone_cache_tip_payload() -> dict[str, object]: + return { + "id": GITIGNORE_CODECLONE_CACHE_TIP_ID, + "severity": "info", + "category": WORKSPACE_HYGIENE_CATEGORY, + "message": GITIGNORE_CODECLONE_CACHE_MESSAGE, + "suggested_entry": GITIGNORE_CODECLONE_CACHE_SUGGESTED_ENTRY, + } + + +__all__ = [ + "GITIGNORE_CODECLONE_CACHE_MESSAGE", + "GITIGNORE_CODECLONE_CACHE_SUGGESTED_ENTRY", + "GITIGNORE_CODECLONE_CACHE_TIP_ID", + "WORKSPACE_HYGIENE_CATEGORY", + "gitignore_codeclone_cache_tip_payload", + "gitignore_pattern_covers_codeclone_cache", + "normalize_gitignore_pattern", + "repo_gitignore_covers_codeclone_cache", +] diff --git a/codeclone/paths/workspace.py b/codeclone/paths/workspace.py new file mode 100644 index 00000000..e682a545 --- /dev/null +++ b/codeclone/paths/workspace.py @@ -0,0 +1,128 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Repo-local CodeClone workspace directories and default artifact paths.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Final, Protocol + +WORKSPACE_DIR_NAME: Final = ".codeclone" +LEGACY_WORKSPACE_DIR_PARTS: Final = (".cache", "codeclone") + +REL_CACHE_PATH: Final = f"{WORKSPACE_DIR_NAME}/cache.json" +REL_REPORT_HTML_PATH: Final = f"{WORKSPACE_DIR_NAME}/report.html" +REL_REPORT_JSON_PATH: Final = f"{WORKSPACE_DIR_NAME}/report.json" +REL_REPORT_MARKDOWN_PATH: Final = f"{WORKSPACE_DIR_NAME}/report.md" +REL_REPORT_SARIF_PATH: Final = f"{WORKSPACE_DIR_NAME}/report.sarif" +REL_REPORT_TEXT_PATH: Final = f"{WORKSPACE_DIR_NAME}/report.txt" +REL_AUDIT_DB_PATH: Final = f"{WORKSPACE_DIR_NAME}/db/audit.sqlite3" +REL_INTENT_REGISTRY_DB_PATH: Final = f"{WORKSPACE_DIR_NAME}/db/intents.sqlite3" +REL_MEMORY_DB_PATH: Final = f"{WORKSPACE_DIR_NAME}/memory/engineering_memory.sqlite3" +REL_SEMANTIC_INDEX_PATH: Final = f"{WORKSPACE_DIR_NAME}/memory/semantic_index.lance" +REL_SEMANTIC_EMBEDDING_CACHE_DIR: Final = f"{WORKSPACE_DIR_NAME}/memory/fastembed" + +FORBIDDEN_WORKSPACE_GLOBS: Final = ( + f"{WORKSPACE_DIR_NAME}/**", + ".cache/codeclone/**", +) + +REGISTRY_DIR_PARTS: Final = (WORKSPACE_DIR_NAME, "intents") +REPORT_JSON_PARTS: Final = (WORKSPACE_DIR_NAME, "report.json") + + +class _PrinterLike(Protocol): + def print(self, message: str) -> None: ... + + +def repo_workspace_dir(root: Path) -> Path: + return root / WORKSPACE_DIR_NAME + + +def legacy_repo_workspace_dir(root: Path) -> Path: + return root.joinpath(*LEGACY_WORKSPACE_DIR_PARTS) + + +def legacy_home_cache_path() -> Path: + return Path("~/.cache/codeclone/cache.json").expanduser() + + +def default_cache_path(root: Path) -> Path: + return repo_workspace_dir(root) / "cache.json" + + +def legacy_repo_workspace_has_artifacts(root: Path) -> bool: + legacy_dir = legacy_repo_workspace_dir(root) + if not legacy_dir.is_dir(): + return False + try: + return any(legacy_dir.iterdir()) + except OSError: + return False + + +def emit_legacy_workspace_warnings( + *, + root_path: Path, + cache_path: Path, + legacy_home_cache_path: Path, + console: _PrinterLike, +) -> None: + """Warn when obsolete home or repo-local artifact locations still exist.""" + from .. import ui_messages as ui + + if legacy_home_cache_path.exists(): + try: + legacy_resolved = legacy_home_cache_path.resolve() + except OSError: + legacy_resolved = legacy_home_cache_path + if legacy_resolved != cache_path: + console.print( + ui.fmt_legacy_cache_warning( + legacy_path=legacy_resolved, + new_path=cache_path, + ) + ) + + if legacy_repo_workspace_has_artifacts(root_path): + console.print( + ui.fmt_legacy_repo_workspace_warning( + legacy_dir=legacy_repo_workspace_dir(root_path), + new_dir=repo_workspace_dir(root_path), + ) + ) + + +def workspace_glob_patterns() -> tuple[str, ...]: + return FORBIDDEN_WORKSPACE_GLOBS + + +__all__ = [ + "FORBIDDEN_WORKSPACE_GLOBS", + "LEGACY_WORKSPACE_DIR_PARTS", + "REGISTRY_DIR_PARTS", + "REL_AUDIT_DB_PATH", + "REL_CACHE_PATH", + "REL_INTENT_REGISTRY_DB_PATH", + "REL_MEMORY_DB_PATH", + "REL_REPORT_HTML_PATH", + "REL_REPORT_JSON_PATH", + "REL_REPORT_MARKDOWN_PATH", + "REL_REPORT_SARIF_PATH", + "REL_REPORT_TEXT_PATH", + "REL_SEMANTIC_EMBEDDING_CACHE_DIR", + "REL_SEMANTIC_INDEX_PATH", + "REPORT_JSON_PARTS", + "WORKSPACE_DIR_NAME", + "default_cache_path", + "emit_legacy_workspace_warnings", + "legacy_home_cache_path", + "legacy_repo_workspace_dir", + "legacy_repo_workspace_has_artifacts", + "repo_workspace_dir", + "workspace_glob_patterns", +] diff --git a/codeclone/report/gates/evaluator.py b/codeclone/report/gates/evaluator.py index 62310f24..c7ba2562 100644 --- a/codeclone/report/gates/evaluator.py +++ b/codeclone/report/gates/evaluator.py @@ -15,6 +15,7 @@ from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence +from ..messages import gates as gate_msgs if TYPE_CHECKING: from ...models import CoverageJoinResult, ProjectMetrics @@ -271,9 +272,9 @@ def _complexity_threshold_reason( ) -> tuple[str, ...]: return _reason_if( 0 <= config.fail_complexity < state.complexity_max, - "Complexity threshold exceeded: " - f"max CC={state.complexity_max}, " - f"threshold={config.fail_complexity}.", + gate_msgs.GATE_REASON_COMPLEXITY_THRESHOLD + + f"max CC={state.complexity_max}, " + + f"threshold={config.fail_complexity}.", ) @@ -284,9 +285,9 @@ def _coupling_threshold_reason( ) -> tuple[str, ...]: return _reason_if( 0 <= config.fail_coupling < state.coupling_max, - "Coupling threshold exceeded: " - f"max CBO={state.coupling_max}, " - f"threshold={config.fail_coupling}.", + gate_msgs.GATE_REASON_COUPLING_THRESHOLD + + f"max CBO={state.coupling_max}, " + + f"threshold={config.fail_coupling}.", ) @@ -297,9 +298,9 @@ def _cohesion_threshold_reason( ) -> tuple[str, ...]: return _reason_if( 0 <= config.fail_cohesion < state.cohesion_max, - "Cohesion threshold exceeded: " - f"max LCOM4={state.cohesion_max}, " - f"threshold={config.fail_cohesion}.", + gate_msgs.GATE_REASON_COHESION_THRESHOLD + + f"max LCOM4={state.cohesion_max}, " + + f"threshold={config.fail_cohesion}.", ) @@ -310,8 +311,8 @@ def _health_threshold_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_health >= 0 and state.health_score < config.fail_health, - "Health score below threshold: " - f"score={state.health_score}, threshold={config.fail_health}.", + gate_msgs.GATE_REASON_HEALTH_THRESHOLD + + f"score={state.health_score}, threshold={config.fail_health}.", ) @@ -322,7 +323,7 @@ def _dependency_cycles_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_cycles and state.dependency_cycles > 0, - f"Dependency cycles detected: {state.dependency_cycles} cycle(s).", + f"{gate_msgs.GATE_REASON_CYCLES_DETECTED}{state.dependency_cycles}{gate_msgs.GATE_SUFFIX_CYCLES}.", ) @@ -333,7 +334,7 @@ def _dead_code_high_confidence_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_dead_code and state.dead_high_confidence > 0, - f"Dead code detected (high confidence): {state.dead_high_confidence} item(s).", + f"{gate_msgs.GATE_REASON_DEAD_CODE_DETECTED}{state.dead_high_confidence}{gate_msgs.GATE_SUFFIX_ITEMS}.", ) @@ -344,8 +345,7 @@ def _new_high_risk_functions_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_new_metrics and state.diff_new_high_risk_functions > 0, - "New high-risk functions vs metrics baseline: " - f"{state.diff_new_high_risk_functions}.", + f"{gate_msgs.GATE_REASON_NEW_HIGH_RISK_FUNCTIONS}{state.diff_new_high_risk_functions}.", ) @@ -356,8 +356,7 @@ def _new_high_coupling_classes_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_new_metrics and state.diff_new_high_coupling_classes > 0, - "New high-coupling classes vs metrics baseline: " - f"{state.diff_new_high_coupling_classes}.", + f"{gate_msgs.GATE_REASON_NEW_HIGH_COUPLING}{state.diff_new_high_coupling_classes}.", ) @@ -368,7 +367,7 @@ def _new_dependency_cycles_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_new_metrics and state.diff_new_cycles > 0, - f"New dependency cycles vs metrics baseline: {state.diff_new_cycles}.", + f"{gate_msgs.GATE_REASON_NEW_CYCLES}{state.diff_new_cycles}.", ) @@ -379,7 +378,7 @@ def _new_dead_code_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_new_metrics and state.diff_new_dead_code > 0, - f"New dead code items vs metrics baseline: {state.diff_new_dead_code}.", + f"{gate_msgs.GATE_REASON_NEW_DEAD_CODE}{state.diff_new_dead_code}.", ) @@ -390,7 +389,7 @@ def _health_regression_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_new_metrics and state.diff_health_delta < 0, - f"Health score regressed vs metrics baseline: delta={state.diff_health_delta}.", + f"{gate_msgs.GATE_REASON_HEALTH_REGRESSION}{state.diff_health_delta}.", ) @@ -403,8 +402,8 @@ def _typing_coverage_threshold_reason( return _reason_if( config.min_typing_coverage >= 0 and typing_percent < float(config.min_typing_coverage), - "Typing coverage below threshold: " - f"coverage={typing_percent:.1f}%, threshold={config.min_typing_coverage}%.", + gate_msgs.GATE_REASON_TYPING_THRESHOLD + + f"coverage={typing_percent:.1f}%, threshold={config.min_typing_coverage}%.", ) @@ -417,9 +416,9 @@ def _docstring_coverage_threshold_reason( return _reason_if( config.min_docstring_coverage >= 0 and docstring_percent < float(config.min_docstring_coverage), - "Docstring coverage below threshold: " - f"coverage={docstring_percent:.1f}%, " - f"threshold={config.min_docstring_coverage}%.", + gate_msgs.GATE_REASON_DOCSTRING_THRESHOLD + + f"coverage={docstring_percent:.1f}%, " + + f"threshold={config.min_docstring_coverage}%.", ) @@ -434,9 +433,9 @@ def _typing_regression_reason( state.diff_typing_param_permille_delta < 0 or state.diff_typing_return_permille_delta < 0 ), - "Typing coverage regressed vs metrics baseline: " - f"params_delta={state.diff_typing_param_permille_delta}, " - f"returns_delta={state.diff_typing_return_permille_delta}.", + gate_msgs.GATE_REASON_TYPING_REGRESSION + + f"params_delta={state.diff_typing_param_permille_delta}, " + + f"returns_delta={state.diff_typing_return_permille_delta}.", ) @@ -447,8 +446,7 @@ def _docstring_regression_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_docstring_regression and state.diff_docstring_permille_delta < 0, - "Docstring coverage regressed vs metrics baseline: " - f"delta={state.diff_docstring_permille_delta}.", + f"{gate_msgs.GATE_REASON_DOCSTRING_REGRESSION}{state.diff_docstring_permille_delta}.", ) @@ -459,8 +457,7 @@ def _api_breaking_changes_reason( ) -> tuple[str, ...]: return _reason_if( config.fail_on_api_break and state.api_breaking_changes > 0, - "Public API breaking changes vs metrics baseline: " - f"{state.api_breaking_changes}.", + f"{gate_msgs.GATE_REASON_API_BREAKING}{state.api_breaking_changes}.", ) @@ -473,9 +470,9 @@ def _coverage_hotspots_reason( config.fail_on_untested_hotspots and state.coverage_join_status == "ok" and state.coverage_hotspots > 0, - "Coverage hotspots detected: " - f"hotspots={state.coverage_hotspots}, " - f"threshold={config.coverage_min}%.", + gate_msgs.GATE_REASON_COVERAGE_HOTSPOTS + + f"hotspots={state.coverage_hotspots}, " + + f"threshold={config.coverage_min}%.", ) diff --git a/codeclone/report/gates/reasons.py b/codeclone/report/gates/reasons.py index 20ec5380..b41892dc 100644 --- a/codeclone/report/gates/reasons.py +++ b/codeclone/report/gates/reasons.py @@ -8,6 +8,8 @@ from typing import Protocol +from ..messages import gates as gate_msgs + __all__ = [ "parse_metric_reason_entry", "policy_context", @@ -65,58 +67,53 @@ def tail(prefix: str) -> str: return trimmed[len(prefix) :] simple_prefixes: tuple[tuple[str, str], ...] = ( - ("New high-risk functions vs metrics baseline: ", "new_high_risk_functions"), - ( - "New high-coupling classes vs metrics baseline: ", - "new_high_coupling_classes", - ), - ("New dependency cycles vs metrics baseline: ", "new_dependency_cycles"), - ("New dead code items vs metrics baseline: ", "new_dead_code_items"), + (gate_msgs.GATE_REASON_NEW_HIGH_RISK_FUNCTIONS, "new_high_risk_functions"), + (gate_msgs.GATE_REASON_NEW_HIGH_COUPLING, "new_high_coupling_classes"), + (gate_msgs.GATE_REASON_NEW_CYCLES, "new_dependency_cycles"), + (gate_msgs.GATE_REASON_NEW_DEAD_CODE, "new_dead_code_items"), ) for prefix, kind in simple_prefixes: if trimmed.startswith(prefix): return kind, tail(prefix) - if trimmed.startswith("Health score regressed vs metrics baseline: delta="): + if trimmed.startswith(gate_msgs.GATE_REASON_HEALTH_REGRESSION): return "health_delta", trimmed.rsplit("=", maxsplit=1)[1] typing_detail = _parse_two_part_metric_detail( trimmed, - prefix="Typing coverage regressed vs metrics baseline: ", + prefix=gate_msgs.GATE_REASON_TYPING_REGRESSION, right_label="returns_delta", ) if typing_detail is not None: return "typing_coverage_delta", typing_detail - if trimmed.startswith("Docstring coverage regressed vs metrics baseline: delta="): + if trimmed.startswith(gate_msgs.GATE_REASON_DOCSTRING_REGRESSION): return "docstring_coverage_delta", trimmed.rsplit("=", maxsplit=1)[1] - if trimmed.startswith("Public API breaking changes vs metrics baseline: "): - return "api_breaking_changes", tail( - "Public API breaking changes vs metrics baseline: " - ) + if trimmed.startswith(gate_msgs.GATE_REASON_API_BREAKING): + return "api_breaking_changes", tail(gate_msgs.GATE_REASON_API_BREAKING) coverage_detail = _parse_two_part_metric_detail( trimmed, - prefix="Coverage hotspots detected: ", + prefix=gate_msgs.GATE_REASON_COVERAGE_HOTSPOTS, right_label="threshold", ) if coverage_detail is not None: return "coverage_hotspots", coverage_detail - if trimmed.startswith("Dependency cycles detected: "): - return "dependency_cycles", tail("Dependency cycles detected: ").replace( - " cycle(s)", "" + if trimmed.startswith(gate_msgs.GATE_REASON_CYCLES_DETECTED): + return "dependency_cycles", tail(gate_msgs.GATE_REASON_CYCLES_DETECTED).replace( + gate_msgs.GATE_SUFFIX_CYCLES, "" ) - if trimmed.startswith("Dead code detected (high confidence): "): + if trimmed.startswith(gate_msgs.GATE_REASON_DEAD_CODE_DETECTED): return "dead_code_items", tail( - "Dead code detected (high confidence): " - ).replace(" item(s)", "") + gate_msgs.GATE_REASON_DEAD_CODE_DETECTED + ).replace(gate_msgs.GATE_SUFFIX_ITEMS, "") threshold_prefixes: tuple[tuple[str, str], ...] = ( - ("Complexity threshold exceeded: ", "complexity_max"), - ("Coupling threshold exceeded: ", "coupling_max"), - ("Cohesion threshold exceeded: ", "cohesion_max"), - ("Health score below threshold: ", "health_score"), - ("Typing coverage below threshold: ", "typing_coverage"), - ("Docstring coverage below threshold: ", "docstring_coverage"), + (gate_msgs.GATE_REASON_COMPLEXITY_THRESHOLD, "complexity_max"), + (gate_msgs.GATE_REASON_COUPLING_THRESHOLD, "coupling_max"), + (gate_msgs.GATE_REASON_COHESION_THRESHOLD, "cohesion_max"), + (gate_msgs.GATE_REASON_HEALTH_THRESHOLD, "health_score"), + (gate_msgs.GATE_REASON_TYPING_THRESHOLD, "typing_coverage"), + (gate_msgs.GATE_REASON_DOCSTRING_THRESHOLD, "docstring_coverage"), ) for prefix, kind in threshold_prefixes: threshold_detail = _parse_two_part_metric_detail( @@ -203,7 +200,11 @@ def print_gating_failure_block( entries: tuple[tuple[str, object], ...] | list[tuple[str, object]], args: _GatingArgs, ) -> None: - console.print(f"\n\u2717 GATING FAILURE [{code}]", style="bold red", markup=False) + console.print( + f"\n\u2717 {gate_msgs.GATE_FAILURE_HEADER.format(code=code)}", + style="bold red", + markup=False, + ) normalized_entries = [("policy", policy_context(args=args, gate_kind=code))] normalized_entries.extend((key, str(value)) for key, value in entries) width = max(len(key) for key, _ in normalized_entries) diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index ed406207..dacfd710 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -16,6 +16,37 @@ from ...domain.quality import CONFIDENCE_HIGH from ...findings.structural.detectors import normalize_structural_findings from ...utils import coerce as _coerce +from ..messages.chrome import ( + BADGE_COPY, + BADGE_DISCLAIMER, + BADGE_FIELD_HTML, + BADGE_FIELD_MARKDOWN, + BADGE_TAB_FULL, + BADGE_TAB_GRADE, + FOOTER_BRAND, + FOOTER_DOCS, + FOOTER_REPORT_ISSUE, + FOOTER_SCHEMA_BASELINE, + FOOTER_SCHEMA_CACHE, + FOOTER_SCHEMA_REPORT, + IDE_PICKER_LABEL, + IDE_PICKER_TITLE, + MODAL_BADGE_TITLE, + MODAL_FINDING_CLOSE, + MODAL_FINDING_TITLE, + PROVENANCE_ARIA_LABEL, + PROVENANCE_TITLE_PREFIX, + TAB_CLONES, + TAB_DEAD_CODE, + TAB_DEPENDENCIES, + TAB_FINDINGS, + TAB_OVERVIEW, + TAB_QUALITY, + TAB_SUGGESTIONS, + TABLIST_ARIA_LABEL, + THEME_BUTTON_TEXT, + THEME_TOGGLE_LABEL, +) from ._context import _meta_pick, build_context from .assets.css import build_css from .assets.js import build_js @@ -144,20 +175,20 @@ def _tab_badge(count: int) -> str: "structural-findings": "structural-findings", } tab_defs = [ - ("overview", "Overview", overview_html, ""), - ("clones", "Clones", clones_html, _tab_badge(ctx.clone_groups_total)), - ("quality", "Quality", quality_html, _tab_badge(quality_issues)), - ("dependencies", "Dependencies", dependencies_html, _tab_badge(dep_cycles)), - ("dead-code", "Dead Code", dead_code_html, _tab_badge(dead_high_conf)), + ("overview", TAB_OVERVIEW, overview_html, ""), + ("clones", TAB_CLONES, clones_html, _tab_badge(ctx.clone_groups_total)), + ("quality", TAB_QUALITY, quality_html, _tab_badge(quality_issues)), + ("dependencies", TAB_DEPENDENCIES, dependencies_html, _tab_badge(dep_cycles)), + ("dead-code", TAB_DEAD_CODE, dead_code_html, _tab_badge(dead_high_conf)), ( "suggestions", - "Suggestions", + TAB_SUGGESTIONS, suggestions_html, _tab_badge(len(ctx.suggestions)), ), ( "structural-findings", - "Findings", + TAB_FINDINGS, structural_html, _tab_badge(structural_count), ), @@ -193,7 +224,7 @@ def _tab_badge(count: int) -> str: tabs_html = ( '
' - '
" ) @@ -232,13 +263,13 @@ def _tab_badge(count: int) -> str: '
' '
' '' + f'aria-haspopup="true" title="{IDE_PICKER_TITLE}">{ICONS["ide"]}' + f'{IDE_PICKER_LABEL}' f'
' f'" - f'" + f'" "
" ) @@ -266,11 +297,15 @@ def _tab_badge(count: int) -> str: ) _schema_parts: list[str] = [] if _report_schema: - _schema_parts.append(f"Report schema {_escape_html(str(_report_schema))}") + _schema_parts.append( + f"{FOOTER_SCHEMA_REPORT}{_escape_html(str(_report_schema))}" + ) if _baseline_schema: - _schema_parts.append(f"Baseline schema {_escape_html(str(_baseline_schema))}") + _schema_parts.append( + f"{FOOTER_SCHEMA_BASELINE}{_escape_html(str(_baseline_schema))}" + ) if _cache_schema: - _schema_parts.append(f"Cache schema {_escape_html(str(_cache_schema))}") + _schema_parts.append(f"{FOOTER_SCHEMA_CACHE}{_escape_html(str(_cache_schema))}") _schema_line = ( f'' if _schema_parts @@ -279,10 +314,11 @@ def _tab_badge(count: int) -> str: footer_html = ( '" @@ -291,11 +327,11 @@ def _tab_badge(count: int) -> str: cmd_palette_html = "" # removed finding_why_modal_html = ( '' + f'aria-label="{MODAL_FINDING_TITLE}">' '" '' "" @@ -303,35 +339,35 @@ def _tab_badge(count: int) -> str: help_modal_html = "" # removed badge_modal_html = ( - '' + '' '" '' + f'' '
' '' - '
' + f'' "
" ) diff --git a/codeclone/report/html/assets/js.py b/codeclone/report/html/assets/js.py index aec07d01..e4c948e5 100644 --- a/codeclone/report/html/assets/js.py +++ b/codeclone/report/html/assets/js.py @@ -346,8 +346,19 @@ const group=btn.closest('.group'); if(!group)return; const d=group.dataset; - const items=[]; - function add(label,val){if(val)items.push('
'+label+'
'+val+'
')} + const body=dlg.querySelector('#modal-body'); + body.replaceChildren(); + const list=document.createElement('dl'); + list.className='info-dl'; + function add(label,val){ + if(!val)return; + const dt=document.createElement('dt'); + dt.textContent=label; + const dd=document.createElement('dd'); + dd.textContent=val; + list.appendChild(dt); + list.appendChild(dd); + } add('Match rule',d.matchRule); add('Block size',d.blockSize); add('Signature',d.signatureKind); @@ -361,11 +372,18 @@ add('Group arity',d.groupArity); add('Clone type',d.cloneType); add('Source kind',d.sourceKind); - if(d.spreadFiles)add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); + if(d.spreadFiles){ + add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); + } + if(list.childNodes.length){ + body.appendChild(list); + }else{ + const empty=document.createElement('p'); + empty.className='muted'; + empty.textContent='No metadata available.'; + body.appendChild(empty); + } dlg.querySelector('#modal-title').textContent='Group: '+groupId; - dlg.querySelector('#modal-body').innerHTML=items.length - ?'
'+items.join('')+'
' - :'

No metadata available.

'; dlg.showModal(); }); })(); @@ -586,7 +604,14 @@ var tplId=btn.getAttribute('data-finding-why-btn'); var tpl=document.getElementById(tplId); if(!tpl)return; - body.innerHTML=tpl.innerHTML; + body.replaceChildren(); + if(tpl.content){ + body.appendChild(document.importNode(tpl.content,true)); + }else{ + Array.from(tpl.childNodes).forEach(function(node){ + body.appendChild(node.cloneNode(true)); + }); + } dlg.showModal(); }); })(); diff --git a/codeclone/report/html/sections/_overview.py b/codeclone/report/html/sections/_overview.py index c9ac47b0..26a78b23 100644 --- a/codeclone/report/html/sections/_overview.py +++ b/codeclone/report/html/sections/_overview.py @@ -14,6 +14,66 @@ from codeclone.utils import coerce as _coerce +from ...messages.overview import ( + ADOPTION_ADDED_SYMBOLS, + ADOPTION_API_DISABLED, + ADOPTION_API_SURFACE_LABEL, + ADOPTION_BREAKING_CHANGES, + ADOPTION_CLUSTER_DESC, + ADOPTION_CLUSTER_TITLE, + ADOPTION_COVERAGE_LABEL, + ADOPTION_DOCSTRINGS, + ADOPTION_ENABLE_VIA, + ADOPTION_ENABLE_VIA_FLAG, + ADOPTION_MODULES, + ADOPTION_PARAM_ANNOTATIONS, + ADOPTION_PUBLIC_SYMBOLS, + ADOPTION_RETURN_ANNOTATIONS, + ADOPTION_STRICT_MODE, + ADOPTION_STRICT_MODE_ENABLED, + ADOPTION_TYPED_AS_ANY, + CLUSTER_EXECUTIVE_SUMMARY, + CLUSTER_HEALTH_PROFILE, + CLUSTER_HEALTH_PROFILE_DESC, + CLUSTER_HEALTH_PROFILE_LABEL, + CLUSTER_HOTSPOTS_BY_DIRECTORY, + CLUSTER_HOTSPOTS_BY_DIRECTORY_DESC, + CLUSTER_ISSUE_BREAKDOWN, + CLUSTER_OVERLOADED_MODULES, + CLUSTER_OVERLOADED_MORE_CANDIDATES, + CLUSTER_OVERLOADED_TOP_CANDIDATES, + CLUSTER_RADAR_CAPTION, + CLUSTER_RADAR_CAPTION_SUFFIX, + CLUSTER_SOURCE_BREAKDOWN, + DIRECTORY_BUCKET_LABELS, + DIRECTORY_BUCKET_ORDER, + DIRECTORY_KIND_LABELS, + EXECUTIVE_HEALTH_SNAPSHOT_QUESTION, + EXECUTIVE_SCAN_SCOPE_DEFAULT, + EXECUTIVE_THRESHOLDS_PREFIX, + ISSUE_BREAKDOWN_EMPTY, + ISSUE_BREAKDOWN_ROW_LABELS, + KPI_CLONE_GROUPS, + KPI_DEAD_CODE, + KPI_DEP_CYCLES, + KPI_FINDINGS, + KPI_HEALTH, + KPI_HEALTH_NA, + KPI_HIGH_COMPLEXITY, + KPI_HIGH_COUPLING, + KPI_LOW_COHESION, + KPI_SUGGESTIONS, + KPI_TIP_CLONE_GROUPS, + KPI_TIP_DEAD_CODE, + KPI_TIP_DEP_CYCLES, + KPI_TIP_FINDINGS, + KPI_TIP_HIGH_COMPLEXITY, + KPI_TIP_HIGH_COUPLING, + KPI_TIP_LOW_COHESION, + KPI_TIP_SUGGESTIONS, + RADAR_DIMENSIONS, + RADAR_LABELS, +) from ..primitives.escape import _escape_html from ..widgets.badges import ( _inline_empty, @@ -38,36 +98,9 @@ _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence -_DIRECTORY_BUCKET_LABELS: dict[str, str] = { - "all": "All Findings", - "clones": "Clone Groups", - "structural": "Structural Findings", - "complexity": "High Complexity", - "cohesion": "Low Cohesion", - "coupling": "High Coupling", - "dead_code": "Dead Code", - "dependency": "Dependency Cycles", -} -_DIRECTORY_BUCKET_ORDER: tuple[str, ...] = ( - "all", - "clones", - "structural", - "complexity", - "cohesion", - "coupling", - "dead_code", - "dependency", -) -_DIRECTORY_KIND_LABELS: dict[str, str] = { - "clones": "clones", - "structural": "structural", - "complexity": "complexity", - "cohesion": "cohesion", - "coupling": "coupling", - "dead_code": "dead code", - "coverage": "coverage", - "dependency": "dependency", -} +_DIRECTORY_BUCKET_LABELS = DIRECTORY_BUCKET_LABELS +_DIRECTORY_BUCKET_ORDER = DIRECTORY_BUCKET_ORDER +_DIRECTORY_KIND_LABELS = DIRECTORY_KIND_LABELS def _health_gauge_html( @@ -76,8 +109,8 @@ def _health_gauge_html( """Render an SVG ring gauge for health score with optional baseline arc.""" if score < 0: return _stat_card( - "Health", - "n/a", + KPI_HEALTH, + KPI_HEALTH_NA, css_class="meta-item overview-health-card", glossary_tip_fn=glossary_tip, ) @@ -169,25 +202,8 @@ def _health_gauge_html( # Analytics: Health Radar (pure SVG) # --------------------------------------------------------------------------- -_RADAR_DIMENSIONS = ( - "clones", - "complexity", - "coupling", - "cohesion", - "dead_code", - "dependencies", - "coverage", -) - -_RADAR_LABELS = { - "clones": "Clones", - "complexity": "Complexity", - "coupling": "Coupling", - "cohesion": "Cohesion", - "dead_code": "Dead Code", - "dependencies": "Deps", - "coverage": "Coverage", -} +_RADAR_DIMENSIONS = RADAR_DIMENSIONS +_RADAR_LABELS = RADAR_LABELS _RADAR_CX, _RADAR_CY, _RADAR_R = 200.0, 200.0, 130.0 _RADAR_LABEL_R = 155.0 @@ -316,22 +332,24 @@ def _issue_breakdown_html( dep_cycles = len(_as_sequence(ctx.dependencies_map.get("cycles"))) structural = len(ctx.structural_findings) - # (key, label, count, color) raw_rows: list[tuple[str, str, int, str]] = [ - ("clones", "Clone Groups", ctx.clone_groups_total, "var(--error)"), - ("structural", "Structural", structural, "var(--warning)"), - ("complexity", "Complexity", complexity_high, "var(--warning)"), - ("cohesion", "Cohesion", cohesion_low, "var(--info)"), - ("coupling", "Coupling", coupling_high, "var(--info)"), - ("dead_code", "Dead Code", dead_total, "var(--text-muted)"), - ("dep_cycles", "Dep. Cycles", dep_cycles, "var(--text-muted)"), + (key, ISSUE_BREAKDOWN_ROW_LABELS[key], count, color) + for key, count, color in ( + ("clones", ctx.clone_groups_total, "var(--error)"), + ("structural", structural, "var(--warning)"), + ("complexity", complexity_high, "var(--warning)"), + ("cohesion", cohesion_low, "var(--info)"), + ("coupling", coupling_high, "var(--info)"), + ("dead_code", dead_total, "var(--text-muted)"), + ("dep_cycles", dep_cycles, "var(--text-muted)"), + ) ] # Filter out zeros — show only actual issues rows = [ (key, label, count, color) for key, label, count, color in raw_rows if count > 0 ] if not rows: - return _inline_empty("No issues detected", tone="good") + return _inline_empty(ISSUE_BREAKDOWN_EMPTY, tone="good") max_count = max(c for _, _, c, _ in rows) parts: list[str] = [] @@ -461,17 +479,17 @@ def _delta_or_none(key: str) -> str | None: rows = [ _fact_row( - "Param annotations", + ADOPTION_PARAM_ANNOTATIONS, _format_permille_pct(adoption_summary.get("param_permille")), delta=_delta_or_none("param_delta"), ), _fact_row( - "Return annotations", + ADOPTION_RETURN_ANNOTATIONS, _format_permille_pct(adoption_summary.get("return_permille")), delta=_delta_or_none("return_delta"), ), _fact_row( - "Docstrings", + ADOPTION_DOCSTRINGS, _format_permille_pct(adoption_summary.get("docstring_permille")), delta=_delta_or_none("docstring_delta"), ), @@ -480,7 +498,7 @@ def _delta_or_none(key: str) -> str | None: any_count = _as_int(adoption_summary.get("typing_any_count")) rows.append( _fact_row( - "Typed as Any", + ADOPTION_TYPED_AS_ANY, _format_count(any_count), value_cls="good" if any_count == 0 else "warn", ) @@ -492,17 +510,17 @@ def _delta_or_none(key: str) -> str | None: def _api_card_html(api_summary: Mapping[str, object]) -> str: if not bool(api_summary.get("enabled")): return ( - '
Disabled in this run.
' + f'
{ADOPTION_API_DISABLED}
' '
' - + _fact_row("Enable via", "--api-surface") + + _fact_row(ADOPTION_ENABLE_VIA, ADOPTION_ENABLE_VIA_FLAG) + "
" ) symbols = _as_int(api_summary.get("public_symbols")) modules = _as_int(api_summary.get("modules")) rows = [ - _fact_row("Public symbols", _format_count(symbols)), - _fact_row("Modules", _format_count(modules)), + _fact_row(ADOPTION_PUBLIC_SYMBOLS, _format_count(symbols)), + _fact_row(ADOPTION_MODULES, _format_count(modules)), ] if bool(api_summary.get("baseline_diff_available")): @@ -510,15 +528,19 @@ def _api_card_html(api_summary: Mapping[str, object]) -> str: added = _as_int(api_summary.get("added")) rows.append( _fact_row( - "Breaking changes", + ADOPTION_BREAKING_CHANGES, _format_count(breaking), value_cls="warn" if breaking > 0 else "good", ) ) - rows.append(_fact_row("Added symbols", _format_count(added))) + rows.append(_fact_row(ADOPTION_ADDED_SYMBOLS, _format_count(added))) if bool(api_summary.get("strict_types")): - rows.append(_fact_row("Strict mode", "enabled", value_cls="good")) + rows.append( + _fact_row( + ADOPTION_STRICT_MODE, ADOPTION_STRICT_MODE_ENABLED, value_cls="good" + ) + ) return '
' + "".join(rows) + "
" @@ -536,24 +558,21 @@ def _adoption_and_api_section(ctx: ReportContext) -> str: if adoption_summary: cards.append( overview_summary_item_html( - label="Adoption coverage", + label=ADOPTION_COVERAGE_LABEL, body_html=_adoption_card_html(adoption_summary), ) ) if api_summary: cards.append( overview_summary_item_html( - label="Public API surface", + label=ADOPTION_API_SURFACE_LABEL, body_html=_api_card_html(api_summary), ) ) return ( '
' - + overview_cluster_header( - "Adoption & API", - "Type/docstring adoption and public API surface are shown as facts, not style pressure.", - ) + + overview_cluster_header(ADOPTION_CLUSTER_TITLE, ADOPTION_CLUSTER_DESC) + '
' + "".join(cards) + "
" @@ -564,7 +583,7 @@ def _scan_scope_subtitle(ctx: ReportContext) -> str: """Build a subtitle string with scan-scope essentials for the Executive Summary header.""" inventory = _as_mapping(getattr(ctx, "inventory_map", {})) if not inventory: - return "Project-wide context derived from the full scanned root." + return EXECUTIVE_SCAN_SCOPE_DEFAULT files = _as_mapping(inventory.get("files")) code = _as_mapping(inventory.get("code")) @@ -586,7 +605,7 @@ def _scan_scope_subtitle(ctx: ReportContext) -> str: return scope_summary return ( f"{scope_summary}. " - "Thresholds: " + f"{EXECUTIVE_THRESHOLDS_PREFIX}" f"func {_as_int(analysis_profile.get('min_loc'))}/" f"{_as_int(analysis_profile.get('min_stmt'))} \u00b7 " f"block {_as_int(analysis_profile.get('block_min_loc'))}/" @@ -685,8 +704,8 @@ def _directory_hotspots_section(ctx: ReportContext) -> str: return ( '
' + overview_cluster_header( - "Hotspots by Directory", - "Directories with the highest concentration of findings by category.", + CLUSTER_HOTSPOTS_BY_DIRECTORY, + CLUSTER_HOTSPOTS_BY_DIRECTORY_DESC, ) + '
' + "".join(cards) @@ -747,15 +766,15 @@ def _overloaded_modules_section(ctx: ReportContext) -> str: right_html = "".join(rows_html[mid:]) return ( '
' - + overview_cluster_header("Overloaded Modules", subtitle) + + overview_cluster_header(CLUSTER_OVERLOADED_MODULES, subtitle) + '
' + overview_summary_item_html( - label="Top candidates", + label=CLUSTER_OVERLOADED_TOP_CANDIDATES, body_html='
' + left_html + "
", ) + ( overview_summary_item_html( - label="More candidates", + label=CLUSTER_OVERLOADED_MORE_CANDIDATES, body_html='
' + right_html + "
", @@ -913,71 +932,71 @@ def _baselined_detail( kpis = [ _stat_card( - "Clone Groups", + KPI_CLONE_GROUPS, ctx.clone_groups_total, detail=_clone_detail, - tip="Detected code clone groups by detection level", + tip=KPI_TIP_CLONE_GROUPS, delta_new=_new_clones, value_tone=_clone_tone, ), _stat_card( - "High Complexity", + KPI_HIGH_COMPLEXITY, complexity_high_risk, detail=_cx_detail, - tip="Functions with cyclomatic complexity above threshold", + tip=KPI_TIP_HIGH_COMPLEXITY, value_tone=_cx_tone, delta_new=_new_complexity, ), _stat_card( - "High Coupling", + KPI_HIGH_COUPLING, coupling_high_risk, detail=_cp_detail, - tip="Classes with high coupling between objects (CBO)", + tip=KPI_TIP_HIGH_COUPLING, value_tone=_cp_tone, delta_new=_new_coupling, ), _stat_card( - "Low Cohesion", + KPI_LOW_COHESION, cohesion_low, detail=_micro_badges( ("avg", cohesion_summary.get("average", "n/a")), ("max", cohesion_summary.get("max", "n/a")), ), - tip="Classes with low internal cohesion (high LCOM4)", + tip=KPI_TIP_LOW_COHESION, value_tone="good" if cohesion_low == 0 else "warn", ), _stat_card( - "Dep. Cycles", + KPI_DEP_CYCLES, dependency_cycle_count, detail=_cy_detail, - tip="Circular dependencies between project modules", + tip=KPI_TIP_DEP_CYCLES, value_tone=_cy_tone, delta_new=_new_cycles, ), _stat_card( - "Dead Code", + KPI_DEAD_CODE, dead_total, detail=_dc_detail, - tip="Potentially unused functions, classes, or imports", + tip=KPI_TIP_DEAD_CODE, value_tone=_dc_tone, delta_new=_new_dead, ), _stat_card( - "Findings", + KPI_FINDINGS, structural_count, detail=_micro_badges(("kinds", structural_kind_count)), - tip="Active structural findings reported in production code", + tip=KPI_TIP_FINDINGS, value_tone="good" if structural_count == 0 else "warn", ), _stat_card( - "Suggestions", + KPI_SUGGESTIONS, len(ctx.suggestions), detail=_micro_badges( ("clone", clone_suggestion_count), ("struct", structural_suggestion_count), ("metric", metrics_suggestion_count), ), - tip="Actionable recommendations derived from clones, findings, and metrics", + tip=KPI_TIP_SUGGESTIONS, value_tone="good" if not ctx.suggestions else "warn", ), ] @@ -998,14 +1017,14 @@ def _baselined_detail( scan_scope_subtitle = _scan_scope_subtitle(ctx) executive = ( '
' - + overview_cluster_header("Executive Summary", scan_scope_subtitle) + + overview_cluster_header(CLUSTER_EXECUTIVE_SUMMARY, scan_scope_subtitle) + '
' + overview_summary_item_html( - label="Issue breakdown", + label=CLUSTER_ISSUE_BREAKDOWN, body_html=_issue_breakdown_html(ctx, deltas=_issue_deltas), ) + overview_summary_item_html( - label="Source breakdown", + label=CLUSTER_SOURCE_BREAKDOWN, body_html=overview_source_breakdown_html( _as_mapping(ctx.overview_data.get("source_breakdown")) ), @@ -1019,7 +1038,7 @@ def _baselined_detail( return ( insight_block( - question="Current health snapshot", + question=EXECUTIVE_HEALTH_SNAPSHOT_QUESTION, answer=overview_answer, tone=overview_tone, ) @@ -1047,20 +1066,19 @@ def _analytics_section(ctx: ReportContext) -> str: radar_html = _health_radar_svg(dimensions) radar_legend = ( '
' - "Higher values indicate better code health." - " Red labels highlight dimensions below 60." + f"{CLUSTER_RADAR_CAPTION}{CLUSTER_RADAR_CAPTION_SUFFIX}" "
" ) return ( '
' + overview_cluster_header( - "Health Profile", - "Dimension scores across all quality axes.", + CLUSTER_HEALTH_PROFILE, + CLUSTER_HEALTH_PROFILE_DESC, ) + '
' + overview_summary_item_html( - label="Health profile", body_html=radar_html + radar_legend + label=CLUSTER_HEALTH_PROFILE_LABEL, body_html=radar_html + radar_legend ) + "
" ) diff --git a/codeclone/report/html/sections/_security_surfaces.py b/codeclone/report/html/sections/_security_surfaces.py index 0d853ece..18a8c608 100644 --- a/codeclone/report/html/sections/_security_surfaces.py +++ b/codeclone/report/html/sections/_security_surfaces.py @@ -12,6 +12,35 @@ from codeclone.utils import coerce as _coerce +from ...messages.security import ( + SECURITY_EMPTY_DETAIL, + SECURITY_EMPTY_TITLE, + SECURITY_REVIEW_BANNER_QUESTION, + SECURITY_REVIEW_COVERAGE_JOIN, + SECURITY_REVIEW_COVERAGE_UNAVAILABLE, + SECURITY_REVIEW_EVIDENCE, + SECURITY_REVIEW_EVIDENCE_VALUE, + SECURITY_REVIEW_HOW_TO_READ, + SECURITY_REVIEW_MEANING, + SECURITY_REVIEW_MEANING_VALUE, + SECURITY_REVIEW_NO_INVENTORY_ROWS, + SECURITY_REVIEW_NO_OVERLAP, + SECURITY_REVIEW_ORDER, + SECURITY_REVIEW_PRODUCTION_MODULE_ROWS, + SECURITY_REVIEW_SIGNAL, + SECURITY_REVIEW_SIGNAL_VALUE, + SECURITY_REVIEW_START_WITH, + SECURITY_REVIEW_THEN_REVIEW, + SECURITY_STAT_CATEGORIES, + SECURITY_STAT_EXACT_ITEMS, + SECURITY_STAT_PRODUCTION, + SECURITY_STAT_SURFACES, + SECURITY_TABLE_EMPTY, + SECURITY_TABLE_EMPTY_DESC, + SECURITY_TABLE_HEADERS, + SECURITY_TABLE_TITLE, + UNKNOWN_LABEL, +) from ..primitives.escape import _escape_html from ..primitives.location import location_file_target, relative_location_path from ..widgets.badges import _micro_badges, _stat_card, _tab_empty_info @@ -42,15 +71,12 @@ def render_security_surfaces_panel(ctx: ReportContext) -> str: ) if not items: return _tab_empty_info( - "No security-relevant capability surfaces matched the exact registry.", - detail_html=( - "This inventory is report-only and focuses on exact boundary " - "capabilities rather than vulnerability claims." - ), + SECURITY_EMPTY_TITLE, + detail_html=SECURITY_EMPTY_DETAIL, ) cards = [ _stat_card( - "Surfaces", + SECURITY_STAT_SURFACES, _as_int(summary.get("items")), detail=_micro_badges(("report", "only"), ("evidence", "exact")), value_tone="warn" if _as_int(summary.get("items")) > 0 else "muted", @@ -58,21 +84,21 @@ def render_security_surfaces_panel(ctx: ReportContext) -> str: glossary_tip_fn=glossary_tip, ), _stat_card( - "Categories", + SECURITY_STAT_CATEGORIES, _as_int(summary.get("category_count")), detail=_micro_badges(("modules", _as_int(summary.get("modules")))), css_class="meta-item", glossary_tip_fn=glossary_tip, ), _stat_card( - "Production", + SECURITY_STAT_PRODUCTION, _as_int(summary.get("production")), detail=_micro_badges(("tests", _as_int(summary.get("tests")))), css_class="meta-item", glossary_tip_fn=glossary_tip, ), _stat_card( - "Exact items", + SECURITY_STAT_EXACT_ITEMS, _as_int(summary.get("exact_items")), detail=_micro_badges(("fixtures", _as_int(summary.get("fixtures")))), css_class="meta-item", @@ -82,22 +108,12 @@ def render_security_surfaces_panel(ctx: ReportContext) -> str: return ( f'
{"".join(cards)}
' + _security_surfaces_context_html(ctx, items) - + '

Security-relevant capability inventory

' + + f'

{SECURITY_TABLE_TITLE}

' + render_rows_table( - headers=( - "Category", - "Capability", - "Evidence", - "Source", - "Location", - "Review", - ), + headers=SECURITY_TABLE_HEADERS, rows=_security_surface_rows(ctx, items), - empty_message="No exact security surfaces are available.", - empty_description=( - "CodeClone inventories trust-boundary capabilities but does not " - "claim vulnerabilities or exploitability." - ), + empty_message=SECURITY_TABLE_EMPTY, + empty_description=SECURITY_TABLE_EMPTY_DESC, raw_html_headers=("Location",), ctx=ctx, ) @@ -117,7 +133,7 @@ def _security_surface_rows( ( _humanize(str(item.get("category", ""))), _humanize(str(item.get("capability", ""))), - str(item.get("evidence_symbol", "")).strip() or "(unknown)", + str(item.get("evidence_symbol", "")).strip() or UNKNOWN_LABEL, _humanize(str(item.get("source_kind", ""))), _location_cell_html(ctx, item), _review_cell_text(ctx, item, coverage_index=coverage_index), @@ -155,21 +171,21 @@ def _security_surfaces_context_html( review_order_rows = _security_review_order_rows(ctx, items) return ( '
' - '
How should I review this inventory?
' + f'
{SECURITY_REVIEW_BANNER_QUESTION}
' '
' '
' + overview_summary_item_html( - label="How to read", + label=SECURITY_REVIEW_HOW_TO_READ, body_html=_fact_list_html( ( - ("Signal", "boundary inventory", None), - ("Evidence", "exact imports/calls/builtins", None), - ("Meaning", "inventory, not vulnerability proof", None), + (SECURITY_REVIEW_SIGNAL, SECURITY_REVIEW_SIGNAL_VALUE, None), + (SECURITY_REVIEW_EVIDENCE, SECURITY_REVIEW_EVIDENCE_VALUE, None), + (SECURITY_REVIEW_MEANING, SECURITY_REVIEW_MEANING_VALUE, None), ) ), ) + overview_summary_item_html( - label="Review order", + label=SECURITY_REVIEW_ORDER, body_html=_fact_list_html(review_order_rows), ) + "
" @@ -201,17 +217,17 @@ def _security_review_order_rows( return ( ( - "Start with", + SECURITY_REVIEW_START_WITH, ( f"{production_callable_count} " f"{_pluralize(production_callable_count, 'production callable')}" if production_callable_count > 0 - else "production module rows only" + else SECURITY_REVIEW_PRODUCTION_MODULE_ROWS ), "warn" if production_callable_count > 0 else None, ), ( - "Coverage join", + SECURITY_REVIEW_COVERAGE_JOIN, _coverage_join_review_text( ctx, overlap_total=coverage_overlap_total, @@ -221,12 +237,12 @@ def _security_review_order_rows( "warn" if coverage_overlap_total > 0 else None, ), ( - "Then review", + SECURITY_REVIEW_THEN_REVIEW, ( f"{non_callable_count} " f"{_pluralize(non_callable_count, 'module/class inventory row')}" if non_callable_count > 0 - else "no inventory-only rows" + else SECURITY_REVIEW_NO_INVENTORY_ROWS ), None, ), @@ -243,9 +259,9 @@ def _coverage_join_review_text( coverage_join = _as_mapping(_as_mapping(ctx.metrics_map).get("coverage_join")) coverage_summary = _as_mapping(coverage_join.get("summary")) if str(coverage_summary.get("status", "")).strip() != "ok": - return "unavailable for this run" + return SECURITY_REVIEW_COVERAGE_UNAVAILABLE if overlap_total <= 0: - return "no overlap in current review set" + return SECURITY_REVIEW_NO_OVERLAP parts = [f"{overlap_total} {_pluralize(overlap_total, 'overlap')}"] if scope_gaps > 0: parts.append(f"{scope_gaps} {_pluralize(scope_gaps, 'scope gap')}") diff --git a/codeclone/report/html/sections/_structural.py b/codeclone/report/html/sections/_structural.py index e2ba87d9..24b8db63 100644 --- a/codeclone/report/html/sections/_structural.py +++ b/codeclone/report/html/sections/_structural.py @@ -27,6 +27,7 @@ report_location_from_structural_occurrence, ) from ...findings import _dedupe_items, _finding_scope_text, _spread +from ...messages import explain as explain_msgs from ...suggestions import ( structural_action_steps, structural_has_separate_suggestion, @@ -48,11 +49,7 @@ "render_structural_panel", ] -_KIND_LABEL: dict[str, str] = { - STRUCTURAL_KIND_DUPLICATED_BRANCHES: "Duplicated branches", - STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: "Clone guard/exit divergence", - STRUCTURAL_KIND_CLONE_COHORT_DRIFT: "Clone cohort drift", -} +_KIND_LABEL: dict[str, str] = dict(explain_msgs.STRUCTURAL_KIND_LABELS) def _sort_key_group(g: StructuralFindingGroup) -> tuple[str, int, str]: @@ -154,35 +151,26 @@ def _finding_reason_list_html( spread = _spread(items) clone_cohort_reasons = { STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: [ - ( - f"{len(items)} divergent clone members were detected after " - "stable sorting and deduplication." + explain_msgs.GUARD_DIVERGENCE_MEMBERS.format(count=len(items)), + explain_msgs.GUARD_DIVERGENCE_COMPARE, + explain_msgs.GUARD_DIVERGENCE_COHORT.format( + cohort_id=group.signature.get("cohort_id", "unknown"), + majority_guard_count=group.signature.get("majority_guard_count", "0"), ), - ( - "Members were compared by entry-guard count/profile, terminal " - "kind, and side-effect-before-guard marker." - ), - ( - f"Cohort id: {group.signature.get('cohort_id', 'unknown')}; " - "majority guard count: " - f"{group.signature.get('majority_guard_count', '0')}." - ), - ( - f"Spread includes {spread['functions']} " - f"{'function' if spread['functions'] == 1 else 'functions'} in " - f"{spread['files']} {'file' if spread['files'] == 1 else 'files'}." - ), - "This is a report-only finding and does not affect clone gating.", + explain_msgs.fmt_spread(spread["functions"], spread["files"]), + explain_msgs.REPORT_ONLY_NO_GATING, ], STRUCTURAL_KIND_CLONE_COHORT_DRIFT: [ - f"{len(items)} clone members diverge from the cohort majority profile.", - f"Drift fields: {group.signature.get('drift_fields', 'n/a')}.", - ( - f"Cohort id: {group.signature.get('cohort_id', 'unknown')} with " - f"arity {group.signature.get('cohort_arity', 'n/a')}." + explain_msgs.DRIFT_MEMBERS.format(count=len(items)), + explain_msgs.DRIFT_FIELDS.format( + drift_fields=group.signature.get("drift_fields", "n/a") ), - "Majority profile is compared deterministically with lexical tie-breaks.", - "This is a report-only finding and does not affect clone gating.", + explain_msgs.DRIFT_COHORT.format( + cohort_id=group.signature.get("cohort_id", "unknown"), + cohort_arity=group.signature.get("cohort_arity", "n/a"), + ), + explain_msgs.DRIFT_MAJORITY, + explain_msgs.REPORT_ONLY_NO_GATING, ], } if group.finding_kind in clone_cohort_reasons: @@ -191,27 +179,15 @@ def _finding_reason_list_html( stmt_seq = group.signature.get("stmt_seq", "n/a") terminal = group.signature.get("terminal", "n/a") reasons = [ - ( - f"{len(items)} non-overlapping branch bodies remained after " - "deduplication and overlap pruning." - ), - ( - f"All occurrences belong to {spread['functions']} " - f"{'function' if spread['functions'] == 1 else 'functions'} in " - f"{spread['files']} {'file' if spread['files'] == 1 else 'files'}." - ), - ( - f"The detector grouped them by structural signature: " - f"stmt seq: {stmt_seq}, terminal: {terminal}." - ), - ( - "Call/raise buckets and nested control-flow flags must also match " - "for branches to land in the same finding group." - ), - ( - "This is a local, report-only hint. It does not change clone groups " - "or CI verdicts." + explain_msgs.BRANCH_BODIES_REMAINED.format(count=len(items)), + explain_msgs.fmt_spread( + spread["functions"], + spread["files"], + template=explain_msgs.SPREAD_ALL_OCCURRENCES, ), + explain_msgs.SIGNATURE_GROUPED.format(stmt_seq=stmt_seq, terminal=terminal), + explain_msgs.SIGNATURE_MATCH_RULE, + explain_msgs.REPORT_ONLY_LOCAL_HINT, ] return _render_reason_list_html(reasons) @@ -227,16 +203,8 @@ def _finding_matters_html( spread = _spread(items) count = len(items) special_messages = { - STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: ( - "Members of one function-clone cohort diverged in guard/exit behavior. " - "This often points to a partial fix where one path was updated and " - "other siblings were left unchanged." - ), - STRUCTURAL_KIND_CLONE_COHORT_DRIFT: ( - "Members of one function-clone cohort drifted from a stable majority " - "profile (terminal, guard, try/finally, side-effect order). Review " - "whether divergence is intentional." - ), + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: explain_msgs.IMPACT_GUARD_DIVERGENCE, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT: explain_msgs.IMPACT_COHORT_DRIFT, } if group.finding_kind in special_messages: return _finding_matters_paragraph(special_messages[group.finding_kind]) @@ -244,30 +212,20 @@ def _finding_matters_html( terminal = str(group.signature.get("terminal", "")).strip() stmt_seq = str(group.signature.get("stmt_seq", "")).strip() if spread["functions"] > 1 or spread["files"] > 1: - message = ( - f"This pattern repeats across {spread['functions']} functions and " - f"{spread['files']} files, so the same branch policy may be copied " - "between multiple code paths." + message = explain_msgs.IMPACT_CROSS_FUNCTION.format( + functions=spread["functions"], + files=spread["files"], ) else: terminal_messages = { - "raise": ( - "This group points to repeated guard or validation exits inside one " - "function. Consolidating the shared exit policy usually reduces " - "branch noise." - ), - "return": ( - "This group points to repeated return-path logic inside one function. " - "A helper can often keep the branch predicate local while sharing " - "the emitted behavior." - ), + "raise": explain_msgs.IMPACT_TERMINAL_RAISE, + "return": explain_msgs.IMPACT_TERMINAL_RETURN, } message = terminal_messages.get( terminal, - ( - f"This group reports {count} branches with the same local shape " - f"({stmt_seq or 'unknown signature'}). Review whether the local " - "branch logic should stay duplicated or be simplified in place." + explain_msgs.IMPACT_DEFAULT_BRANCHES.format( + count=count, + signature=stmt_seq or "unknown signature", ), ) return _finding_matters_paragraph(message) @@ -320,7 +278,8 @@ def _finding_inline_action_html( primary_action = action_steps[0] return ( '
' - 'Suggested action' + '' + f"{explain_msgs.STRUCTURAL_INLINE_ACTION_LABEL}" f'{_escape_html(primary_action)}' "
" ) @@ -346,47 +305,51 @@ def _finding_why_template_html( for idx, item in enumerate(preview_items) ) if group.finding_kind == STRUCTURAL_KIND_DUPLICATED_BRANCHES: - showing_note = ( - f"Showing the first {len(preview_items)} matching branches from " - f"{len(items)} total occurrences." + showing_note = explain_msgs.SHOWING_BRANCHES.format( + shown=len(preview_items), + total=len(items), ) - reported_subject = "structurally matching branch bodies" + reported_subject = explain_msgs.SUBJECT_BRANCH_BODIES elif group.finding_kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: - showing_note = ( - f"Showing the first {len(preview_items)} cohort members from " - f"{len(items)} divergent occurrences." + showing_note = explain_msgs.SHOWING_GUARD_DIVERGENCE.format( + shown=len(preview_items), + total=len(items), ) - reported_subject = "clone cohort members with guard/exit divergence" + reported_subject = explain_msgs.SUBJECT_GUARD_DIVERGENCE elif group.finding_kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: - showing_note = ( - f"Showing the first {len(preview_items)} cohort members from " - f"{len(items)} divergent occurrences." + showing_note = explain_msgs.SHOWING_COHORT_DRIFT.format( + shown=len(preview_items), + total=len(items), ) - reported_subject = "clone cohort members that drift from majority profile" + reported_subject = explain_msgs.SUBJECT_COHORT_DRIFT else: - showing_note = ( - f"Showing the first {len(preview_items)} matching branches from " - f"{len(items)} total occurrences." + showing_note = explain_msgs.SHOWING_BRANCHES.format( + shown=len(preview_items), + total=len(items), ) - reported_subject = "structurally matching branch bodies" + reported_subject = explain_msgs.SUBJECT_BRANCH_BODIES + rationale_intro = explain_msgs.DETECTION_RATIONALE_INTRO.format( + count=len(items), + subject=reported_subject, + scope=_finding_scope_text(items), + ) return ( '
' - '
Impact
' + f'
{explain_msgs.STRUCTURAL_SECTION_IMPACT}
' f"{_finding_matters_html(group, items)}" "
" '
' - '
Detection Rationale
' - f'

CodeClone reported this group because it found ' - f"{len(items)} {reported_subject} " - f"{_escape_html(_finding_scope_text(items))}.

" + f'
' + f"{explain_msgs.STRUCTURAL_SECTION_DETECTION_RATIONALE}
" + f'

{_escape_html(rationale_intro)}

' f"{_finding_reason_list_html(group, items)}" "
" '
' - '
Signature
' + f'
{explain_msgs.STRUCTURAL_SECTION_SIGNATURE}
' f'
{_signature_chips_html(group.signature)}
' "
" '
' - '
Examples
' + f'
{explain_msgs.STRUCTURAL_SECTION_EXAMPLES}
' f'
{_escape_html(showing_note)}
' f'
{examples_html}
' "
" @@ -463,7 +426,7 @@ def _render_finding_card( f"{spread['functions']} {func_word} \u00b7 {spread['files']} {file_word}" f'' + f'aria-haspopup="dialog">{explain_msgs.STRUCTURAL_WHY_BUTTON}' "
" '
' f'
{context_chips}
' @@ -491,15 +454,17 @@ def build_structural_findings_html_panel( ) -> str: intro = ( '
' - '
What are structural findings?
' - '
Repeated non-overlapping branch-body shapes ' - "detected inside individual functions. These are local, report-only " - "refactoring hints and do not affect clone detection or CI verdicts.
" + '
' + + explain_msgs.STRUCTURAL_INTRO_QUESTION + + "
" + '
' + + _escape_html(explain_msgs.STRUCTURAL_INTRO_ANSWER) + + "
" "
" ) normalized_groups = normalize_structural_findings(groups) if not normalized_groups: - return intro + _tab_empty("No structural findings detected.") + return intro + _tab_empty(explain_msgs.STRUCTURAL_EMPTY) resolved_file_cache = file_cache if file_cache is not None else _FileCache() why_templates: list[str] = [] @@ -522,7 +487,7 @@ def build_structural_findings_html_panel( sub_tabs: list[tuple[str, str, int, str]] = [ ( "all", - "All", + explain_msgs.TAB_LABEL_ALL, len(all_cards), f'
{"".join(all_cards)}
', ), diff --git a/codeclone/report/html/widgets/glossary.py b/codeclone/report/html/widgets/glossary.py index 19ab1728..70dc81dd 100644 --- a/codeclone/report/html/widgets/glossary.py +++ b/codeclone/report/html/widgets/glossary.py @@ -4,95 +4,13 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -"""Tooltip glossary for report table headers and stat cards.""" +"""HTML glossary tooltip helper.""" from __future__ import annotations +from ...messages.glossary import GLOSSARY from ..primitives.escape import _escape_html -GLOSSARY: dict[str, str] = { - # Complexity - "function": "Fully-qualified function or method name", - "class": "Fully-qualified class name", - "name": "Symbol name (function, class, or variable)", - "file": "Source file path relative to scan root", - "location": "File and line range where the symbol is defined", - "cc": "Cyclomatic complexity — number of independent execution paths", - "nesting": "Maximum nesting depth of control-flow statements", - "risk": "Risk level based on metric thresholds (low / medium / high)", - # Coupling / cohesion - "cbo": "Coupling Between Objects — number of classes this class depends on", - "coupled classes": "Resolved class dependencies used to compute CBO for this class", - "lcom4": "Lack of Cohesion of Methods — connected components in method/field graph", - "methods": "Number of methods defined in the class", - "fields": "Number of instance variables (attributes) in the class", - # Dead code - "line": "Source line number where the symbol starts", - "kind": "Symbol type: function, class, import, or variable", - "confidence": "Detection confidence (low / medium / high / critical)", - # Dependencies - "longest chain": "Longest transitive import chain between modules", - "length": "Number of modules in the dependency chain", - "cycle": "Circular import dependency between modules", - # Suggestions - "priority": "Computed priority score (higher = more urgent)", - "severity": "Issue severity: critical, warning, or info", - "category": ( - "Metric category: clone, complexity, coupling, cohesion, dead_code, dependency" - ), - "title": "Brief description of the suggested improvement", - "effort": "Estimated effort to fix: easy, moderate, or hard", - "steps": "Actionable steps to resolve the issue", - # Dependency stat cards - "modules": "Total number of Python modules analyzed", - "edges": "Total number of import relationships between modules", - "max depth": ( - "Longest internal transitive import chain; compare with avg and p95 depth" - ), - "cycles": "Number of circular import dependencies detected", - # Complexity stat cards - "high-risk functions": ( - "Functions with cyclomatic complexity above the high-risk threshold" - ), - "max cc": "Highest cyclomatic complexity value among all analyzed functions", - "avg cc": "Average cyclomatic complexity across all analyzed functions", - "deep nesting": ( - "Functions with nesting depth exceeding recommended threshold (> 4)" - ), - # Coupling stat cards - "high-coupling classes": "Classes with CBO above the high-risk threshold", - "max cbo": "Highest Coupling Between Objects value among all classes", - "avg cbo": "Average CBO across all analyzed classes", - "medium risk": "Items at medium risk level — worth reviewing but not critical", - # Cohesion stat cards - "low-cohesion classes": ( - "Classes with LCOM4 > 1, indicating multiple responsibilities" - ), - "max lcom4": "Highest Lack of Cohesion value among all classes", - "high risk": "Items at high risk level requiring attention", - # Overloaded module stat cards - "overloaded": ( - "Modules exceeding acceptable thresholds for size, complexity, or coupling" - ), - "critical": "Items with critical status requiring immediate attention", - "max score": "Highest overload score among all modules", - "avg loc": "Average lines of code per module", - # Dead code stat cards - "candidates": "Total dead code candidates detected by static analysis", - "high confidence": "Dead code items detected with high or critical confidence", - "suppressed": "Dead code candidates excluded by suppression rules", - "hit rate": "Percentage of high-confidence items among all candidates", - # Clone stat cards - "clone groups": "Distinct duplication patterns, each containing 2+ code fragments", - "instances": "Total duplicated code fragments across all groups", - "new groups": "Clone groups not present in the previous baseline", - "high spread": "Clone groups spanning multiple files", - # Suggestion stat cards - "total suggestions": "Total actionable improvement suggestions generated", - "warning": "Suggestions with warning severity worth reviewing", - "easy wins": "Actionable suggestions with low estimated effort", -} - def glossary_tip(label: str) -> str: """Return a tooltip ```` for *label*, or ``''`` if unknown.""" diff --git a/codeclone/report/messages/__init__.py b/codeclone/report/messages/__init__.py new file mode 100644 index 00000000..26318f3d --- /dev/null +++ b/codeclone/report/messages/__init__.py @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""User-facing report copy (HTML tooltips, explainability prose).""" + +from __future__ import annotations diff --git a/codeclone/report/messages/chrome.py b/codeclone/report/messages/chrome.py new file mode 100644 index 00000000..dc7d5c78 --- /dev/null +++ b/codeclone/report/messages/chrome.py @@ -0,0 +1,49 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""HTML report chrome: tabs, modals, footer.""" + +from __future__ import annotations + +from typing import Final + +REPORT_TITLE_DEFAULT: Final = "CodeClone Report" +BRAND_TITLE: Final = "CodeClone Report" + +TAB_OVERVIEW: Final = "Overview" +TAB_CLONES: Final = "Clones" +TAB_QUALITY: Final = "Quality" +TAB_DEPENDENCIES: Final = "Dependencies" +TAB_DEAD_CODE: Final = "Dead Code" +TAB_SUGGESTIONS: Final = "Suggestions" +TAB_FINDINGS: Final = "Findings" + +TABLIST_ARIA_LABEL: Final = "Report sections" +BADGE_BUTTON_LABEL: Final = "Get Badge" +MODAL_FINDING_TITLE: Final = "Finding Details" +MODAL_FINDING_CLOSE: Final = "Close" +MODAL_BADGE_TITLE: Final = "Get Badge" +THEME_TOGGLE_LABEL: Final = "Toggle theme" +THEME_BUTTON_TEXT: Final = "Theme" +FOOTER_DOCS: Final = "Docs" +FOOTER_REPORT_ISSUE: Final = "Report Issue" +FOOTER_BRAND: Final = "CodeClone" + +IDE_PICKER_LABEL: Final = "IDE" +IDE_PICKER_TITLE: Final = "Open in IDE" +PROVENANCE_ARIA_LABEL: Final = "Report Provenance" +PROVENANCE_TITLE_PREFIX: Final = "Report Provenance — " + +FOOTER_SCHEMA_REPORT: Final = "Report schema " +FOOTER_SCHEMA_BASELINE: Final = "Baseline schema " +FOOTER_SCHEMA_CACHE: Final = "Cache schema " + +BADGE_TAB_GRADE: Final = "Grade only" +BADGE_TAB_FULL: Final = "Score + Grade" +BADGE_DISCLAIMER: Final = "Badge reflects the current report snapshot." +BADGE_FIELD_MARKDOWN: Final = "Markdown" +BADGE_FIELD_HTML: Final = "HTML" +BADGE_COPY: Final = "Copy" diff --git a/codeclone/report/messages/explain.py b/codeclone/report/messages/explain.py new file mode 100644 index 00000000..2bf89a47 --- /dev/null +++ b/codeclone/report/messages/explain.py @@ -0,0 +1,147 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Structural finding explainability copy.""" + +from __future__ import annotations + +from typing import Final + +from ...domain.findings import ( + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + STRUCTURAL_KIND_DUPLICATED_BRANCHES, +) + +STRUCTURAL_KIND_LABELS: Final[dict[str, str]] = { + STRUCTURAL_KIND_DUPLICATED_BRANCHES: "Duplicated branches", + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: "Clone guard/exit divergence", + STRUCTURAL_KIND_CLONE_COHORT_DRIFT: "Clone cohort drift", +} + +STRUCTURAL_INTRO_QUESTION: Final = "What are structural findings?" +STRUCTURAL_INTRO_ANSWER: Final = ( + "Repeated non-overlapping branch-body shapes detected inside individual " + "functions. These are local, report-only refactoring hints and do not " + "affect clone detection or CI verdicts." +) +STRUCTURAL_EMPTY: Final = "No structural findings detected." +STRUCTURAL_WHY_BUTTON: Final = "Why?" +STRUCTURAL_INLINE_ACTION_LABEL: Final = "Suggested action" +STRUCTURAL_SECTION_IMPACT: Final = "Impact" +STRUCTURAL_SECTION_DETECTION_RATIONALE: Final = "Detection Rationale" +STRUCTURAL_SECTION_SIGNATURE: Final = "Signature" +STRUCTURAL_SECTION_EXAMPLES: Final = "Examples" +TAB_LABEL_ALL: Final = "All" + +REPORT_ONLY_NO_GATING: Final = ( + "This is a report-only finding and does not affect clone gating." +) +REPORT_ONLY_LOCAL_HINT: Final = ( + "This is a local, report-only hint. It does not change clone groups or CI verdicts." +) + +GUARD_DIVERGENCE_MEMBERS: Final = ( + "{count} divergent clone members were detected after " + "stable sorting and deduplication." +) +GUARD_DIVERGENCE_COMPARE: Final = ( + "Members were compared by entry-guard count/profile, terminal " + "kind, and side-effect-before-guard marker." +) +GUARD_DIVERGENCE_COHORT: Final = ( + "Cohort id: {cohort_id}; majority guard count: {majority_guard_count}." +) +DRIFT_MEMBERS: Final = "{count} clone members diverge from the cohort majority profile." +DRIFT_FIELDS: Final = "Drift fields: {drift_fields}." +DRIFT_COHORT: Final = "Cohort id: {cohort_id} with arity {cohort_arity}." +DRIFT_MAJORITY: Final = ( + "Majority profile is compared deterministically with lexical tie-breaks." +) + +BRANCH_BODIES_REMAINED: Final = ( + "{count} non-overlapping branch bodies remained after " + "deduplication and overlap pruning." +) +SIGNATURE_GROUPED: Final = ( + "The detector grouped them by structural signature: " + "stmt seq: {stmt_seq}, terminal: {terminal}." +) +SIGNATURE_MATCH_RULE: Final = ( + "Call/raise buckets and nested control-flow flags must also match " + "for branches to land in the same finding group." +) + +SPREAD_INCLUDES: Final = ( + "Spread includes {functions} {func_word} in {files} {file_word}." +) +SPREAD_ALL_OCCURRENCES: Final = ( + "All occurrences belong to {functions} {func_word} in {files} {file_word}." +) + +IMPACT_GUARD_DIVERGENCE: Final = ( + "Members of one function-clone cohort diverged in guard/exit behavior. " + "This often points to a partial fix where one path was updated and " + "other siblings were left unchanged." +) +IMPACT_COHORT_DRIFT: Final = ( + "Members of one function-clone cohort drifted from a stable majority " + "profile (terminal, guard, try/finally, side-effect order). Review " + "whether divergence is intentional." +) +IMPACT_CROSS_FUNCTION: Final = ( + "This pattern repeats across {functions} functions and " + "{files} files, so the same branch policy may be copied " + "between multiple code paths." +) +IMPACT_TERMINAL_RAISE: Final = ( + "This group points to repeated guard or validation exits inside one " + "function. Consolidating the shared exit policy usually reduces " + "branch noise." +) +IMPACT_TERMINAL_RETURN: Final = ( + "This group points to repeated return-path logic inside one function. " + "A helper can often keep the branch predicate local while sharing " + "the emitted behavior." +) +IMPACT_DEFAULT_BRANCHES: Final = ( + "This group reports {count} branches with the same local shape " + "({signature}). Review whether the local branch logic should stay " + "duplicated or be simplified in place." +) + +DETECTION_RATIONALE_INTRO: Final = ( + "CodeClone reported this group because it found {count} {subject} {scope}." +) + +SHOWING_BRANCHES: Final = ( + "Showing the first {shown} matching branches from {total} total occurrences." +) +SHOWING_GUARD_DIVERGENCE: Final = ( + "Showing the first {shown} cohort members from {total} divergent occurrences." +) +SHOWING_COHORT_DRIFT: Final = ( + "Showing the first {shown} cohort members from {total} divergent occurrences." +) + +SUBJECT_BRANCH_BODIES: Final = "structurally matching branch bodies" +SUBJECT_GUARD_DIVERGENCE: Final = "clone cohort members with guard/exit divergence" +SUBJECT_COHORT_DRIFT: Final = "clone cohort members that drift from majority profile" + +EXAMPLE_LABEL_AB: Final = ("A", "B") + + +def plural_word(count: int, singular: str, plural: str) -> str: + return singular if count == 1 else plural + + +def fmt_spread(functions: int, files: int, *, template: str = SPREAD_INCLUDES) -> str: + return template.format( + functions=functions, + func_word=plural_word(functions, "function", "functions"), + files=files, + file_word=plural_word(files, "file", "files"), + ) diff --git a/codeclone/report/messages/gates.py b/codeclone/report/messages/gates.py new file mode 100644 index 00000000..4efe30b6 --- /dev/null +++ b/codeclone/report/messages/gates.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Metric gate reason prefix strings (generation and parsing).""" + +from __future__ import annotations + +from typing import Final + +GATE_REASON_NEW_HIGH_RISK_FUNCTIONS: Final = ( + "New high-risk functions vs metrics baseline: " +) +GATE_REASON_NEW_HIGH_COUPLING: Final = "New high-coupling classes vs metrics baseline: " +GATE_REASON_NEW_CYCLES: Final = "New dependency cycles vs metrics baseline: " +GATE_REASON_NEW_DEAD_CODE: Final = "New dead code items vs metrics baseline: " +GATE_REASON_HEALTH_REGRESSION: Final = ( + "Health score regressed vs metrics baseline: delta=" +) +GATE_REASON_TYPING_REGRESSION: Final = "Typing coverage regressed vs metrics baseline: " +GATE_REASON_DOCSTRING_REGRESSION: Final = ( + "Docstring coverage regressed vs metrics baseline: delta=" +) +GATE_REASON_API_BREAKING: Final = "Public API breaking changes vs metrics baseline: " +GATE_REASON_COVERAGE_HOTSPOTS: Final = "Coverage hotspots detected: " +GATE_REASON_CYCLES_DETECTED: Final = "Dependency cycles detected: " +GATE_REASON_DEAD_CODE_DETECTED: Final = "Dead code detected (high confidence): " +GATE_REASON_COMPLEXITY_THRESHOLD: Final = "Complexity threshold exceeded: " +GATE_REASON_COUPLING_THRESHOLD: Final = "Coupling threshold exceeded: " +GATE_REASON_COHESION_THRESHOLD: Final = "Cohesion threshold exceeded: " +GATE_REASON_HEALTH_THRESHOLD: Final = "Health score below threshold: " +GATE_REASON_TYPING_THRESHOLD: Final = "Typing coverage below threshold: " +GATE_REASON_DOCSTRING_THRESHOLD: Final = "Docstring coverage below threshold: " + +GATE_SUFFIX_CYCLES: Final = " cycle(s)" +GATE_SUFFIX_ITEMS: Final = " item(s)" + +GATE_FAILURE_HEADER: Final = "GATING FAILURE [{code}]" diff --git a/codeclone/report/messages/glossary.py b/codeclone/report/messages/glossary.py new file mode 100644 index 00000000..3f0461c5 --- /dev/null +++ b/codeclone/report/messages/glossary.py @@ -0,0 +1,92 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""HTML glossary term definitions for report table headers and stat cards.""" + +from __future__ import annotations + +GLOSSARY: dict[str, str] = { + # Complexity + "function": "Fully-qualified function or method name", + "class": "Fully-qualified class name", + "name": "Symbol name (function, class, or variable)", + "file": "Source file path relative to scan root", + "location": "File and line range where the symbol is defined", + "cc": "Cyclomatic complexity — number of independent execution paths", + "nesting": "Maximum nesting depth of control-flow statements", + "risk": "Risk level based on metric thresholds (low / medium / high)", + # Coupling / cohesion + "cbo": "Coupling Between Objects — number of classes this class depends on", + "coupled classes": "Resolved class dependencies used to compute CBO for this class", + "lcom4": "Lack of Cohesion of Methods — connected components in method/field graph", + "methods": "Number of methods defined in the class", + "fields": "Number of instance variables (attributes) in the class", + # Dead code + "line": "Source line number where the symbol starts", + "kind": "Symbol type: function, class, import, or variable", + "confidence": "Detection confidence (low / medium / high / critical)", + # Dependencies + "longest chain": "Longest transitive import chain between modules", + "length": "Number of modules in the dependency chain", + "cycle": "Circular import dependency between modules", + # Suggestions + "priority": "Computed priority score (higher = more urgent)", + "severity": "Issue severity: critical, warning, or info", + "category": ( + "Metric category: clone, complexity, coupling, cohesion, dead_code, dependency" + ), + "title": "Brief description of the suggested improvement", + "effort": "Estimated effort to fix: easy, moderate, or hard", + "steps": "Actionable steps to resolve the issue", + # Dependency stat cards + "modules": "Total number of Python modules analyzed", + "edges": "Total number of import relationships between modules", + "max depth": ( + "Longest internal transitive import chain; compare with avg and p95 depth" + ), + "cycles": "Number of circular import dependencies detected", + # Complexity stat cards + "high-risk functions": ( + "Functions with cyclomatic complexity above the high-risk threshold" + ), + "max cc": "Highest cyclomatic complexity value among all analyzed functions", + "avg cc": "Average cyclomatic complexity across all analyzed functions", + "deep nesting": ( + "Functions with nesting depth exceeding recommended threshold (> 4)" + ), + # Coupling stat cards + "high-coupling classes": "Classes with CBO above the high-risk threshold", + "max cbo": "Highest Coupling Between Objects value among all classes", + "avg cbo": "Average CBO across all analyzed classes", + "medium risk": "Items at medium risk level — worth reviewing but not critical", + # Cohesion stat cards + "low-cohesion classes": ( + "Classes with LCOM4 > 1, indicating multiple responsibilities" + ), + "max lcom4": "Highest Lack of Cohesion value among all classes", + "high risk": "Items at high risk level requiring attention", + # Overloaded module stat cards + "overloaded": ( + "Modules exceeding acceptable thresholds for size, complexity, or coupling" + ), + "critical": "Items with critical status requiring immediate attention", + "max score": "Highest overload score among all modules", + "avg loc": "Average lines of code per module", + # Dead code stat cards + "candidates": "Total dead code candidates detected by static analysis", + "high confidence": "Dead code items detected with high or critical confidence", + "suppressed": "Dead code candidates excluded by suppression rules", + "hit rate": "Percentage of high-confidence items among all candidates", + # Clone stat cards + "clone groups": "Distinct duplication patterns, each containing 2+ code fragments", + "instances": "Total duplicated code fragments across all groups", + "new groups": "Clone groups not present in the previous baseline", + "high spread": "Clone groups spanning multiple files", + # Suggestion stat cards + "total suggestions": "Total actionable improvement suggestions generated", + "warning": "Suggestions with warning severity worth reviewing", + "easy wins": "Actionable suggestions with low estimated effort", +} diff --git a/codeclone/report/messages/markdown.py b/codeclone/report/messages/markdown.py new file mode 100644 index 00000000..b60a02d7 --- /dev/null +++ b/codeclone/report/messages/markdown.py @@ -0,0 +1,111 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Markdown report projection copy.""" + +from __future__ import annotations + +from typing import Final + +from ...domain.findings import ( + FAMILY_CLONE, + FAMILY_DEAD_CODE, + FAMILY_STRUCTURAL, +) +from .projections import MD_TITLE, PROJECTION_NONE + +MD_SCHEMA_LABEL: Final = "Markdown schema" +MD_SOURCE_SCHEMA_LABEL: Final = "Source report schema" +MD_PROJECT_LABEL: Final = "Project" +MD_ANALYSIS_MODE_LABEL: Final = "Analysis mode" +MD_REPORT_MODE_LABEL: Final = "Report mode" +MD_GENERATED_BY_LABEL: Final = "Generated by" +MD_PYTHON_LABEL: Final = "Python" +MD_REPORT_GENERATED_LABEL: Final = "Report generated (UTC)" + +MD_LABEL_HEALTH: Final = "Health" +MD_LABEL_TOTAL_FINDINGS: Final = "Total findings" +MD_LABEL_FAMILIES: Final = "Families" +MD_LABEL_STRONGEST_DIMENSION: Final = "Strongest dimension" +MD_LABEL_WEAKEST_DIMENSION: Final = "Weakest dimension" +MD_LABEL_FILES: Final = "Files" +MD_LABEL_CODE: Final = "Code" +MD_LABEL_TOTAL: Final = "Total" +MD_LABEL_BY_FAMILY: Final = "By family" +MD_LABEL_BY_SEVERITY: Final = "By severity" +MD_LABEL_BY_IMPACT_SCOPE: Final = "By impact scope" +MD_LABEL_SOURCE_SCOPE: Final = "Source scope breakdown" + +MD_FINDING_ID: Final = "Finding ID" +MD_FAMILY: Final = "Family" +MD_CATEGORY: Final = "Category" +MD_KIND: Final = "Kind" +MD_SEVERITY: Final = "Severity" +MD_CONFIDENCE: Final = "Confidence" +MD_PRIORITY: Final = "Priority" +MD_SCOPE: Final = "Scope" +MD_SPREAD: Final = "Spread" +MD_OCCURRENCES: Final = "Occurrences" +MD_FACTS: Final = "Facts" +MD_PRESENTATION_FACTS: Final = "Presentation facts" +MD_LOCATIONS: Final = "Locations" +MD_MORE_OCCURRENCES: Final = "... and {count} more occurrence(s)" + +MD_NONE: Final = "_None._" +MD_SUPPRESSED_CLONE_GROUP: Final = "Suppressed clone group" + +MD_HEADING_CLONE: Final = "{category} clone group{suffix}" +MD_HEADING_STRUCTURAL: Final = "Structural finding: {category}" +MD_HEADING_DEAD_CODE: Final = "Dead code: {category}" +MD_HEADING_DESIGN: Final = "Design finding: {category}" + +MD_ANCHORS: Final[tuple[tuple[str, str, int], ...]] = ( + ("overview", "Overview", 2), + ("inventory", "Inventory", 2), + ("findings-summary", "Findings Summary", 2), + ("top-risks", "Top Risks", 2), + ("suggestions", "Suggestions", 2), + ("findings", "Findings", 2), + ("clone-findings", "Clone Findings", 3), + ("structural-findings", "Structural Findings", 3), + ("dead-code-findings", "Dead Code Findings", 3), + ("design-findings", "Design Findings", 3), + ("metrics", "Metrics", 2), + ("health", "Health", 3), + ("complexity", "Complexity", 3), + ("coupling", "Coupling", 3), + ("cohesion", "Cohesion", 3), + ("coverage-join", "Coverage Join", 3), + ("overloaded-modules", "Overloaded Modules", 3), + ("dependencies", "Dependencies", 3), + ("dead-code-metrics", "Dead Code", 3), + ("dead-code-suppressed", "Suppressed Dead Code", 3), + ("integrity", "Integrity", 2), +) + + +def finding_heading(*, family: str, category: str, clone_type: str = "") -> str: + category_text = category.title() if category else category + if family == FAMILY_CLONE: + suffix = f" ({clone_type})" if clone_type else "" + return MD_HEADING_CLONE.format(category=category_text, suffix=suffix) + if family == FAMILY_STRUCTURAL: + return MD_HEADING_STRUCTURAL.format(category=category) + if family == FAMILY_DEAD_CODE: + return MD_HEADING_DEAD_CODE.format(category=category) + return MD_HEADING_DESIGN.format(category=category) + + +def projection_none() -> str: + return PROJECTION_NONE + + +__all__ = [ + "MD_ANCHORS", + "MD_TITLE", + "finding_heading", + "projection_none", +] diff --git a/codeclone/report/messages/overview.py b/codeclone/report/messages/overview.py new file mode 100644 index 00000000..49827072 --- /dev/null +++ b/codeclone/report/messages/overview.py @@ -0,0 +1,143 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Overview tab labels and directory hotspot copy.""" + +from __future__ import annotations + +from typing import Final + +DIRECTORY_BUCKET_LABELS: Final[dict[str, str]] = { + "all": "All Findings", + "clones": "Clone Groups", + "structural": "Structural Findings", + "complexity": "High Complexity", + "cohesion": "Low Cohesion", + "coupling": "High Coupling", + "dead_code": "Dead Code", + "dependency": "Dependency Cycles", +} + +DIRECTORY_BUCKET_ORDER: Final[tuple[str, ...]] = ( + "all", + "clones", + "structural", + "complexity", + "cohesion", + "coupling", + "dead_code", + "dependency", +) + +DIRECTORY_KIND_LABELS: Final[dict[str, str]] = { + "clones": "clones", + "structural": "structural", + "complexity": "complexity", + "cohesion": "cohesion", + "coupling": "coupling", + "dead_code": "dead code", + "coverage": "coverage", + "dependency": "dependency", +} + +RADAR_DIMENSIONS: Final[tuple[str, ...]] = ( + "clones", + "complexity", + "coupling", + "cohesion", + "dead_code", + "dependencies", + "coverage", +) + +RADAR_LABELS: Final[dict[str, str]] = { + "clones": "Clones", + "complexity": "Complexity", + "coupling": "Coupling", + "cohesion": "Cohesion", + "dead_code": "Dead Code", + "dependencies": "Deps", + "coverage": "Coverage", +} + +KPI_HEALTH: Final = "Health" +KPI_HEALTH_NA: Final = "n/a" +ISSUE_BREAKDOWN_EMPTY: Final = "No issues detected" + +ISSUE_BREAKDOWN_ROW_LABELS: Final[dict[str, str]] = { + "clones": "Clone Groups", + "structural": "Structural", + "complexity": "Complexity", + "cohesion": "Cohesion", + "coupling": "Coupling", + "dead_code": "Dead Code", + "dep_cycles": "Dep. Cycles", +} + +KPI_CLONE_GROUPS: Final = "Clone Groups" +KPI_HIGH_COMPLEXITY: Final = "High Complexity" +KPI_HIGH_COUPLING: Final = "High Coupling" +KPI_LOW_COHESION: Final = "Low Cohesion" +KPI_DEP_CYCLES: Final = "Dep. Cycles" +KPI_DEAD_CODE: Final = "Dead Code" +KPI_FINDINGS: Final = "Findings" +KPI_SUGGESTIONS: Final = "Suggestions" + +KPI_TIP_CLONE_GROUPS: Final = "Detected code clone groups by detection level" +KPI_TIP_HIGH_COMPLEXITY: Final = "Functions with cyclomatic complexity above threshold" +KPI_TIP_HIGH_COUPLING: Final = "Classes with high coupling between objects (CBO)" +KPI_TIP_LOW_COHESION: Final = "Classes with low internal cohesion (high LCOM4)" +KPI_TIP_DEP_CYCLES: Final = "Circular dependencies between project modules" +KPI_TIP_DEAD_CODE: Final = "Potentially unused functions, classes, or imports" +KPI_TIP_FINDINGS: Final = "Active structural findings reported in production code" +KPI_TIP_SUGGESTIONS: Final = ( + "Actionable recommendations derived from clones, findings, and metrics" +) + +CLUSTER_EXECUTIVE_SUMMARY: Final = "Executive Summary" +CLUSTER_ISSUE_BREAKDOWN: Final = "Issue breakdown" +CLUSTER_SOURCE_BREAKDOWN: Final = "Source breakdown" +CLUSTER_HOTSPOTS_BY_DIRECTORY: Final = "Hotspots by Directory" +CLUSTER_HOTSPOTS_BY_DIRECTORY_DESC: Final = ( + "Directories with the highest concentration of findings by category." +) +CLUSTER_OVERLOADED_MODULES: Final = "Overloaded Modules" +CLUSTER_OVERLOADED_TOP_CANDIDATES: Final = "Top candidates" +CLUSTER_OVERLOADED_MORE_CANDIDATES: Final = "More candidates" +CLUSTER_ANALYTICS: Final = "Analytics" +CLUSTER_HEALTH_PROFILE: Final = "Health Profile" +CLUSTER_HEALTH_PROFILE_DESC: Final = "Dimension scores across all quality axes." +CLUSTER_HEALTH_PROFILE_LABEL: Final = "Health profile" +CLUSTER_RADAR_CAPTION: Final = "Higher values indicate better code health." +CLUSTER_RADAR_CAPTION_SUFFIX: Final = " Red labels highlight dimensions below 60." + +EXECUTIVE_SCAN_SCOPE_DEFAULT: Final = ( + "Project-wide context derived from the full scanned root." +) +EXECUTIVE_HEALTH_SNAPSHOT_QUESTION: Final = "Current health snapshot" +EXECUTIVE_THRESHOLDS_PREFIX: Final = "Thresholds: " + +ADOPTION_API_DISABLED: Final = "Disabled in this run." + +ADOPTION_CLUSTER_TITLE: Final = "Adoption & API" +ADOPTION_CLUSTER_DESC: Final = ( + "Type/docstring adoption and public API surface are shown as facts, " + "not style pressure." +) +ADOPTION_COVERAGE_LABEL: Final = "Adoption coverage" +ADOPTION_API_SURFACE_LABEL: Final = "Public API surface" +ADOPTION_PARAM_ANNOTATIONS: Final = "Param annotations" +ADOPTION_RETURN_ANNOTATIONS: Final = "Return annotations" +ADOPTION_DOCSTRINGS: Final = "Docstrings" +ADOPTION_TYPED_AS_ANY: Final = "Typed as Any" +ADOPTION_ENABLE_VIA: Final = "Enable via" +ADOPTION_ENABLE_VIA_FLAG: Final = "--api-surface" +ADOPTION_PUBLIC_SYMBOLS: Final = "Public symbols" +ADOPTION_MODULES: Final = "Modules" +ADOPTION_BREAKING_CHANGES: Final = "Breaking changes" +ADOPTION_ADDED_SYMBOLS: Final = "Added symbols" +ADOPTION_STRICT_MODE: Final = "Strict mode" +ADOPTION_STRICT_MODE_ENABLED: Final = "enabled" diff --git a/codeclone/report/messages/projections.py b/codeclone/report/messages/projections.py new file mode 100644 index 00000000..fcc01fea --- /dev/null +++ b/codeclone/report/messages/projections.py @@ -0,0 +1,96 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Text and Markdown report projection headings.""" + +from __future__ import annotations + +from typing import Final + +PROJECTION_NONE: Final = "(none)" +MD_TITLE: Final = "# CodeClone Report" + +TEXT_SECTION_REPORT_METADATA: Final = "REPORT METADATA" +TEXT_SECTION_INVENTORY: Final = "INVENTORY" +TEXT_SECTION_FINDINGS_SUMMARY: Final = "FINDINGS SUMMARY" +TEXT_SECTION_METRICS_SUMMARY: Final = "METRICS SUMMARY" +TEXT_SECTION_DERIVED_OVERVIEW: Final = "DERIVED OVERVIEW" +TEXT_SECTION_INTEGRITY: Final = "INTEGRITY" +TEXT_SECTION_SUGGESTIONS: Final = "SUGGESTIONS" +TEXT_SECTION_FUNCTION_CLONES: Final = "FUNCTION CLONES" +TEXT_SECTION_BLOCK_CLONES: Final = "BLOCK CLONES" +TEXT_SECTION_SEGMENT_CLONES: Final = "SEGMENT CLONES" +TEXT_SECTION_SUPPRESSED_FUNCTION_CLONES: Final = "SUPPRESSED FUNCTION CLONES" +TEXT_SECTION_SUPPRESSED_BLOCK_CLONES: Final = "SUPPRESSED BLOCK CLONES" +TEXT_SECTION_SUPPRESSED_SEGMENT_CLONES: Final = "SUPPRESSED SEGMENT CLONES" +TEXT_SECTION_STRUCTURAL_FINDINGS: Final = "STRUCTURAL FINDINGS" +TEXT_SECTION_DEAD_CODE_FINDINGS: Final = "DEAD CODE FINDINGS" +TEXT_SECTION_DESIGN_FINDINGS: Final = "DESIGN FINDINGS" +TEXT_SECTION_SUPPRESSED_DEAD_CODE: Final = "SUPPRESSED DEAD CODE" +TEXT_SECTION_COVERAGE_JOIN: Final = "COVERAGE JOIN (top 10)" +TEXT_SECTION_OVERLOADED_MODULES: Final = "OVERLOADED MODULES (top 10)" +TEXT_SECTION_SECURITY_SURFACES: Final = "SECURITY SURFACES (top 10)" +TEXT_BASELINE_UNTRUSTED_NOTE: Final = ( + "Note: baseline is untrusted; all groups are treated as NEW." +) + +TEXT_OVERVIEW_FAMILIES: Final = "Families:" +TEXT_OVERVIEW_SOURCE_SCOPE: Final = "Source scope breakdown:" +TEXT_OVERVIEW_HEALTH_SNAPSHOT: Final = "Health snapshot:" +TEXT_OVERVIEW_HOTLISTS: Final = "Hotlists:" +TEXT_OVERVIEW_TOP_RISKS: Final = "Top risks:" +TEXT_OVERVIEW_TOP_RISKS_NONE: Final = "Top risks: (none)" + +TEXT_META_REPORT_SCHEMA_VERSION: Final = "Report schema version: " +TEXT_META_CODECLONE_VERSION: Final = "CodeClone version: " +TEXT_META_PROJECT_NAME: Final = "Project name: " +TEXT_META_SCAN_ROOT: Final = "Scan root: " +TEXT_META_PYTHON_VERSION: Final = "Python version: " +TEXT_META_PYTHON_TAG: Final = "Python tag: " +TEXT_META_ANALYSIS_MODE: Final = "Analysis mode: " +TEXT_META_REPORT_MODE: Final = "Report mode: " +TEXT_META_REPORT_GENERATED: Final = "Report generated (UTC): " +TEXT_META_COMPUTED_METRIC_FAMILIES: Final = "Computed metric families: " +TEXT_META_BASELINE_PATH: Final = "Baseline path: " +TEXT_META_BASELINE_FINGERPRINT_VERSION: Final = "Baseline fingerprint version: " +TEXT_META_BASELINE_SCHEMA_VERSION: Final = "Baseline schema version: " +TEXT_META_BASELINE_PYTHON_TAG: Final = "Baseline Python tag: " +TEXT_META_BASELINE_GENERATOR_NAME: Final = "Baseline generator name: " +TEXT_META_BASELINE_GENERATOR_VERSION: Final = "Baseline generator version: " +TEXT_META_BASELINE_PAYLOAD_SHA256: Final = "Baseline payload sha256: " +TEXT_META_BASELINE_PAYLOAD_VERIFIED: Final = "Baseline payload verified: " +TEXT_META_BASELINE_LOADED: Final = "Baseline loaded: " +TEXT_META_BASELINE_STATUS: Final = "Baseline status: " +TEXT_META_CACHE_PATH: Final = "Cache path: " +TEXT_META_CACHE_SCHEMA_VERSION: Final = "Cache schema version: " +TEXT_META_CACHE_STATUS: Final = "Cache status: " +TEXT_META_CACHE_USED: Final = "Cache used: " +TEXT_META_METRICS_BASELINE_PATH: Final = "Metrics baseline path: " +TEXT_META_METRICS_BASELINE_LOADED: Final = "Metrics baseline loaded: " +TEXT_META_METRICS_BASELINE_STATUS: Final = "Metrics baseline status: " +TEXT_META_METRICS_BASELINE_SCHEMA_VERSION: Final = "Metrics baseline schema version: " +TEXT_META_METRICS_BASELINE_PAYLOAD_SHA256: Final = "Metrics baseline payload sha256: " +TEXT_META_METRICS_BASELINE_PAYLOAD_VERIFIED: Final = ( + "Metrics baseline payload verified: " +) + +TEXT_INVENTORY_FILES: Final = "Files: " +TEXT_INVENTORY_CODE: Final = "Code: " +TEXT_INVENTORY_FILE_REGISTRY: Final = "File registry: " + +TEXT_FINDINGS_TOTAL_GROUPS: Final = "Total groups: " +TEXT_FINDINGS_FAMILIES: Final = "Families: " +TEXT_FINDINGS_SEVERITY: Final = "Severity: " +TEXT_FINDINGS_IMPACT_SCOPE: Final = "Impact scope: " +TEXT_FINDINGS_CLONES: Final = "Clones: " +TEXT_FINDINGS_SUPPRESSED: Final = "Suppressed: " + +TEXT_INTEGRITY_CANONICALIZATION: Final = "Canonicalization: " +TEXT_INTEGRITY_DIGEST: Final = "Digest: " + +TEXT_STRUCTURAL_FINDINGS_HEADER: Final = "STRUCTURAL FINDINGS (groups={count})" +TEXT_SUPPRESSED_DEAD_CODE_HEADER: Final = "SUPPRESSED DEAD CODE (items={count})" +TEXT_SUGGESTIONS_HEADER: Final = "SUGGESTIONS (count={count})" diff --git a/codeclone/report/messages/sarif.py b/codeclone/report/messages/sarif.py new file mode 100644 index 00000000..b3584636 --- /dev/null +++ b/codeclone/report/messages/sarif.py @@ -0,0 +1,124 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""SARIF rule descriptions and remediation copy.""" + +from __future__ import annotations + +from typing import Final + +SARIF_HELP_DOCS_SUFFIX: Final = "See [CodeClone docs]({docs_url})." + +REMEDIATION_CLONE: Final = ( + "Review the representative occurrence and related occurrences, " + "then extract shared behavior or keep accepted debt in the baseline." +) +REMEDIATION_DUPLICATED_BRANCHES: Final = ( + "Collapse repeated branch shapes into a shared helper, validator, " + "or control-flow abstraction where the behavior is intentionally shared." +) +REMEDIATION_GUARD_DIVERGENCE: Final = ( + "Review the clone cohort and reconcile guard or early-exit behavior " + "if those members are expected to stay aligned." +) +REMEDIATION_COHORT_DRIFT: Final = ( + "Review the clone cohort and reconcile terminal, guard, or try/finally " + "profiles if the drift is not intentional." +) +REMEDIATION_DEAD_CODE: Final = ( + "Remove the unused symbol or keep it explicitly documented/suppressed " + "when runtime dynamics call it intentionally." +) +REMEDIATION_LOW_COHESION: Final = ( + "Split the class or regroup behavior so responsibilities become cohesive." +) +REMEDIATION_COMPLEXITY: Final = ( + "Split the function or simplify control flow to reduce complexity." +) +REMEDIATION_COUPLING: Final = ( + "Reduce dependencies or split responsibilities to lower coupling." +) +REMEDIATION_DEPENDENCY_CYCLE: Final = ( + "Break the cycle or invert dependencies so modules no longer depend " + "on each other circularly." +) + +RULE_FUNCTION_CLONE_SHORT: Final = "Function clone group" +RULE_FUNCTION_CLONE_FULL: Final = ( + "Multiple functions share the same normalized function body." +) +RULE_BLOCK_CLONE_SHORT: Final = "Block clone group" +RULE_BLOCK_CLONE_FULL: Final = ( + "Repeated normalized statement blocks were detected across occurrences." +) +RULE_SEGMENT_CLONE_SHORT: Final = "Segment clone group" +RULE_SEGMENT_CLONE_FULL: Final = ( + "Repeated normalized statement segments were detected across occurrences." +) + +RULE_DUPLICATED_BRANCHES_SHORT: Final = "Duplicated branches" +RULE_DUPLICATED_BRANCHES_FULL: Final = ( + "Repeated branch families with matching structural signatures were detected." +) +RULE_GUARD_DIVERGENCE_SHORT: Final = "Clone guard/exit divergence" +RULE_GUARD_DIVERGENCE_FULL: Final = ( + "Members of the same function-clone cohort diverged in " + "entry guards or early-exit behavior." +) +RULE_COHORT_DRIFT_SHORT: Final = "Clone cohort drift" +RULE_COHORT_DRIFT_FULL: Final = ( + "Members of the same function-clone cohort drifted from " + "the majority terminal/guard/try profile." +) + +RULE_UNUSED_FUNCTION_SHORT: Final = "Unused function" +RULE_UNUSED_FUNCTION_FULL: Final = "Function appears to be unused with high confidence." +RULE_UNUSED_CLASS_SHORT: Final = "Unused class" +RULE_UNUSED_CLASS_FULL: Final = "Class appears to be unused with high confidence." +RULE_UNUSED_METHOD_SHORT: Final = "Unused method" +RULE_UNUSED_METHOD_FULL: Final = "Method appears to be unused with high confidence." +RULE_UNUSED_SYMBOL_SHORT: Final = "Unused symbol" +RULE_UNUSED_SYMBOL_FULL: Final = "Symbol appears to be unused with reported confidence." + +RULE_LOW_COHESION_SHORT: Final = "Low cohesion class" +RULE_LOW_COHESION_FULL: Final = ( + "Class cohesion is low according to LCOM4 hotspot thresholds." +) +RULE_COMPLEXITY_SHORT: Final = "Complexity hotspot" +RULE_COMPLEXITY_FULL: Final = ( + "Function exceeds the project complexity hotspot threshold." +) +RULE_COUPLING_SHORT: Final = "Coupling hotspot" +RULE_COUPLING_FULL: Final = "Class exceeds the project coupling hotspot threshold." +RULE_COVERAGE_SCOPE_GAP_SHORT: Final = "Coverage scope gap" +RULE_COVERAGE_SCOPE_GAP_FULL: Final = ( + "A medium/high-risk function is outside the supplied joined coverage scope." +) +RULE_COVERAGE_HOTSPOT_SHORT: Final = "Coverage hotspot" +RULE_COVERAGE_HOTSPOT_FULL: Final = ( + "A medium/high-risk function falls below the configured joined coverage threshold." +) +RULE_DEPENDENCY_CYCLE_SHORT: Final = "Dependency cycle" +RULE_DEPENDENCY_CYCLE_FULL: Final = ( + "A dependency cycle was detected between project modules." +) + +REMEDIATION_BY_RULE_ID: Final[dict[str, str]] = { + "CCLONE001": REMEDIATION_CLONE, + "CCLONE002": REMEDIATION_CLONE, + "CCLONE003": REMEDIATION_CLONE, + "CSTRUCT001": REMEDIATION_DUPLICATED_BRANCHES, + "CSTRUCT002": REMEDIATION_GUARD_DIVERGENCE, + "CSTRUCT003": REMEDIATION_COHORT_DRIFT, + "CDEAD001": REMEDIATION_DEAD_CODE, + "CDEAD002": REMEDIATION_DEAD_CODE, + "CDEAD003": REMEDIATION_DEAD_CODE, + "CDEAD004": REMEDIATION_DEAD_CODE, + "CDESIGN001": REMEDIATION_LOW_COHESION, + "CDESIGN002": REMEDIATION_COMPLEXITY, + "CDESIGN003": REMEDIATION_COUPLING, + "CDESIGN004": REMEDIATION_DEPENDENCY_CYCLE, +} diff --git a/codeclone/report/messages/security.py b/codeclone/report/messages/security.py new file mode 100644 index 00000000..bd2cc401 --- /dev/null +++ b/codeclone/report/messages/security.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Security surfaces inventory copy.""" + +from __future__ import annotations + +from typing import Final + +SECURITY_EMPTY_TITLE: Final = ( + "No security-relevant capability surfaces matched the exact registry." +) +SECURITY_EMPTY_DETAIL: Final = ( + "This inventory is report-only and focuses on exact boundary " + "capabilities rather than vulnerability claims." +) +SECURITY_STAT_SURFACES: Final = "Surfaces" +SECURITY_STAT_CATEGORIES: Final = "Categories" +SECURITY_STAT_PRODUCTION: Final = "Production" +SECURITY_STAT_EXACT_ITEMS: Final = "Exact items" +SECURITY_TABLE_TITLE: Final = "Security-relevant capability inventory" +SECURITY_TABLE_HEADERS: Final[tuple[str, ...]] = ( + "Category", + "Capability", + "Evidence", + "Source", + "Location", + "Review", +) +SECURITY_TABLE_EMPTY: Final = "No exact security surfaces are available." +SECURITY_TABLE_EMPTY_DESC: Final = ( + "CodeClone inventories trust-boundary capabilities but does not " + "claim vulnerabilities or exploitability." +) +UNKNOWN_LABEL: Final = "(unknown)" + +SECURITY_REVIEW_BANNER_QUESTION: Final = "How should I review this inventory?" +SECURITY_REVIEW_HOW_TO_READ: Final = "How to read" +SECURITY_REVIEW_ORDER: Final = "Review order" +SECURITY_REVIEW_SIGNAL: Final = "Signal" +SECURITY_REVIEW_SIGNAL_VALUE: Final = "boundary inventory" +SECURITY_REVIEW_EVIDENCE: Final = "Evidence" +SECURITY_REVIEW_EVIDENCE_VALUE: Final = "exact imports/calls/builtins" +SECURITY_REVIEW_MEANING: Final = "Meaning" +SECURITY_REVIEW_MEANING_VALUE: Final = "inventory, not vulnerability proof" +SECURITY_REVIEW_START_WITH: Final = "Start with" +SECURITY_REVIEW_COVERAGE_JOIN: Final = "Coverage join" +SECURITY_REVIEW_THEN_REVIEW: Final = "Then review" +SECURITY_REVIEW_PRODUCTION_MODULE_ROWS: Final = "production module rows only" +SECURITY_REVIEW_NO_INVENTORY_ROWS: Final = "no inventory-only rows" +SECURITY_REVIEW_NO_OVERLAP: Final = "no overlap in current review set" +SECURITY_REVIEW_COVERAGE_UNAVAILABLE: Final = "unavailable for this run" diff --git a/codeclone/report/messages/suggestions.py b/codeclone/report/messages/suggestions.py new file mode 100644 index 00000000..47a955a6 --- /dev/null +++ b/codeclone/report/messages/suggestions.py @@ -0,0 +1,148 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Suggestion titles, fact kinds, summaries, and action steps.""" + +from __future__ import annotations + +from typing import Final + +from ...domain.findings import ( + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, +) + +CLONE_FACT_KIND_FUNCTION: Final = "Function clone group" +CLONE_FACT_KIND_BLOCK: Final = "Block clone group" +CLONE_FACT_KIND_SEGMENT: Final = "Segment clone group" + +CLONE_FACT_KIND_BY_KIND: Final[dict[str, str]] = { + CLONE_KIND_FUNCTION: CLONE_FACT_KIND_FUNCTION, + CLONE_KIND_BLOCK: CLONE_FACT_KIND_BLOCK, + CLONE_KIND_SEGMENT: CLONE_FACT_KIND_SEGMENT, +} + +CLONE_SUMMARY_FUNCTION_TYPE1: Final = "same exact function body" +CLONE_SUMMARY_FUNCTION_TYPE2: Final = "same parameterized function body" +CLONE_SUMMARY_FUNCTION_TYPE3: Final = ( + "same structural function body with small identifier changes" +) +CLONE_SUMMARY_FUNCTION_TYPE4: Final = "same structural function body" +CLONE_SUMMARY_BLOCK_ASSERT_ONLY: Final = "same assertion template" +CLONE_SUMMARY_BLOCK_REPEATED_STMT: Final = "same repeated setup/assert pattern" +CLONE_SUMMARY_BLOCK_DEFAULT: Final = "same structural sequence with small value changes" +CLONE_SUMMARY_SEGMENT: Final = "same structural segment sequence" + +CLONE_STEP_TYPE1_1: Final = ( + "Keep one canonical implementation and remove the exact duplicates." +) +CLONE_STEP_TYPE1_2: Final = ( + "Route the remaining call sites to the shared implementation." +) +CLONE_STEP_TYPE2_1: Final = "Extract a shared implementation with explicit parameters." +CLONE_STEP_TYPE2_2: Final = "Replace identifier-only variations with arguments." +CLONE_STEP_BLOCK_ASSERT_1: Final = ( + "Collapse the repeated assertion template into a helper or loop." +) +CLONE_STEP_BLOCK_ASSERT_2: Final = ( + "Keep the asserted values as data instead of copy-pasted statements." +) +CLONE_STEP_BLOCK_1: Final = "Extract the repeated statement sequence into a helper." +CLONE_STEP_BLOCK_2: Final = ( + "Keep setup data close to the call site and move shared logic out." +) +CLONE_STEP_SEGMENT_1: Final = ( + "Review whether the repeated segment should become shared utility code." +) +CLONE_STEP_SEGMENT_2: Final = ( + "Keep this as a report hint only if the duplication is intentional." +) +CLONE_STEP_DEFAULT_1: Final = "Extract the repeated logic into a shared abstraction." +CLONE_STEP_DEFAULT_2: Final = ( + "Replace the duplicated bodies with calls to the shared code." +) + +SUGGESTION_TITLE_REDUCE_COMPLEXITY: Final = "Reduce function complexity" +SUGGESTION_TITLE_REDUCE_COUPLING: Final = "Reduce class coupling" +SUGGESTION_TITLE_SPLIT_COHESION: Final = "Split low-cohesion class" +SUGGESTION_TITLE_DEAD_CODE: Final = "Remove or explicitly keep unused code" +SUGGESTION_TITLE_BREAK_CYCLE: Final = "Break circular dependency" + +COMPLEXITY_STEP_1: Final = "Split the function into smaller deterministic stages." +COMPLEXITY_STEP_2: Final = "Extract helper functions for nested branches." +COUPLING_STEP_1: Final = "Reduce external dependencies of this class." +COUPLING_STEP_2: Final = "Move unrelated responsibilities to collaborator classes." +COHESION_STEP_1: Final = "Split class by responsibility boundaries." +COHESION_STEP_2: Final = "Group methods by shared state and extract subcomponents." +DEAD_CODE_STEP_1: Final = "Remove or deprecate the unused symbol." +DEAD_CODE_STEP_2: Final = ( + "If intentionally reserved, add explicit keep marker and test." +) +DEPENDENCY_STEP_1: Final = "Break the cycle by extracting a shared abstraction." +DEPENDENCY_STEP_2: Final = ( + "Invert one dependency edge through an interface or protocol." +) + +FACT_KIND_COMPLEXITY_HOTSPOT: Final = "Function complexity hotspot" +FACT_KIND_COUPLING_HOTSPOT: Final = "Class coupling hotspot" +FACT_KIND_LOW_COHESION: Final = "Low cohesion class" +FACT_KIND_DEAD_CODE: Final = "Dead code item" +FACT_KIND_DEPENDENCY_CYCLE: Final = "Dependency cycle" +FACT_KIND_STRUCTURAL: Final = "Structural finding" + +STRUCTURAL_TITLE_GUARD_EXIT_DIVERGENCE: Final = "Clone guard/exit divergence" +STRUCTURAL_SUMMARY_GUARD_EXIT_DIVERGENCE: Final = ( + "clone cohort members differ in entry guards or early-exit behavior" +) +STRUCTURAL_TITLE_COHORT_DRIFT: Final = "Clone cohort drift" +STRUCTURAL_SUMMARY_COHORT_DRIFT: Final = ( + "clone cohort members drift from majority terminal/guard/try profile" +) +STRUCTURAL_TITLE_REPEATED_BRANCH: Final = "Repeated branch family" +STRUCTURAL_SUMMARY_RAISE_BRANCH: Final = "same repeated guard/validation branch" +STRUCTURAL_SUMMARY_RETURN_BRANCH: Final = "same repeated return branch" +STRUCTURAL_SUMMARY_LOOP_BRANCH: Final = "same repeated loop branch" +STRUCTURAL_SUMMARY_BRANCH_DEFAULT: Final = "same repeated branch shape" + +STRUCTURAL_STEP_GUARD_EXIT_1: Final = ( + "Compare divergent clone members against the majority guard/exit profile." +) +STRUCTURAL_STEP_GUARD_EXIT_2: Final = ( + "If divergence is accidental, align guard exits across the cohort." +) +STRUCTURAL_STEP_COHORT_DRIFT_1: Final = ( + "Review whether cohort drift is intentional for this clone family." +) +STRUCTURAL_STEP_COHORT_DRIFT_2: Final = ( + "If not intentional, reconcile terminal/guard/try profiles across members." +) +STRUCTURAL_STEP_CONTINUE_1: Final = ( + "Review whether the repeated continue guard can be merged into one predicate." +) +STRUCTURAL_STEP_CONTINUE_2: Final = ( + "If separate continue checks keep the local control flow clearer, " + "keep this as a report-only hint." +) +STRUCTURAL_STEP_RAISE_1: Final = ( + "Factor the repeated validation/guard path into a shared helper." +) +STRUCTURAL_STEP_RAISE_2: Final = ( + "Keep the branch-specific inputs at the call site and share the exit policy." +) +STRUCTURAL_STEP_RETURN_1: Final = ( + "Consolidate the repeated return-path logic into a shared helper." +) +STRUCTURAL_STEP_RETURN_2: Final = ( + "Keep the branch predicate local and share the emitted behavior." +) +STRUCTURAL_STEP_DEFAULT_1: Final = ( + "Review whether the repeated local branch can be simplified in place." +) +STRUCTURAL_STEP_DEFAULT_2: Final = ( + "If the local duplication keeps control flow clearer, keep " + "this as a report-only hint." +) diff --git a/codeclone/report/renderers/markdown.py b/codeclone/report/renderers/markdown.py index cf64a699..9db70603 100644 --- a/codeclone/report/renderers/markdown.py +++ b/codeclone/report/renderers/markdown.py @@ -9,9 +9,10 @@ from collections.abc import Collection, Mapping, Sequence from typing import TYPE_CHECKING -from ...domain.findings import FAMILY_CLONE, FAMILY_DEAD_CODE, FAMILY_STRUCTURAL from ...utils.coerce import as_float, as_int, as_mapping, as_sequence from .._formatting import format_spread_text +from ..messages import markdown as md_msgs +from ..messages.projections import PROJECTION_NONE if TYPE_CHECKING: from ...models import StructuralFindingGroup, Suggestion, SuppressedCloneGroup @@ -26,43 +27,21 @@ _as_mapping = as_mapping _as_sequence = as_sequence -_ANCHORS: tuple[tuple[str, str, int], ...] = ( - ("overview", "Overview", 2), - ("inventory", "Inventory", 2), - ("findings-summary", "Findings Summary", 2), - ("top-risks", "Top Risks", 2), - ("suggestions", "Suggestions", 2), - ("findings", "Findings", 2), - ("clone-findings", "Clone Findings", 3), - ("structural-findings", "Structural Findings", 3), - ("dead-code-findings", "Dead Code Findings", 3), - ("design-findings", "Design Findings", 3), - ("metrics", "Metrics", 2), - ("health", "Health", 3), - ("complexity", "Complexity", 3), - ("coupling", "Coupling", 3), - ("cohesion", "Cohesion", 3), - ("coverage-join", "Coverage Join", 3), - ("overloaded-modules", "Overloaded Modules", 3), - ("dependencies", "Dependencies", 3), - ("dead-code-metrics", "Dead Code", 3), - ("dead-code-suppressed", "Suppressed Dead Code", 3), - ("integrity", "Integrity", 2), -) _ANCHOR_MAP: dict[str, tuple[str, str, int]] = { - anchor[0]: anchor for anchor in _ANCHORS + anchor_id: (anchor_id, title, level) + for anchor_id, title, level in md_msgs.MD_ANCHORS } def _text(value: object) -> str: if value is None: - return "(none)" + return PROJECTION_NONE if isinstance(value, float): return f"{value:.2f}".rstrip("0").rstrip(".") or "0" if isinstance(value, bool): return "true" if value else "false" text = str(value).strip() - return text or "(none)" + return text or PROJECTION_NONE def _source_scope_text(scope: Mapping[str, object]) -> str: @@ -113,17 +92,11 @@ def _append_kv_bullets( def _finding_heading(group: Mapping[str, object]) -> str: - family = str(group.get("family", "")).strip() - category = str(group.get("category", "")).strip() - clone_type = str(group.get("clone_type", "")).strip() - if family == FAMILY_CLONE: - suffix = f" ({clone_type})" if clone_type else "" - return f"{category.title()} clone group{suffix}" - if family == FAMILY_STRUCTURAL: - return f"Structural finding: {category}" - if family == FAMILY_DEAD_CODE: - return f"Dead code: {category}" - return f"Design finding: {category}" + return md_msgs.finding_heading( + family=str(group.get("family", "")).strip(), + category=str(group.get("category", "")).strip(), + clone_type=str(group.get("clone_type", "")).strip(), + ) def _append_facts_block( @@ -145,7 +118,7 @@ def _append_findings_section( ) -> None: finding_rows = [_as_mapping(group) for group in groups] if not finding_rows: - lines.append("_None._") + lines.append(md_msgs.MD_NONE) lines.append("") return for group in finding_rows: @@ -154,32 +127,40 @@ def _append_findings_section( _append_kv_bullets( lines, ( - ("Finding ID", f"`{_text(group.get('id'))}`"), - ("Family", group.get("family")), - ("Category", group.get("category")), - ("Kind", group.get("kind")), - ("Severity", group.get("severity")), - ("Confidence", group.get("confidence")), - ("Priority", _as_float(group.get("priority"))), - ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))), - ("Spread", _spread_text(_as_mapping(group.get("spread")))), - ("Occurrences", group.get("count")), + (md_msgs.MD_FINDING_ID, f"`{_text(group.get('id'))}`"), + (md_msgs.MD_FAMILY, group.get("family")), + (md_msgs.MD_CATEGORY, group.get("category")), + (md_msgs.MD_KIND, group.get("kind")), + (md_msgs.MD_SEVERITY, group.get("severity")), + (md_msgs.MD_CONFIDENCE, group.get("confidence")), + (md_msgs.MD_PRIORITY, _as_float(group.get("priority"))), + ( + md_msgs.MD_SCOPE, + _source_scope_text(_as_mapping(group.get("source_scope"))), + ), + (md_msgs.MD_SPREAD, _spread_text(_as_mapping(group.get("spread")))), + (md_msgs.MD_OCCURRENCES, group.get("count")), ), ) facts = _as_mapping(group.get("facts")) display_facts = _as_mapping(group.get("display_facts")) if facts or display_facts: - _append_facts_block(lines, title="Facts", facts=facts) - _append_facts_block(lines, title="Presentation facts", facts=display_facts) + _append_facts_block(lines, title=md_msgs.MD_FACTS, facts=facts) + _append_facts_block( + lines, + title=md_msgs.MD_PRESENTATION_FACTS, + facts=display_facts, + ) lines.append("") items = list(map(_as_mapping, _as_sequence(group.get("items")))) - lines.append("- Locations:") + lines.append(f"- {md_msgs.MD_LOCATIONS}:") visible_items = items[:_MAX_FINDING_LOCATIONS] lines.extend(f" - {_location_text(item)}" for item in visible_items) if len(items) > len(visible_items): - lines.append( - f" - ... and {len(items) - len(visible_items)} more occurrence(s)" + extra = md_msgs.MD_MORE_OCCURRENCES.format( + count=len(items) - len(visible_items) ) + lines.append(f" - {extra}") lines.append("") @@ -190,11 +171,11 @@ def _append_suppressed_clone_findings( ) -> None: finding_rows = [_as_mapping(group) for group in groups] if not finding_rows: - lines.append("_None._") + lines.append(md_msgs.MD_NONE) lines.append("") return for group in finding_rows: - lines.append("#### Suppressed clone group") + lines.append(f"#### {md_msgs.MD_SUPPRESSED_CLONE_GROUP}") lines.append("") _append_kv_bullets( lines, @@ -215,7 +196,7 @@ def _append_suppressed_clone_findings( for item in _as_sequence(group.get("matched_patterns")) if str(item).strip() ) - or "(none)", + or PROJECTION_NONE, ), ), ) @@ -284,16 +265,25 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) lines = [ - "# CodeClone Report", + md_msgs.MD_TITLE, "", - f"- Markdown schema: {MARKDOWN_SCHEMA_VERSION}", - f"- Source report schema: {_text(payload.get('report_schema_version'))}", - f"- Project: {_text(meta.get('project_name'))}", - f"- Analysis mode: {_text(meta.get('analysis_mode'))}", - f"- Report mode: {_text(meta.get('report_mode'))}", - f"- Generated by: codeclone {_text(meta.get('codeclone_version'))}", - f"- Python: {_text(meta.get('python_tag'))}", - f"- Report generated (UTC): {_text(runtime.get('report_generated_at_utc'))}", + f"- {md_msgs.MD_SCHEMA_LABEL}: {MARKDOWN_SCHEMA_VERSION}", + ( + f"- {md_msgs.MD_SOURCE_SCHEMA_LABEL}: " + f"{_text(payload.get('report_schema_version'))}" + ), + f"- {md_msgs.MD_PROJECT_LABEL}: {_text(meta.get('project_name'))}", + f"- {md_msgs.MD_ANALYSIS_MODE_LABEL}: {_text(meta.get('analysis_mode'))}", + f"- {md_msgs.MD_REPORT_MODE_LABEL}: {_text(meta.get('report_mode'))}", + ( + f"- {md_msgs.MD_GENERATED_BY_LABEL}: " + f"codeclone {_text(meta.get('codeclone_version'))}" + ), + f"- {md_msgs.MD_PYTHON_LABEL}: {_text(meta.get('python_tag'))}", + ( + f"- {md_msgs.MD_REPORT_GENERATED_LABEL}: " + f"{_text(runtime.get('report_generated_at_utc'))}" + ), "", ] @@ -301,24 +291,29 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: _append_kv_bullets( lines, ( - ("Project", meta.get("project_name")), ( - "Health", + md_msgs.MD_LABEL_HEALTH, ( f"{_text(health_snapshot.get('score'))} " f"({_text(health_snapshot.get('grade'))})" ), ), - ("Total findings", findings_summary.get("total")), + (md_msgs.MD_LABEL_TOTAL_FINDINGS, findings_summary.get("total")), ( - "Families", + md_msgs.MD_LABEL_FAMILIES, ", ".join( f"{name}={_text(family_summary.get(name))}" for name in ("clones", "structural", "dead_code", "design") ), ), - ("Strongest dimension", health_snapshot.get("strongest_dimension")), - ("Weakest dimension", health_snapshot.get("weakest_dimension")), + ( + md_msgs.MD_LABEL_STRONGEST_DIMENSION, + health_snapshot.get("strongest_dimension"), + ), + ( + md_msgs.MD_LABEL_WEAKEST_DIMENSION, + health_snapshot.get("weakest_dimension"), + ), ), ) @@ -327,7 +322,7 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: lines, ( ( - "Files", + md_msgs.MD_LABEL_FILES, ", ".join( f"{name}={_text(inventory_files.get(name))}" for name in ( @@ -340,7 +335,7 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: ), ), ( - "Code", + md_msgs.MD_LABEL_CODE, ", ".join( f"{name}={_text(inventory_code.get(name))}" for name in ( @@ -358,36 +353,36 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: _append_kv_bullets( lines, ( - ("Total", findings_summary.get("total")), + (md_msgs.MD_LABEL_TOTAL, findings_summary.get("total")), ( - "By family", + md_msgs.MD_LABEL_BY_FAMILY, ", ".join( f"{name}={_text(family_summary.get(name))}" for name in ("clones", "structural", "dead_code", "design") ), ), ( - "By severity", + md_msgs.MD_LABEL_BY_SEVERITY, ", ".join( f"{name}={_text(severity_summary.get(name))}" for name in ("critical", "warning", "info") ), ), ( - "By impact scope", + md_msgs.MD_LABEL_BY_IMPACT_SCOPE, ", ".join( f"{name}={_text(impact_summary.get(name))}" for name in ("runtime", "non_runtime", "mixed") ), ), ( - "Source scope breakdown", + md_msgs.MD_LABEL_SOURCE_SCOPE, ", ".join( f"{name}={_text(source_breakdown.get(name))}" for name in ("production", "tests", "fixtures", "other") if name in source_breakdown ) - or "(none)", + or PROJECTION_NONE, ), ), ) @@ -403,7 +398,7 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: f"count={_text(risk.get('count'))})" ) else: - lines.append("_None._") + lines.append(md_msgs.MD_NONE) lines.append("") if suggestions: diff --git a/codeclone/report/renderers/sarif.py b/codeclone/report/renderers/sarif.py index ba443c8a..db3b7f87 100644 --- a/codeclone/report/renderers/sarif.py +++ b/codeclone/report/renderers/sarif.py @@ -52,6 +52,7 @@ from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence +from ..messages import sarif as sarif_msgs if TYPE_CHECKING: from ...models import StructuralFindingGroup, Suggestion @@ -93,43 +94,9 @@ def _rule_name(spec: _RuleSpec) -> str: def _rule_remediation(spec: _RuleSpec) -> str: - rule_id = spec.rule_id - if rule_id.startswith("CCLONE"): - return ( - "Review the representative occurrence and related occurrences, " - "then extract shared behavior or keep accepted debt in the baseline." - ) - if rule_id == "CSTRUCT001": - return ( - "Collapse repeated branch shapes into a shared helper, validator, " - "or control-flow abstraction where the behavior is intentionally shared." - ) - if rule_id == "CSTRUCT002": - return ( - "Review the clone cohort and reconcile guard or early-exit behavior " - "if those members are expected to stay aligned." - ) - if rule_id == "CSTRUCT003": - return ( - "Review the clone cohort and reconcile terminal, guard, or try/finally " - "profiles if the drift is not intentional." - ) - if rule_id.startswith("CDEAD"): - return ( - "Remove the unused symbol or keep it explicitly documented/suppressed " - "when runtime dynamics call it intentionally." - ) - if rule_id == "CDESIGN001": - return ( - "Split the class or regroup behavior so responsibilities become cohesive." - ) - if rule_id == "CDESIGN002": - return "Split the function or simplify control flow to reduce complexity." - if rule_id == "CDESIGN003": - return "Reduce dependencies or split responsibilities to lower coupling." - return ( - "Break the cycle or invert dependencies so modules no longer depend " - "on each other circularly." + return sarif_msgs.REMEDIATION_BY_RULE_ID.get( + spec.rule_id, + sarif_msgs.REMEDIATION_DEPENDENCY_CYCLE, ) @@ -140,7 +107,7 @@ def _rule_help(spec: _RuleSpec) -> dict[str, str]: "markdown": ( f"{spec.full_description}\n\n" f"{remediation}\n\n" - f"See [CodeClone docs]({DOCS_URL})." + f"{sarif_msgs.SARIF_HELP_DOCS_SUFFIX.format(docs_url=DOCS_URL)}" ), } @@ -209,8 +176,8 @@ def _clone_rule_spec(category: str) -> _RuleSpec: if category == CLONE_KIND_FUNCTION: return _RuleSpec( "CCLONE001", - "Function clone group", - "Multiple functions share the same normalized function body.", + sarif_msgs.RULE_FUNCTION_CLONE_SHORT, + sarif_msgs.RULE_FUNCTION_CLONE_FULL, SEVERITY_WARNING, FAMILY_CLONE, FINDING_KIND_CLONE_GROUP, @@ -219,8 +186,8 @@ def _clone_rule_spec(category: str) -> _RuleSpec: if category == CLONE_KIND_BLOCK: return _RuleSpec( "CCLONE002", - "Block clone group", - "Repeated normalized statement blocks were detected across occurrences.", + sarif_msgs.RULE_BLOCK_CLONE_SHORT, + sarif_msgs.RULE_BLOCK_CLONE_FULL, SEVERITY_WARNING, FAMILY_CLONE, FINDING_KIND_CLONE_GROUP, @@ -228,8 +195,8 @@ def _clone_rule_spec(category: str) -> _RuleSpec: ) return _RuleSpec( "CCLONE003", - "Segment clone group", - "Repeated normalized statement segments were detected across occurrences.", + sarif_msgs.RULE_SEGMENT_CLONE_SHORT, + sarif_msgs.RULE_SEGMENT_CLONE_FULL, "note", FAMILY_CLONE, FINDING_KIND_CLONE_GROUP, @@ -241,11 +208,8 @@ def _structural_rule_spec(kind: str) -> _RuleSpec: if kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: return _RuleSpec( "CSTRUCT002", - "Clone guard/exit divergence", - ( - "Members of the same function-clone cohort diverged in " - "entry guards or early-exit behavior." - ), + sarif_msgs.RULE_GUARD_DIVERGENCE_SHORT, + sarif_msgs.RULE_GUARD_DIVERGENCE_FULL, SEVERITY_WARNING, FAMILY_STRUCTURAL, STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, @@ -254,11 +218,8 @@ def _structural_rule_spec(kind: str) -> _RuleSpec: if kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: return _RuleSpec( "CSTRUCT003", - "Clone cohort drift", - ( - "Members of the same function-clone cohort drifted from " - "the majority terminal/guard/try profile." - ), + sarif_msgs.RULE_COHORT_DRIFT_SHORT, + sarif_msgs.RULE_COHORT_DRIFT_FULL, SEVERITY_WARNING, FAMILY_STRUCTURAL, STRUCTURAL_KIND_CLONE_COHORT_DRIFT, @@ -266,8 +227,8 @@ def _structural_rule_spec(kind: str) -> _RuleSpec: ) return _RuleSpec( "CSTRUCT001", - "Duplicated branches", - "Repeated branch families with matching structural signatures were detected.", + sarif_msgs.RULE_DUPLICATED_BRANCHES_SHORT, + sarif_msgs.RULE_DUPLICATED_BRANCHES_FULL, SEVERITY_WARNING, FAMILY_STRUCTURAL, kind or STRUCTURAL_KIND_DUPLICATED_BRANCHES, @@ -279,8 +240,8 @@ def _dead_code_rule_spec(category: str) -> _RuleSpec: if category == SYMBOL_KIND_FUNCTION: return _RuleSpec( "CDEAD001", - "Unused function", - "Function appears to be unused with high confidence.", + sarif_msgs.RULE_UNUSED_FUNCTION_SHORT, + sarif_msgs.RULE_UNUSED_FUNCTION_FULL, SEVERITY_WARNING, FAMILY_DEAD_CODE, FINDING_KIND_UNUSED_SYMBOL, @@ -289,8 +250,8 @@ def _dead_code_rule_spec(category: str) -> _RuleSpec: if category == SYMBOL_KIND_CLASS: return _RuleSpec( "CDEAD002", - "Unused class", - "Class appears to be unused with high confidence.", + sarif_msgs.RULE_UNUSED_CLASS_SHORT, + sarif_msgs.RULE_UNUSED_CLASS_FULL, SEVERITY_WARNING, FAMILY_DEAD_CODE, FINDING_KIND_UNUSED_SYMBOL, @@ -299,8 +260,8 @@ def _dead_code_rule_spec(category: str) -> _RuleSpec: if category == SYMBOL_KIND_METHOD: return _RuleSpec( "CDEAD003", - "Unused method", - "Method appears to be unused with high confidence.", + sarif_msgs.RULE_UNUSED_METHOD_SHORT, + sarif_msgs.RULE_UNUSED_METHOD_FULL, SEVERITY_WARNING, FAMILY_DEAD_CODE, FINDING_KIND_UNUSED_SYMBOL, @@ -308,8 +269,8 @@ def _dead_code_rule_spec(category: str) -> _RuleSpec: ) return _RuleSpec( "CDEAD004", - "Unused symbol", - "Symbol appears to be unused with reported confidence.", + sarif_msgs.RULE_UNUSED_SYMBOL_SHORT, + sarif_msgs.RULE_UNUSED_SYMBOL_FULL, SEVERITY_WARNING, FAMILY_DEAD_CODE, FINDING_KIND_UNUSED_SYMBOL, @@ -321,8 +282,8 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec: if category == CATEGORY_COHESION: return _RuleSpec( "CDESIGN001", - "Low cohesion class", - "Class cohesion is low according to LCOM4 hotspot thresholds.", + sarif_msgs.RULE_LOW_COHESION_SHORT, + sarif_msgs.RULE_LOW_COHESION_FULL, SEVERITY_WARNING, FAMILY_DESIGN, kind or FINDING_KIND_CLASS_HOTSPOT, @@ -331,8 +292,8 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec: if category == CATEGORY_COMPLEXITY: return _RuleSpec( "CDESIGN002", - "Complexity hotspot", - "Function exceeds the project complexity hotspot threshold.", + sarif_msgs.RULE_COMPLEXITY_SHORT, + sarif_msgs.RULE_COMPLEXITY_FULL, SEVERITY_WARNING, FAMILY_DESIGN, kind or FINDING_KIND_FUNCTION_HOTSPOT, @@ -341,8 +302,8 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec: if category == CATEGORY_COUPLING: return _RuleSpec( "CDESIGN003", - "Coupling hotspot", - "Class exceeds the project coupling hotspot threshold.", + sarif_msgs.RULE_COUPLING_SHORT, + sarif_msgs.RULE_COUPLING_FULL, SEVERITY_WARNING, FAMILY_DESIGN, kind or FINDING_KIND_CLASS_HOTSPOT, @@ -352,9 +313,8 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec: if kind == FINDING_KIND_COVERAGE_SCOPE_GAP: return _RuleSpec( "CDESIGN006", - "Coverage scope gap", - "A medium/high-risk function is outside the supplied joined " - "coverage scope.", + sarif_msgs.RULE_COVERAGE_SCOPE_GAP_SHORT, + sarif_msgs.RULE_COVERAGE_SCOPE_GAP_FULL, SEVERITY_WARNING, FAMILY_DESIGN, kind, @@ -362,9 +322,8 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec: ) return _RuleSpec( "CDESIGN005", - "Coverage hotspot", - "A medium/high-risk function falls below the configured joined " - "coverage threshold.", + sarif_msgs.RULE_COVERAGE_HOTSPOT_SHORT, + sarif_msgs.RULE_COVERAGE_HOTSPOT_FULL, SEVERITY_WARNING, FAMILY_DESIGN, kind or FINDING_KIND_COVERAGE_HOTSPOT, @@ -372,8 +331,8 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec: ) return _RuleSpec( "CDESIGN004", - "Dependency cycle", - "A dependency cycle was detected between project modules.", + sarif_msgs.RULE_DEPENDENCY_CYCLE_SHORT, + sarif_msgs.RULE_DEPENDENCY_CYCLE_FULL, "error", FAMILY_DESIGN, kind or FINDING_KIND_CYCLE, diff --git a/codeclone/report/renderers/text.py b/codeclone/report/renderers/text.py index a3f2a9de..b152a5c5 100644 --- a/codeclone/report/renderers/text.py +++ b/codeclone/report/renderers/text.py @@ -11,6 +11,8 @@ from ...domain.source_scope import IMPACT_SCOPE_NON_RUNTIME, SOURCE_KIND_OTHER from ...utils.coerce import as_int, as_mapping, as_sequence from .._formatting import format_spread_text +from ..messages import explain as explain_msgs +from ..messages import projections as proj _as_int = as_int _as_mapping = as_mapping @@ -21,7 +23,7 @@ def format_meta_text_value(value: object) -> str: if isinstance(value, bool): return "true" if value else "false" if value is None: - return "(none)" + return proj.PROJECTION_NONE if isinstance(value, float): return f"{value:.2f}".rstrip("0").rstrip(".") or "0" if isinstance(value, Sequence) and not isinstance( @@ -29,9 +31,9 @@ def format_meta_text_value(value: object) -> str: (str, bytes, bytearray), ): formatted = [format_meta_text_value(item) for item in value] - return ", ".join(formatted) if formatted else "(none)" + return ", ".join(formatted) if formatted else proj.PROJECTION_NONE text = str(value).strip() - return text if text else "(none)" + return text if text else proj.PROJECTION_NONE def _format_key_values( @@ -45,9 +47,9 @@ def _format_key_values( if key not in mapping: continue formatted = format_meta_text_value(mapping.get(key)) - if not skip_empty or formatted != "(none)": + if not skip_empty or formatted != proj.PROJECTION_NONE: parts.append(f"{key}={formatted}") - return " ".join(parts) if parts else "(none)" + return " ".join(parts) if parts else proj.PROJECTION_NONE def _spread_text(spread: Mapping[str, object]) -> str: @@ -67,15 +69,9 @@ def _scope_text(source_scope: Mapping[str, object]) -> str: def _structural_kind_label(kind: object) -> str: kind_text = str(kind).strip() - match kind_text: - case "duplicated_branches": - return "Duplicated branches" - case "clone_guard_exit_divergence": - return "Clone guard/exit divergence" - case "clone_cohort_drift": - return "Clone cohort drift" - case _: - return kind_text or "(none)" + if kind_text in explain_msgs.STRUCTURAL_KIND_LABELS: + return explain_msgs.STRUCTURAL_KIND_LABELS[kind_text] + return kind_text or proj.PROJECTION_NONE def _location_line( @@ -112,7 +108,7 @@ def _append_clone_section( ] lines.append(f"{title} ({novelty.upper()}) (groups={len(section_groups)})") if not section_groups: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return for idx, group in enumerate(section_groups, start=1): lines.append(f"=== Clone group #{idx} ===") @@ -164,7 +160,7 @@ def _append_suppressed_clone_section( section_groups = [_as_mapping(group) for group in groups] lines.append(f"{title} (groups={len(section_groups)})") if not section_groups: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return for idx, group in enumerate(section_groups, start=1): lines.append(f"=== Suppressed clone group #{idx} ===") @@ -204,9 +200,11 @@ def _append_suppressed_clone_section( def _append_structural_findings(lines: list[str], groups: Sequence[object]) -> None: structural_groups = [_as_mapping(group) for group in groups] - lines.append(f"STRUCTURAL FINDINGS (groups={len(structural_groups)})") + lines.append( + proj.TEXT_STRUCTURAL_FINDINGS_HEADER.format(count=len(structural_groups)) + ) if not structural_groups: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return for idx, group in enumerate(structural_groups, start=1): lines.append(f"=== Structural finding #{idx} ===") @@ -285,7 +283,7 @@ def _append_single_item_findings( finding_groups = [_as_mapping(group) for group in groups] lines.append(f"{title} (groups={len(finding_groups)})") if not finding_groups: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return for idx, group in enumerate(finding_groups, start=1): lines.append(f"=== Finding #{idx} ===") @@ -329,7 +327,7 @@ def _suppression_bindings_text(item: Mapping[str, object]) -> str: source = str(item.get("suppression_source", "")).strip() if rule or source: return f"{rule or 'unknown'}@{source or 'unknown'}" - return "(none)" + return proj.PROJECTION_NONE def _append_suppressed_dead_code_items( @@ -338,9 +336,11 @@ def _append_suppressed_dead_code_items( items: Sequence[object], ) -> None: suppressed_items = [_as_mapping(item) for item in items] - lines.append(f"SUPPRESSED DEAD CODE (items={len(suppressed_items)})") + lines.append( + proj.TEXT_SUPPRESSED_DEAD_CODE_HEADER.format(count=len(suppressed_items)) + ) if not suppressed_items: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return for idx, item in enumerate(suppressed_items, start=1): lines.append(f"=== Suppressed dead-code item #{idx} ===") @@ -389,9 +389,9 @@ def _append_suggestions( finding_index = { str(group.get("id")): group for group in _flatten_findings(findings) } - lines.append(f"SUGGESTIONS (count={len(suggestion_rows)})") + lines.append(proj.TEXT_SUGGESTIONS_HEADER.format(count=len(suggestion_rows))) if not suggestion_rows: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return for idx, suggestion in enumerate(suggestion_rows, start=1): finding = finding_index.get(str(suggestion.get("finding_id")), {}) @@ -429,10 +429,10 @@ def _append_overview( overview: Mapping[str, object], hotlists: Mapping[str, object], ) -> None: - lines.append("DERIVED OVERVIEW") + lines.append(proj.TEXT_SECTION_DERIVED_OVERVIEW) families = _as_mapping(overview.get("families")) lines.append( - "Families: " + proj.TEXT_OVERVIEW_FAMILIES + _format_key_values( families, ("clones", "structural", "dead_code", "design"), @@ -440,7 +440,7 @@ def _append_overview( ) source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) lines.append( - "Source scope breakdown: " + proj.TEXT_OVERVIEW_SOURCE_SCOPE + _format_key_values( source_breakdown, ("production", "tests", "fixtures", "other"), @@ -448,7 +448,7 @@ def _append_overview( ) health_snapshot = _as_mapping(overview.get("health_snapshot")) lines.append( - "Health snapshot: " + proj.TEXT_OVERVIEW_HEALTH_SNAPSHOT + _format_key_values( health_snapshot, ("score", "grade", "strongest_dimension", "weakest_dimension"), @@ -465,7 +465,7 @@ def _append_overview( ), } lines.append( - "Hotlists: " + proj.TEXT_OVERVIEW_HOTLISTS + _format_key_values( hotlist_counts, ( @@ -478,9 +478,9 @@ def _append_overview( ) top_risks = list(map(_as_mapping, _as_sequence(overview.get("top_risks")))) if not top_risks: - lines.append("Top risks: (none)") + lines.append(proj.TEXT_OVERVIEW_TOP_RISKS_NONE) return - lines.append("Top risks:") + lines.append(proj.TEXT_OVERVIEW_TOP_RISKS) lines.extend( ( "- " @@ -572,7 +572,7 @@ def _append_top_metric_family( lines.extend(["", title]) rows = list(map(_as_mapping, items[:10])) if not rows: - lines.append("(none)") + lines.append(proj.PROJECTION_NONE) return lines.extend("- " + _format_key_values(item, key_order) for item in rows) @@ -586,7 +586,7 @@ def _append_metric_family_sections( if coverage_join_family: _append_top_metric_family( lines, - title="COVERAGE JOIN (top 10)", + title=proj.TEXT_SECTION_COVERAGE_JOIN, items=_as_sequence(coverage_join_family.get("items")), key_order=( "relative_path", @@ -605,7 +605,7 @@ def _append_metric_family_sections( overloaded_modules_family = _as_mapping(metrics_families.get("god_modules")) _append_top_metric_family( lines, - title="OVERLOADED MODULES (top 10)", + title=proj.TEXT_SECTION_OVERLOADED_MODULES, items=_as_sequence(overloaded_modules_family.get("items")), key_order=( "module", @@ -623,7 +623,7 @@ def _append_metric_family_sections( security_surfaces_family = _as_mapping(metrics_families.get("security_surfaces")) _append_top_metric_family( lines, - title="SECURITY SURFACES (top 10)", + title=proj.TEXT_SECTION_SECURITY_SURFACES, items=_as_sequence(security_surfaces_family.get("items")), key_order=( "category", @@ -646,9 +646,9 @@ def _append_findings_sections( metrics_families: Mapping[str, object], ) -> None: for title, group_key, metric_name in ( - ("FUNCTION CLONES", "functions", "loc"), - ("BLOCK CLONES", "blocks", "size"), - ("SEGMENT CLONES", "segments", "size"), + (proj.TEXT_SECTION_FUNCTION_CLONES, "functions", "loc"), + (proj.TEXT_SECTION_BLOCK_CLONES, "blocks", "size"), + (proj.TEXT_SECTION_SEGMENT_CLONES, "segments", "size"), ): groups = _as_sequence(clone_groups.get(group_key)) lines.append("") @@ -670,9 +670,9 @@ def _append_findings_sections( if suppressed_clone_groups: for title, group_key, metric_name in ( - ("SUPPRESSED FUNCTION CLONES", "functions", "loc"), - ("SUPPRESSED BLOCK CLONES", "blocks", "size"), - ("SUPPRESSED SEGMENT CLONES", "segments", "size"), + (proj.TEXT_SECTION_SUPPRESSED_FUNCTION_CLONES, "functions", "loc"), + (proj.TEXT_SECTION_SUPPRESSED_BLOCK_CLONES, "blocks", "size"), + (proj.TEXT_SECTION_SUPPRESSED_SEGMENT_CLONES, "segments", "size"), ): lines.append("") _append_suppressed_clone_section( @@ -690,7 +690,7 @@ def _append_findings_sections( lines.append("") _append_single_item_findings( lines, - title="DEAD CODE FINDINGS", + title=proj.TEXT_SECTION_DEAD_CODE_FINDINGS, groups=_as_sequence( _as_mapping(findings_groups.get("dead_code")).get("groups") ), @@ -705,7 +705,7 @@ def _append_findings_sections( lines.append("") _append_single_item_findings( lines, - title="DESIGN FINDINGS", + title=proj.TEXT_SECTION_DESIGN_FINDINGS, groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), fact_keys=("lcom4", "method_count", "instance_var_count", "fan_out", "risk"), ) @@ -749,66 +749,77 @@ def render_text_report_document(payload: Mapping[str, object]) -> str: suppressed_summary_keys.append("clones") lines = [ - "REPORT METADATA", - "Report schema version: " - f"{format_meta_text_value(payload.get('report_schema_version'))}", - "CodeClone version: " - f"{format_meta_text_value(meta_payload.get('codeclone_version'))}", - f"Project name: {format_meta_text_value(meta_payload.get('project_name'))}", - f"Scan root: {format_meta_text_value(meta_payload.get('scan_root'))}", - f"Python version: {format_meta_text_value(meta_payload.get('python_version'))}", - f"Python tag: {format_meta_text_value(meta_payload.get('python_tag'))}", - f"Analysis mode: {format_meta_text_value(meta_payload.get('analysis_mode'))}", - f"Report mode: {format_meta_text_value(meta_payload.get('report_mode'))}", - "Report generated (UTC): " - f"{format_meta_text_value(runtime_meta.get('report_generated_at_utc'))}", - "Computed metric families: " - f"{format_meta_text_value(meta_payload.get('computed_metric_families'))}", - f"Baseline path: {format_meta_text_value(baseline.get('path'))}", - "Baseline fingerprint version: " - f"{format_meta_text_value(baseline.get('fingerprint_version'))}", - "Baseline schema version: " - f"{format_meta_text_value(baseline.get('schema_version'))}", - f"Baseline Python tag: {format_meta_text_value(baseline.get('python_tag'))}", - "Baseline generator name: " - f"{format_meta_text_value(baseline.get('generator_name'))}", - "Baseline generator version: " - f"{format_meta_text_value(baseline.get('generator_version'))}", - "Baseline payload sha256: " - f"{format_meta_text_value(baseline.get('payload_sha256'))}", - "Baseline payload verified: " - f"{format_meta_text_value(baseline.get('payload_sha256_verified'))}", - f"Baseline loaded: {format_meta_text_value(baseline.get('loaded'))}", - f"Baseline status: {format_meta_text_value(baseline.get('status'))}", - f"Cache path: {format_meta_text_value(cache.get('path'))}", - f"Cache schema version: {format_meta_text_value(cache.get('schema_version'))}", - f"Cache status: {format_meta_text_value(cache.get('status'))}", - f"Cache used: {format_meta_text_value(cache.get('used'))}", - "Metrics baseline path: " - f"{format_meta_text_value(metrics_baseline.get('path'))}", - "Metrics baseline loaded: " - f"{format_meta_text_value(metrics_baseline.get('loaded'))}", - "Metrics baseline status: " - f"{format_meta_text_value(metrics_baseline.get('status'))}", - "Metrics baseline schema version: " - f"{format_meta_text_value(metrics_baseline.get('schema_version'))}", - "Metrics baseline payload sha256: " - f"{format_meta_text_value(metrics_baseline.get('payload_sha256'))}", - "Metrics baseline payload verified: " - f"{format_meta_text_value(metrics_baseline.get('payload_sha256_verified'))}", + proj.TEXT_SECTION_REPORT_METADATA, + proj.TEXT_META_REPORT_SCHEMA_VERSION + + f"{format_meta_text_value(payload.get('report_schema_version'))}", + proj.TEXT_META_CODECLONE_VERSION + + f"{format_meta_text_value(meta_payload.get('codeclone_version'))}", + proj.TEXT_META_PROJECT_NAME + + f"{format_meta_text_value(meta_payload.get('project_name'))}", + proj.TEXT_META_SCAN_ROOT + + f"{format_meta_text_value(meta_payload.get('scan_root'))}", + proj.TEXT_META_PYTHON_VERSION + + f"{format_meta_text_value(meta_payload.get('python_version'))}", + proj.TEXT_META_PYTHON_TAG + + f"{format_meta_text_value(meta_payload.get('python_tag'))}", + proj.TEXT_META_ANALYSIS_MODE + + f"{format_meta_text_value(meta_payload.get('analysis_mode'))}", + proj.TEXT_META_REPORT_MODE + + f"{format_meta_text_value(meta_payload.get('report_mode'))}", + proj.TEXT_META_REPORT_GENERATED + + f"{format_meta_text_value(runtime_meta.get('report_generated_at_utc'))}", + proj.TEXT_META_COMPUTED_METRIC_FAMILIES + + f"{format_meta_text_value(meta_payload.get('computed_metric_families'))}", + proj.TEXT_META_BASELINE_PATH + + f"{format_meta_text_value(baseline.get('path'))}", + proj.TEXT_META_BASELINE_FINGERPRINT_VERSION + + f"{format_meta_text_value(baseline.get('fingerprint_version'))}", + proj.TEXT_META_BASELINE_SCHEMA_VERSION + + f"{format_meta_text_value(baseline.get('schema_version'))}", + proj.TEXT_META_BASELINE_PYTHON_TAG + + f"{format_meta_text_value(baseline.get('python_tag'))}", + proj.TEXT_META_BASELINE_GENERATOR_NAME + + f"{format_meta_text_value(baseline.get('generator_name'))}", + proj.TEXT_META_BASELINE_GENERATOR_VERSION + + f"{format_meta_text_value(baseline.get('generator_version'))}", + proj.TEXT_META_BASELINE_PAYLOAD_SHA256 + + f"{format_meta_text_value(baseline.get('payload_sha256'))}", + proj.TEXT_META_BASELINE_PAYLOAD_VERIFIED + + f"{format_meta_text_value(baseline.get('payload_sha256_verified'))}", + proj.TEXT_META_BASELINE_LOADED + + f"{format_meta_text_value(baseline.get('loaded'))}", + proj.TEXT_META_BASELINE_STATUS + + f"{format_meta_text_value(baseline.get('status'))}", + proj.TEXT_META_CACHE_PATH + f"{format_meta_text_value(cache.get('path'))}", + proj.TEXT_META_CACHE_SCHEMA_VERSION + + f"{format_meta_text_value(cache.get('schema_version'))}", + proj.TEXT_META_CACHE_STATUS + f"{format_meta_text_value(cache.get('status'))}", + proj.TEXT_META_CACHE_USED + f"{format_meta_text_value(cache.get('used'))}", + proj.TEXT_META_METRICS_BASELINE_PATH + + f"{format_meta_text_value(metrics_baseline.get('path'))}", + proj.TEXT_META_METRICS_BASELINE_LOADED + + f"{format_meta_text_value(metrics_baseline.get('loaded'))}", + proj.TEXT_META_METRICS_BASELINE_STATUS + + f"{format_meta_text_value(metrics_baseline.get('status'))}", + proj.TEXT_META_METRICS_BASELINE_SCHEMA_VERSION + + f"{format_meta_text_value(metrics_baseline.get('schema_version'))}", + proj.TEXT_META_METRICS_BASELINE_PAYLOAD_SHA256 + + f"{format_meta_text_value(metrics_baseline.get('payload_sha256'))}", + proj.TEXT_META_METRICS_BASELINE_PAYLOAD_VERIFIED + + f"{format_meta_text_value(metrics_baseline.get('payload_sha256_verified'))}", ] if ( baseline.get("loaded") is not True or str(baseline.get("status", "")).strip().lower() != "ok" ): - lines.append("Note: baseline is untrusted; all groups are treated as NEW.") + lines.append(proj.TEXT_BASELINE_UNTRUSTED_NOTE) lines.extend( [ "", - "INVENTORY", - "Files: " + proj.TEXT_SECTION_INVENTORY, + proj.TEXT_INVENTORY_FILES + _format_key_values( inventory_files, ( @@ -819,44 +830,45 @@ def render_text_report_document(payload: Mapping[str, object]) -> str: "source_io_skipped", ), ), - "Code: " + proj.TEXT_INVENTORY_CODE + _format_key_values( inventory_code, ("scope", "parsed_lines", "functions", "methods", "classes"), ), - "File registry: " - f"encoding={format_meta_text_value(file_registry.get('encoding'))} " + proj.TEXT_INVENTORY_FILE_REGISTRY + + f"encoding={format_meta_text_value(file_registry.get('encoding'))} " f"count={len(_as_sequence(file_registry.get('items')))}", "", - "FINDINGS SUMMARY", - f"Total groups: {format_meta_text_value(findings_summary.get('total'))}", - "Families: " + proj.TEXT_SECTION_FINDINGS_SUMMARY, + proj.TEXT_FINDINGS_TOTAL_GROUPS + + f"{format_meta_text_value(findings_summary.get('total'))}", + proj.TEXT_FINDINGS_FAMILIES + _format_key_values( findings_families, ("clones", "structural", "dead_code", "design"), ), - "Severity: " + proj.TEXT_FINDINGS_SEVERITY + _format_key_values( findings_severity, ("critical", "warning", "info"), ), - "Impact scope: " + proj.TEXT_FINDINGS_IMPACT_SCOPE + _format_key_values( findings_impact_scope, ("runtime", "non_runtime", "mixed"), ), - "Clones: " + proj.TEXT_FINDINGS_CLONES + _format_key_values( findings_clones, tuple(clone_summary_keys), ), - "Suppressed: " + proj.TEXT_FINDINGS_SUPPRESSED + _format_key_values( findings_suppressed, tuple(suppressed_summary_keys), ), "", - "METRICS SUMMARY", + proj.TEXT_SECTION_METRICS_SUMMARY, ] ) _append_metrics_summary_lines(lines, metrics_summary=metrics_summary) @@ -877,13 +889,13 @@ def render_text_report_document(payload: Mapping[str, object]) -> str: lines.extend( [ "", - "INTEGRITY", - "Canonicalization: " + proj.TEXT_SECTION_INTEGRITY, + proj.TEXT_INTEGRITY_CANONICALIZATION + _format_key_values( canonicalization, ("version", "scope", "sections"), ), - "Digest: " + proj.TEXT_INTEGRITY_DIGEST + _format_key_values( digest, ("algorithm", "verified", "value"), diff --git a/codeclone/report/suggestions.py b/codeclone/report/suggestions.py index 85ff186f..e7d37004 100644 --- a/codeclone/report/suggestions.py +++ b/codeclone/report/suggestions.py @@ -61,6 +61,7 @@ representative_locations, source_kind_breakdown, ) +from .messages import suggestions as sugg_msgs if TYPE_CHECKING: from collections.abc import Mapping, Sequence @@ -131,11 +132,7 @@ def _source_context( def _clone_fact_kind(kind: Literal["function", "block", "segment"]) -> str: - return { - CLONE_KIND_FUNCTION: "Function clone group", - CLONE_KIND_BLOCK: "Block clone group", - CLONE_KIND_SEGMENT: "Segment clone group", - }[kind] + return sugg_msgs.CLONE_FACT_KIND_BY_KIND[kind] def _clone_summary( @@ -147,22 +144,22 @@ def _clone_summary( if kind == CLONE_KIND_FUNCTION: match clone_type: case "Type-1": - return "same exact function body" + return sugg_msgs.CLONE_SUMMARY_FUNCTION_TYPE1 case "Type-2": - return "same parameterized function body" + return sugg_msgs.CLONE_SUMMARY_FUNCTION_TYPE2 case "Type-3": - return "same structural function body with small identifier changes" + return sugg_msgs.CLONE_SUMMARY_FUNCTION_TYPE3 case _: - return "same structural function body" + return sugg_msgs.CLONE_SUMMARY_FUNCTION_TYPE4 if kind == CLONE_KIND_BLOCK: hint = str(facts.get("hint", "")).strip() pattern = str(facts.get("pattern", "")).strip() if hint == BLOCK_HINT_ASSERT_ONLY: - return "same assertion template" + return sugg_msgs.CLONE_SUMMARY_BLOCK_ASSERT_ONLY if pattern == BLOCK_PATTERN_REPEATED_STMT_HASH: - return "same repeated setup/assert pattern" - return "same structural sequence with small value changes" - return "same structural segment sequence" + return sugg_msgs.CLONE_SUMMARY_BLOCK_REPEATED_STMT + return sugg_msgs.CLONE_SUMMARY_BLOCK_DEFAULT + return sugg_msgs.CLONE_SUMMARY_SEGMENT def _clone_steps( @@ -173,34 +170,19 @@ def _clone_steps( ) -> tuple[str, ...]: hint = str(facts.get("hint", "")).strip() if kind == CLONE_KIND_FUNCTION and clone_type == "Type-1": - return ( - "Keep one canonical implementation and remove the exact duplicates.", - "Route the remaining call sites to the shared implementation.", - ) + return (sugg_msgs.CLONE_STEP_TYPE1_1, sugg_msgs.CLONE_STEP_TYPE1_2) if kind == CLONE_KIND_FUNCTION and clone_type == "Type-2": - return ( - "Extract a shared implementation with explicit parameters.", - "Replace identifier-only variations with arguments.", - ) + return (sugg_msgs.CLONE_STEP_TYPE2_1, sugg_msgs.CLONE_STEP_TYPE2_2) if kind == CLONE_KIND_BLOCK and hint == BLOCK_HINT_ASSERT_ONLY: return ( - "Collapse the repeated assertion template into a helper or loop.", - "Keep the asserted values as data instead of copy-pasted statements.", + sugg_msgs.CLONE_STEP_BLOCK_ASSERT_1, + sugg_msgs.CLONE_STEP_BLOCK_ASSERT_2, ) if kind == CLONE_KIND_BLOCK: - return ( - "Extract the repeated statement sequence into a helper.", - "Keep setup data close to the call site and move shared logic out.", - ) + return (sugg_msgs.CLONE_STEP_BLOCK_1, sugg_msgs.CLONE_STEP_BLOCK_2) if kind == CLONE_KIND_SEGMENT: - return ( - "Review whether the repeated segment should become shared utility code.", - "Keep this as a report hint only if the duplication is intentional.", - ) - return ( - "Extract the repeated logic into a shared abstraction.", - "Replace the duplicated bodies with calls to the shared code.", - ) + return (sugg_msgs.CLONE_STEP_SEGMENT_1, sugg_msgs.CLONE_STEP_SEGMENT_2) + return (sugg_msgs.CLONE_STEP_DEFAULT_1, sugg_msgs.CLONE_STEP_DEFAULT_2) def _clone_suggestion( @@ -388,13 +370,13 @@ def _complexity_suggestions( _single_location_suggestion( severity=severity, category=CATEGORY_COMPLEXITY, - title="Reduce function complexity", + title=sugg_msgs.SUGGESTION_TITLE_REDUCE_COMPLEXITY, steps=( - "Split the function into smaller deterministic stages.", - "Extract helper functions for nested branches.", + sugg_msgs.COMPLEXITY_STEP_1, + sugg_msgs.COMPLEXITY_STEP_2, ), effort=EFFORT_MODERATE, - fact_kind="Function complexity hotspot", + fact_kind=sugg_msgs.FACT_KIND_COMPLEXITY_HOTSPOT, fact_summary=f"cyclomatic_complexity={cc}, nesting_depth={nesting}", filepath=_as_str(unit.get("filepath")), start_line=_as_int(unit.get("start_line")), @@ -424,13 +406,13 @@ def _coupling_and_cohesion_suggestions( _single_location_suggestion( severity=SEVERITY_WARNING, category=CATEGORY_COUPLING, - title="Reduce class coupling", + title=sugg_msgs.SUGGESTION_TITLE_REDUCE_COUPLING, steps=( - "Reduce external dependencies of this class.", - "Move unrelated responsibilities to collaborator classes.", + sugg_msgs.COUPLING_STEP_1, + sugg_msgs.COUPLING_STEP_2, ), effort=EFFORT_MODERATE, - fact_kind="Class coupling hotspot", + fact_kind=sugg_msgs.FACT_KIND_COUPLING_HOTSPOT, fact_summary=f"cbo={metric.cbo}", filepath=metric.filepath, start_line=metric.start_line, @@ -447,13 +429,13 @@ def _coupling_and_cohesion_suggestions( _single_location_suggestion( severity=SEVERITY_WARNING, category=CATEGORY_COHESION, - title="Split low-cohesion class", + title=sugg_msgs.SUGGESTION_TITLE_SPLIT_COHESION, steps=( - "Split class by responsibility boundaries.", - "Group methods by shared state and extract subcomponents.", + sugg_msgs.COHESION_STEP_1, + sugg_msgs.COHESION_STEP_2, ), effort=EFFORT_MODERATE, - fact_kind="Low cohesion class", + fact_kind=sugg_msgs.FACT_KIND_LOW_COHESION, fact_summary=f"lcom4={metric.lcom4}", filepath=metric.filepath, start_line=metric.start_line, @@ -481,13 +463,13 @@ def _dead_code_suggestions( _single_location_suggestion( severity=SEVERITY_WARNING, category=CATEGORY_DEAD_CODE, - title="Remove or explicitly keep unused code", + title=sugg_msgs.SUGGESTION_TITLE_DEAD_CODE, steps=( - "Remove or deprecate the unused symbol.", - "If intentionally reserved, add explicit keep marker and test.", + sugg_msgs.DEAD_CODE_STEP_1, + sugg_msgs.DEAD_CODE_STEP_2, ), effort=EFFORT_EASY, - fact_kind="Dead code item", + fact_kind=sugg_msgs.FACT_KIND_DEAD_CODE, fact_summary=f"{item.kind} with {item.confidence} confidence", filepath=item.filepath, start_line=item.start_line, @@ -518,18 +500,18 @@ def _dependency_suggestions(project_metrics: ProjectMetrics) -> list[Suggestion] Suggestion( severity=SEVERITY_CRITICAL, category=CATEGORY_DEPENDENCY, - title="Break circular dependency", + title=sugg_msgs.SUGGESTION_TITLE_BREAK_CYCLE, location=location, steps=( - "Break the cycle by extracting a shared abstraction.", - "Invert one dependency edge through an interface or protocol.", + sugg_msgs.DEPENDENCY_STEP_1, + sugg_msgs.DEPENDENCY_STEP_2, ), effort=EFFORT_HARD, priority=_priority(SEVERITY_CRITICAL, EFFORT_HARD), finding_family=FAMILY_METRICS, finding_kind="cycle", subject_key=location, - fact_kind="Dependency cycle", + fact_kind=sugg_msgs.FACT_KIND_DEPENDENCY_CYCLE, fact_summary=f"{len(cycle)} modules participate in this cycle", fact_count=len(cycle), spread_files=len(cycle), @@ -547,13 +529,13 @@ def _structural_summary(group: StructuralFindingGroup) -> tuple[str, str]: match group.finding_kind: case "clone_guard_exit_divergence": return ( - "Clone guard/exit divergence", - "clone cohort members differ in entry guards or early-exit behavior", + sugg_msgs.STRUCTURAL_TITLE_GUARD_EXIT_DIVERGENCE, + sugg_msgs.STRUCTURAL_SUMMARY_GUARD_EXIT_DIVERGENCE, ) case "clone_cohort_drift": return ( - "Clone cohort drift", - "clone cohort members drift from majority terminal/guard/try profile", + sugg_msgs.STRUCTURAL_TITLE_COHORT_DRIFT, + sugg_msgs.STRUCTURAL_SUMMARY_COHORT_DRIFT, ) case _: pass @@ -565,36 +547,42 @@ def _structural_summary(group: StructuralFindingGroup) -> tuple[str, str]: raise_like = terminal == "raise" or raises not in {"", "0"} match (raise_like, terminal, has_loop): case (True, _, _): - return "Repeated branch family", "same repeated guard/validation branch" + return ( + sugg_msgs.STRUCTURAL_TITLE_REPEATED_BRANCH, + sugg_msgs.STRUCTURAL_SUMMARY_RAISE_BRANCH, + ) case (False, "return", _): - return "Repeated branch family", "same repeated return branch" + return ( + sugg_msgs.STRUCTURAL_TITLE_REPEATED_BRANCH, + sugg_msgs.STRUCTURAL_SUMMARY_RETURN_BRANCH, + ) case (False, _, "1"): - return "Repeated branch family", "same repeated loop branch" + return ( + sugg_msgs.STRUCTURAL_TITLE_REPEATED_BRANCH, + sugg_msgs.STRUCTURAL_SUMMARY_LOOP_BRANCH, + ) case _: if stmt_seq: - return "Repeated branch family", ( + return sugg_msgs.STRUCTURAL_TITLE_REPEATED_BRANCH, ( f"same repeated branch shape ({stmt_seq})" ) - return "Repeated branch family", "same repeated branch shape" + return ( + sugg_msgs.STRUCTURAL_TITLE_REPEATED_BRANCH, + sugg_msgs.STRUCTURAL_SUMMARY_BRANCH_DEFAULT, + ) def structural_action_steps(group: StructuralFindingGroup) -> tuple[str, ...]: match group.finding_kind: case "clone_guard_exit_divergence": return ( - ( - "Compare divergent clone members against the majority " - "guard/exit profile." - ), - "If divergence is accidental, align guard exits across the cohort.", + sugg_msgs.STRUCTURAL_STEP_GUARD_EXIT_1, + sugg_msgs.STRUCTURAL_STEP_GUARD_EXIT_2, ) case "clone_cohort_drift": return ( - "Review whether cohort drift is intentional for this clone family.", - ( - "If not intentional, reconcile terminal/guard/try profiles " - "across members." - ), + sugg_msgs.STRUCTURAL_STEP_COHORT_DRIFT_1, + sugg_msgs.STRUCTURAL_STEP_COHORT_DRIFT_2, ) case _: pass @@ -604,36 +592,24 @@ def structural_action_steps(group: StructuralFindingGroup) -> tuple[str, ...]: stmt_names = tuple(part.strip() for part in stmt_seq.split(",") if part.strip()) if "Continue" in stmt_names: return ( - ( - "Review whether the repeated continue guard can be merged " - "into one predicate." - ), - ( - "If separate continue checks keep the local control flow clearer, " - "keep this as a report-only hint." - ), + sugg_msgs.STRUCTURAL_STEP_CONTINUE_1, + sugg_msgs.STRUCTURAL_STEP_CONTINUE_2, ) match terminal: case "raise": return ( - "Factor the repeated validation/guard path into a shared helper.", - ( - "Keep the branch-specific inputs at the call site and share " - "the exit policy." - ), + sugg_msgs.STRUCTURAL_STEP_RAISE_1, + sugg_msgs.STRUCTURAL_STEP_RAISE_2, ) case "return": return ( - "Consolidate the repeated return-path logic into a shared helper.", - "Keep the branch predicate local and share the emitted behavior.", + sugg_msgs.STRUCTURAL_STEP_RETURN_1, + sugg_msgs.STRUCTURAL_STEP_RETURN_2, ) case _: return ( - "Review whether the repeated local branch can be simplified in place.", - ( - "If the local duplication keeps control flow clearer, keep " - "this as a report-only hint." - ), + sugg_msgs.STRUCTURAL_STEP_DEFAULT_1, + sugg_msgs.STRUCTURAL_STEP_DEFAULT_2, ) @@ -717,7 +693,7 @@ def _structural_suggestions( finding_family=FAMILY_STRUCTURAL, finding_kind=group.finding_kind, subject_key=group.finding_key, - fact_kind="Structural finding", + fact_kind=sugg_msgs.FACT_KIND_STRUCTURAL, fact_summary=summary, fact_count=count, spread_files=spread_files, diff --git a/codeclone/scanner/__init__.py b/codeclone/scanner/__init__.py index 4d89478d..66c427e0 100644 --- a/codeclone/scanner/__init__.py +++ b/codeclone/scanner/__init__.py @@ -57,6 +57,18 @@ def _is_under_root(path: Path, root: Path) -> bool: return False +def resolved_path_under_root(filepath: str, root: str) -> Path | None: + """Return the resolved source path when it stays under ``root``.""" + try: + root_path = Path(root).resolve() + resolved = Path(filepath).resolve() + except OSError: + return None + if _is_under_root(resolved, root_path): + return resolved + return None + + def _ensure_not_sensitive_root(*, rootp: Path, root_arg: str) -> None: root_str = str(rootp) temp_root = _get_tempdir() diff --git a/codeclone/surfaces/cli/analytics.py b/codeclone/surfaces/cli/analytics.py new file mode 100644 index 00000000..d20847f5 --- /dev/null +++ b/codeclone/surfaces/cli/analytics.py @@ -0,0 +1,800 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Corpus analytics CLI subcommands.""" + +from __future__ import annotations + +import argparse +import os +import sqlite3 +import sys +from collections.abc import Callable +from pathlib import Path +from typing import Literal, cast + +from ...analytics.capabilities import ( + AnalyticsCapability, + check_capability, + install_hint, +) +from ...analytics.clustering.models import NOISE_LABEL, ClusteringParameters +from ...analytics.contracts import ( + INTENT_REPRESENTATION_DESCRIPTION, + INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME, +) +from ...analytics.exceptions import ( + AnalyticsCapabilityError, + AnalyticsError, + AnalyticsWorkflowError, +) +from ...analytics.export.json_export import ( + export_clustering_json, + export_sweep_comparison_json, +) +from ...analytics.integrity import validate_persisted_run +from ...analytics.profiles.loader import ( + load_manifest_file, + manifest_value, + profile_manifest_digest, +) +from ...analytics.profiles.models import ProfileSearchSpace +from ...analytics.profiles.registry import ( + ProfileRegistry, + get_profile, + list_profiles, + resolve_profile_registry, +) +from ...analytics.report.html import render_analytics_html +from ...analytics.store.sqlite import SqliteCorpusAnalyticsStore +from ...analytics.workflow import ( + BuildResult, + run_build, + run_clustering, + run_embed, + run_snapshot, + select_cluster_run, +) +from ...config.analytics import AnalyticsConfig, resolve_analytics_config +from ...config.observability import resolve_observability_config +from ...contracts import ExitCode +from ...observability import bootstrap, operation, shutdown, span +from ...utils.json_io import ( + json_text, + write_json_document_atomically, + write_json_text_atomically, +) +from ...utils.repo_paths import RepoPathPolicy, resolve_under_repo_root + + +def _representation_kind(raw: str) -> str: + if raw == "description": + return INTENT_REPRESENTATION_DESCRIPTION + if raw == "description_with_frame": + return INTENT_REPRESENTATION_DESCRIPTION_WITH_FRAME + msg = f"unsupported representation: {raw}" + raise AnalyticsWorkflowError(msg) + + +def _require_capability(capability: AnalyticsCapability) -> None: + status = check_capability(capability) + if not status.available: + missing = ", ".join(status.missing_packages) + raise AnalyticsCapabilityError( + f"missing analytics dependencies: {missing}. " + f"Install with: {install_hint(status.missing_packages)}" + ) + + +def _add_root(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--root", + default=".", + help="Repository root (default: .)", + ) + + +def _add_clustering_controls(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--sweep", action="store_true") + parser.add_argument("--profile", default=None) + parser.add_argument("--pca-dimensions", type=int, default=None) + parser.add_argument("--min-cluster-size", type=int, default=None) + parser.add_argument("--min-samples", type=int, default=None) + parser.add_argument( + "--cluster-selection-method", + choices=("eom", "leaf"), + default=None, + ) + parser.add_argument("--sweep-pca", default=None) + parser.add_argument("--sweep-min-cluster-size", default=None) + parser.add_argument("--sweep-min-samples", default=None) + parser.add_argument("--sweep-selection-method", default=None) + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="codeclone analytics") + sub = parser.add_subparsers(dest="command", required=True) + + snapshot = sub.add_parser("snapshot", help="Build immutable intent corpus snapshot") + _add_root(snapshot) + snapshot.add_argument( + "--representation", + choices=("description", "description_with_frame"), + default="description", + ) + snapshot.add_argument("--output-json", type=Path, default=None) + + embed = sub.add_parser("embed", help="Generate analytics embeddings for snapshot") + _add_root(embed) + embed.add_argument("--snapshot-id", required=True) + + cluster = sub.add_parser("cluster", help="Cluster embedded snapshot") + _add_root(cluster) + cluster.add_argument("--snapshot-id") + cluster.add_argument("--embedding-generation-id") + _add_clustering_controls(cluster) + cluster.add_argument("--select-run", dest="select_run", default=None) + cluster.add_argument("--selection-rationale", default=None) + cluster.add_argument( + "--selected-by", + default=None, + ) + cluster.add_argument( + "--selection-profile", + default="none", + help="Profile batch id, profile id, or none for global selection", + ) + + build = sub.add_parser("build", help="Snapshot, embed, and cluster end-to-end") + _add_root(build) + build.add_argument( + "--lane", + choices=("intent",), + default="intent", + ) + build.add_argument( + "--representation", + choices=("description", "description_with_frame"), + default="description", + ) + _add_clustering_controls(build) + build.add_argument("--use-recommended", action="store_true") + build.add_argument("--html-out", type=Path, default=None) + build.add_argument("--json-out", type=Path, default=None) + + clusters = sub.add_parser("clusters", help="List clustering runs for snapshot") + _add_root(clusters) + clusters.add_argument("--snapshot-id", required=True) + + cluster_show = sub.add_parser("cluster-show", help="Export one clustering run JSON") + _add_root(cluster_show) + cluster_show.add_argument("--snapshot-id", required=True) + cluster_show.add_argument("--run-id", required=True) + cluster_show.add_argument("--output", type=Path, default=None) + + outliers = sub.add_parser("outliers", help="Show noise cluster assignments") + _add_root(outliers) + outliers.add_argument("--snapshot-id", required=True) + outliers.add_argument("--run-id", required=True) + + profiles = sub.add_parser("profiles", help="Inspect analytics profile registry") + profile_sub = profiles.add_subparsers(dest="profile_command", required=True) + profile_list = profile_sub.add_parser("list", help="List registered profiles") + _add_root(profile_list) + profile_show = profile_sub.add_parser("show", help="Show one profile manifest") + _add_root(profile_show) + profile_show.add_argument("--profile-id", required=True) + profile_validate = profile_sub.add_parser( + "validate", + help="Validate one manifest or the resolved registry", + ) + _add_root(profile_validate) + profile_validate.add_argument("--path", type=Path, default=None) + + return parser + + +def _comma_ints(raw: str, *, flag: str) -> tuple[int, ...]: + try: + values = tuple(int(item.strip()) for item in raw.split(",")) + except ValueError as exc: + raise AnalyticsWorkflowError( + f"{flag} requires comma-separated positive integers" + ) from exc + if not values or any(value <= 0 for value in values): + raise AnalyticsWorkflowError( + f"{flag} requires comma-separated positive integers" + ) + return tuple(sorted(set(values))) + + +def _comma_methods( + raw: str, +) -> tuple[Literal["eom", "leaf"], ...]: + values = tuple(sorted({item.strip() for item in raw.split(",") if item.strip()})) + if not values or any(value not in {"eom", "leaf"} for value in values): + raise AnalyticsWorkflowError( + "--sweep-selection-method requires eom and/or leaf" + ) + return cast("tuple[Literal['eom', 'leaf'], ...]", values) + + +def _single_parameter_flags_set(args: argparse.Namespace) -> bool: + return any( + getattr(args, field, None) is not None + for field in ( + "pca_dimensions", + "min_cluster_size", + "min_samples", + "cluster_selection_method", + ) + ) + + +def _sweep_override_flags_set(args: argparse.Namespace) -> bool: + return any( + getattr(args, field, None) is not None + for field in ( + "sweep_pca", + "sweep_min_cluster_size", + "sweep_min_samples", + "sweep_selection_method", + ) + ) + + +def _clustering_execution_args_set(args: argparse.Namespace) -> bool: + return any( + ( + getattr(args, "snapshot_id", None) is not None, + getattr(args, "embedding_generation_id", None) is not None, + bool(getattr(args, "sweep", False)), + getattr(args, "profile", None) is not None, + _single_parameter_flags_set(args), + _sweep_override_flags_set(args), + ) + ) + + +def _validate_clustering_mode_args(args: argparse.Namespace) -> None: + single = _single_parameter_flags_set(args) + sweep_overrides = _sweep_override_flags_set(args) + if getattr(args, "profile", None) is not None and single: + raise AnalyticsWorkflowError( + "profile sweep conflicts with explicit clustering parameters" + ) + if sweep_overrides: + args.sweep = True + if getattr(args, "profile", None) is not None: + args.sweep = True + if args.sweep and single: + raise AnalyticsWorkflowError( + "sweep mode conflicts with explicit clustering parameters" + ) + + +def _clustering_parameters_from_args( + args: argparse.Namespace, + *, + config: AnalyticsConfig, +) -> ClusteringParameters | None: + if not _single_parameter_flags_set(args): + return None + return ClusteringParameters( + pca_dimensions=( + args.pca_dimensions + if args.pca_dimensions is not None + else config.default_pca_dimensions + ), + min_cluster_size=( + args.min_cluster_size + if args.min_cluster_size is not None + else config.default_min_cluster_size + ), + min_samples=( + args.min_samples + if args.min_samples is not None + else config.default_min_samples + ), + cluster_selection_method=( + args.cluster_selection_method + if args.cluster_selection_method is not None + else config.default_cluster_selection_method + ), + ) + + +def _sweep_grid_from_args( + args: argparse.Namespace, + *, + config: AnalyticsConfig, +) -> ProfileSearchSpace | None: + if not _sweep_override_flags_set(args): + return None + return ProfileSearchSpace( + pca_dimensions=( + _comma_ints(args.sweep_pca, flag="--sweep-pca") + if args.sweep_pca is not None + else config.sweep_pca_dimensions + ), + min_cluster_size=( + _comma_ints( + args.sweep_min_cluster_size, + flag="--sweep-min-cluster-size", + ) + if args.sweep_min_cluster_size is not None + else config.sweep_min_cluster_sizes + ), + min_samples=( + _comma_ints(args.sweep_min_samples, flag="--sweep-min-samples") + if args.sweep_min_samples is not None + else config.sweep_min_samples + ), + cluster_selection_method=( + _comma_methods(args.sweep_selection_method) + if args.sweep_selection_method is not None + else config.sweep_selection_methods + ), + ) + + +def _run_snapshot_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + snapshot_result = run_snapshot( + root_path=root, + representation_kind=_representation_kind(args.representation), + ) + payload = { + "snapshot_id": snapshot_result.snapshot_id, + "source_digest": snapshot_result.source_digest, + "record_count": snapshot_result.record_count, + } + if args.output_json is not None: + write_json_document_atomically(args.output_json, payload) + else: + _print_json(payload) + return ExitCode.SUCCESS + + +def _run_embed_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("embed") + embed_result = run_embed(root_path=root, snapshot_id=args.snapshot_id) + _print_json( + { + "embedding_generation_id": embed_result.embedding_generation_id, + "item_count": embed_result.item_count, + } + ) + return ExitCode.SUCCESS + + +def _run_cluster_command(args: argparse.Namespace, root: Path) -> int: + if args.select_run: + if _clustering_execution_args_set(args): + raise AnalyticsWorkflowError( + "--select-run cannot be combined with clustering execution arguments" + ) + _require_capability("base") + profile_batch_id, selection_profile_id = _selection_scope( + getattr(args, "selection_profile", "none") + ) + requested_selected_by = getattr(args, "selected_by", None) + selected_by = ( + requested_selected_by + if requested_selected_by is not None + else os.environ.get("USER") or "local-maintainer" + ) + selection = select_cluster_run( + root_path=root, + clustering_run_id=args.select_run, + profile_batch_id=profile_batch_id, + selection_profile_id=selection_profile_id, + selected_by=selected_by, + rationale=getattr(args, "selection_rationale", None), + ) + _print_json( + { + "selected_run_id": args.select_run, + "selection_id": selection.selection_id, + } + ) + return ExitCode.SUCCESS + if ( + getattr(args, "selection_rationale", None) is not None + or getattr(args, "selection_profile", "none") != "none" + or getattr(args, "selected_by", None) is not None + ): + raise AnalyticsWorkflowError( + "--selection-rationale, --selection-profile, and --selected-by " + "require --select-run" + ) + if not args.snapshot_id or not args.embedding_generation_id: + raise AnalyticsWorkflowError( + "--snapshot-id and --embedding-generation-id are required " + "unless --select-run is used" + ) + _validate_clustering_mode_args(args) + _require_capability("cluster") + config = resolve_analytics_config(root) + run_ids = run_clustering( + root_path=root, + snapshot_id=args.snapshot_id, + embedding_generation_id=args.embedding_generation_id, + requested=_clustering_parameters_from_args(args, config=config), + sweep=args.sweep, + sweep_grid=_sweep_grid_from_args(args, config=config), + profile_id=getattr(args, "profile", None), + config=config, + ) + payload: dict[str, object] = {"clustering_run_ids": list(run_ids)} + if not config.db_path.exists(): + _print_json(payload) + return ExitCode.SUCCESS + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + runs = store.list_clustering_runs( + snapshot_id=args.snapshot_id, + embedding_generation_id=args.embedding_generation_id, + ) + recommended = next( + (run.clustering_run_id for run in runs if run.recommended_by_heuristic), + None, + ) + if recommended is not None: + payload["recommended_run_id"] = recommended + resolved_profile_id = _resolved_profile_id( + config, + getattr(args, "profile", None), + ) + if resolved_profile_id is not None: + batch = store.get_latest_profile_batch( + snapshot_id=args.snapshot_id, + embedding_generation_id=args.embedding_generation_id, + profile_id=resolved_profile_id, + ) + if batch is not None: + payload.update( + { + "profile_batch_id": batch.profile_batch_id, + "recommended_for_profile_run_id": ( + batch.recommended_clustering_run_id + ), + "profile_id": batch.profile_id, + "batch_status": batch.status, + } + ) + finally: + store.close() + _print_json(payload) + return ExitCode.SUCCESS + + +def _selection_scope(raw: str) -> tuple[str | None, str | None]: + normalized = raw.strip() + if normalized == "none": + return None, None + if normalized.startswith("pbatch-"): + return normalized, None + return None, normalized + + +def _resolved_profile_id( + config: AnalyticsConfig, + profile_id: str | None, +) -> str | None: + if profile_id is None: + return None + if profile_id == "auto": + if config.default_profile_id is None: + raise AnalyticsWorkflowError("default_profile_id not configured") + return config.default_profile_id + return profile_id + + +def _write_build_exports( + *, + args: argparse.Namespace, + root: Path, + build_result: BuildResult, +) -> None: + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + snapshot = store.get_snapshot(build_result.snapshot_id) + if snapshot is None: + raise AnalyticsWorkflowError("snapshot missing after build") + primary_run_id = ( + build_result.recommended_for_profile_run_id + if args.use_recommended and build_result.profile_id is not None + else build_result.recommended_run_id + if args.use_recommended + else None + ) or ( + build_result.clustering_run_ids[0] + if build_result.clustering_run_ids + else None + ) + comparison_export = args.sweep and ( + not args.use_recommended + or ( + build_result.profile_id is not None + and build_result.recommended_for_profile_run_id is None + ) + ) + if primary_run_id is None and comparison_export and args.html_out is not None: + runs = store.list_clustering_runs( + snapshot_id=build_result.snapshot_id, + embedding_generation_id=build_result.embedding_generation_id, + ) + primary_run_id = runs[0].clustering_run_id if runs else None + with span(name="analytics.report"): + if args.json_out is not None and ( + comparison_export or primary_run_id is not None + ): + if comparison_export: + text = export_sweep_comparison_json( + store=store, + snapshot_id=build_result.snapshot_id, + embedding_generation_id=build_result.embedding_generation_id, + profile_id=build_result.profile_id, + profile_batch_id=build_result.profile_batch_id, + ) + else: + if primary_run_id is None: + raise AnalyticsWorkflowError( + "clustering run missing after build" + ) + text = export_clustering_json( + store=store, + snapshot_id=build_result.snapshot_id, + clustering_run_id=primary_run_id, + profile_id=build_result.profile_id, + profile_batch_id=build_result.profile_batch_id, + ) + args.json_out.parent.mkdir(parents=True, exist_ok=True) + write_json_text_atomically(args.json_out, text) + if args.html_out is not None: + if primary_run_id is None: + raise AnalyticsWorkflowError("clustering run missing after build") + run = store.get_clustering_run(primary_run_id) + if run is None: + raise AnalyticsWorkflowError("clustering run missing after build") + rendered = render_analytics_html( + store=store, + snapshot=snapshot, + run=run, + comparison_only=comparison_export, + profile_id=build_result.profile_id, + profile_batch_id=build_result.profile_batch_id, + ) + args.html_out.parent.mkdir(parents=True, exist_ok=True) + write_json_text_atomically(args.html_out, rendered) + finally: + store.close() + + +def _run_build_command(args: argparse.Namespace, root: Path) -> int: + _validate_clustering_mode_args(args) + if args.use_recommended and not args.sweep: + raise AnalyticsWorkflowError("--use-recommended requires --sweep") + _require_capability("full") + config = resolve_analytics_config(root) + build_result = run_build( + root_path=root, + representation_kind=_representation_kind(args.representation), + sweep=args.sweep, + use_recommended=args.use_recommended, + requested=_clustering_parameters_from_args(args, config=config), + sweep_grid=_sweep_grid_from_args(args, config=config), + profile_id=getattr(args, "profile", None), + config=config, + ) + if args.json_out is not None or args.html_out is not None: + _write_build_exports(args=args, root=root, build_result=build_result) + _print_json( + { + "snapshot_id": build_result.snapshot_id, + "embedding_generation_id": build_result.embedding_generation_id, + "clustering_run_ids": list(build_result.clustering_run_ids), + "recommended_run_id": build_result.recommended_run_id, + "profile_id": build_result.profile_id, + "profile_batch_id": build_result.profile_batch_id, + "recommended_for_profile_run_id": ( + build_result.recommended_for_profile_run_id + ), + } + ) + return ExitCode.SUCCESS + + +def _run_clusters_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + if store.get_snapshot(args.snapshot_id) is None: + raise AnalyticsWorkflowError(f"unknown snapshot: {args.snapshot_id}") + runs = store.list_clustering_runs(snapshot_id=args.snapshot_id) + _print_json( + [ + { + "clustering_run_id": run.clustering_run_id, + "recommended_by_heuristic": run.recommended_by_heuristic, + "selected_by_maintainer": run.selected_by_maintainer, + "profile_batch_ids": ( + list( + store.list_profile_batch_ids_for_run( + clustering_run_id=run.clustering_run_id + ) + ) + if hasattr(store, "list_profile_batch_ids_for_run") + else [] + ), + "status": run.status, + } + for run in runs + ] + ) + finally: + store.close() + return ExitCode.SUCCESS + + +def _run_cluster_show_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + text = export_clustering_json( + store=store, + snapshot_id=args.snapshot_id, + clustering_run_id=args.run_id, + ) + if args.output is not None: + args.output.parent.mkdir(parents=True, exist_ok=True) + write_json_text_atomically(args.output, text) + else: + print(text, end="") + finally: + store.close() + return ExitCode.SUCCESS + + +def _run_outliers_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + config = resolve_analytics_config(root) + store = SqliteCorpusAnalyticsStore.open_readonly(config.db_path) + try: + validate_persisted_run( + store=store, + snapshot_id=args.snapshot_id, + clustering_run_id=args.run_id, + ) + assignments = store.list_assignments(args.run_id) + noise = [ + item.snapshot_item_id + for item in assignments + if item.cluster_label == NOISE_LABEL + ] + _print_json({"noise_items": noise}) + finally: + store.close() + return ExitCode.SUCCESS + + +def _profile_registry(root: Path) -> tuple[AnalyticsConfig, ProfileRegistry]: + config = resolve_analytics_config(root) + registry = resolve_profile_registry( + profile_paths=config.profile_paths, + default_profile_id=config.default_profile_id, + ) + return config, registry + + +def _run_profiles_command(args: argparse.Namespace, root: Path) -> int: + _require_capability("base") + _config, registry = _profile_registry(root) + if args.profile_command == "list": + _print_json( + { + "profiles": [ + { + "profile_id": profile_id, + "label": registry.profiles[profile_id].label, + "profile_version": ( + registry.profiles[profile_id].profile_version + ), + "source": registry.sources[profile_id], + "manifest_digest": profile_manifest_digest( + registry.profiles[profile_id] + ), + } + for profile_id in list_profiles(registry) + ] + } + ) + return ExitCode.SUCCESS + if args.profile_command == "show": + profile = get_profile(registry, args.profile_id) + payload = manifest_value(profile) + payload["manifest_digest"] = profile_manifest_digest(profile) + payload["source"] = registry.sources[profile.profile_id] + _print_json(payload) + return ExitCode.SUCCESS + if args.path is not None: + path = resolve_under_repo_root( + root, + args.path, + policy=RepoPathPolicy( + allow_absolute=True, + must_exist=True, + must_be_file=True, + ), + ) + profile = load_manifest_file(path) + _print_json( + { + "valid": True, + "profile_id": profile.profile_id, + "manifest_digest": profile_manifest_digest(profile), + } + ) + return ExitCode.SUCCESS + _print_json( + { + "valid": True, + "profiles": [ + { + "profile_id": profile_id, + "manifest_digest": profile_manifest_digest( + registry.profiles[profile_id] + ), + } + for profile_id in list_profiles(registry) + ], + } + ) + return ExitCode.SUCCESS + + +_CommandHandler = Callable[[argparse.Namespace, Path], int] + +_COMMAND_HANDLERS: dict[str, _CommandHandler] = { + "snapshot": _run_snapshot_command, + "embed": _run_embed_command, + "cluster": _run_cluster_command, + "build": _run_build_command, + "clusters": _run_clusters_command, + "cluster-show": _run_cluster_show_command, + "outliers": _run_outliers_command, + "profiles": _run_profiles_command, +} + + +def analytics_main(argv: list[str] | None = None) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + root = Path(args.root).resolve() + if not root.is_dir(): + print(f"repository root is not a directory: {root}", file=sys.stderr) + return ExitCode.CONTRACT_ERROR + handler = _COMMAND_HANDLERS[args.command] + try: + bootstrap(resolve_observability_config(), root=root) + with operation(name=f"cli.analytics.{args.command}", surface="cli"): + return handler(args, root) + except (AnalyticsError, OSError, ValueError, sqlite3.Error) as exc: + print(str(exc), file=sys.stderr) + return ExitCode.CONTRACT_ERROR + finally: + shutdown() + + +def _print_json(payload: object) -> None: + print(json_text(payload, sort_keys=True)) + + +__all__ = ["analytics_main"] diff --git a/codeclone/surfaces/cli/audit.py b/codeclone/surfaces/cli/audit.py new file mode 100644 index 00000000..43d78d05 --- /dev/null +++ b/codeclone/surfaces/cli/audit.py @@ -0,0 +1,446 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path + +from ... import ui_messages as ui +from ...audit.reader import ( + AuditSummary, + PayloadFootprint, + payload_footprint_to_dict, + read_audit_summary, +) +from ...audit.validation import AuditConfigError, AuditReadError, resolve_audit_path +from ...contracts import ExitCode +from .types import PrinterLike + + +def render_audit( + *, + console: PrinterLike, + root_path: Path, + audit_enabled: bool, + audit_path: str, + quiet: bool, + json_summary: bool = False, +) -> int: + if not audit_enabled: + console.print(ui.fmt_contract_error(ui.AUDIT_NOT_ENABLED)) + return int(ExitCode.CONTRACT_ERROR) + try: + db_path = resolve_audit_path(root_path=root_path, value=audit_path) + summary = read_audit_summary(db_path=db_path, limit=50) + except (AuditConfigError, AuditReadError) as exc: + console.print(ui.fmt_contract_error(str(exc))) + return int(ExitCode.CONTRACT_ERROR) + except Exception as exc: + console.print(ui.fmt_internal_error(exc)) + return int(ExitCode.INTERNAL_ERROR) + if json_summary: + return _render_json_summary(console=console, summary=summary) + if quiet: + return _render_quiet(console=console, summary=summary) + return _render_verbose(console=console, summary=summary) + + +def _render_quiet(*, console: PrinterLike, summary: AuditSummary) -> int: + console.print( + ui.AUDIT_QUIET_TEMPLATE.format( + prefix=ui.AUDIT_QUIET_PREFIX, + total_events=summary.total_events, + intent_events=summary.intent_events, + contract_events=summary.contract_events, + receipt_events=summary.receipt_events, + violation_events=summary.violation_events, + last_relative=_relative_time(summary.latest_event_utc), + ) + ) + return int(ExitCode.SUCCESS) + + +def _render_json_summary(*, console: PrinterLike, summary: AuditSummary) -> int: + import json + + fp = summary.payload_footprint + data = { + "mcp_payload_footprint": payload_footprint_to_dict(fp) if fp else None, + "total_events": summary.total_events, + "intents": summary.intent_events, + "contracts": summary.contract_events, + "receipts": summary.receipt_events, + "violations": summary.violation_events, + } + console.print(json.dumps(data, indent=2), markup=False) + return int(ExitCode.SUCCESS) + + +def _render_verbose(*, console: PrinterLike, summary: AuditSummary) -> int: + if _supports_rich(console): + return _render_verbose_rich(console=console, summary=summary) + + console.print(f"[bold]╍╍╍ {ui.AUDIT_TITLE} ╍╍╍[/bold]") + console.print() + console.print( + f" {ui.AUDIT_DATABASE:<13} {summary.db_path} ({summary.total_events} events)" + ) + if summary.retention_days is not None: + console.print(f" {ui.AUDIT_RETENTION:<13} {summary.retention_days} days") + console.print( + f" {ui.AUDIT_OLDEST:<13} {summary.oldest_event_utc or ui.AUDIT_NONE}" + ) + console.print( + f" {ui.AUDIT_LATEST:<13} {summary.latest_event_utc or ui.AUDIT_NONE}" + ) + console.print() + for event in summary.events: + console.print( + " " + f"{_short_time(event.created_at_utc):<16} " + f"{_short_type(event.event_type):<10} " + f"{event.intent_id or '-':<24} " + f"{event.status or '-':<10} " + f"{event.run_id or '-'}" + ) + console.print() + console.print( + f" {ui.AUDIT_SUMMARY} " + f"{summary.intent_events} intents, " + f"{summary.contract_events} contracts, " + f"{summary.receipt_events} receipts" + ) + console.print(f" {ui.AUDIT_VIOLATIONS} {summary.violation_events}") + return int(ExitCode.SUCCESS) + + +def _render_verbose_rich(*, console: PrinterLike, summary: AuditSummary) -> int: + from rich import box + from rich.panel import Panel + from rich.rule import Rule + from rich.table import Table + from rich.text import Text + + console.print(Rule(ui.AUDIT_TITLE, style="dim", characters="─")) + + meta = Table.grid(padding=(0, 2)) + meta.add_column(style="dim", no_wrap=True) + meta.add_column() + meta.add_row( + ui.AUDIT_DATABASE.rstrip(":"), + f"{summary.db_path} ({summary.total_events} events, " + f"{_format_bytes(summary.db_size_bytes)})", + ) + if summary.retention_days is not None: + meta.add_row(ui.AUDIT_RETENTION.rstrip(":"), f"{summary.retention_days} days") + meta.add_row( + ui.AUDIT_OLDEST.rstrip(":"), + summary.oldest_event_utc or ui.AUDIT_NONE, + ) + meta.add_row( + ui.AUDIT_LATEST.rstrip(":"), + summary.latest_event_utc or ui.AUDIT_NONE, + ) + meta.add_row( + ui.AUDIT_SUMMARY.rstrip(":"), + ( + f"{summary.intent_events} intents, " + f"{summary.contract_events} contracts, " + f"{summary.receipt_events} receipts" + ), + ) + meta.add_row( + ui.AUDIT_VIOLATIONS.rstrip(":"), + Text( + str(summary.violation_events), + style="red" if summary.violation_events else "green", + ), + ) + fp = summary.payload_footprint + if fp is not None: + meta.add_row( + ui.AUDIT_MCP_PAYLOAD_FOOTPRINT_ROW, + ( + f"~{fp.total_tokens:,} tokens in retention window " + f"({fp.encoding}, {fp.tool_calls} tool calls)" + ), + ) + console.print(Panel(meta, border_style="cyan")) + + table = Table(box=box.SIMPLE_HEAVY) + table.add_column(ui.AUDIT_COL_TOKENS, justify="right", no_wrap=True) + table.add_column(ui.AUDIT_COL_TIME, no_wrap=True) + table.add_column(ui.AUDIT_COL_TYPE, no_wrap=True) + table.add_column(ui.AUDIT_COL_SEVERITY, no_wrap=True) + table.add_column(ui.AUDIT_COL_INTENT, no_wrap=True) + table.add_column(ui.AUDIT_COL_STATUS, no_wrap=True) + table.add_column(ui.AUDIT_COL_RUN, no_wrap=True) + table.add_column(ui.AUDIT_COL_AGENT, no_wrap=True) + for event in summary.events: + table.add_row( + _format_tokens(event.estimated_tokens), + _short_time(event.created_at_utc), + _short_type(event.event_type), + Text(event.severity, style=_severity_style(event.severity)), + _short_intent(event.intent_id), + event.status or "-", + _short_run(event.run_id), + _short_agent(event.agent_label), + ) + console.print(table) + + if fp is not None: + _render_payload_analytics(console=console, fp=fp) + + return int(ExitCode.SUCCESS) + + +# Payload budget thresholds (tokens) +_SINGLE_PAYLOAD_OK = 500 +_SINGLE_PAYLOAD_WATCH = 1500 +_WORKFLOW_OK = 5000 +_WORKFLOW_WATCH = 15000 + + +def _render_payload_analytics( + *, + console: PrinterLike, + fp: PayloadFootprint, +) -> None: + from rich import box + from rich.panel import Panel + from rich.table import Table + from rich.text import Text + + # ── Aggregate stats ── + stats = Table.grid(padding=(0, 2)) + stats.add_column(style="dim", no_wrap=True) + stats.add_column(justify="right", no_wrap=True) + stats.add_row(ui.AUDIT_STAT_TOTAL_TOKENS, f"~{fp.total_tokens:,}") + stats.add_row(ui.AUDIT_STAT_TOOL_CALLS, str(fp.tool_calls)) + stats.add_row(ui.AUDIT_STAT_AVG_TOKENS, str(fp.avg_tokens)) + stats.add_row(ui.AUDIT_STAT_P95_TOKENS, str(fp.p95_tokens)) + stats.add_row(ui.AUDIT_STAT_MAX_TOKENS, str(fp.max_tokens)) + stats.add_row(ui.AUDIT_STAT_ENCODING, fp.encoding) + + # ── Breakdown by type ── + breakdown = Table(box=box.SIMPLE, show_edge=False) + breakdown.add_column(ui.AUDIT_COL_TYPE, no_wrap=True) + breakdown.add_column(ui.AUDIT_BREAKDOWN_COL_CALLS, justify="right", no_wrap=True) + breakdown.add_column(ui.AUDIT_BREAKDOWN_COL_TOTAL, justify="right", no_wrap=True) + breakdown.add_column(ui.AUDIT_BREAKDOWN_COL_MAX, justify="right", no_wrap=True) + for tp in fp.by_type: + breakdown.add_row( + _short_type(tp.event_type), + str(tp.call_count), + f"{tp.total_tokens:,}", + str(tp.max_tokens), + ) + + # ── Top workflows ── + workflows = Table(box=box.SIMPLE, show_edge=False) + workflows.add_column( + ui.AUDIT_TOP_COL_RANK, + justify="right", + no_wrap=True, + style="dim", + ) + workflows.add_column(ui.AUDIT_COL_WORKFLOW, no_wrap=True) + workflows.add_column(ui.AUDIT_BREAKDOWN_COL_CALLS, justify="right", no_wrap=True) + workflows.add_column(ui.AUDIT_BREAKDOWN_COL_TOTAL, justify="right", no_wrap=True) + workflows.add_column(ui.AUDIT_BREAKDOWN_COL_MAX, justify="right", no_wrap=True) + workflows.add_column(ui.AUDIT_COL_FIRST, no_wrap=True) + workflows.add_column(ui.AUDIT_COL_LAST, no_wrap=True) + workflows.add_column(ui.AUDIT_COL_AGENT, no_wrap=True) + for i, workflow in enumerate(fp.top_workflows, 1): + workflows.add_row( + str(i), + _short_workflow(workflow.workflow_kind, workflow.workflow_id), + str(workflow.call_count), + f"{workflow.total_tokens:,}", + str(workflow.max_tokens), + _short_time(workflow.first_event_utc), + _short_time(workflow.latest_event_utc), + _short_agent(workflow.agent_label), + ) + + # ── Top payloads ── + top = Table(box=box.SIMPLE, show_edge=False) + top.add_column(ui.AUDIT_TOP_COL_RANK, justify="right", no_wrap=True, style="dim") + top.add_column(ui.AUDIT_COL_TYPE, no_wrap=True) + top.add_column(ui.AUDIT_COL_TOKENS, justify="right", no_wrap=True) + top.add_column(ui.AUDIT_COL_TIME, no_wrap=True) + top.add_column(ui.AUDIT_COL_INTENT, no_wrap=True) + top.add_column(ui.AUDIT_COL_RUN, no_wrap=True) + top.add_column(ui.AUDIT_COL_AGENT, no_wrap=True) + for i, payload in enumerate(fp.top_payloads, 1): + style = ( + "bold red" + if payload.estimated_tokens > _SINGLE_PAYLOAD_WATCH + else "yellow" + if payload.estimated_tokens > _SINGLE_PAYLOAD_OK + else "" + ) + top.add_row( + str(i), + _short_type(payload.event_type), + Text(f"{payload.estimated_tokens:,}", style=style), + _short_time(payload.created_at_utc), + _short_intent(payload.intent_id), + _short_run(payload.run_id), + _short_agent(payload.agent_label), + ) + + # ── Budget warnings ── + warnings: list[str] = [] + for workflow in fp.top_workflows: + workflow_label = _short_workflow(workflow.workflow_kind, workflow.workflow_id) + if workflow.total_tokens > _WORKFLOW_WATCH: + warnings.append( + ui.AUDIT_BUDGET_WORKFLOW_HEAVY.format( + workflow=workflow_label, + total_tokens=workflow.total_tokens, + threshold=_WORKFLOW_WATCH, + ) + ) + elif workflow.total_tokens > _WORKFLOW_OK: + warnings.append( + ui.AUDIT_BUDGET_WORKFLOW_WATCH.format( + workflow=workflow_label, + total_tokens=workflow.total_tokens, + threshold=_WORKFLOW_OK, + ) + ) + warnings.extend( + ui.AUDIT_BUDGET_PAYLOAD_HEAVY.format( + event_type=_short_type(payload.event_type), + estimated_tokens=payload.estimated_tokens, + ) + for payload in fp.top_payloads + if payload.estimated_tokens > _SINGLE_PAYLOAD_WATCH + ) + + # ── Render ── + console.print() + console.print(Panel(stats, title=ui.AUDIT_MCP_FOOTPRINT_PANEL, border_style="cyan")) + console.print(Panel(breakdown, title=ui.AUDIT_TOKENS_BY_TYPE, border_style="dim")) + if fp.top_workflows: + console.print( + Panel(workflows, title=ui.AUDIT_TOP_WORKFLOWS, border_style="dim") + ) + if fp.top_payloads: + console.print(Panel(top, title=ui.AUDIT_TOP_PAYLOADS, border_style="dim")) + if warnings: + warning_text = Text() + for w in warnings: + warning_text.append(f" ⚠ {w}\n", style="yellow") + console.print( + Panel( + warning_text, + title=ui.AUDIT_PAYLOAD_BUDGET_WARNINGS, + border_style="yellow", + ) + ) + + +def _supports_rich(console: PrinterLike) -> bool: + return console.__class__.__module__.startswith("rich.") + + +def _short_type(event_type: str) -> str: + return ui.AUDIT_EVENT_TYPE_ALIASES.get( + event_type, + event_type.rsplit(".", maxsplit=1)[-1], + ) + + +def _short_intent(intent_id: str | None) -> str: + if not intent_id: + return ui.AUDIT_FIELD_EMPTY + return intent_id.removeprefix("intent-") + + +def _short_agent(agent_label: str | None) -> str: + if not agent_label: + return ui.AUDIT_FIELD_EMPTY + return agent_label.replace("claude-code/", "cc/") + + +def _short_run(run_id: str | None) -> str: + return run_id[:8] if run_id else "-" + + +def _short_workflow(kind: str, value: str) -> str: + if kind == "intent": + return _short_intent(value) + if kind == "run": + return f"run:{_short_run(value)}" + if kind == "event": + return value[:16] if value else "-" + return value or "-" + + +def _short_time(value: str) -> str: + parsed = _parse_utc(value) + if parsed is None: + return value or "-" + now = datetime.now(timezone.utc) + if parsed.date() == now.date(): + return parsed.strftime("%H:%M today") + return parsed.strftime("%Y-%m-%d %H:%M") + + +def _relative_time(value: str | None) -> str: + parsed = _parse_utc(value or "") + if parsed is None: + return ui.AUDIT_RELATIVE_NONE + seconds = max(0, int((datetime.now(timezone.utc) - parsed).total_seconds())) + if seconds < 60: + return f"{seconds}s ago" + minutes = seconds // 60 + if minutes < 60: + return f"{minutes}m ago" + hours = minutes // 60 + if hours < 24: + return f"{hours}h ago" + return f"{hours // 24}d ago" + + +def _parse_utc(value: str) -> datetime | None: + if not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone( + timezone.utc + ) + except ValueError: + return None + + +def _format_tokens(value: int | None) -> str: + if value is None: + return ui.AUDIT_TOKENS_EMPTY + return f"{value:,}" + + +def _format_bytes(value: int) -> str: + if value < 1024: + return f"{value} B" + kib = value / 1024 + if kib < 1024: + return f"{kib:.1f} KiB" + return f"{kib / 1024:.1f} MiB" + + +def _severity_style(value: str) -> str: + return {"info": "green", "warn": "yellow", "error": "bold red"}.get( + value, + "white", + ) + + +__all__ = ["render_audit"] diff --git a/codeclone/surfaces/cli/baseline_state.py b/codeclone/surfaces/cli/baseline_state.py index 429dcd49..7e87ec47 100644 --- a/codeclone/surfaces/cli/baseline_state.py +++ b/codeclone/surfaces/cli/baseline_state.py @@ -270,12 +270,7 @@ def _metrics_mode_short_circuit( or args.fail_on_docstring_regression or args.fail_on_api_break ): - console.print( - ui.fmt_contract_error( - "Metrics baseline operations require metrics analysis. " - "Remove --skip-metrics." - ) - ) + console.print(ui.fmt_contract_error(ui.ERR_METRICS_BASELINE_REQUIRES_ANALYSIS)) sys.exit(ExitCode.CONTRACT_ERROR) return True @@ -299,10 +294,7 @@ def _load_metrics_baseline_for_diff( if _metrics_baseline_gate_requested(args) and not args.update_metrics_baseline: state.failure_code = ExitCode.CONTRACT_ERROR console.print( - ui.fmt_contract_error( - "Metrics baseline file is required for metrics baseline-aware " - "gates. Run codeclone . --update-metrics-baseline first." - ) + ui.fmt_contract_error(ui.ERR_METRICS_BASELINE_REQUIRED_FOR_GATES) ) return @@ -371,9 +363,7 @@ def _update_metrics_baseline_if_requested( return if project_metrics is None: console.print( - ui.fmt_contract_error( - "Cannot update metrics baseline: metrics were not computed." - ) + ui.fmt_contract_error(ui.ERR_METRICS_BASELINE_UPDATE_WITHOUT_METRICS) ) sys.exit(ExitCode.CONTRACT_ERROR) @@ -431,26 +421,14 @@ def _enforce_metrics_gate_schema_requirements( state.trusted_for_diff = False state.status = MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION state.failure_code = ExitCode.CONTRACT_ERROR - console.print( - ui.fmt_contract_error( - "Typing/docstring regression gates require a metrics baseline " - "that includes coverage adoption data. Run codeclone . " - "--update-metrics-baseline first." - ) - ) + console.print(ui.fmt_contract_error(ui.ERR_METRICS_BASELINE_TYPING_GATES)) return if args.fail_on_api_break and baseline.api_surface_snapshot is None: state.loaded = False state.trusted_for_diff = False state.status = MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION state.failure_code = ExitCode.CONTRACT_ERROR - console.print( - ui.fmt_contract_error( - "API break gating requires a metrics baseline with public API " - "surface data. Run codeclone . --api-surface " - "--update-metrics-baseline first." - ) - ) + console.print(ui.fmt_contract_error(ui.ERR_METRICS_BASELINE_API_GATES)) def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: diff --git a/codeclone/surfaces/cli/blast_radius.py b/codeclone/surfaces/cli/blast_radius.py new file mode 100644 index 00000000..9b432349 --- /dev/null +++ b/codeclone/surfaces/cli/blast_radius.py @@ -0,0 +1,240 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +from collections.abc import Mapping, Sequence +from pathlib import Path, PurePosixPath + +from ... import ui_messages as ui +from ...analysis.blast_radius import BlastRadiusResult, compute_blast_radius +from ...contracts import ExitCode +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence +from .types import PrinterLike + +_RISK_STYLES = { + "low": "green", + "medium": "yellow", + "high": "bold red", + "critical": "bold white on red", +} +_MAX_RENDERED_ITEMS = 20 + + +def _report_run_id(report_document: Mapping[str, object]) -> str: + integrity = _as_mapping(report_document.get("integrity")) + digest = _as_mapping(integrity.get("digest")) + value = str(digest.get("value", "")).strip() + return value or "cli-blast-radius" + + +def _inventory_paths(report_document: Mapping[str, object]) -> frozenset[str]: + inventory = _as_mapping(report_document.get("inventory")) + file_registry = _as_mapping(inventory.get("file_registry")) + return frozenset( + str(item).replace("\\", "/").strip("/") + for item in _as_sequence(file_registry.get("items")) + if str(item).strip() + ) + + +def _normalize_cli_path(raw_path: object) -> str: + text = str(raw_path).replace("\\", "/").strip() + if not text: + raise ValueError("empty path") + if Path(text).is_absolute(): + raise ValueError("absolute paths are not accepted") + normalized = str(PurePosixPath(text)) + parts = PurePosixPath(normalized).parts + if normalized in {"", "."} or any(part == ".." for part in parts): + raise ValueError("paths must stay inside the scan root") + return normalized.removeprefix("./").strip("/") + + +def _validated_origin_paths( + *, + report_document: Mapping[str, object], + files: Sequence[object], + console: PrinterLike, + quiet: bool, +) -> tuple[str, ...]: + known_paths = _inventory_paths(report_document) + valid: set[str] = set() + skipped: list[str] = [] + invalid: list[str] = [] + for raw_path in files: + try: + relative_path = _normalize_cli_path(raw_path) + except ValueError as exc: + invalid.append(f"{raw_path}: {exc}") + continue + if relative_path not in known_paths: + skipped.append(relative_path) + continue + valid.add(relative_path) + + if invalid: + rendered = "\n".join(f" - {item}" for item in invalid[:10]) + if len(invalid) > 10: + rendered += f"\n {ui.BLAST_RADIUS_MORE.format(count=len(invalid) - 10)}" + console.print( + ui.fmt_contract_error( + ui.BLAST_RADIUS_INVALID_SELECTION.format(rendered=rendered) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + if skipped and not quiet: + rendered = ", ".join(skipped[:5]) + if len(skipped) > 5: + rendered += f", {ui.BLAST_RADIUS_MORE.format(count=len(skipped) - 5)}" + console.print( + ui.fmt_cli_runtime_warning( + ui.BLAST_RADIUS_SKIPPED_INVENTORY.format(rendered=rendered) + ) + ) + + if not valid: + console.print(ui.fmt_contract_error(ui.BLAST_RADIUS_REQUIRES_INVENTORY_FILE)) + sys.exit(ExitCode.CONTRACT_ERROR) + return tuple(sorted(valid)) + + +def _style(value: str, *, styles: Mapping[str, str]) -> str: + style = styles.get(value, "") + return f"[{style}]{value}[/{style}]" if style else value + + +def _print_items( + *, + console: PrinterLike, + title: str, + items: Sequence[str], +) -> None: + console.print(f" [bold]{title} ({len(items)}):[/bold]") + if not items: + console.print(f" [dim]{ui.BLAST_RADIUS_NONE}[/dim]") + return + for item in items[:_MAX_RENDERED_ITEMS]: + console.print(f" {item}") + if len(items) > _MAX_RENDERED_ITEMS: + more = ui.BLAST_RADIUS_MORE.format(count=len(items) - _MAX_RENDERED_ITEMS) + console.print(f" [dim]{more}[/dim]") + + +def _print_entries( + *, + console: PrinterLike, + title: str, + entries: Sequence[Mapping[str, str]], +) -> None: + console.print(f" [bold]{title} ({len(entries)}):[/bold]") + if not entries: + console.print(f" [dim]{ui.BLAST_RADIUS_NONE}[/dim]") + return + for entry in entries[:_MAX_RENDERED_ITEMS]: + path = str(entry.get("path", "")).strip() + reason = str(entry.get("reason", "")).strip() + severity = str(entry.get("severity", "")).strip() + suffix = f" [{severity}]" if severity else "" + console.print(f" {path} [dim]{reason}{suffix}[/dim]") + if len(entries) > _MAX_RENDERED_ITEMS: + more = ui.BLAST_RADIUS_MORE.format(count=len(entries) - _MAX_RENDERED_ITEMS) + console.print(f" [dim]{more}[/dim]") + + +def _contract_error_result(*, console: PrinterLike, message: str) -> int: + console.print(ui.fmt_contract_error(message)) + return int(ExitCode.CONTRACT_ERROR) + + +def _render_quiet_result(*, console: PrinterLike, result: BlastRadiusResult) -> int: + console.print( + ui.fmt_blast_radius_compact( + level=result.radius_level, + dependents=len(result.direct_dependents), + cohorts=len(result.clone_cohort_members), + cycles=len(result.in_dependency_cycle), + do_not_touch=len(result.do_not_touch), + ) + ) + return int(ExitCode.SUCCESS) + + +def render_blast_radius( + *, + console: PrinterLike, + report_document: Mapping[str, object] | None, + files: Sequence[object], + root_path: Path, + quiet: bool, +) -> int: + _ = root_path + if report_document is None: + return _contract_error_result( + console=console, + message=ui.BLAST_RADIUS_REQUIRES_REPORT, + ) + + origin_paths = _validated_origin_paths( + report_document=report_document, + files=files, + console=console, + quiet=quiet, + ) + result = compute_blast_radius( + run_id=_report_run_id(report_document), + report_document=report_document, + files=origin_paths, + ) + + if quiet: + return _render_quiet_result(console=console, result=result) + + console.print() + console.print(f"[bold]{ui.BLAST_RADIUS_TITLE}[/bold]") + console.print() + console.print(f" [bold]{ui.BLAST_RADIUS_FILES}[/bold] {', '.join(result.origin)}") + console.print( + f" [bold]{ui.BLAST_RADIUS_RISK_LEVEL}[/bold] " + f"{_style(result.radius_level, styles=_RISK_STYLES)}" + ) + console.print() + _print_items( + console=console, + title=ui.BLAST_RADIUS_DIRECT_DEPENDENTS, + items=result.direct_dependents, + ) + _print_items( + console=console, + title=ui.BLAST_RADIUS_CLONE_COHORT, + items=result.clone_cohort_members, + ) + _print_items( + console=console, + title=ui.BLAST_RADIUS_DEPENDENCY_CYCLES, + items=result.in_dependency_cycle, + ) + _print_entries( + console=console, + title=ui.BLAST_RADIUS_DO_NOT_TOUCH, + entries=result.do_not_touch, + ) + _print_entries( + console=console, + title=ui.BLAST_RADIUS_REVIEW_CONTEXT, + entries=result.review_context, + ) + if result.guardrails: + console.print(f" [bold]{ui.BLAST_RADIUS_GUARDRAILS}[/bold]") + for guardrail in result.guardrails: + console.print(f" - {guardrail}") + return int(ExitCode.SUCCESS) + + +__all__ = ["render_blast_radius"] diff --git a/codeclone/surfaces/cli/changed_scope.py b/codeclone/surfaces/cli/changed_scope.py index 4a47d231..6cba7ea3 100644 --- a/codeclone/surfaces/cli/changed_scope.py +++ b/codeclone/surfaces/cli/changed_scope.py @@ -29,6 +29,8 @@ def _validate_changed_scope_args(*, args: object) -> str | None: console = require_status_console(cli_state.get_console()) diff_against = optional_text_attr(args, "diff_against") paths_from_git_diff = optional_text_attr(args, "paths_from_git_diff") + if bool_attr(args, "blast_radius"): + return None if diff_against and paths_from_git_diff: console.print( ui.fmt_contract_error( @@ -39,7 +41,11 @@ def _validate_changed_scope_args(*, args: object) -> str | None: if paths_from_git_diff: set_bool_attr(args, "changed_only", True) return paths_from_git_diff - if diff_against and not bool_attr(args, "changed_only"): + if ( + diff_against + and not bool_attr(args, "changed_only") + and not bool_attr(args, "patch_verify") + ): console.print(ui.fmt_contract_error("--diff-against requires --changed-only.")) sys.exit(ExitCode.CONTRACT_ERROR) if bool_attr(args, "changed_only") and not diff_against: diff --git a/codeclone/surfaces/cli/console.py b/codeclone/surfaces/cli/console.py index 9a7ffb5a..d9a827af 100644 --- a/codeclone/surfaces/cli/console.py +++ b/codeclone/surfaces/cli/console.py @@ -9,11 +9,12 @@ import os import re import sys +import types from collections.abc import Mapping, Sequence from contextlib import AbstractContextManager, nullcontext from functools import lru_cache from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from ... import __version__ from ... import ui_messages as ui @@ -23,12 +24,15 @@ if TYPE_CHECKING: from rich.console import Console as RichConsole + from rich.panel import Panel as RichPanel from rich.progress import BarColumn as RichBarColumn from rich.progress import Progress as RichProgress from rich.progress import SpinnerColumn as RichSpinnerColumn from rich.progress import TextColumn as RichTextColumn from rich.progress import TimeElapsedColumn as RichTimeElapsedColumn from rich.rule import Rule as RichRule + from rich.table import Table as RichTable + from rich.text import Text as RichText from rich.theme import Theme as RichTheme _RICH_THEME_STYLES: dict[str, str] = { @@ -44,14 +48,17 @@ class PlainConsole: """Lightweight console for quiet/no-progress mode.""" - @staticmethod def print( + self, *objects: object, - sep: str = " ", - end: str = "\n", - markup: bool = True, - **_: object, + **kwargs: object, ) -> None: + sep_obj = kwargs.get("sep", " ") + end_obj = kwargs.get("end", "\n") + markup_obj = kwargs.get("markup", True) + sep = sep_obj if isinstance(sep_obj, str) else " " + end = end_obj if isinstance(end_obj, str) else "\n" + markup = markup_obj if isinstance(markup_obj, bool) else True text = sep.join(str(obj) for obj in objects) if markup: text = _RICH_MARKUP_TAG_RE.sub("", text) @@ -103,6 +110,39 @@ def make_console(*, no_color: bool, width: int) -> RichConsole: ) +def supports_rich_console(console: PrinterLike) -> bool: + return console.__class__.__module__.startswith("rich.") + + +@lru_cache(maxsize=1) +def rich_panel_symbols() -> tuple[ + types.ModuleType, + type[RichPanel], + type[RichRule], + type[RichTable], + type[RichText], +]: + from rich import box + from rich.panel import Panel + from rich.rule import Rule + from rich.table import Table + from rich.text import Text + + return box, Panel, Rule, Table, Text + + +def make_query_console(*, no_color: bool | None = None) -> PrinterLike: + resolved_no_color = ( + bool(os.environ.get("NO_COLOR")) or not sys.stdout.isatty() + if no_color is None + else no_color + ) + return cast( + PrinterLike, + make_console(no_color=resolved_no_color, width=ui.CLI_AUDIT_MAX_WIDTH), + ) + + def make_plain_console() -> PlainConsole: return PlainConsole() diff --git a/codeclone/surfaces/cli/execution.py b/codeclone/surfaces/cli/execution.py index f4445ba1..6a7318ef 100644 --- a/codeclone/surfaces/cli/execution.py +++ b/codeclone/surfaces/cli/execution.py @@ -11,6 +11,7 @@ from collections.abc import Callable from dataclasses import replace from pathlib import Path +from typing import TypeVar from rich.console import Console as RichConsole from rich.progress import ( @@ -29,12 +30,65 @@ from ...core._types import ProcessingResult as PipelineProcessingResult from ...core.reporting import GatingResult from ...models import MetricsDiff +from ...observability import SpanHandle, is_observability_enabled, span from . import state as cli_state from .attrs import bool_attr from .console import PlainConsole from .types import require_status_console +def _save_cache_after_analysis( + *, + cache: Cache, + analysis_result: AnalysisResult, + cache_update_segment_projection_fn: Callable[[Cache, AnalysisResult], None], + printer: object, +) -> None: + cache_update_segment_projection_fn(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + require_status_console(printer).print( + ui.fmt_cli_runtime_warning(ui.fmt_cache_save_failed(exc)) + ) + + +_StageT = TypeVar("_StageT") + + +def _traced_stage( + name: str, + stage_fn: Callable[..., _StageT], + counters: Callable[[SpanHandle, _StageT], None] | None, +) -> Callable[..., _StageT]: + """Wrap a pipeline stage callable so each invocation records a span. + + Stage-level only (never per-file). Inert with no overhead when + observability is disabled — ``span`` yields an inert handle. + """ + + def traced(**kwargs: object) -> _StageT: + with span(name=name) as stage_span: + result = stage_fn(**kwargs) + # Only read result fields for counters when actually recording — + # keeps the disabled path zero-work and tolerant of stub results. + if counters is not None and is_observability_enabled(): + counters(stage_span, result) + return result + + return traced + + +def _discover_counters(stage_span: SpanHandle, result: DiscoveryResult) -> None: + stage_span.set_counter("files_to_process", len(result.files_to_process)) + stage_span.set_counter("cache_hits", result.cache_hits) + + +def _process_counters(stage_span: SpanHandle, result: PipelineProcessingResult) -> None: + stage_span.set_counter("files_analyzed", result.files_analyzed) + stage_span.set_counter("failed_files", len(result.failed_files)) + + def run_analysis_stages( *, args: object, @@ -56,6 +110,10 @@ def run_analysis_stages( ], ], ) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: + discover_fn = _traced_stage("pipeline.discover", discover_fn, _discover_counters) + process_fn = _traced_stage("pipeline.process", process_fn, _process_counters) + analyze_fn = _traced_stage("pipeline.analyze", analyze_fn, None) + def _require_rich_console(value: object) -> RichConsole: if isinstance(value, PlainConsole): raise RuntimeError("Rich console is required when progress UI is enabled.") @@ -65,6 +123,9 @@ def _require_rich_console(value: object) -> RichConsole: printer = require_status_console(cli_state.get_console()) use_status = not bool_attr(args, "quiet") and not bool_attr(args, "no_progress") + write_cache = not ( + bool_attr(args, "blast_radius") or bool_attr(args, "patch_verify") + ) try: if use_status: @@ -154,22 +215,26 @@ def _require_rich_console(value: object) -> RichConsole: discovery=discovery_result, processing=processing_result, ) - cache_update_segment_projection_fn(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - printer.print(ui.fmt_cli_runtime_warning(ui.fmt_cache_save_failed(exc))) + if write_cache: + _save_cache_after_analysis( + cache=cache, + analysis_result=analysis_result, + cache_update_segment_projection_fn=cache_update_segment_projection_fn, + printer=printer, + ) else: analysis_result = analyze_fn( boot=boot, discovery=discovery_result, processing=processing_result, ) - cache_update_segment_projection_fn(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - printer.print(ui.fmt_cli_runtime_warning(ui.fmt_cache_save_failed(exc))) + if write_cache: + _save_cache_after_analysis( + cache=cache, + analysis_result=analysis_result, + cache_update_segment_projection_fn=cache_update_segment_projection_fn, + printer=printer, + ) coverage_join = getattr(analysis_result, "coverage_join", None) if ( diff --git a/codeclone/surfaces/cli/memory.py b/codeclone/surfaces/cli/memory.py new file mode 100644 index 00000000..ffd3a535 --- /dev/null +++ b/codeclone/surfaces/cli/memory.py @@ -0,0 +1,1511 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import argparse +import json +from collections.abc import Callable +from pathlib import Path +from typing import cast + +from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path +from ...config.memory import MemoryConfig, resolve_memory_config +from ...config.memory_defaults import DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS +from ...contracts import ExitCode +from ...memory.embedding import EmbeddingProvider, resolve_embedding_provider +from ...memory.exceptions import MemoryContractError, MemorySemanticUnavailableError +from ...memory.governance import approve_record, archive_record, reject_record +from ...memory.ingest import InitOptions +from ...memory.ingest.runner import run_memory_init +from ...memory.jobs import ( + execute_enqueue_projection_rebuild, + execute_projection_rebuild_status, + execute_run_projection_jobs_once, +) +from ...memory.models import MemoryProject, MemoryQuery +from ...memory.paths import normalize_memory_scope_path +from ...memory.project import resolve_memory_db_path, resolve_project_identity +from ...memory.retrieval import query_engineering_memory, query_records_for_repo_path +from ...memory.retrieval.semantic import semantic_search +from ...memory.semantic import ( + execute_semantic_index_rebuild, + resolve_semantic_index, +) +from ...memory.semantic.models import SemanticSearchResult +from ...memory.semantic.rebuild_workflow import ( + RebuildSemanticIndexOkPayload, + RebuildSemanticIndexSkippedPayload, + RebuildSemanticIndexUnavailablePayload, + execute_semantic_projection_probe, +) +from ...memory.sqlite_store import SqliteEngineeringMemoryStore +from ...memory.status_report import build_memory_status_report +from ...memory.trajectory.cli_render import ( + render_projection_run, + render_trajectory_agents, + render_trajectory_anomalies, + render_trajectory_detail, + render_trajectory_list, + render_trajectory_search_results, + render_trajectory_status, +) +from ...memory.trajectory.export import ( + export_trajectories_jsonl, + resolve_export_output_path, +) +from ...memory.vacuum import run_memory_vacuum +from ...observability import ( + bootstrap, + is_observability_enabled, + operation, + shutdown, + span, +) +from .memory_analysis import load_report_for_memory_init +from .memory_render import ( + memory_console, + render_coverage_report, + render_draft_candidates, + render_governance_result, + render_init_note, + render_init_result, + render_path_results, + render_search_results, + render_stale_records, + render_status_report, + render_vacuum_report, +) +from .types import PrinterLike + +_CLI_GOVERNANCE_BREAK_GLASS_FLAG = "--i-know-what-im-doing" +_CLI_GOVERNANCE_BREAK_GLASS_MESSAGE = ( + "Direct CLI memory governance is disabled by default. Use the IDE " + "governance channel, or pass --i-know-what-im-doing for an explicit " + "human break-glass action." +) + + +def _print_memory_contract_error(console: PrinterLike, exc: MemoryContractError) -> int: + console.print(str(exc)) + return int(ExitCode.CONTRACT_ERROR) + + +def _normalize_memory_cli_path(console: PrinterLike, raw_path: str) -> str | None: + try: + return normalize_memory_scope_path(raw_path) + except MemoryContractError as exc: + _print_memory_contract_error(console, exc) + return None + + +def memory_main(argv: list[str]) -> int: + console = memory_console() + parser = _build_parser() + args = parser.parse_args(argv) + root_path = Path(args.root).expanduser().resolve() + if not root_path.is_dir(): + console.print(f"Repository root does not exist: {root_path}") + return int(ExitCode.CONTRACT_ERROR) + return _run_memory_with_observability( + root_path=root_path, + args=args, + handler=lambda: _dispatch_memory_command( + console=console, root_path=root_path, args=args + ), + ) + + +def _memory_operation_name(args: argparse.Namespace) -> str: + command = str(args.command) + if command == "semantic": + return f"cli.memory.semantic.{args.semantic_action}" + if command == "trajectory": + return f"cli.memory.trajectory.{args.trajectory_action}" + if command == "jobs": + return f"cli.memory.jobs.{args.jobs_action}" + return f"cli.memory.{command}" + + +def _run_memory_with_observability( + *, + root_path: Path, + args: argparse.Namespace, + handler: Callable[[], int], +) -> int: + from ...config.observability import resolve_observability_config + + config = resolve_observability_config() + if not config.enabled: + return handler() + owns_observability = not is_observability_enabled() + if owns_observability: + bootstrap(config, root=root_path) + try: + with operation(name=_memory_operation_name(args), surface="cli"): + return handler() + finally: + if owns_observability: + shutdown() + + +def _dispatch_memory_command( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + if args.command == "status": + return _render_status(console=console, root_path=root_path) + if args.command == "init": + return _run_init(console=console, root_path=root_path, args=args) + if args.command == "for-path": + return _run_for_path(console=console, root_path=root_path, args=args) + if args.command == "search": + return _run_search(console=console, root_path=root_path, args=args) + if args.command == "stale": + return _run_stale(console=console, root_path=root_path, args=args) + if args.command == "vacuum": + return _run_vacuum(console=console, root_path=root_path) + if args.command == "coverage": + return _run_coverage(console=console, root_path=root_path, args=args) + if args.command == "review-candidates": + return _run_review_candidates(console=console, root_path=root_path, args=args) + if args.command == "approve": + return _run_approve(console=console, root_path=root_path, args=args) + if args.command == "reject": + return _run_reject(console=console, root_path=root_path, args=args) + if args.command == "archive": + return _run_archive(console=console, root_path=root_path, args=args) + if args.command == "semantic": + return _run_semantic(console=console, root_path=root_path, args=args) + if args.command == "trajectory": + return _run_trajectory(console=console, root_path=root_path, args=args) + if args.command == "jobs": + return _run_jobs(console=console, root_path=root_path, args=args) + return int(ExitCode.CONTRACT_ERROR) + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="codeclone memory") + subparsers = parser.add_subparsers(dest="command", required=True) + + def _add_root(sub: argparse.ArgumentParser) -> None: + sub.add_argument("--root", default=".", help="Repository root path.") + + init_parser = subparsers.add_parser("init", help="Initialize engineering memory.") + _add_root(init_parser) + init_parser.add_argument("--dry-run", action="store_true") + init_parser.add_argument("--refresh", action="store_true") + init_parser.add_argument("--from-report", metavar="PATH") + init_parser.add_argument("--no-docs", action="store_true") + init_parser.add_argument("--no-tests", action="store_true") + + status_parser = subparsers.add_parser( + "status", + help="Show engineering memory status.", + ) + _add_root(status_parser) + + for_path = subparsers.add_parser( + "for-path", help="List memory records linked to a source path." + ) + _add_root(for_path) + for_path.add_argument("path", help="Repo-relative source file path.") + for_path.add_argument("--limit", type=int, default=20) + + search_parser = subparsers.add_parser( + "search", + help="Search engineering memory records by keyword.", + ) + _add_root(search_parser) + search_parser.add_argument("query", help="Keyword query.") + search_parser.add_argument("--limit", type=int, default=20) + search_parser.add_argument( + "--match", + choices=("any", "all"), + default="any", + help="Match any token (default) or require all tokens.", + ) + search_parser.add_argument( + "--active-only", + action="store_true", + help="Exclude stale records from search results.", + ) + search_parser.add_argument( + "--semantic", + action="store_true", + help="Blend semantic proximity into ranking (requires the index).", + ) + + stale_parser = subparsers.add_parser( + "stale", + help="List stale engineering memory records.", + ) + _add_root(stale_parser) + stale_parser.add_argument("--limit", type=int, default=50) + + vacuum_parser = subparsers.add_parser( + "vacuum", + help="Purge expired stale/draft/rejected/archived records.", + ) + _add_root(vacuum_parser) + + coverage_parser = subparsers.add_parser( + "coverage", + help="Show memory coverage for repo-relative paths.", + ) + _add_root(coverage_parser) + coverage_parser.add_argument( + "paths", + nargs="+", + help="Repo-relative paths to inspect.", + ) + + review_parser = subparsers.add_parser( + "review-candidates", + help="List draft memory candidates awaiting review.", + ) + _add_root(review_parser) + review_parser.add_argument("--limit", type=int, default=50) + + approve_parser = subparsers.add_parser( + "approve", + help="Approve a draft memory record.", + ) + _add_root(approve_parser) + approve_parser.add_argument("record_id") + approve_parser.add_argument("--by", default="human") + approve_parser.add_argument(_CLI_GOVERNANCE_BREAK_GLASS_FLAG, action="store_true") + + reject_parser = subparsers.add_parser( + "reject", + help="Reject a draft memory record.", + ) + _add_root(reject_parser) + reject_parser.add_argument("record_id") + reject_parser.add_argument("--by", default="human") + reject_parser.add_argument("--reason") + reject_parser.add_argument(_CLI_GOVERNANCE_BREAK_GLASS_FLAG, action="store_true") + + archive_parser = subparsers.add_parser( + "archive", + help="Archive an active memory record.", + ) + _add_root(archive_parser) + archive_parser.add_argument("record_id") + archive_parser.add_argument("--by", default="human") + archive_parser.add_argument(_CLI_GOVERNANCE_BREAK_GLASS_FLAG, action="store_true") + + semantic_parser = subparsers.add_parser( + "semantic", + help="Semantic retrieval index (status / rebuild / search).", + ) + semantic_sub = semantic_parser.add_subparsers(dest="semantic_action", required=True) + sem_status = semantic_sub.add_parser("status", help="Show semantic index status.") + _add_root(sem_status) + sem_rebuild = semantic_sub.add_parser("rebuild", help="Rebuild the semantic index.") + _add_root(sem_rebuild) + sem_search = semantic_sub.add_parser( + "search", help="Semantic free-text search over memory." + ) + _add_root(sem_search) + sem_search.add_argument("query", help="Free-text query.") + sem_search.add_argument("--limit", type=int, default=10) + sem_search.add_argument("--json", action="store_true", help="Emit results as JSON.") + sem_probe = semantic_sub.add_parser( + "probe", + help="Measure semantic projection length distribution per lane.", + ) + _add_root(sem_probe) + sem_probe.add_argument( + "--json", action="store_true", help="Emit probe payload as JSON." + ) + sem_probe.add_argument( + "--exact-tokens", + action="store_true", + help=( + "Measure raw/effective token counts via the embedding model tokenizer " + "(loads FastEmbed when configured)." + ), + ) + + trajectory_parser = subparsers.add_parser( + "trajectory", + help=( + "Trajectory projections and analytics " + "(status / rebuild / list / search / show / agents / " + "anomalies / dashboard / export)." + ), + ) + trajectory_sub = trajectory_parser.add_subparsers( + dest="trajectory_action", + required=True, + ) + traj_status = trajectory_sub.add_parser( + "status", + help="Show trajectory projection status.", + ) + _add_root(traj_status) + traj_rebuild = trajectory_sub.add_parser( + "rebuild", + help="Rebuild trajectory projections from audit event core.", + ) + _add_root(traj_rebuild) + traj_list = trajectory_sub.add_parser("list", help="List stored trajectories.") + _add_root(traj_list) + traj_list.add_argument("--limit", type=int, default=20) + traj_search = trajectory_sub.add_parser( + "search", + help="Search stored trajectories by keyword.", + ) + _add_root(traj_search) + traj_search.add_argument("query", help="Keyword query.") + traj_search.add_argument("--limit", type=int, default=10) + traj_search.add_argument( + "--match", + choices=("any", "all"), + default="any", + help="Match any token (default) or require all tokens.", + ) + traj_show = trajectory_sub.add_parser("show", help="Show one stored trajectory.") + _add_root(traj_show) + traj_show.add_argument("trajectory_id") + traj_agents = trajectory_sub.add_parser( + "agents", + help="Aggregate trajectories by agent label.", + ) + _add_root(traj_agents) + traj_agents.add_argument( + "--include-routine", + action="store_true", + help="Include routine analysis-only trajectories.", + ) + traj_agents.add_argument("--json", action="store_true") + traj_anomalies = trajectory_sub.add_parser( + "anomalies", + help="List trajectories with detected anomalies.", + ) + _add_root(traj_anomalies) + traj_anomalies.add_argument("--limit", type=int, default=25) + traj_anomalies.add_argument( + "--include-routine", + action="store_true", + help="Include routine analysis-only trajectories.", + ) + traj_anomalies.add_argument("--json", action="store_true") + traj_dashboard = trajectory_sub.add_parser( + "dashboard", + help="Combined trajectory status, agents, and anomalies summary.", + ) + _add_root(traj_dashboard) + traj_dashboard.add_argument("--limit", type=int, default=25) + traj_dashboard.add_argument( + "--include-routine", + action="store_true", + help="Include routine analysis-only trajectories.", + ) + traj_dashboard.add_argument("--json", action="store_true") + traj_export = trajectory_sub.add_parser( + "export", + help="Export trajectories to local JSONL (disabled by default).", + ) + _add_root(traj_export) + traj_export.add_argument( + "--profile", + required=True, + help="Export profile name (for example agent-change-control-v1).", + ) + traj_export.add_argument( + "--out", + required=True, + help="Output JSONL path (repo-relative or absolute with --allow-external-out).", + ) + traj_export.add_argument( + "--allow-external-out", + action="store_true", + help="Allow writing outside the repository root.", + ) + traj_export.add_argument( + "--force", + action="store_true", + help="Run export even when trajectory_export_enabled=false.", + ) + traj_export.add_argument("--json", action="store_true", help="Emit manifest JSON.") + + jobs_parser = subparsers.add_parser( + "jobs", + help="Projection rebuild jobs (status / enqueue / run-once / list).", + ) + jobs_sub = jobs_parser.add_subparsers(dest="jobs_action", required=True) + jobs_status = jobs_sub.add_parser( + "status", + help="Show projection rebuild job status.", + ) + _add_root(jobs_status) + jobs_enqueue = jobs_sub.add_parser( + "enqueue", + help="Enqueue a projection rebuild bundle job.", + ) + _add_root(jobs_enqueue) + jobs_enqueue.add_argument( + "--force", + action="store_true", + help="Enqueue even when policy is off or stimulus unchanged.", + ) + jobs_enqueue.add_argument( + "--no-spawn", + action="store_true", + help="Do not spawn a background worker process.", + ) + jobs_run = jobs_sub.add_parser( + "run-once", + help="Claim and run one pending projection rebuild job.", + ) + _add_root(jobs_run) + jobs_run.add_argument( + "--not-before", + dest="not_before", + default=None, + help=( + "ISO-8601 UTC deadline to defer the run until before loading the " + "embedding model (coalesced trailing-edge flush)." + ), + ) + jobs_list = jobs_sub.add_parser("list", help="List recent projection jobs.") + _add_root(jobs_list) + jobs_list.add_argument("--limit", type=int, default=20) + jobs_list.add_argument("--json", action="store_true") + + return parser + + +def _render_status(*, console: PrinterLike, root_path: Path) -> int: + config = resolve_memory_config(root_path) + db_path = resolve_memory_db_path(root_path, config) + report = build_memory_status_report( + root_path=root_path, + db_path=db_path, + backend=config.backend, + ) + render_status_report(console=console, report=report) + return int(ExitCode.SUCCESS) + + +def _run_init( + *, + console: PrinterLike, + root_path: Path, + args: argparse.Namespace, +) -> int: + try: + loaded = load_report_for_memory_init( + root_path=root_path, + from_report=Path(args.from_report) if args.from_report else None, + ) + except Exception as exc: + console.print(f"Unable to load analysis report for memory init: {exc}") + return int(ExitCode.CONTRACT_ERROR) + + if loaded.rejected_cache_reason: + render_init_note( + console=console, + message=( + "cached report rejected; running fresh analysis " + f"({loaded.rejected_cache_reason})" + ), + ) + elif loaded.source == "fresh_analysis": + render_init_note( + console=console, + message="no trusted cached report; running fresh analysis", + ) + elif loaded.source == "trusted_cache": + render_init_note(console=console, message="reusing trusted cached report") + + options = InitOptions( + dry_run=bool(args.dry_run), + refresh=bool(args.refresh), + include_docs=not args.no_docs, + include_tests=not args.no_tests, + ) + try: + result = run_memory_init( + root_path=root_path, + report_document=loaded.document, + options=options, + ) + except Exception as exc: + console.print(f"Memory init failed: {exc}") + return int(ExitCode.INTERNAL_ERROR) + + render_init_result( + console=console, + dry_run=bool(result.dry_run), + project_id=result.project_id, + db_path=str(result.db_path) if result.db_path else None, + analysis_fingerprint=result.analysis_fingerprint, + stats=result.stats, + planned_counts=result.planned_counts, + ) + return int(ExitCode.SUCCESS) + + +def _run_for_path( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + rel_path = _normalize_memory_cli_path(console, args.path) + if rel_path is None: + return int(ExitCode.CONTRACT_ERROR) + try: + store, _config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + records = query_records_for_repo_path( + store, + project_id=project.id, + rel_path=rel_path, + limit=max(1, int(args.limit)), + ) + finally: + store.close() + + render_path_results(console=console, rel_path=rel_path, records=records) + return int(ExitCode.SUCCESS) + + +def _run_search( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + semantic = bool(args.semantic) + index = resolve_semantic_index(config.semantic) if semantic else None + provider: EmbeddingProvider | None = None + semantic_reason: str | None = None + if semantic: + try: + provider = resolve_embedding_provider(config.semantic) + except MemorySemanticUnavailableError as exc: + semantic_reason = str(exc) + try: + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=resolve_memory_db_path(root_path, config), + mode="search", + query=str(args.query), + filters={"match_mode": str(args.match)}, + max_results=max(1, int(args.limit)), + include_stale=not bool(args.active_only), + semantic=semantic, + semantic_index=index, + embedding_provider=provider, + provider_label=config.semantic.embedding_provider, + semantic_reason=semantic_reason, + ) + finally: + store.close() + + payload = result.get("payload") + if not isinstance(payload, dict): + console.print("Memory search returned an unexpected payload.") + return int(ExitCode.INTERNAL_ERROR) + records = payload.get("records") + if not isinstance(records, list): + records = [] + typed_records = [item for item in records if isinstance(item, dict)] + render_search_results(console=console, query=str(args.query), records=typed_records) + _print_semantic_advisory(console, result.get("semantic")) + return int(ExitCode.SUCCESS) + + +def _print_semantic_advisory(console: PrinterLike, semantic: object) -> None: + if not isinstance(semantic, dict): + return + if semantic.get("used"): + provider = str(semantic.get("provider") or "") + quality = ( + "diagnostic, NOT semantic-quality" if provider == "diagnostic" else provider + ) + console.print(f"semantic: on ({quality})", markup=False) + else: + console.print(f"semantic: off ({semantic.get('reason')})", markup=False) + + +def _open_store( + root_path: Path, +) -> tuple[SqliteEngineeringMemoryStore, MemoryConfig, MemoryProject]: + config = resolve_memory_config(root_path) + db_path = resolve_memory_db_path(root_path, config) + if not db_path.exists(): + raise FileNotFoundError(str(db_path)) + project = resolve_project_identity(root_path) + return SqliteEngineeringMemoryStore(db_path), config, project + + +def _run_stale( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=resolve_memory_db_path(root_path, config), + mode="stale", + max_results=max(1, int(args.limit)), + ) + finally: + store.close() + payload = result.get("payload") + records = payload.get("records") if isinstance(payload, dict) else None + typed_records = [item for item in (records or []) if isinstance(item, dict)] + render_stale_records(console=console, records=typed_records) + return int(ExitCode.SUCCESS) + + +def _run_vacuum(*, console: PrinterLike, root_path: Path) -> int: + try: + store, config, _project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + report = run_memory_vacuum(store, config) + finally: + store.close() + render_vacuum_report(console=console, report=report) + return int(ExitCode.SUCCESS) + + +def _run_coverage( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + from ...memory.coverage import compute_scope_coverage + + try: + store, _config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + report = compute_scope_coverage( + store, + project_id=project.id, + scope_paths=args.paths, + ) + except MemoryContractError as exc: + return _print_memory_contract_error(console, exc) + finally: + store.close() + render_coverage_report(console=console, report=report) + return int(ExitCode.SUCCESS) + + +def _run_review_candidates( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, _config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + records = store.query_records( + MemoryQuery( + project_id=project.id, + statuses=("draft",), + limit=max(1, int(args.limit)), + ) + ) + finally: + store.close() + render_draft_candidates(console=console, records=records) + return int(ExitCode.SUCCESS) + + +def _run_approve( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + if not _confirm_cli_governance_break_glass(console, args): + return int(ExitCode.CONTRACT_ERROR) + try: + store, _config, _project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + record = approve_record( + store, + record_id=str(args.record_id), + approved_by=str(args.by), + ) + except Exception as exc: + console.print(f"Approve failed: {exc}") + return int(ExitCode.CONTRACT_ERROR) + finally: + store.close() + render_governance_result( + console=console, + action="approved", + record_id=record.id, + detail=f"Approved {record.id} -> active", + ) + return int(ExitCode.SUCCESS) + + +def _run_reject( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + if not _confirm_cli_governance_break_glass(console, args): + return int(ExitCode.CONTRACT_ERROR) + try: + store, _config, _project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + record = reject_record( + store, + record_id=str(args.record_id), + rejected_by=str(args.by), + reason=args.reason, + ) + except Exception as exc: + console.print(f"Reject failed: {exc}") + return int(ExitCode.CONTRACT_ERROR) + finally: + store.close() + render_governance_result( + console=console, + action="rejected", + record_id=record.id, + detail=f"Rejected {record.id}", + ) + return int(ExitCode.SUCCESS) + + +def _run_archive( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + if not _confirm_cli_governance_break_glass(console, args): + return int(ExitCode.CONTRACT_ERROR) + try: + store, _config, _project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + record = archive_record( + store, + record_id=str(args.record_id), + archived_by=str(args.by), + ) + except Exception as exc: + console.print(f"Archive failed: {exc}") + return int(ExitCode.CONTRACT_ERROR) + finally: + store.close() + render_governance_result( + console=console, + action="archived", + record_id=record.id, + detail=f"Archived {record.id}", + ) + return int(ExitCode.SUCCESS) + + +def _run_trajectory( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + action = str(args.trajectory_action) + if action == "status": + return _run_trajectory_status(console=console, root_path=root_path) + if action == "rebuild": + return _run_trajectory_rebuild(console=console, root_path=root_path) + if action == "list": + return _run_trajectory_list(console=console, root_path=root_path, args=args) + if action == "search": + return _run_trajectory_search(console=console, root_path=root_path, args=args) + if action == "agents": + return _run_trajectory_agents(console=console, root_path=root_path, args=args) + if action == "anomalies": + return _run_trajectory_anomalies( + console=console, root_path=root_path, args=args + ) + if action == "dashboard": + return _run_trajectory_dashboard( + console=console, root_path=root_path, args=args + ) + if action == "export": + return _run_trajectory_export(console=console, root_path=root_path, args=args) + return _run_trajectory_show(console=console, root_path=root_path, args=args) + + +def _run_trajectory_status(*, console: PrinterLike, root_path: Path) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + count = store.count_trajectories(project_id=project.id) + latest = store.latest_trajectory_projection_run(project_id=project.id) + finally: + store.close() + render_trajectory_status( + console=console, + enabled=config.trajectories_enabled, + count=count, + latest_run=latest, + ) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_rebuild(*, console: PrinterLike, root_path: Path) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + if not config.trajectories_enabled: + store.close() + console.print("Trajectory projection is disabled.") + return int(ExitCode.CONTRACT_ERROR) + audit_db_path = resolve_audit_path(root_path=root_path, value=DEFAULT_AUDIT_PATH) + try: + result = store.rebuild_trajectories_from_audit( + project=project, + root_path=root_path, + audit_db_path=audit_db_path, + ) + except Exception as exc: + console.print(f"Trajectory rebuild failed: {exc}") + return int(ExitCode.CONTRACT_ERROR) + finally: + store.close() + render_projection_run(console=console, run=result.run) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_list( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, _config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + items = store.list_trajectories( + project_id=project.id, + limit=max(1, int(args.limit)), + ) + finally: + store.close() + render_trajectory_list(console=console, items=items) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_search( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=resolve_memory_db_path(root_path, config), + mode="trajectory_search", + query=str(args.query), + filters={"match_mode": str(args.match)}, + max_results=max(1, int(args.limit)), + ) + finally: + store.close() + payload = result.get("payload") + trajectories = payload.get("trajectories") if isinstance(payload, dict) else None + typed = [item for item in (trajectories or []) if isinstance(item, dict)] + render_trajectory_search_results( + console=console, + query=str(args.query), + trajectories=typed, + ) + return int(ExitCode.SUCCESS) + + +def _trajectory_query_filters(args: argparse.Namespace) -> dict[str, object] | None: + if bool(getattr(args, "include_routine", False)): + return {"include_routine": True} + return None + + +def _run_trajectory_agents( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=resolve_memory_db_path(root_path, config), + mode="trajectory_agents", + filters=_trajectory_query_filters(args), + ) + finally: + store.close() + payload = result.get("payload") + if not isinstance(payload, dict): + console.print("Unexpected trajectory agents payload.") + return int(ExitCode.INTERNAL_ERROR) + if bool(getattr(args, "json", False)): + console.print(json.dumps(payload, indent=2, sort_keys=True)) + return int(ExitCode.SUCCESS) + render_trajectory_agents(console=console, payload=payload) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_anomalies( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=resolve_memory_db_path(root_path, config), + mode="trajectory_anomalies", + filters=_trajectory_query_filters(args), + max_results=max(1, int(args.limit)), + ) + finally: + store.close() + payload = result.get("payload") + if not isinstance(payload, dict): + console.print("Unexpected trajectory anomalies payload.") + return int(ExitCode.INTERNAL_ERROR) + if bool(getattr(args, "json", False)): + console.print(json.dumps(payload, indent=2, sort_keys=True)) + return int(ExitCode.SUCCESS) + render_trajectory_anomalies(console=console, payload=payload) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_dashboard( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + result = query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=resolve_memory_db_path(root_path, config), + mode="trajectory_dashboard", + filters=_trajectory_query_filters(args), + max_results=max(1, int(args.limit)), + ) + finally: + store.close() + payload = result.get("payload") + if not isinstance(payload, dict): + console.print("Unexpected trajectory dashboard payload.") + return int(ExitCode.INTERNAL_ERROR) + if bool(getattr(args, "json", False)): + console.print(json.dumps(payload, indent=2, sort_keys=True)) + return int(ExitCode.SUCCESS) + status = payload.get("status") + if isinstance(status, dict): + latest = status.get("latest_projection") + render_trajectory_status( + console=console, + enabled=config.trajectories_enabled, + count=int(status.get("trajectory_count", 0)), + latest_run=None, + ) + if isinstance(latest, dict) and latest.get("finished_at_utc"): + console.print( + f" latest projection finished: {latest.get('finished_at_utc')}", + markup=False, + ) + agents = payload.get("agents") + if isinstance(agents, dict): + console.print("") + render_trajectory_agents(console=console, payload=agents) + anomalies = payload.get("anomalies") + if isinstance(anomalies, dict): + console.print("") + render_trajectory_anomalies(console=console, payload=anomalies) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_show( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, _config, _project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + trajectory = store.find_trajectory(str(args.trajectory_id)) + finally: + store.close() + if trajectory is None: + console.print(f"Trajectory not found: {args.trajectory_id}") + return int(ExitCode.CONTRACT_ERROR) + render_trajectory_detail(console=console, trajectory=trajectory) + return int(ExitCode.SUCCESS) + + +def _run_trajectory_export( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + store, config, project = _open_store(root_path) + except FileNotFoundError as exc: + console.print(f"Engineering memory database not found: {exc}") + return int(ExitCode.CONTRACT_ERROR) + try: + output_path = resolve_export_output_path( + root_path=root_path, + raw_path=str(args.out), + allow_external_out=bool(args.allow_external_out), + ) + result = export_trajectories_jsonl( + store=store, + project=project, + root_path=root_path, + config=config, + profile_name=str(args.profile), + output_path=output_path, + force_enabled=bool(args.force), + ) + except MemoryContractError as exc: + console.print(str(exc)) + return int(ExitCode.CONTRACT_ERROR) + finally: + store.close() + if bool(args.json): + console.print(json.dumps(result.manifest, sort_keys=True, indent=2)) + else: + console.print( + "Trajectory export complete: " + f"{result.records_written} record(s) -> {result.output_path}" + ) + return int(ExitCode.SUCCESS) + + +def _run_jobs( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + action = str(args.jobs_action) + if action == "status": + return _run_jobs_status(console=console, root_path=root_path) + if action == "enqueue": + return _run_jobs_enqueue(console=console, root_path=root_path, args=args) + if action == "run-once": + return _run_jobs_run_once(console=console, root_path=root_path, args=args) + return _run_jobs_list(console=console, root_path=root_path, args=args) + + +def _run_jobs_json( + *, + console: PrinterLike, + root_path: Path, + action: Callable[[], dict[str, object]], + fail_on: frozenset[str] = frozenset({"failed"}), +) -> int: + try: + payload = action() + except MemoryContractError as exc: + return _print_memory_contract_error(console, exc) + console.print(json.dumps(payload, sort_keys=True, indent=2)) + if str(payload.get("status", "")) in fail_on: + return int(ExitCode.CONTRACT_ERROR) + return int(ExitCode.SUCCESS) + + +def _run_jobs_status(*, console: PrinterLike, root_path: Path) -> int: + return _run_jobs_json( + console=console, + root_path=root_path, + action=lambda: execute_projection_rebuild_status(root_path=root_path), + ) + + +def _run_jobs_enqueue( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + return _run_jobs_json( + console=console, + root_path=root_path, + action=lambda: execute_enqueue_projection_rebuild( + root_path=root_path, + trigger="cli", + force=bool(args.force), + spawn_worker=not bool(args.no_spawn), + ), + ) + + +def _run_jobs_run_once( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + not_before = getattr(args, "not_before", None) + return _run_jobs_json( + console=console, + root_path=root_path, + action=lambda: execute_run_projection_jobs_once( + root_path=root_path, not_before_utc=not_before + ), + fail_on=frozenset({"failed"}), + ) + + +def _run_jobs_list( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + try: + payload = execute_projection_rebuild_status( + root_path=root_path, + limit=max(1, int(args.limit)), + ) + except MemoryContractError as exc: + return _print_memory_contract_error(console, exc) + exit_code = int(ExitCode.SUCCESS) + if bool(args.json): + console.print(json.dumps(payload, sort_keys=True, indent=2)) + else: + jobs = payload.get("jobs") + if not isinstance(jobs, list) or not jobs: + console.print("No projection rebuild jobs recorded.") + else: + for job in jobs: + if isinstance(job, dict): + console.print( + f"{job.get('id')} {job.get('status')} " + f"trigger={job.get('trigger')} " + f"requested={job.get('requested_at_utc')}" + ) + return exit_code + + +def _confirm_cli_governance_break_glass( + console: PrinterLike, + args: argparse.Namespace, +) -> bool: + if bool(getattr(args, "i_know_what_im_doing", False)): + return True + console.print(_CLI_GOVERNANCE_BREAK_GLASS_MESSAGE) + return False + + +def _semantic_unavailable(console: PrinterLike, message: str) -> int: + console.print(message) + console.print( + "Enable memory.semantic and install: pip install 'codeclone[semantic-lancedb]'", + markup=False, + ) + return int(ExitCode.CONTRACT_ERROR) + + +def _resolve_semantic_provider_or_fail( + console: PrinterLike, config: MemoryConfig +) -> EmbeddingProvider | int: + try: + return resolve_embedding_provider(config.semantic) + except MemorySemanticUnavailableError as exc: + return _semantic_unavailable( + console, f"Semantic embedding provider unavailable: {exc}" + ) + + +def _run_semantic( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + action = str(args.semantic_action) + if action == "status": + return _run_semantic_status(console=console, root_path=root_path) + if action == "rebuild": + return _run_semantic_rebuild(console=console, root_path=root_path) + if action == "probe": + return _run_semantic_probe(console=console, root_path=root_path, args=args) + return _run_semantic_search(console=console, root_path=root_path, args=args) + + +def _run_semantic_status(*, console: PrinterLike, root_path: Path) -> int: + config = resolve_memory_config(root_path) + status = resolve_semantic_index(config.semantic).status() + provider_status = "available" + provider_reason: str | None = None + if config.semantic.enabled: + try: + provider = resolve_embedding_provider(config.semantic) + except MemorySemanticUnavailableError as exc: + provider_status = "unavailable" + provider_reason = str(exc) + else: + provider_status = provider.model_id + state = ( + "available" if status.available and provider_reason is None else "unavailable" + ) + console.print(f"semantic index: {state}") + for reason in (status.reason, provider_reason): + if reason: + console.print(f" reason: {reason}") + console.print(f" enabled: {config.semantic.enabled}") + console.print( + f" embedding: {config.semantic.embedding_provider} " + f"(dim {config.semantic.dimension})" + ) + if config.semantic.enabled: + console.print(f" provider: {provider_status}", markup=False) + return int(ExitCode.SUCCESS) + + +def _run_semantic_rebuild(*, console: PrinterLike, root_path: Path) -> int: + config = resolve_memory_config(root_path) + try: + payload = execute_semantic_index_rebuild(root_path=root_path, config=config) + except MemoryContractError as exc: + console.print(str(exc)) + console.print("Run: codeclone memory init") + return int(ExitCode.CONTRACT_ERROR) + status = payload["status"] + if status == "ok": + ok = cast(RebuildSemanticIndexOkPayload, payload) + console.print( + f"Rebuilt semantic index: {ok['indexed']} indexed, {ok['deleted']} pruned." + ) + console.print( + " embedded: " + f"{ok['embedded']}, skipped unchanged: {ok['skipped_unchanged']}" + ) + for name, count in sorted(ok["by_source"].items()): + console.print(f" {name}: {count}") + return int(ExitCode.SUCCESS) + if status == "skipped": + skipped = cast(RebuildSemanticIndexSkippedPayload, payload) + return _semantic_unavailable( + console, f"Semantic indexing is disabled ({skipped['reason']})." + ) + unavailable = cast(RebuildSemanticIndexUnavailablePayload, payload) + return _semantic_unavailable( + console, f"Semantic index rebuild unavailable: {unavailable['reason']}." + ) + + +def _run_semantic_probe( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + config = resolve_memory_config(root_path) + try: + payload = execute_semantic_projection_probe( + root_path=root_path, + config=config, + exact_tokens=bool(getattr(args, "exact_tokens", False)), + ) + except MemoryContractError as exc: + console.print(str(exc)) + console.print("Run: codeclone memory init") + return int(ExitCode.CONTRACT_ERROR) + if args.json: + console.print(json.dumps(payload, indent=2, sort_keys=True)) + return int(ExitCode.SUCCESS) + if payload.get("status") in {"skipped", "unavailable"}: + reason = str(payload.get("reason", "unknown")) + return _semantic_unavailable( + console, + f"Semantic projection probe unavailable: {reason}.", + ) + lanes_obj = payload.get("lanes") + if not isinstance(lanes_obj, dict): + return _semantic_unavailable( + console, "Semantic projection probe returned invalid payload." + ) + lanes = lanes_obj + console.print("Semantic projection probe:") + console.print(f" estimator: {payload.get('estimator')}") + console.print(f" model_max_tokens: {payload.get('model_max_tokens')}") + for lane in ("memory", "audit", "trajectory"): + stats = lanes.get(lane, {}) + if not stats: + continue + chars = stats.get("chars", {}) + tokens = stats.get("tokens", {}) + overflow = stats.get("token_overflow", {}) + truncation = stats.get("truncation", {}) + raw_tokens = tokens.get("raw", {}) if isinstance(tokens, dict) else {} + effective_tokens = ( + tokens.get("effective", {}) if isinstance(tokens, dict) else {} + ) + console.print(f" {lane}: {stats.get('documents', 0)} documents") + console.print( + " chars p50/p95/max: " + f"{chars.get('p50')}/{chars.get('p95')}/{chars.get('max')}" + ) + console.print( + " raw tokens p50/p95/max: " + f"{raw_tokens.get('p50')}/{raw_tokens.get('p95')}/{raw_tokens.get('max')}" + ) + console.print( + " effective tokens p50/p95/max: " + f"{effective_tokens.get('p50')}/{effective_tokens.get('p95')}/" + f"{effective_tokens.get('max')}" + ) + console.print( + " truncated: " + f"{truncation.get('documents', 0)} " + f"(max_dropped={truncation.get('max_dropped_tokens', 0)})" + ) + console.print( + " over_model_limit: " + f"{overflow.get('over_model_limit')} " + f"(max_overflow={overflow.get('max_overflow_tokens')})" + ) + return int(ExitCode.SUCCESS) + + +def _run_semantic_search( + *, console: PrinterLike, root_path: Path, args: argparse.Namespace +) -> int: + config = resolve_memory_config(root_path) + provider = _resolve_semantic_provider_or_fail(console, config) + if isinstance(provider, int): + return provider + index = resolve_semantic_index(config.semantic) + status = index.status() + if not status.available: + return _semantic_unavailable( + console, f"Semantic search unavailable: {status.reason}." + ) + db_path = resolve_memory_db_path(root_path, config) + store = SqliteEngineeringMemoryStore(db_path) if db_path.exists() else None + from ...memory.semantic import close_semantic_index + + try: + with span(name="memory.semantic.search"): + results = semantic_search( + index=index, + provider=provider, + store=store, + audit_db_path=resolve_audit_path( + root_path=root_path, value=DEFAULT_AUDIT_PATH + ), + query=str(args.query), + limit=max(1, int(args.limit)), + preview_chars=DEFAULT_MEMORY_STATEMENT_PREVIEW_CHARS, + ) + except MemorySemanticUnavailableError as exc: + # The embedding model loads lazily, so an unavailable model surfaces at + # the first embed rather than at provider resolution. + return _semantic_unavailable(console, f"Semantic search unavailable: {exc}.") + finally: + close_semantic_index(index) + if store is not None: + store.close() + if bool(args.json): + return _render_semantic_json( + console=console, query=str(args.query), config=config, results=results + ) + return _render_semantic_text( + console=console, + query=str(args.query), + config=config, + provider=provider, + results=results, + ) + + +def _provider_note(config: MemoryConfig, provider: EmbeddingProvider) -> str: + kind = config.semantic.embedding_provider + quality = "diagnostic, NOT semantic-quality" if kind == "diagnostic" else kind + return f"provider: {provider.model_id} ({quality})" + + +def _render_semantic_text( + *, + console: PrinterLike, + query: str, + config: MemoryConfig, + provider: EmbeddingProvider, + results: list[SemanticSearchResult], +) -> int: + console.print(f"Semantic matches for: {query}", markup=False) + console.print(_provider_note(config, provider), markup=False) + if not results: + console.print(" (no matches)") + return int(ExitCode.SUCCESS) + for rank, result in enumerate(results, start=1): + console.print( + f"{rank}. {result.source}/{result.source_id} score={result.score:.3f}", + markup=False, + ) + meta = " · ".join( + part for part in (result.kind, result.status, result.confidence) if part + ) + console.print(f" {meta}", markup=False) + if result.subject_path: + console.print(f" subject: {result.subject_path}", markup=False) + console.print(f' "{result.preview}"', markup=False) + return int(ExitCode.SUCCESS) + + +def _render_semantic_json( + *, + console: PrinterLike, + query: str, + config: MemoryConfig, + results: list[SemanticSearchResult], +) -> int: + import json + + kind = config.semantic.embedding_provider + payload = { + "query": query, + "semantic": {"provider": kind, "diagnostic": kind == "diagnostic"}, + "results": [result.model_dump() for result in results], + } + console.print(json.dumps(payload, indent=2), markup=False) + return int(ExitCode.SUCCESS) + + +__all__ = ["memory_main"] diff --git a/codeclone/surfaces/cli/memory_analysis.py b/codeclone/surfaces/cli/memory_analysis.py new file mode 100644 index 00000000..bdb96392 --- /dev/null +++ b/codeclone/surfaces/cli/memory_analysis.py @@ -0,0 +1,231 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from ... import __version__ +from ...cache.store import Cache +from ...config.argparse_builder import build_parser +from ...config.pyproject_loader import load_pyproject_config +from ...config.resolver import apply_pyproject_config_overrides +from ...contracts import DEFAULT_JSON_REPORT_PATH +from ...core.bootstrap import bootstrap +from ...core.discovery import discover +from ...core.parallelism import process +from ...core.pipeline import analyze +from ...core.reporting import report +from ...memory.report_trust import assess_cached_report_trust +from ...report.html import build_html_report +from ...utils.json_io import read_json_object +from . import baseline_state as cli_baseline_state +from . import execution as cli_execution +from . import post_run as cli_post_run +from . import report_meta as cli_meta_mod +from . import reports_output as cli_reports_output +from . import runtime as cli_runtime +from . import startup as cli_startup +from . import state as cli_state +from .console import PlainConsole +from .types import require_status_console + +ReportSource = Literal["explicit_report", "trusted_cache", "fresh_analysis"] + + +@dataclass(frozen=True, slots=True) +class LoadedMemoryReport: + document: dict[str, object] + source: ReportSource + rejected_cache_reason: str | None = None + + +def _rich_progress_symbols() -> tuple[type, type, type, type, type]: + from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, + ) + + return Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn + + +def load_report_for_memory_init( + *, + root_path: Path, + from_report: Path | None, +) -> LoadedMemoryReport: + if from_report is not None: + return LoadedMemoryReport( + document=read_json_object(from_report.resolve()), + source="explicit_report", + ) + + default_path = root_path / DEFAULT_JSON_REPORT_PATH + if default_path.is_file(): + report_document = read_json_object(default_path) + trust = assess_cached_report_trust( + root_path=root_path, + report_path=default_path, + report_document=report_document, + ) + if trust.trusted: + return LoadedMemoryReport( + document=report_document, + source="trusted_cache", + ) + return LoadedMemoryReport( + document=run_memory_analysis_report(root_path=root_path), + source="fresh_analysis", + rejected_cache_reason=trust.reason, + ) + + return LoadedMemoryReport( + document=run_memory_analysis_report(root_path=root_path), + source="fresh_analysis", + ) + + +def run_memory_analysis_report(*, root_path: Path) -> dict[str, object]: + ap = build_parser(__version__) + args = ap.parse_args([str(root_path), "--quiet", "--no-progress"]) + pyproject_config = load_pyproject_config(root_path) + apply_pyproject_config_overrides( + args=args, + config_values=pyproject_config, + explicit_cli_dests=set(), + ) + cli_state.set_console(PlainConsole()) + printer = require_status_console(cli_state.get_console()) + started = cli_meta_mod._current_report_timestamp_utc() + baseline_inputs = cli_startup.resolve_baseline_inputs( + ap=ap, + args=args, + root_path=root_path, + baseline_path_from_args=False, + metrics_path_from_args=False, + probe_metrics_baseline_section_fn=( + cli_baseline_state._probe_metrics_baseline_section + ), + printer=printer, + ) + cache_path = cli_runtime._resolve_cache_path( + args=args, + root_path=root_path, + from_args=False, + ) + output_paths = cli_reports_output._resolve_output_paths( + args, + report_path_origins=cli_reports_output._report_path_origins([]), + report_generated_at_utc=started, + ) + cache = Cache( + cache_path, + root=root_path, + max_size_bytes=args.max_cache_size_mb * 1024 * 1024, + min_loc=args.min_loc, + min_stmt=args.min_stmt, + block_min_loc=args.block_min_loc, + block_min_stmt=args.block_min_stmt, + segment_min_loc=args.segment_min_loc, + segment_min_stmt=args.segment_min_stmt, + collect_api_surface=True, + ) + cache.load() + boot = bootstrap( + args=args, + root=root_path, + output_paths=output_paths, + cache_path=cache_path, + ) + discovery_result, processing_result, analysis_result = ( + cli_execution.run_analysis_stages( + args=args, + boot=boot, + cache=cache, + discover_fn=discover, + process_fn=process, + analyze_fn=analyze, + print_failed_files_fn=lambda _paths: None, + cache_update_segment_projection_fn=lambda _cache, _analysis: None, + rich_progress_symbols_fn=_rich_progress_symbols, + ) + ) + shared_baseline_payload = ( + baseline_inputs.shared_baseline_payload + if baseline_inputs.metrics_baseline_path == baseline_inputs.baseline_path + else None + ) + baseline_state = cli_baseline_state._resolve_clone_baseline_state( + args=args, + baseline_path=baseline_inputs.baseline_path, + baseline_exists=baseline_inputs.baseline_exists, + analysis=analysis_result, + shared_baseline_payload=shared_baseline_payload, + ) + metrics_baseline_state = cli_baseline_state._resolve_metrics_baseline_state( + args=args, + metrics_baseline_path=baseline_inputs.metrics_baseline_path, + metrics_baseline_exists=baseline_inputs.metrics_baseline_exists, + clone_baseline_state=baseline_state, + baseline_updated_path=baseline_state.updated_path, + analysis=analysis_result, + shared_baseline_payload=shared_baseline_payload, + ) + cache_status, cache_schema_version = cli_runtime._resolve_cache_status(cache) + report_meta = cli_meta_mod.build_cli_report_meta( + codeclone_version=__version__, + scan_root=root_path, + baseline_path=baseline_inputs.baseline_path, + baseline_state=baseline_state, + cache_path=cli_runtime.resolve_report_cache_path(cache_path), + cache_status=cache_status, + cache_schema_version=cache_schema_version, + processing_result=processing_result, + metrics_baseline_path=baseline_inputs.metrics_baseline_path, + metrics_baseline_state=metrics_baseline_state, + analysis_result=analysis_result, + args=args, + metrics_computed=cli_runtime._metrics_computed(args), + analysis_started_at_utc=started, + report_generated_at_utc=started, + ) + diff_context = cli_post_run.build_diff_context( + analysis=analysis_result, + baseline_path=baseline_inputs.baseline_path, + baseline_state=baseline_state, + metrics_baseline_state=metrics_baseline_state, + ) + artifacts = report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=diff_context.new_func, + new_block=diff_context.new_block, + html_builder=build_html_report, + metrics_diff=diff_context.metrics_diff, + coverage_adoption_diff_available=diff_context.coverage_adoption_diff_available, + api_surface_diff_available=diff_context.api_surface_diff_available, + include_report_document=True, + ) + if artifacts.report_document is None: + msg = "Memory init analysis did not produce a canonical report document." + raise RuntimeError(msg) + return artifacts.report_document + + +__all__ = [ + "LoadedMemoryReport", + "ReportSource", + "load_report_for_memory_init", + "run_memory_analysis_report", +] diff --git a/codeclone/surfaces/cli/memory_render.py b/codeclone/surfaces/cli/memory_render.py new file mode 100644 index 00000000..90b54431 --- /dev/null +++ b/codeclone/surfaces/cli/memory_render.py @@ -0,0 +1,568 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Mapping, Sequence +from typing import TYPE_CHECKING + +from ...memory.coverage import ScopeCoverageReport +from ...memory.display import format_memory_record_line +from ...memory.models import MemoryRecord +from ...memory.status_report import MemoryStatusReport +from ...memory.vacuum import VacuumReport +from .console import make_query_console, rich_panel_symbols, supports_rich_console +from .types import PrinterLike + +if TYPE_CHECKING: + from rich.text import Text as RichText + +_ColumnKwargs = dict[str, str | bool | int | None] + +if TYPE_CHECKING: + _MemoryRowBuilder = Callable[[int, object, type[RichText]], Sequence[object]] +else: + _MemoryRowBuilder = Callable[[int, object, object], Sequence[object]] + + +def _table_add_column( + table: object, + title: str, + kwargs: _ColumnKwargs, +) -> None: + table.add_column(title, **kwargs) # type: ignore[attr-defined] + + +def _table_add_row(table: object, cells: Sequence[object]) -> None: + table.add_row(*cells) # type: ignore[attr-defined] + + +def memory_console() -> PrinterLike: + return make_query_console() + + +def render_search_results( + *, + console: PrinterLike, + query: str, + records: Sequence[Mapping[str, object]], +) -> None: + if supports_rich_console(console): + _render_record_table_rich( + console=console, + command="search", + subtitle=( + f"[cyan]{query}[/cyan] " + f"[dim]{_count_label(len(records), 'result')}[/dim]" + ), + records=records, + columns=( + ("#", {"style": "dim", "justify": "right", "no_wrap": True}), + ("Type", {"style": "cyan", "no_wrap": True}), + ("Status", {"no_wrap": True}), + ("Record", {}), + ), + row_builder=_search_row, + ) + return + console.print(f"Engineering Memory search: {query!r}") + _print_record_lines(console, records) + + +def render_path_results( + *, + console: PrinterLike, + rel_path: str, + records: Sequence[MemoryRecord], +) -> None: + if supports_rich_console(console): + mapped = [_record_mapping(record) for record in records] + _render_record_table_rich( + console=console, + command="for-path", + subtitle=( + f"[cyan]{rel_path}[/cyan] " + f"[dim]{_count_label(len(records), 'record')}[/dim]" + ), + records=mapped, + columns=( + ("#", {"style": "dim", "justify": "right", "no_wrap": True}), + ("Type", {"style": "cyan", "no_wrap": True}), + ("Status", {"no_wrap": True}), + ("Statement", {}), + ), + row_builder=_search_row, + ) + return + console.print(f"Engineering Memory for path: {rel_path}") + _print_record_lines(console, [_record_mapping(record) for record in records]) + + +def render_status_report(*, console: PrinterLike, report: MemoryStatusReport) -> None: + if supports_rich_console(console): + _render_status_report_rich(console=console, report=report) + return + _render_status_report_plain(console=console, report=report) + + +def render_init_result( + *, + console: PrinterLike, + dry_run: bool, + project_id: str, + db_path: str | None, + analysis_fingerprint: str | None, + stats: Mapping[str, int] | None, + planned_counts: Mapping[str, int] | None, +) -> None: + if supports_rich_console(console): + _render_init_result_rich( + console=console, + dry_run=dry_run, + project_id=project_id, + db_path=db_path, + analysis_fingerprint=analysis_fingerprint, + stats=stats, + planned_counts=planned_counts, + ) + return + title = ( + "Engineering Memory init dry-run" + if dry_run + else "Engineering Memory initialized" + ) + console.print(title) + console.print(f" project_id: {project_id}") + if dry_run: + console.print(f" analysis_fingerprint:{analysis_fingerprint}") + _print_count_map(console, " planned records:", planned_counts) + return + if db_path is not None: + console.print(f" db: {db_path}") + _print_count_map(console, " upsert stats:", stats) + _print_count_map(console, " record types:", planned_counts) + + +def render_init_note(*, console: PrinterLike, message: str) -> None: + if supports_rich_console(console): + _, _, _, _, text_cls = rich_panel_symbols() + console.print(text_cls(f" note: {message}", style="dim italic")) + return + console.print(f" note: {message}") + + +def render_stale_records( + *, + console: PrinterLike, + records: Sequence[Mapping[str, object]], +) -> None: + if supports_rich_console(console): + _render_record_table_rich( + console=console, + command="stale", + subtitle=f"[dim]{_count_label(len(records), 'record')}[/dim]", + border_style="yellow", + records=records, + columns=( + ("#", {"style": "dim", "justify": "right", "no_wrap": True}), + ("Type", {"style": "cyan", "no_wrap": True}), + ("Reason", {"style": "yellow", "no_wrap": True}), + ("Record", {}), + ), + row_builder=_stale_row, + empty_message="(none)", + ) + return + console.print("Stale engineering memory records") + if not records: + console.print(" (none)") + return + for item in records: + reason = item.get("stale_reason", "") + line = format_memory_record_line(item) + console.print(f" - [{item.get('type')}] {line} ({reason})") + + +def render_vacuum_report(*, console: PrinterLike, report: VacuumReport) -> None: + if supports_rich_console(console): + _render_vacuum_report_rich(console=console, report=report) + return + console.print("Engineering Memory vacuum complete") + console.print(f" deleted: {report.total_deleted}") + _print_count_map(console, " ", report.deleted_by_status, indent=" ") + + +def render_coverage_report( + *, console: PrinterLike, report: ScopeCoverageReport +) -> None: + if supports_rich_console(console): + _render_coverage_report_rich(console=console, report=report) + return + console.print("Engineering Memory coverage") + covered = report.scope_paths_with_memory + total = report.scope_paths_total + console.print(f" covered: {covered}/{total} ({report.scope_coverage_percent}%)") + if report.uncovered_paths: + console.print(" uncovered:") + for path in report.uncovered_paths: + console.print(f" - {path}") + + +def render_draft_candidates( + *, + console: PrinterLike, + records: Sequence[MemoryRecord], +) -> None: + if supports_rich_console(console): + _render_record_table_rich( + console=console, + command="review candidates", + subtitle=f"[dim]{_count_label(len(records), 'draft')}[/dim]", + border_style="magenta", + records=records, + columns=( + ("#", {"style": "dim", "justify": "right", "no_wrap": True}), + ("ID", {"style": "dim", "no_wrap": True}), + ("Type", {"style": "cyan", "no_wrap": True}), + ("Statement", {}), + ), + row_builder=_draft_row, + empty_message="(none)", + ) + return + console.print("Draft memory candidates") + if not records: + console.print(" (none)") + return + for record in records: + console.print(f" - {record.id} [{record.type}] {record.statement}") + + +def render_governance_result( + *, + console: PrinterLike, + action: str, + record_id: str, + detail: str | None = None, +) -> None: + message = detail or f"{action} {record_id}" + if supports_rich_console(console): + _, panel_cls, _, _, text_cls = rich_panel_symbols() + style = "green" if action == "approved" else "yellow" + console.print( + panel_cls( + text_cls(message, style=style), + border_style=style, + padding=(0, 1), + ) + ) + return + console.print(message) + + +def _render_record_table_rich( + *, + console: PrinterLike, + command: str, + subtitle: str, + records: Sequence[object], + columns: Sequence[tuple[str, _ColumnKwargs]], + row_builder: _MemoryRowBuilder, + border_style: str = "blue", + empty_message: str = "(no records)", +) -> None: + box, panel_cls, rule_cls, table_cls, text_cls = rich_panel_symbols() + console.print(rule_cls("Engineering Memory", style="dim", characters="─")) + console.print( + panel_cls( + text_cls.from_markup(f"[bold]{command}[/bold] {subtitle}"), + border_style=border_style, + padding=(0, 1), + ) + ) + if not records: + console.print(f" [dim]{empty_message}[/dim]") + return + table = table_cls( + box=box.SIMPLE, + show_header=True, + header_style="bold", + padding=(0, 1), + ) + for title, kwargs in columns: + _table_add_column(table, title, kwargs) + for index, item in enumerate(records, start=1): + _table_add_row(table, row_builder(index, item, text_cls)) + console.print(table) + + +def _search_row( + index: int, + item: object, + text_cls: type[RichText], +) -> Sequence[object]: + mapping = item if isinstance(item, Mapping) else {} + record_type = str(mapping.get("type", "?")) + status = str(mapping.get("status", "?")) + return ( + str(index), + record_type, + text_cls(status, style=_status_style(status)), + format_memory_record_line(mapping), + ) + + +def _stale_row( + index: int, + item: object, + _text_cls: type[RichText], +) -> Sequence[object]: + mapping = item if isinstance(item, Mapping) else {} + return ( + str(index), + str(mapping.get("type", "?")), + str(mapping.get("stale_reason", "")), + format_memory_record_line(mapping), + ) + + +def _draft_row( + index: int, + item: object, + _text_cls: type[RichText], +) -> Sequence[object]: + if not isinstance(item, MemoryRecord): + return (str(index), "?", "?", "") + return (str(index), item.id, item.type, item.statement) + + +def _render_status_report_rich( + *, console: PrinterLike, report: MemoryStatusReport +) -> None: + box, panel_cls, rule_cls, table_cls, text_cls = rich_panel_symbols() + console.print(rule_cls("Engineering Memory", style="dim", characters="─")) + console.print( + panel_cls( + text_cls.from_markup("[bold]status[/bold]"), + border_style="blue", + padding=(0, 1), + ) + ) + meta = table_cls.grid(padding=(0, 2)) + meta.add_column(style="dim", no_wrap=True) + meta.add_column() + for label, value in _status_rows(report): + meta.add_row(label, value) + console.print(meta) + if report.records_by_type: + type_table = table_cls(box=box.SIMPLE, show_header=True, header_style="bold") + type_table.add_column("Type", style="cyan") + type_table.add_column("Count", justify="right") + for key, count in sorted(report.records_by_type.items()): + type_table.add_row(key, str(count)) + console.print(type_table) + + +def _render_init_result_rich( + *, + console: PrinterLike, + dry_run: bool, + project_id: str, + db_path: str | None, + analysis_fingerprint: str | None, + stats: Mapping[str, int] | None, + planned_counts: Mapping[str, int] | None, +) -> None: + _box, panel_cls, rule_cls, table_cls, text_cls = rich_panel_symbols() + title = "init dry-run" if dry_run else "initialized" + console.print(rule_cls("Engineering Memory", style="dim", characters="─")) + console.print( + panel_cls( + text_cls.from_markup(f"[bold]{title}[/bold] [cyan]{project_id}[/cyan]"), + border_style="green" if not dry_run else "yellow", + padding=(0, 1), + ) + ) + meta = table_cls.grid(padding=(0, 2)) + meta.add_column(style="dim", no_wrap=True) + meta.add_column() + if dry_run: + meta.add_row("analysis_fp", analysis_fingerprint or "n/a") + elif db_path is not None: + meta.add_row("db", db_path) + console.print(meta) + _render_count_table(console, title="Upsert stats", counts=stats) + _render_count_table( + console, + title="Record types" if not dry_run else "Planned records", + counts=planned_counts, + ) + + +def _render_vacuum_report_rich(*, console: PrinterLike, report: VacuumReport) -> None: + _, panel_cls, rule_cls, _, text_cls = rich_panel_symbols() + console.print(rule_cls("Engineering Memory", style="dim", characters="─")) + console.print( + panel_cls( + text_cls.from_markup( + f"[bold]vacuum complete[/bold] " + f"[dim](deleted {report.total_deleted})[/dim]" + ), + border_style="green" if report.total_deleted else "blue", + padding=(0, 1), + ) + ) + if not report.deleted_by_status: + console.print(" [dim](nothing to purge)[/dim]") + return + _render_count_table( + console, + title="Deleted by status", + counts=report.deleted_by_status, + ) + + +def _render_coverage_report_rich( + *, console: PrinterLike, report: ScopeCoverageReport +) -> None: + box, panel_cls, rule_cls, table_cls, text_cls = rich_panel_symbols() + covered = report.scope_paths_with_memory + total = report.scope_paths_total + percent = report.scope_coverage_percent + console.print(rule_cls("Engineering Memory", style="dim", characters="─")) + console.print( + panel_cls( + text_cls.from_markup( + f"[bold]coverage[/bold] [cyan]{covered}/{total}[/cyan] " + f"[dim]({percent}%)[/dim]" + ), + border_style="blue", + padding=(0, 1), + ) + ) + if not report.uncovered_paths: + console.print(" [dim](all scoped paths covered)[/dim]") + return + table = table_cls(box=box.SIMPLE, show_header=True, header_style="bold") + table.add_column("Uncovered path", style="yellow") + for path in report.uncovered_paths: + table.add_row(path) + console.print(table) + + +def _render_count_table( + console: PrinterLike, + *, + title: str, + counts: Mapping[str, int] | None, +) -> None: + if not counts: + return + box, _, _, table_cls, _ = rich_panel_symbols() + table = table_cls( + title=title, + box=box.SIMPLE, + show_header=True, + header_style="bold", + ) + table.add_column("Key", style="cyan") + table.add_column("Count", justify="right") + for key, count in sorted(counts.items()): + table.add_row(key, str(count)) + console.print(table) + + +def _render_status_report_plain( + *, console: PrinterLike, report: MemoryStatusReport +) -> None: + console.print("Engineering Memory status") + for label, value in _status_rows(report): + console.print(f" {label + ':':18} {value}") + if report.records_by_type: + console.print(" records_by_type:") + for key, count in sorted(report.records_by_type.items()): + console.print(f" {key}: {count}") + + +def _status_rows(report: MemoryStatusReport) -> tuple[tuple[str, str], ...]: + return ( + ("root", str(report.project_root)), + ("backend", report.backend), + ("db", str(report.db_path)), + ("db_exists", str(report.db_exists)), + ("schema", report.schema_version or "n/a"), + ("project_id", report.project_id or "n/a"), + ("analysis_fp", report.last_analysis_fingerprint or "n/a"), + ("last_init_run", report.last_init_run_id or "n/a"), + ("records", str(report.record_count)), + ) + + +def _record_mapping(record: MemoryRecord) -> dict[str, object]: + return { + "type": record.type, + "status": record.status, + "statement": record.statement, + "payload": record.payload, + } + + +def _print_record_lines( + console: PrinterLike, + records: Sequence[Mapping[str, object]], +) -> None: + if not records: + console.print(" (no records)") + return + for item in records: + record_type = item.get("type", "?") + status = item.get("status", "?") + line = format_memory_record_line(item) + console.print(f" - [{record_type}/{status}] {line}") + + +def _print_count_map( + console: PrinterLike, + heading: str, + counts: Mapping[str, int] | None, + *, + indent: str = " ", +) -> None: + if not counts: + return + console.print(heading) + for key, count in sorted(counts.items()): + console.print(f"{indent}{key}: {count}") + + +def _count_label(count: int, noun: str) -> str: + suffix = "" if count == 1 else "s" + return f"({count} {noun}{suffix})" + + +def _status_style(status: str) -> str: + if status == "active": + return "green" + if status == "stale": + return "yellow" + if status == "draft": + return "magenta" + return "dim" + + +__all__ = [ + "memory_console", + "render_coverage_report", + "render_draft_candidates", + "render_governance_result", + "render_init_note", + "render_init_result", + "render_path_results", + "render_search_results", + "render_stale_records", + "render_status_report", + "render_vacuum_report", +] diff --git a/codeclone/surfaces/cli/observability.py b/codeclone/surfaces/cli/observability.py new file mode 100644 index 00000000..114cd5b3 --- /dev/null +++ b/codeclone/surfaces/cli/observability.py @@ -0,0 +1,85 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CLI surface for the platform observability store (``codeclone observability``). + +Read-only: opens the per-root store read-only, builds the ``TraceView`` read +model, and renders it as JSON or branded HTML. Never writes the store. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +from ...contracts import ExitCode +from ...observability.render_html import render_trace_html +from ...observability.render_json import render_trace_json +from ...observability.store.reader import ( + build_trace_view, + open_observability_store_readonly, +) + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="codeclone observability") + sub = parser.add_subparsers(dest="command") + trace = sub.add_parser("trace", help="Render the recorded operation trace.") + trace.add_argument("--root", default=".", help="Repository root path.") + trace.add_argument( + "--last", type=int, default=None, help="Show the last N root operations." + ) + trace.add_argument( + "--operation", default=None, help="Focus one operation id and its chain." + ) + trace.add_argument("--correlation", default=None, help="Filter by correlation id.") + trace.add_argument("--json", default=None, help="Write JSON to this path.") + trace.add_argument("--html", default=None, help="Write HTML to this path.") + return parser + + +def _report_missing_store(root: Path) -> int: + print( + f"No observability store at {root}. Run with " + "CODECLONE_OBSERVABILITY_ENABLED=1 to start collecting." + ) + return int(ExitCode.SUCCESS) + + +def observability_main(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + if args.command != "trace": + parser.print_help() + return int(ExitCode.CONTRACT_ERROR) + + root = Path(args.root).resolve() + conn = open_observability_store_readonly(root) + if conn is None: + return _report_missing_store(root) + try: + trace = build_trace_view( + conn, + operation_id=args.operation, + correlation_id=args.correlation, + last=args.last, + ) + finally: + conn.close() + + outputs = [(args.json, render_trace_json), (args.html, render_trace_html)] + wrote = False + for path, render in outputs: + if path is not None: + Path(path).write_text(render(trace), encoding="utf-8") + print(f"Wrote {path}") + wrote = True + if not wrote: + print(render_trace_json(trace)) + return int(ExitCode.SUCCESS) + + +__all__ = ["observability_main"] diff --git a/codeclone/surfaces/cli/patch_verify.py b/codeclone/surfaces/cli/patch_verify.py new file mode 100644 index 00000000..0e6dade0 --- /dev/null +++ b/codeclone/surfaces/cli/patch_verify.py @@ -0,0 +1,306 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from ... import ui_messages as ui +from ...contracts import ExitCode +from ...core._types import AnalysisResult +from ...report.gates.evaluator import ( + GateResult, + GateState, + MetricGateConfig, + evaluate_gate_state, + gate_state_from_project_metrics, +) +from ...utils.coerce import as_int as _as_int +from ..mcp._patch_contract import ( + VALID_STRICTNESS_PROFILES, + StrictnessProfile, + budgets_for_strictness, +) +from .baseline_state import CloneBaselineState +from .post_run import DiffContext +from .types import CLIArgsLike, PrinterLike + +_STATUS_STYLES = { + "accepted": "bold green", + "violated": "bold red", + "unverified": "yellow", +} + + +def validate_strictness(value: str) -> StrictnessProfile: + if value not in VALID_STRICTNESS_PROFILES: + expected = ", ".join(sorted(VALID_STRICTNESS_PROFILES)) + raise ValueError(f"Invalid --strictness value: {value!r}. Expected {expected}.") + if value == "strict": + return "strict" + if value == "relaxed": + return "relaxed" + return "ci" + + +def _threshold_or_none(value: object) -> int | None: + threshold = _as_int(value, -1) + return threshold if threshold >= 0 else None + + +def _health_after(analysis: AnalysisResult) -> int: + metrics = analysis.project_metrics + if metrics is None: + return 0 + return max(int(metrics.health.total), 0) + + +def _health_delta(metrics_diff: object | None) -> int: + if metrics_diff is None: + return 0 + return _as_int(getattr(metrics_diff, "health_delta", 0), 0) + + +def _metric_gate_config( + *, + args: CLIArgsLike, + strictness: StrictnessProfile, +) -> MetricGateConfig: + if strictness == "ci": + return MetricGateConfig( + fail_complexity=int(args.fail_complexity), + fail_coupling=int(args.fail_coupling), + fail_cohesion=int(args.fail_cohesion), + fail_cycles=bool(args.fail_cycles), + fail_dead_code=bool(args.fail_dead_code), + fail_health=int(args.fail_health), + fail_on_new_metrics=bool(args.fail_on_new_metrics), + fail_on_typing_regression=bool(args.fail_on_typing_regression), + fail_on_docstring_regression=bool(args.fail_on_docstring_regression), + fail_on_api_break=bool(args.fail_on_api_break), + fail_on_untested_hotspots=bool(args.fail_on_untested_hotspots), + min_typing_coverage=int(args.min_typing_coverage), + min_docstring_coverage=int(args.min_docstring_coverage), + coverage_min=int(args.coverage_min), + fail_on_new=True, + fail_threshold=-1, + ) + + budgets = budgets_for_strictness( + strictness=strictness, + coverage_min=int(args.coverage_min), + complexity_threshold=_threshold_or_none(args.fail_complexity), + coupling_threshold=_threshold_or_none(args.fail_coupling), + cohesion_threshold=_threshold_or_none(args.fail_cohesion), + ) + return MetricGateConfig( + fail_complexity=budgets.complexity_delta, + fail_coupling=budgets.coupling_delta, + fail_cohesion=budgets.cohesion_delta, + fail_cycles=budgets.dependency_cycle, + fail_dead_code=budgets.dead_code_regression, + fail_health=budgets.health_floor, + fail_on_new_metrics=( + budgets.typing_regression + or budgets.docstring_regression + or budgets.api_break + ), + fail_on_typing_regression=budgets.typing_regression, + fail_on_docstring_regression=budgets.docstring_regression, + fail_on_api_break=budgets.api_break, + fail_on_untested_hotspots=budgets.coverage_hotspot, + min_typing_coverage=int(args.min_typing_coverage), + min_docstring_coverage=int(args.min_docstring_coverage), + coverage_min=budgets.coverage_min, + fail_on_new=budgets.clone_regression == 0, + fail_threshold=-1, + ) + + +def _gate_state( + *, + analysis: AnalysisResult, + diff_context: DiffContext, +) -> GateState: + clone_total = analysis.func_clones_count + analysis.block_clones_count + if analysis.project_metrics is None: + return GateState( + clone_new_count=diff_context.new_clones_count, + clone_total=clone_total, + ) + return gate_state_from_project_metrics( + project_metrics=analysis.project_metrics, + coverage_join=analysis.coverage_join, + metrics_diff=diff_context.metrics_diff, + clone_new_count=diff_context.new_clones_count, + clone_total=clone_total, + ) + + +def _evaluate_patch_gates( + *, + args: CLIArgsLike, + strictness: StrictnessProfile, + analysis: AnalysisResult, + diff_context: DiffContext, +) -> GateResult: + return evaluate_gate_state( + state=_gate_state(analysis=analysis, diff_context=diff_context), + config=_metric_gate_config(args=args, strictness=strictness), + ) + + +def _status_text(status: str) -> str: + style = _STATUS_STYLES.get(status) + return f"[{style}]{status}[/{style}]" if style else status + + +def _gate_status(gate_result: GateResult) -> str: + return "FAIL" if gate_result.exit_code != 0 else "pass" + + +def _contract_violations( + *, + diff_context: DiffContext, + gate_result: GateResult, +) -> tuple[str, ...]: + violations: list[str] = [] + if diff_context.new_clones_count > 0: + violations.append("structural_regressions") + if gate_result.exit_code != 0: + violations.append("gate_failures") + return tuple(violations) + + +def _render_reasons( + *, + console: PrinterLike, + title: str, + values: Sequence[str], +) -> None: + console.print(f" [bold]{title}:[/bold]") + if not values: + console.print(" [dim]none[/dim]") + return + for value in values: + console.print(f" - {value}") + + +def render_patch_verify( + *, + console: PrinterLike, + args: CLIArgsLike, + strictness: str, + analysis: AnalysisResult, + diff_context: DiffContext, + baseline_state: CloneBaselineState, + quiet: bool, +) -> int: + try: + validated_strictness = validate_strictness(strictness) + except ValueError as exc: + console.print(ui.fmt_contract_error(str(exc))) + return int(ExitCode.CONTRACT_ERROR) + + if not baseline_state.trusted_for_diff: + console.print( + ui.fmt_contract_error( + "Patch verify requires a trusted baseline. " + "Run codeclone . --update-baseline first." + ) + ) + return int(ExitCode.CONTRACT_ERROR) + + gate_result = _evaluate_patch_gates( + args=args, + strictness=validated_strictness, + analysis=analysis, + diff_context=diff_context, + ) + violations = _contract_violations( + diff_context=diff_context, + gate_result=gate_result, + ) + status = "violated" if violations else "accepted" + exit_code = ( + int(ExitCode.GATING_FAILURE) + if violations and validated_strictness != "relaxed" + else int(ExitCode.SUCCESS) + ) + health_after = _health_after(analysis) + health_before = health_after - _health_delta(diff_context.metrics_diff) + gate_status = _gate_status(gate_result) + + if quiet: + console.print( + ui.fmt_patch_verify_compact( + status=status, + health_before=health_before, + health_after=health_after, + regressions=diff_context.new_clones_count, + gate_status=gate_status, + ) + ) + return exit_code + + from rich.rule import Rule + + console.print() + console.print(Rule(ui.PATCH_VERIFY_TITLE)) + console.print() + console.print( + f" [bold]{ui.PATCH_VERIFY_LABEL_STRICTNESS}[/bold] {validated_strictness}" + ) + console.print( + f" [bold]{ui.PATCH_VERIFY_LABEL_STATUS}[/bold] {_status_text(status)}" + ) + console.print() + console.print( + f" [bold]{ui.PATCH_VERIFY_LABEL_HEALTH}[/bold] " + f"{health_before} -> {health_after} " + f"(delta: {health_after - health_before})" + ) + console.print() + console.print(f" [bold]{ui.PATCH_VERIFY_LABEL_STRUCTURAL_DELTA}[/bold]") + console.print( + f" {ui.PATCH_VERIFY_LABEL_REGRESSIONS} {diff_context.new_clones_count}" + ) + console.print(f" {ui.PATCH_VERIFY_LABEL_IMPROVEMENTS} 0") + verdict = ( + ui.PATCH_VERIFY_VERDICT_REGRESSED + if diff_context.new_clones_count > 0 + else ui.PATCH_VERIFY_VERDICT_STABLE + ) + console.print(f" {ui.PATCH_VERIFY_LABEL_VERDICT} {verdict}") + console.print() + console.print( + f" [bold]{ui.PATCH_VERIFY_LABEL_GATE_PREVIEW}[/bold] {gate_status} " + f"{ui.PATCH_VERIFY_GATE_EXIT.format(exit_code=gate_result.exit_code)}" + ) + if gate_result.reasons: + for reason in gate_result.reasons: + console.print(f" - {reason}") + console.print() + _render_reasons( + console=console, + title=ui.PATCH_VERIFY_CONTRACT_VIOLATIONS, + values=violations, + ) + console.print() + if status == "accepted": + console.print(f" [bold green]{ui.PATCH_VERIFY_ACCEPTED}[/bold green]") + elif validated_strictness == "relaxed": + console.print(f" [yellow]{ui.PATCH_VERIFY_RELAXED_ADVISORY}[/yellow]") + else: + console.print(f" [bold red]{ui.PATCH_VERIFY_VIOLATED}[/bold red]") + return exit_code + + +__all__ = [ + "VALID_STRICTNESS_PROFILES", + "render_patch_verify", + "validate_strictness", +] diff --git a/codeclone/surfaces/cli/runtime.py b/codeclone/surfaces/cli/runtime.py index 8f91fb4d..d65fe81c 100644 --- a/codeclone/surfaces/cli/runtime.py +++ b/codeclone/surfaces/cli/runtime.py @@ -108,19 +108,15 @@ def resolve_cache_path( if from_args and cache_path_arg: return Path(cache_path_arg).expanduser() - cache_path = root_path / ".cache" / "codeclone" / "cache.json" - if legacy_cache_path.exists(): - try: - legacy_resolved = legacy_cache_path.resolve() - except OSError: - legacy_resolved = legacy_cache_path - if legacy_resolved != cache_path: - console.print( - ui.fmt_legacy_cache_warning( - legacy_path=legacy_resolved, - new_path=cache_path, - ) - ) + from ...paths.workspace import default_cache_path, emit_legacy_workspace_warnings + + cache_path = default_cache_path(root_path) + emit_legacy_workspace_warnings( + root_path=root_path, + cache_path=cache_path, + legacy_home_cache_path=legacy_cache_path, + console=console, + ) return cache_path diff --git a/codeclone/surfaces/cli/session_stats.py b/codeclone/surfaces/cli/session_stats.py new file mode 100644 index 00000000..ae32d381 --- /dev/null +++ b/codeclone/surfaces/cli/session_stats.py @@ -0,0 +1,408 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import time +from pathlib import Path + +from ... import ui_messages as ui +from ...contracts import ExitCode +from ...controller_insights.session_stats import ( + SessionSnapshot as _SessionSnapshot, +) +from ...controller_insights.session_stats import ( + WorkflowFootprintSnapshot as _WorkflowFootprintSnapshot, +) +from ...controller_insights.session_stats import ( + _active_intent_count, + _format_age, + _format_duration, + _live_agent_count, + _visible_intent_count, + collect_session_snapshot, + latest_run_source_label, +) +from . import console as cli_console +from .types import PrinterLike + +_MAX_ALLOWED_FILES_SHOWN = 2 +_MAX_TOP_WORKFLOWS_SHOWN = 3 +_PLAIN_LABEL_WIDTH = 25 + + +def render_session_stats( + *, + console: PrinterLike, + root_path: Path, + quiet: bool, +) -> int: + """Render workspace session status. Returns ExitCode int.""" + try: + snapshot = collect_session_snapshot(root_path) + except Exception as exc: + console.print( + ui.fmt_contract_error(ui.SESSION_STATS_READ_FAILED.format(error=exc)) + ) + return int(ExitCode.CONTRACT_ERROR) + if quiet: + return _render_quiet(console, snapshot) + return _render_verbose(console, snapshot) + + +def _render_quiet(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + line = ui.SESSION_STATS_QUIET_TEMPLATE.format( + prefix=ui.SESSION_STATS_QUIET_PREFIX, + workspace_health=snapshot.workspace_health, + live_agents=_live_agent_count(snapshot), + active_intents=_active_intent_count(snapshot), + visible_intents=_visible_intent_count(snapshot), + stale=snapshot.stale_count, + latest_run=snapshot.latest_run_id or ui.SESSION_STATS_LATEST_RUN_NONE, + ) + if snapshot.latest_run_health is not None: + line += " " + ui.SESSION_STATS_QUIET_HEALTH.format( + health=snapshot.latest_run_health + ) + console.print(line) + return int(ExitCode.SUCCESS) + + +def _render_verbose(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + if cli_console.supports_rich_console(console): + return _render_verbose_rich(console, snapshot) + console.print(f"[bold]╍╍╍ {ui.SESSION_STATS_TITLE} ╍╍╍[/bold]") + console.print() + console.print( + f" {ui.SESSION_STATS_WORKSPACE:<{_PLAIN_LABEL_WIDTH}}{snapshot.root}" + ) + console.print( + f" {ui.SESSION_STATS_INTENT_REGISTRY:<{_PLAIN_LABEL_WIDTH}}" + f"{snapshot.intent_registry_backend} ({snapshot.intent_registry_storage})" + ) + if snapshot.audit_enabled and snapshot.audit_storage: + console.print( + f" {ui.SESSION_STATS_AUDIT:<{_PLAIN_LABEL_WIDTH}}" + f"{ui.SESSION_STATS_AUDIT_ENABLED} ({snapshot.audit_storage})" + ) + + if snapshot.latest_run_id: + age_str = _format_age(snapshot.latest_run_age_seconds) + health_part = ( + f", health={snapshot.latest_run_health}" + if snapshot.latest_run_health is not None + else "" + ) + findings_part = ( + f", findings={snapshot.latest_run_findings}" + if snapshot.latest_run_findings is not None + else "" + ) + source_part = _latest_run_source_suffix(snapshot) + console.print( + f" {ui.SESSION_STATS_LATEST_RUN:<{_PLAIN_LABEL_WIDTH}}" + f"{snapshot.latest_run_id}" + f" ({age_str}{health_part}{findings_part}{source_part})" + ) + if snapshot.cache_present and snapshot.latest_run_files is not None: + console.print( + f" {ui.SESSION_STATS_CACHE:<{_PLAIN_LABEL_WIDTH}}" + f"{ui.SESSION_STATS_REPORT_PRESENT.format(files=snapshot.latest_run_files)}" + ) + else: + console.print( + f" {ui.SESSION_STATS_LATEST_RUN:<{_PLAIN_LABEL_WIDTH}}" + f"{ui.SESSION_STATS_LATEST_RUN_NONE}" + ) + + console.print() + live_agents = [a for a in snapshot.agents if a.alive] + console.print( + f" {ui.SESSION_STATS_LIVE_AGENTS:<{_PLAIN_LABEL_WIDTH}}{len(live_agents)}" + ) + console.print( + f" {ui.SESSION_STATS_ACTIVE_INTENTS:<{_PLAIN_LABEL_WIDTH}}" + f"{_active_intent_count(snapshot)}" + ) + console.print( + f" {ui.SESSION_STATS_VISIBLE_INTENTS:<{_PLAIN_LABEL_WIDTH}}" + f"{_visible_intent_count(snapshot)}" + ) + + for agent in live_agents: + label = agent.label or "unknown" + started_ago = _format_age(int(time.time()) - agent.start_epoch) + console.print(f" PID {agent.pid} ({label}) — started {started_ago}") + for intent in agent.intents: + file_count_label = f"{intent.scope_file_count} file" + ( + "s" if intent.scope_file_count != 1 else "" + ) + console.print( + f" {intent.intent_id} {intent.status} scope: {file_count_label}" + ) + shown_files = intent.allowed_files[:_MAX_ALLOWED_FILES_SHOWN] + if shown_files: + files_str = ", ".join(shown_files) + if len(intent.allowed_files) > _MAX_ALLOWED_FILES_SHOWN: + remaining = len(intent.allowed_files) - _MAX_ALLOWED_FILES_SHOWN + files_str += f" ... and {remaining} more" + console.print(f" allowed: {files_str}") + lease_str = _format_duration(intent.lease_remaining_seconds) + console.print(f" lease: {lease_str} remaining") + + console.print() + console.print( + f" {ui.SESSION_STATS_STALE:<{_PLAIN_LABEL_WIDTH}}{snapshot.stale_count}" + ) + console.print( + f" {ui.SESSION_STATS_EXPIRED:<{_PLAIN_LABEL_WIDTH}}{snapshot.expired_count}" + ) + console.print( + f" {ui.SESSION_STATS_RECOVERABLE:<{_PLAIN_LABEL_WIDTH}}" + f"{snapshot.recoverable_count}" + ) + if snapshot.mcp_token_footprint is not None and snapshot.mcp_token_event_count > 0: + enc = snapshot.mcp_token_encoding or "unknown" + console.print( + " " + + ui.SESSION_STATS_RETENTION_FOOTPRINT_VERBOSE.format( + tokens=snapshot.mcp_token_footprint, + encoding=enc, + calls=snapshot.mcp_token_event_count, + ) + ) + _render_plain_top_workflows(console, snapshot.top_workflows) + console.print() + console.print(f" {ui.SESSION_STATS_WORKSPACE_HEALTH} {snapshot.workspace_health}") + return int(ExitCode.SUCCESS) + + +def _render_verbose_rich(console: PrinterLike, snapshot: _SessionSnapshot) -> int: + box, panel_cls, rule_cls, table_cls, text_cls = cli_console.rich_panel_symbols() + + console.print(rule_cls(ui.SESSION_STATS_TITLE, style="dim", characters="─")) + + summary = table_cls.grid(padding=(0, 2)) + summary.add_column(style="dim", no_wrap=True) + summary.add_column() + summary.add_row(ui.SESSION_STATS_WORKSPACE.rstrip(":"), str(snapshot.root)) + summary.add_row( + ui.SESSION_STATS_INTENT_REGISTRY.rstrip(":"), + f"{snapshot.intent_registry_backend} ({snapshot.intent_registry_storage})", + ) + if snapshot.audit_enabled and snapshot.audit_storage: + summary.add_row( + ui.SESSION_STATS_AUDIT.rstrip(":"), + f"{ui.SESSION_STATS_AUDIT_ENABLED} ({snapshot.audit_storage})", + ) + if snapshot.latest_run_id: + run_text = _latest_run_text(snapshot) + summary.add_row(ui.SESSION_STATS_LATEST_RUN.rstrip(":"), run_text) + if snapshot.cache_present and snapshot.latest_run_files is not None: + summary.add_row( + ui.SESSION_STATS_CACHE.rstrip(":"), + ui.SESSION_STATS_REPORT_PRESENT.format(files=snapshot.latest_run_files), + ) + else: + summary.add_row( + ui.SESSION_STATS_LATEST_RUN.rstrip(":"), + ui.SESSION_STATS_LATEST_RUN_NONE, + ) + summary.add_row( + ui.SESSION_STATS_LIVE_AGENTS.rstrip(":"), + str(_live_agent_count(snapshot)), + ) + summary.add_row( + ui.SESSION_STATS_ACTIVE_INTENTS.rstrip(":"), + str(_active_intent_count(snapshot)), + ) + summary.add_row( + ui.SESSION_STATS_VISIBLE_INTENTS.rstrip(":"), + str(_visible_intent_count(snapshot)), + ) + summary.add_row( + ui.SESSION_STATS_STALE.rstrip(":"), + str(snapshot.stale_count), + ) + summary.add_row( + ui.SESSION_STATS_EXPIRED.rstrip(":"), + str(snapshot.expired_count), + ) + summary.add_row( + ui.SESSION_STATS_RECOVERABLE.rstrip(":"), + str(snapshot.recoverable_count), + ) + if snapshot.mcp_token_footprint is not None and snapshot.mcp_token_event_count > 0: + enc = snapshot.mcp_token_encoding or "unknown" + summary.add_row( + ui.SESSION_STATS_RETENTION_FOOTPRINT, + f"~{snapshot.mcp_token_footprint:,} tokens in retention window " + f"({enc}, {snapshot.mcp_token_event_count} tool calls)", + ) + health_text = text_cls( + snapshot.workspace_health, + style=_health_style(snapshot.workspace_health), + ) + summary.add_row( + ui.SESSION_STATS_WORKSPACE_HEALTH.rstrip(":"), + health_text, + ) + console.print( + panel_cls(summary, border_style=_health_style(snapshot.workspace_health)) + ) + + live_agents = [agent for agent in snapshot.agents if agent.alive] + if not live_agents: + console.print(f"[dim]{ui.SESSION_STATS_NO_AGENTS}[/dim]") + _render_rich_top_workflows(console, snapshot.top_workflows) + return int(ExitCode.SUCCESS) + + table = table_cls( + title=ui.SESSION_STATS_WORKSPACE_INTENT_RECORDS_TITLE, + box=box.SIMPLE_HEAVY, + show_lines=False, + expand=True, + ) + table.add_column(ui.SESSION_STATS_COL_PID, no_wrap=True, style="dim") + table.add_column(ui.SESSION_STATS_COL_AGENT, overflow="fold") + table.add_column(ui.SESSION_STATS_COL_OWNERSHIP, no_wrap=True) + table.add_column(ui.SESSION_STATS_COL_STATUS, no_wrap=True) + table.add_column(ui.SESSION_STATS_COL_SCOPE, justify="right", no_wrap=True) + table.add_column(ui.SESSION_STATS_COL_LEASE, no_wrap=True) + table.add_column(ui.SESSION_STATS_COL_FILES, overflow="ellipsis", max_width=42) + + for agent in live_agents: + label = agent.label or ui.SESSION_STATS_AGENT_UNKNOWN + for intent in agent.intents: + table.add_row( + str(agent.pid), + label, + text_cls(intent.ownership, style=_ownership_style(intent.ownership)), + text_cls(intent.status, style=_intent_status_style(intent.status)), + str(intent.scope_file_count), + _format_duration(intent.lease_remaining_seconds), + _allowed_files_label(intent.allowed_files), + ) + console.print(table) + _render_rich_top_workflows(console, snapshot.top_workflows) + return int(ExitCode.SUCCESS) + + +def _render_plain_top_workflows( + console: PrinterLike, + workflows: tuple[_WorkflowFootprintSnapshot, ...], +) -> None: + if not workflows: + return + console.print(f" {ui.SESSION_STATS_TOP_WORKFLOWS}:") + for workflow in workflows[:_MAX_TOP_WORKFLOWS_SHOWN]: + console.print(f" {_workflow_label(workflow)}") + + +def _render_rich_top_workflows( + console: PrinterLike, + workflows: tuple[_WorkflowFootprintSnapshot, ...], +) -> None: + if not workflows or not cli_console.supports_rich_console(console): + return + box, _panel_cls, _rule_cls, table_cls, text_cls = cli_console.rich_panel_symbols() + table = table_cls( + title=ui.SESSION_STATS_TOP_WORKFLOWS, + box=box.SIMPLE_HEAVY, + show_lines=False, + expand=True, + ) + table.add_column(ui.SESSION_STATS_COL_WORKFLOW, overflow="ellipsis") + table.add_column(ui.SESSION_STATS_COL_TOKENS, justify="right", no_wrap=True) + table.add_column(ui.SESSION_STATS_COL_CALLS, justify="right", no_wrap=True) + table.add_column(ui.SESSION_STATS_COL_AGENT, overflow="ellipsis") + for workflow in workflows[:_MAX_TOP_WORKFLOWS_SHOWN]: + table.add_row( + _workflow_name(workflow), + f"~{workflow.total_tokens:,}", + str(workflow.call_count), + text_cls(workflow.agent_label or "-", style="dim"), + ) + console.print(table) + + +def _latest_run_text(snapshot: _SessionSnapshot) -> str: + age_str = _format_age(snapshot.latest_run_age_seconds) + parts = [f"{snapshot.latest_run_id} ({age_str}"] + if snapshot.latest_run_health is not None: + parts.append(f", health={snapshot.latest_run_health}") + if snapshot.latest_run_findings is not None: + parts.append(f", findings={snapshot.latest_run_findings}") + source_part = _latest_run_source_suffix(snapshot) + if source_part: + parts.append(source_part) + parts.append(")") + return "".join(parts) + + +def _latest_run_source_suffix(snapshot: _SessionSnapshot) -> str: + label = latest_run_source_label(snapshot.latest_run_source) + if label is None: + return "" + return f", source={label}" + + +def _allowed_files_label(files: tuple[str, ...]) -> str: + if not files: + return "-" + shown = files[:_MAX_ALLOWED_FILES_SHOWN] + label = ", ".join(shown) + if len(files) > _MAX_ALLOWED_FILES_SHOWN: + extra = ui.BLAST_RADIUS_MORE.format(count=len(files) - _MAX_ALLOWED_FILES_SHOWN) + label += f" {extra}" + return label + + +def _workflow_name(workflow: _WorkflowFootprintSnapshot) -> str: + prefix = workflow.workflow_kind or "workflow" + workflow_id = workflow.workflow_id or "-" + return f"{prefix}:{workflow_id}" + + +def _workflow_label(workflow: _WorkflowFootprintSnapshot) -> str: + agent = workflow.agent_label or "-" + return ( + f"{_workflow_name(workflow)} " + f"~{workflow.total_tokens:,} tokens / " + f"{workflow.call_count} calls " + f"agent={agent}" + ) + + +def _health_style(value: str) -> str: + return { + "idle": "dim", + "clean": "green", + "active": "cyan", + "contested": "yellow", + }.get(value, "cyan") + + +def _ownership_style(value: str) -> str: + if value.startswith("own"): + return "green" + if value == "foreign_stale": + return "yellow" + if value == "foreign_active": + return "cyan" + if value == "recoverable": + return "magenta" + return "dim" + + +def _intent_status_style(value: str) -> str: + return { + "active": "cyan", + "clean": "green", + "expanded": "yellow", + "violated": "red", + "expired": "dim", + }.get(value, "white") diff --git a/codeclone/surfaces/cli/state.py b/codeclone/surfaces/cli/state.py index 1c9aa9ea..d6205832 100644 --- a/codeclone/surfaces/cli/state.py +++ b/codeclone/surfaces/cli/state.py @@ -5,10 +5,11 @@ from __future__ import annotations -from pathlib import Path +from ...paths.workspace import legacy_home_cache_path console: object | None = None -LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() + +LEGACY_CACHE_PATH = legacy_home_cache_path() def get_console() -> object: diff --git a/codeclone/surfaces/cli/tips.py b/codeclone/surfaces/cli/tips.py index 2ca3434e..626c2daa 100644 --- a/codeclone/surfaces/cli/tips.py +++ b/codeclone/surfaces/cli/tips.py @@ -8,13 +8,14 @@ import os import sys -from collections.abc import Mapping +from collections.abc import Callable, Mapping from pathlib import Path from typing import NamedTuple, TextIO from packaging.version import InvalidVersion, Version from ... import ui_messages as ui +from ...paths.gitignore import repo_gitignore_covers_codeclone_cache from ...utils.json_io import read_json_object, write_json_document_atomically from .attrs import bool_attr from .types import PrinterLike @@ -26,6 +27,7 @@ _DEAD_CODE_REACHABILITY_2_0_2_MIGRATION_TIP_KEY = ( "dead_code_reachability_2_0_2_migration_shown" ) +_COHESION_LCOM4_2_1_MIGRATION_TIP_KEY = "cohesion_lcom4_2_1_migration_shown" _TIPS_SCHEMA_VERSION = 1 _VSCODE_EXTENSION_URL = ( "https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone" @@ -73,6 +75,25 @@ class _DeadCodeReachabilityMigration(NamedTuple): ) +class _CohesionLcom4Migration(NamedTuple): + tip_key: str + baseline_min: Version + baseline_max: Version + current_min: Version + target_version: str + + +_COHESION_LCOM4_MIGRATIONS: tuple[_CohesionLcom4Migration, ...] = ( + _CohesionLcom4Migration( + tip_key=_COHESION_LCOM4_2_1_MIGRATION_TIP_KEY, + baseline_min=Version("2.0.2"), + baseline_max=Version("2.0.2"), + current_min=Version("2.1.0a1"), + target_version="2.1.0", + ), +) + + def _tips_state_path(cache_path: Path) -> Path: return cache_path.parent / "tips.json" @@ -194,11 +215,12 @@ def _tip_context_allowed( return _stream_is_tty(stream) -def _dead_code_reachability_migration( +def _select_baseline_migration( + migrations: tuple[_DeadCodeReachabilityMigration | _CohesionLcom4Migration, ...], *, baseline_generator_version: str | None, codeclone_version: str, -) -> _DeadCodeReachabilityMigration | None: +) -> _DeadCodeReachabilityMigration | _CohesionLcom4Migration | None: if not baseline_generator_version: return None try: @@ -206,7 +228,7 @@ def _dead_code_reachability_migration( current_version = Version(codeclone_version) except InvalidVersion: return None - for migration in _DEAD_CODE_REACHABILITY_MIGRATIONS: + for migration in migrations: if ( migration.baseline_min <= baseline_version <= migration.baseline_max and current_version >= migration.current_min @@ -215,6 +237,36 @@ def _dead_code_reachability_migration( return None +def _dead_code_reachability_migration( + *, + baseline_generator_version: str | None, + codeclone_version: str, +) -> _DeadCodeReachabilityMigration | None: + selected = _select_baseline_migration( + _DEAD_CODE_REACHABILITY_MIGRATIONS, + baseline_generator_version=baseline_generator_version, + codeclone_version=codeclone_version, + ) + if isinstance(selected, _DeadCodeReachabilityMigration): + return selected + return None + + +def _cohesion_lcom4_migration( + *, + baseline_generator_version: str | None, + codeclone_version: str, +) -> _CohesionLcom4Migration | None: + selected = _select_baseline_migration( + _COHESION_LCOM4_MIGRATIONS, + baseline_generator_version=baseline_generator_version, + codeclone_version=codeclone_version, + ) + if isinstance(selected, _CohesionLcom4Migration): + return selected + return None + + def maybe_print_vscode_extension_tip( *, args: object, @@ -256,23 +308,16 @@ def maybe_print_vscode_extension_tip( return True -def maybe_print_dead_code_reachability_migration_note( +def _maybe_print_baseline_migration_note( *, args: object, console: PrinterLike, - codeclone_version: str, cache_path: Path, - baseline_generator_version: str | None, - baseline_trusted_for_diff: bool, + migration: _DeadCodeReachabilityMigration | _CohesionLcom4Migration | None, + format_note: Callable[..., str], environ: Mapping[str, str] | None = None, stream: TextIO | None = None, ) -> bool: - if not baseline_trusted_for_diff: - return False - migration = _dead_code_reachability_migration( - baseline_generator_version=baseline_generator_version, - codeclone_version=codeclone_version, - ) if migration is None: return False @@ -293,11 +338,7 @@ def maybe_print_dead_code_reachability_migration_note( ): return False - console.print( - ui.fmt_dead_code_reachability_migration_note( - target_version=migration.target_version, - ) - ) + console.print(format_note(target_version=migration.target_version)) try: _remember_tip_shown( path=state_path, @@ -309,7 +350,85 @@ def maybe_print_dead_code_reachability_migration_note( return True +def maybe_print_dead_code_reachability_migration_note( + *, + args: object, + console: PrinterLike, + codeclone_version: str, + cache_path: Path, + baseline_generator_version: str | None, + baseline_trusted_for_diff: bool, + environ: Mapping[str, str] | None = None, + stream: TextIO | None = None, +) -> bool: + if not baseline_trusted_for_diff: + return False + return _maybe_print_baseline_migration_note( + args=args, + console=console, + cache_path=cache_path, + migration=_dead_code_reachability_migration( + baseline_generator_version=baseline_generator_version, + codeclone_version=codeclone_version, + ), + format_note=ui.fmt_dead_code_reachability_migration_note, + environ=environ, + stream=stream, + ) + + +def maybe_print_cohesion_lcom4_migration_note( + *, + args: object, + console: PrinterLike, + codeclone_version: str, + cache_path: Path, + baseline_generator_version: str | None, + baseline_trusted_for_diff: bool, + environ: Mapping[str, str] | None = None, + stream: TextIO | None = None, +) -> bool: + if not baseline_trusted_for_diff: + return False + return _maybe_print_baseline_migration_note( + args=args, + console=console, + cache_path=cache_path, + migration=_cohesion_lcom4_migration( + baseline_generator_version=baseline_generator_version, + codeclone_version=codeclone_version, + ), + format_note=ui.fmt_cohesion_lcom4_migration_note, + environ=environ, + stream=stream, + ) + + +def maybe_print_gitignore_codeclone_cache_tip( + *, + args: object, + console: PrinterLike, + root_path: Path, + environ: Mapping[str, str] | None = None, + stream: TextIO | None = None, +) -> bool: + effective_environ = os.environ if environ is None else environ + effective_stream = sys.stdout if stream is None else stream + if not _tip_context_allowed( + args=args, + environ=effective_environ, + stream=effective_stream, + ): + return False + if repo_gitignore_covers_codeclone_cache(root_path): + return False + console.print(ui.fmt_gitignore_codeclone_cache_tip()) + return True + + __all__ = [ + "maybe_print_cohesion_lcom4_migration_note", "maybe_print_dead_code_reachability_migration_note", + "maybe_print_gitignore_codeclone_cache_tip", "maybe_print_vscode_extension_tip", ] diff --git a/codeclone/surfaces/cli/types.py b/codeclone/surfaces/cli/types.py index 3e464b2d..a2707d4f 100644 --- a/codeclone/surfaces/cli/types.py +++ b/codeclone/surfaces/cli/types.py @@ -78,6 +78,16 @@ class CLIArgsLike(Protocol): changed_only: bool diff_against: str | None paths_from_git_diff: str | None + blast_radius: tuple[str, ...] | list[str] | None + patch_verify: bool + strictness: str + session_stats: bool + audit: bool + audit_enabled: bool + audit_path: str + audit_payloads: str + audit_retention_days: int + audit_token_estimator: str skip_metrics: bool skip_dead_code: bool skip_dependencies: bool diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index bed4dd4d..822bd168 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -9,6 +9,7 @@ import sys import time from pathlib import Path +from typing import Protocol from ... import __version__ from ... import ui_messages as ui @@ -17,6 +18,7 @@ from ...cache.store import Cache from ...config import resolver as config_resolver from ...config.argparse_builder import build_parser +from ...config.observability import resolve_observability_config from ...config.pyproject_loader import load_pyproject_config from ...contracts import ( ISSUES_URL, @@ -30,6 +32,8 @@ from ...core.pipeline import analyze from ...core.reporting import gate, report from ...models import MetricsDiff +from ...observability import bootstrap as start_observability +from ...observability import operation from ...report.html import build_html_report from . import baseline_state as cli_baseline_state from . import changed_scope as cli_changed_scope @@ -43,8 +47,17 @@ from . import state as cli_state from . import summary as cli_summary from . import tips as cli_tips +from .attrs import bool_attr +from .patch_verify import VALID_STRICTNESS_PROFILES from .types import CLIArgsLike, StatusConsole, require_status_console +_CLI_SESSION_START_EPOCH = int(time.time()) + + +class _AuditEnabledArgs(Protocol): + audit_enabled: bool + + __all__ = [ "LEGACY_CACHE_PATH", "Baseline", @@ -68,6 +81,7 @@ "_resolve_metrics_baseline_state", "_rich_progress_symbols", "_run_analysis_stages", + "_validate_controller_query_flags", "_validate_report_ui_flags", "_write_report_outputs", "analyze", @@ -79,6 +93,7 @@ "discover", "gate", "main", + "maybe_print_gitignore_codeclone_cache_tip", "maybe_print_vscode_extension_tip", "print_banner", "process", @@ -117,6 +132,12 @@ maybe_print_dead_code_reachability_migration_note = ( cli_tips.maybe_print_dead_code_reachability_migration_note ) +maybe_print_cohesion_lcom4_migration_note = ( + cli_tips.maybe_print_cohesion_lcom4_migration_note +) +maybe_print_gitignore_codeclone_cache_tip = ( + cli_tips.maybe_print_gitignore_codeclone_cache_tip +) _report_path_origins = cli_reports_output._report_path_origins _resolve_output_paths = cli_reports_output._resolve_output_paths @@ -165,6 +186,142 @@ def _make_console(*, no_color: bool) -> object: LEGACY_CACHE_PATH = cli_state.LEGACY_CACHE_PATH +def _controller_query_mode(args: object) -> bool: + return ( + bool_attr(args, "blast_radius") + or bool_attr(args, "patch_verify") + or bool_attr(args, "session_stats") + or bool_attr(args, "audit") + or bool_attr(args, "audit_json") + ) + + +def _validate_controller_query_flags( + *, + args: object, + report_outputs_requested: bool = False, + strictness_explicit: bool = False, +) -> None: + printer = _console() + blast_radius = bool_attr(args, "blast_radius") + patch_verify = bool_attr(args, "patch_verify") + strictness = str(getattr(args, "strictness", "ci") or "ci") + if strictness not in VALID_STRICTNESS_PROFILES: + expected = ", ".join(sorted(VALID_STRICTNESS_PROFILES)) + printer.print( + ui.fmt_contract_error( + f"Invalid --strictness value: {strictness!r}. Expected {expected}." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if strictness_explicit and not patch_verify: + printer.print(ui.fmt_contract_error(ui.ERR_STRICTNESS_PATCH_VERIFY_ONLY)) + sys.exit(ExitCode.CONTRACT_ERROR) + session_stats = bool_attr(args, "session_stats") + audit = bool_attr(args, "audit") + if session_stats and (blast_radius or patch_verify or audit): + printer.print(ui.fmt_contract_error(ui.ERR_SESSION_STATS_COMBINED)) + sys.exit(ExitCode.CONTRACT_ERROR) + if audit and (blast_radius or patch_verify): + printer.print(ui.fmt_contract_error(ui.ERR_AUDIT_COMBINED)) + sys.exit(ExitCode.CONTRACT_ERROR) + if blast_radius and patch_verify: + printer.print(ui.fmt_contract_error(ui.ERR_BLAST_PATCH_BOTH)) + sys.exit(ExitCode.CONTRACT_ERROR) + if not (blast_radius or patch_verify or session_stats or audit): + return + if bool_attr(args, "update_baseline") or bool_attr(args, "update_metrics_baseline"): + printer.print(ui.fmt_contract_error(ui.ERR_CONTROLLER_NO_BASELINE_UPDATE)) + sys.exit(ExitCode.CONTRACT_ERROR) + if ( + bool_attr(args, "changed_only") + or getattr(args, "diff_against", None) + or getattr(args, "paths_from_git_diff", None) + ): + printer.print(ui.fmt_contract_error(ui.ERR_CONTROLLER_NO_CHANGED_SCOPE)) + sys.exit(ExitCode.CONTRACT_ERROR) + if report_outputs_requested: + printer.print(ui.fmt_contract_error(ui.ERR_CONTROLLER_TERMINAL_ONLY)) + sys.exit(ExitCode.CONTRACT_ERROR) + + +def _run_controller_query( + *, + args: CLIArgsLike, + report_document: dict[str, object] | None, + root_path: Path, + analysis_result: AnalysisResult, + diff_context: cli_post_run.DiffContext, + baseline_state: cli_baseline_state.CloneBaselineState, +) -> int | None: + if bool_attr(args, "blast_radius"): + from .blast_radius import render_blast_radius + + return render_blast_radius( + console=_console(), + report_document=report_document, + files=tuple(getattr(args, "blast_radius", ()) or ()), + root_path=root_path, + quiet=args.quiet, + ) + if not bool_attr(args, "patch_verify"): + return None + from .patch_verify import render_patch_verify + + return render_patch_verify( + console=_console(), + args=args, + strictness=str(getattr(args, "strictness", "ci") or "ci"), + analysis=analysis_result, + diff_context=diff_context, + baseline_state=baseline_state, + quiet=args.quiet, + ) + + +def _controller_query_console(args: CLIArgsLike) -> StatusConsole: + """Shared console for pre-analysis controller query screens.""" + return require_status_console( + cli_console.make_query_console(no_color=args.no_color) + ) + + +def _dispatch_session_stats(args: CLIArgsLike, root_path: Path) -> int: + from .session_stats import render_session_stats + + return render_session_stats( + console=_controller_query_console(args), + root_path=root_path, + quiet=args.quiet, + ) + + +def _dispatch_audit(args: CLIArgsLike, root_path: Path) -> int: + from .audit import render_audit + + audit_json = bool_attr(args, "audit_json") + return render_audit( + console=_controller_query_console(args), + root_path=root_path, + audit_enabled=bool(getattr(args, "audit_enabled", False)), + audit_path=str(getattr(args, "audit_path", "")), + quiet=args.quiet, + json_summary=audit_json, + ) + + +def _run_pre_analysis_controller_query( + *, + args: CLIArgsLike, + root_path: Path, +) -> int | None: + if bool_attr(args, "session_stats"): + return _dispatch_session_stats(args, root_path) + if bool_attr(args, "audit") or bool_attr(args, "audit_json"): + return _dispatch_audit(args, root_path) + return None + + def print_banner(*, root: Path | None = None) -> None: _set_console(console) _print_banner_impl(root=root) @@ -273,6 +430,9 @@ def _main_impl() -> None: explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) report_path_origins = _report_path_origins(raw_argv) report_generated_at_utc = cli_meta_mod._current_report_timestamp_utc() + strictness_explicit = any( + arg == "--strictness" or arg.startswith("--strictness=") for arg in raw_argv + ) cache_path_from_args = any( arg in {"--cache-dir", "--cache-path"} or arg.startswith(("--cache-dir=", "--cache-path=")) @@ -298,14 +458,24 @@ def _main_impl() -> None: config_values=pyproject_config, explicit_cli_dests=explicit_cli_dests, ) + _validate_controller_query_flags( + args=args, + strictness_explicit=strictness_explicit, + ) + _configure_runtime_flags(args) + _configure_runtime_console(args) + pre_analysis_query_exit = _run_pre_analysis_controller_query( + args=args, + root_path=root_path, + ) + if pre_analysis_query_exit is not None: + sys.exit(pre_analysis_query_exit) git_diff_ref = _validate_changed_scope_args(args=args) changed_paths = ( _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) if git_diff_ref is not None else () ) - _configure_runtime_flags(args) - _configure_runtime_console(args) _validate_numeric_args_or_exit( args=args, validate_numeric_args_fn=_validate_numeric_args, @@ -337,6 +507,19 @@ def _main_impl() -> None: report_generated_at_utc=report_generated_at_utc, ) _validate_report_ui_flags(args=args, output_paths=output_paths) + _validate_controller_query_flags( + args=args, + report_outputs_requested=bool( + output_paths.html + or output_paths.json + or output_paths.md + or output_paths.sarif + or output_paths.text + or bool_attr(args, "open_html_report") + or bool_attr(args, "timestamped_report_paths") + ), + strictness_explicit=strictness_explicit, + ) cache_path = _resolve_cache_path( root_path=root_path, args=args, @@ -365,11 +548,15 @@ def _main_impl() -> None: output_paths=output_paths, cache_path=cache_path, ) - discovery_result, processing_result, analysis_result = _run_analysis_stages( - args=args, - boot=boot, - cache=cache, - ) + # Freeze the env-resolved observability decision for this CLI process + # (default OFF) so the core pipeline stage spans attach to a cli.analyze op. + start_observability(resolve_observability_config(), root=root_path) + with operation(name="cli.analyze", surface="cli"): + discovery_result, processing_result, analysis_result = _run_analysis_stages( + args=args, + boot=boot, + cache=cache, + ) source_read_contract_failure = ( bool(processing_result.source_read_failures) @@ -427,35 +614,36 @@ def _main_impl() -> None: discovery_result=discovery_result, processing_result=processing_result, ) - _print_summary( - console=_console(), - quiet=args.quiet, - files_found=discovery_result.files_found, - files_analyzed=processing_result.files_analyzed, - cache_hits=discovery_result.cache_hits, - files_skipped=processing_result.files_skipped, - analyzed_lines=summary_counts["analyzed_lines"], - analyzed_functions=summary_counts["analyzed_functions"], - analyzed_methods=summary_counts["analyzed_methods"], - analyzed_classes=summary_counts["analyzed_classes"], - func_clones_count=analysis_result.func_clones_count, - block_clones_count=analysis_result.block_clones_count, - segment_clones_count=analysis_result.segment_clones_count, - suppressed_golden_fixture_groups=len( - getattr(analysis_result, "suppressed_clone_groups", ()) - ), - suppressed_segment_groups=analysis_result.suppressed_segment_groups, - new_clones_count=diff_context.new_clones_count, - ) - print_metrics_if_available( - args=args, - analysis=analysis_result, - metrics_diff=diff_context.metrics_diff, - api_surface_diff_available=diff_context.api_surface_diff_available, - console=_console(), - build_metrics_snapshot_fn=build_metrics_snapshot, - print_metrics_fn=_print_metrics, - ) + if not _controller_query_mode(args): + _print_summary( + console=_console(), + quiet=args.quiet, + files_found=discovery_result.files_found, + files_analyzed=processing_result.files_analyzed, + cache_hits=discovery_result.cache_hits, + files_skipped=processing_result.files_skipped, + analyzed_lines=summary_counts["analyzed_lines"], + analyzed_functions=summary_counts["analyzed_functions"], + analyzed_methods=summary_counts["analyzed_methods"], + analyzed_classes=summary_counts["analyzed_classes"], + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + segment_clones_count=analysis_result.segment_clones_count, + suppressed_golden_fixture_groups=len( + getattr(analysis_result, "suppressed_clone_groups", ()) + ), + suppressed_segment_groups=analysis_result.suppressed_segment_groups, + new_clones_count=diff_context.new_clones_count, + ) + print_metrics_if_available( + args=args, + analysis=analysis_result, + metrics_diff=diff_context.metrics_diff, + api_surface_diff_available=diff_context.api_surface_diff_available, + console=_console(), + build_metrics_snapshot_fn=build_metrics_snapshot, + print_metrics_fn=_print_metrics, + ) report_artifacts = report( boot=boot, @@ -469,8 +657,25 @@ def _main_impl() -> None: metrics_diff=diff_context.metrics_diff, coverage_adoption_diff_available=diff_context.coverage_adoption_diff_available, api_surface_diff_available=diff_context.api_surface_diff_available, - include_report_document=bool(changed_paths), + include_report_document=bool(changed_paths) or _controller_query_mode(args), + ) + _emit_cli_analysis_completed_if_enabled( + args=args, + root_path=root_path, + report_document=report_artifacts.report_document, + new_func_count=len(diff_context.new_func), + new_block_count=len(diff_context.new_block), + ) + controller_exit_code = _run_controller_query( + args=args, + report_document=report_artifacts.report_document, + root_path=root_path, + analysis_result=analysis_result, + diff_context=diff_context, + baseline_state=baseline_state, ) + if controller_exit_code is not None: + sys.exit(controller_exit_code) changed_clone_gate = resolve_changed_clone_gate( args=args, report_document=report_artifacts.report_document, @@ -533,16 +738,86 @@ def _main_impl() -> None: baseline_generator_version=baseline_state.baseline.generator_version, baseline_trusted_for_diff=baseline_state.trusted_for_diff, ) + maybe_print_cohesion_lcom4_migration_note( + args=args, + console=_console(), + codeclone_version=__version__, + cache_path=cache_path, + baseline_generator_version=baseline_state.baseline.generator_version, + baseline_trusted_for_diff=baseline_state.trusted_for_diff, + ) maybe_print_vscode_extension_tip( args=args, console=_console(), codeclone_version=__version__, cache_path=cache_path, ) + maybe_print_gitignore_codeclone_cache_tip( + args=args, + console=_console(), + root_path=root_path, + ) print_pipeline_done_if_needed(args=args, run_started_at=run_started_at) +def _emit_cli_analysis_completed_if_enabled( + *, + args: _AuditEnabledArgs, + root_path: Path, + report_document: object, + new_func_count: int, + new_block_count: int, +) -> None: + if not bool(getattr(args, "audit_enabled", False)): + return + if not isinstance(report_document, dict): + return + digest = _report_digest_from_document(report_document) + if not digest: + return + try: + from ...audit.analysis_completed import ( + ANALYSIS_SOURCE_CLI, + emit_analysis_completed_from_report, + ) + + emit_analysis_completed_from_report( + root_path=root_path, + report_document=report_document, + report_digest=digest, + run_id=digest, + source=ANALYSIS_SOURCE_CLI, + new_func_count=new_func_count, + new_block_count=new_block_count, + agent_start_epoch=_CLI_SESSION_START_EPOCH, + ) + except Exception: + return None + + +def _report_digest_from_document(report_document: dict[str, object]) -> str: + integrity = report_document.get("integrity") + if not isinstance(integrity, dict): + return "" + digest = integrity.get("digest") + if not isinstance(digest, dict): + return "" + return str(digest.get("value", "")).strip() + + def main() -> None: + if len(sys.argv) > 1 and sys.argv[1] == "analytics": + from .analytics import analytics_main + + raise SystemExit(analytics_main(sys.argv[2:])) + if len(sys.argv) > 1 and sys.argv[1] == "memory": + from .memory import memory_main + + raise SystemExit(memory_main(sys.argv[2:])) + if len(sys.argv) > 1 and sys.argv[1] == "observability": + from .observability import observability_main + + raise SystemExit(observability_main(sys.argv[2:])) try: _main_impl() except SystemExit: diff --git a/codeclone/surfaces/mcp/_blast_radius.py b/codeclone/surfaces/mcp/_blast_radius.py new file mode 100644 index 00000000..55901413 --- /dev/null +++ b/codeclone/surfaces/mcp/_blast_radius.py @@ -0,0 +1,166 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP blast-radius presentation over the neutral analysis core.""" + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import Final, Literal + +from codeclone.analysis import blast_radius as _core + +BlastRadiusDepth = _core.BlastRadiusDepth +BlastRadiusResult = _core.BlastRadiusResult +DEFAULT_DO_NOT_TOUCH_PATTERNS = _core.DEFAULT_DO_NOT_TOUCH_PATTERNS +MAX_CONTEXT_ITEMS = _core.MAX_CONTEXT_ITEMS +compute_blast_radius = _core.compute_blast_radius + +# Re-export core helpers for MCP contract tests and backward compatibility. +_append_boundary_entry = _core._append_boundary_entry +_append_review_entry = _core._append_review_entry +_as_int = _core._as_int +_compute_transitive_dependents = _core._compute_transitive_dependents +_guardrails = _core._guardrails +_item_path = _core._item_path +_normalize_relative_path = _core._normalize_relative_path +_path_to_module = _core._path_to_module + +BlastRadiusInclude = Literal[ + "imports", + "clone_cohorts", + "coverage", + "risk_signals", + "do_not_touch", + "review_context", + "cycles", +] + +VALID_BLAST_RADIUS_DEPTHS: Final[frozenset[str]] = frozenset({"direct", "transitive"}) +VALID_BLAST_RADIUS_INCLUDE: Final[frozenset[str]] = frozenset( + { + "imports", + "clone_cohorts", + "coverage", + "risk_signals", + "do_not_touch", + "review_context", + "cycles", + } +) +DEFAULT_BLAST_RADIUS_INCLUDE: Final[tuple[BlastRadiusInclude, ...]] = ( + "imports", + "clone_cohorts", + "coverage", + "risk_signals", + "do_not_touch", + "review_context", + "cycles", +) + + +def _bounded_entries( + entries: Sequence[Mapping[str, str]], + *, + limit: int = MAX_CONTEXT_ITEMS, +) -> list[dict[str, str]]: + return [dict(item) for item in entries[:limit]] + + +def _count_by_field( + entries: Sequence[Mapping[str, str]], + *, + field: str, +) -> dict[str, int]: + counts: dict[str, int] = {} + for entry in entries: + key = str(entry.get(field, "")).strip() or "unknown" + counts[key] = counts.get(key, 0) + 1 + return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0]))) + + +def _entry_summary( + *, + entries: Sequence[Mapping[str, str]], + shown: int, +) -> dict[str, object]: + return { + "total": len(entries), + "shown": shown, + "truncated": shown < len(entries), + "top_categories": _count_by_field(entries, field="category"), + "top_reasons": _count_by_field(entries, field="reason"), + } + + +def blast_radius_to_payload( + result: BlastRadiusResult, + *, + include: Sequence[str] = DEFAULT_BLAST_RADIUS_INCLUDE, +) -> dict[str, object]: + include_set = {str(item) for item in include} + imports_enabled = "imports" in include_set + risk_enabled = "risk_signals" in include_set or "coverage" in include_set + structural_risk = dict(result.structural_risk) if risk_enabled else {} + if "coverage" not in include_set: + structural_risk.pop("low_coverage_in_blast_zone", None) + if "risk_signals" not in include_set: + for key in ( + "high_complexity_in_blast_zone", + "high_coupling_in_blast_zone", + "overloaded_modules_in_blast_zone", + ): + structural_risk.pop(key, None) + do_not_touch = result.do_not_touch if "do_not_touch" in include_set else () + review_context = result.review_context if "review_context" in include_set else () + do_not_touch_payload = _bounded_entries(do_not_touch) + review_context_payload = _bounded_entries(review_context) + return { + "run_id": result.run_id, + "origin": list(result.origin), + "depth": result.depth, + "radius_level": result.radius_level, + "direct_dependents": ( + list(result.direct_dependents) if imports_enabled else [] + ), + "transitive_dependents": ( + list(result.transitive_dependents) + if imports_enabled and result.depth == "transitive" + else [] + ), + "clone_cohort_members": ( + list(result.clone_cohort_members) if "clone_cohorts" in include_set else [] + ), + "in_dependency_cycle": ( + list(result.in_dependency_cycle) if "cycles" in include_set else [] + ), + "structural_risk": structural_risk, + "do_not_touch": do_not_touch_payload, + "do_not_touch_summary": _entry_summary( + entries=do_not_touch, + shown=len(do_not_touch_payload), + ), + "review_context": review_context_payload, + "review_context_summary": _entry_summary( + entries=review_context, + shown=len(review_context_payload), + ), + "guardrails": list(result.guardrails), + } + + +__all__ = [ + "DEFAULT_BLAST_RADIUS_INCLUDE", + "DEFAULT_DO_NOT_TOUCH_PATTERNS", + "MAX_CONTEXT_ITEMS", + "VALID_BLAST_RADIUS_DEPTHS", + "VALID_BLAST_RADIUS_INCLUDE", + "BlastRadiusDepth", + "BlastRadiusInclude", + "BlastRadiusResult", + "blast_radius_to_payload", + "compute_blast_radius", +] diff --git a/codeclone/surfaces/mcp/_claim_guard.py b/codeclone/surfaces/mcp/_claim_guard.py new file mode 100644 index 00000000..153e26f4 --- /dev/null +++ b/codeclone/surfaces/mcp/_claim_guard.py @@ -0,0 +1,603 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Final, Literal + +from .messages import claims as claim_msgs + +MAX_REVIEW_CLAIM_TEXT_CHARS: Final = 50_000 +TEXT_WINDOW_RADIUS: Final = 80 +SECURITY_SURFACES_FAMILY: Final = "security_surfaces" + +CitationKind = Literal["finding", "metric_family"] + +SECURITY_OVERCLAIM_KEYWORDS: Final = ( + "vulnerab", + "exploit", + "attack", + "cve", + "threat", + "security flaw", + "security bug", + "security issue", +) +GATE_OVERCLAIM_KEYWORDS: Final = ( + "fail", + "block", + "gate", + "ci ", + "ci-", + "pipeline", + "break build", + "must fix", + "blocking", +) +REGRESSION_OVERCLAIM_KEYWORDS: Final = ( + "new ", + "regress", + "introduc", + "just appeared", + "added", + "caused by", + "broke", +) +DEAD_CODE_CERTAINTY_KEYWORDS: Final = ( + "dead", + "unused", + "unreachable", + "remove", + "delete", + "safe to remove", + "definitely dead", +) +FIX_OVERCLAIM_KEYWORDS: Final = ( + "fixed", + "resolved", + "eliminated", + "removed the", + "cleaned up", + "refactored away", + "no longer", +) +STRUCTURAL_SCOPE_KEYWORDS: Final = ( + "no structural regression", + "no regressions", + "regression-free", + "structural verification", + "structurally verified", + "all checks passed", + "code quality verified", +) + +_STRUCTURAL_PROFILES: Final[frozenset[str]] = frozenset({"python_structural"}) + +_UNKNOWN_SHORT_FINDING_RE: Final = re.compile(r"\bF-\d+\b", re.IGNORECASE) +_LITERAL_BOUNDARY_CHARS: Final = r"A-Za-z0-9_:" +_SENTENCE_BOUNDARIES: Final = ".!?\n" + + +@dataclass(frozen=True, slots=True) +class Citation: + cited_id: str + kind: CitationKind + text_window: str + start_offset: int + end_offset: int + + +@dataclass(frozen=True, slots=True) +class Violation: + pattern: str + claim: str + cited_id: str + reason: str + source_flag: str + + +@dataclass(frozen=True, slots=True) +class ReportContext: + findings: Mapping[str, Mapping[str, object]] + short_to_canonical: Mapping[str, str] + reachable_qualnames: frozenset[str] + report_only_families: frozenset[str] + has_comparison_run: bool + metric_families: frozenset[str] + verification_profile: str | None = None + patch_health_delta: int | None = None + + +def validate_claims( + *, + text: str, + report_context: ReportContext, + require_citations: bool = True, +) -> dict[str, object]: + citations = extract_citations(text, report_context=report_context) + violations = _violations_for_citations( + citations=citations, + report_context=report_context, + ) + violations = (*violations, *_text_violations(text, report_context=report_context)) + warnings = _warnings_for_text( + text=text, + citations=citations, + report_context=report_context, + require_citations=require_citations, + ) + violation_keys = { + (violation.pattern, violation.cited_id, violation.claim) + for violation in violations + } + return { + "valid": len(violations) == 0, + "citations_found": len(citations), + "violations": [_violation_payload(violation) for violation in violations], + "warnings": warnings, + "validated_citations": [ + { + "cited_id": citation.cited_id, + "kind": citation.kind, + "valid": not any( + key[1] == citation.cited_id and key[2] == citation.text_window + for key in violation_keys + ), + } + for citation in citations + ], + } + + +def validate_text_input(text: object) -> str: + if not isinstance(text, str): + raise ValueError(claim_msgs.ERR_TEXT_NOT_STRING) + cleaned = text.strip() + if not cleaned: + raise ValueError(claim_msgs.ERR_TEXT_EMPTY) + if len(text) > MAX_REVIEW_CLAIM_TEXT_CHARS: + raise ValueError( + claim_msgs.ERR_TEXT_TOO_LONG.format( + max_chars=MAX_REVIEW_CLAIM_TEXT_CHARS, + ) + ) + return text + + +def extract_citations( + text: str, + *, + report_context: ReportContext, +) -> tuple[Citation, ...]: + citations: list[Citation] = [] + known_finding_ids = { + *report_context.findings.keys(), + *report_context.short_to_canonical.keys(), + } + for finding_id in sorted(known_finding_ids): + canonical_id = report_context.short_to_canonical.get(finding_id, finding_id) + if canonical_id not in report_context.findings: + continue + citations.extend( + Citation( + cited_id=canonical_id, + kind="finding", + text_window=text_window(text, match.start(), match.end()), + start_offset=match.start(), + end_offset=match.end(), + ) + for match in _find_literal_matches(text, finding_id) + ) + for family_name in sorted(report_context.metric_families): + for variant in _metric_family_patterns(family_name): + citations.extend( + Citation( + cited_id=family_name, + kind="metric_family", + text_window=text_window(text, match.start(), match.end()), + start_offset=match.start(), + end_offset=match.end(), + ) + for match in variant.finditer(text) + ) + return tuple( + sorted( + _dedupe_citations(citations), + key=lambda item: ( + item.start_offset, + item.end_offset, + item.kind, + item.cited_id, + ), + ) + ) + + +def text_window( + text: str, + start_offset: int, + end_offset: int, + *, + radius: int = TEXT_WINDOW_RADIUS, +) -> str: + bound_start = max(0, start_offset - radius) + bound_end = min(len(text), end_offset + radius) + sentence_start = max( + ( + text.rfind(boundary, bound_start, start_offset) + for boundary in _SENTENCE_BOUNDARIES + ), + default=-1, + ) + start = max(bound_start, sentence_start + 1) + sentence_ends = [ + candidate + for boundary in _SENTENCE_BOUNDARIES + if (candidate := text.find(boundary, end_offset, bound_end)) != -1 + ] + end = min(sentence_ends) + 1 if sentence_ends else bound_end + return text[start:end].strip() + + +def _violations_for_citations( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + checks = ( + _check_security_vulnerability_overclaim, + _check_report_only_gate_overclaim, + _check_known_debt_overclaim, + _check_dead_code_reachability_overclaim, + _check_fix_without_verification, + ) + violations: list[Violation] = [] + for check in checks: + violations.extend(check(citations=citations, report_context=report_context)) + return tuple( + sorted( + _dedupe_violations(violations), + key=lambda item: (item.pattern, item.cited_id, item.claim), + ) + ) + + +def _check_security_vulnerability_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if ( + citation.kind != "metric_family" + or citation.cited_id != SECURITY_SURFACES_FAMILY + ): + continue + if not _contains_keyword(citation.text_window, SECURITY_OVERCLAIM_KEYWORDS): + continue + violations.append( + Violation( + pattern="P-1", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=claim_msgs.VIOLATION_REASON_SECURITY_NOT_VULNERABILITY, + source_flag="security_surfaces.gate_keys=()", + ) + ) + return tuple(violations) + + +def _check_report_only_gate_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "metric_family": + continue + if citation.cited_id not in report_context.report_only_families: + continue + if not _contains_keyword(citation.text_window, GATE_OVERCLAIM_KEYWORDS): + continue + violations.append( + Violation( + pattern="P-2", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=claim_msgs.VIOLATION_REASON_REPORT_ONLY_GATE.format( + family=citation.cited_id, + ), + source_flag=f"{citation.cited_id}.gate_keys=()", + ) + ) + return tuple(violations) + + +def _check_known_debt_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "finding": + continue + finding = report_context.findings.get(citation.cited_id) + if finding is None or str(finding.get("novelty", "")) != "known": + continue + if not _contains_keyword(citation.text_window, REGRESSION_OVERCLAIM_KEYWORDS): + continue + violations.append( + Violation( + pattern="P-3", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=claim_msgs.VIOLATION_REASON_KNOWN_DEBT_OVERCLAIM, + source_flag="finding.novelty='known'", + ) + ) + return tuple(violations) + + +def _check_dead_code_reachability_overclaim( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "finding": + continue + finding = report_context.findings.get(citation.cited_id) + if finding is None or not _is_dead_code_finding(citation.cited_id, finding): + continue + if not _contains_keyword(citation.text_window, DEAD_CODE_CERTAINTY_KEYWORDS): + continue + reachable = sorted( + qualname + for qualname in _extract_qualnames_from_finding(citation.cited_id, finding) + if qualname in report_context.reachable_qualnames + ) + if not reachable: + continue + violations.append( + Violation( + pattern="P-4", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=claim_msgs.VIOLATION_REASON_DEAD_CODE_REACHABILITY.format( + qualname=reachable[0], + ), + source_flag="runtime_reachability.evidence_present", + ) + ) + return tuple(violations) + + +def _check_fix_without_verification( + *, + citations: Sequence[Citation], + report_context: ReportContext, +) -> tuple[Violation, ...]: + if report_context.has_comparison_run: + return () + violations: list[Violation] = [] + for citation in citations: + if citation.kind != "finding" or not _contains_keyword( + citation.text_window, + FIX_OVERCLAIM_KEYWORDS, + ): + continue + violations.append( + Violation( + pattern="P-5", + claim=citation.text_window, + cited_id=citation.cited_id, + reason=claim_msgs.VIOLATION_REASON_FIX_WITHOUT_VERIFICATION, + source_flag="session.comparison_run_available=false", + ) + ) + return tuple(violations) + + +def _warnings_for_text( + *, + text: str, + citations: Sequence[Citation], + report_context: ReportContext, + require_citations: bool, +) -> list[dict[str, str]]: + warnings: list[dict[str, str]] = [] + if require_citations and not citations: + warnings.append( + { + "type": "no_citations", + "message": claim_msgs.WARN_NO_CITATIONS, + } + ) + for match in _UNKNOWN_SHORT_FINDING_RE.finditer(text): + cited_id = match.group(0).upper() + if cited_id not in report_context.short_to_canonical: + warnings.append( + { + "type": "unknown_finding", + "message": claim_msgs.WARN_UNKNOWN_FINDING.format( + cited_id=cited_id, + ), + } + ) + profile = report_context.verification_profile + if ( + profile is not None + and profile not in _STRUCTURAL_PROFILES + and _contains_keyword(text, STRUCTURAL_SCOPE_KEYWORDS) + ): + warnings.append( + { + "type": "structural_checks_not_applicable", + "message": claim_msgs.WARN_STRUCTURAL_CHECKS_NOT_APPLICABLE.format( + profile=profile, + ), + } + ) + health_delta = report_context.patch_health_delta + if ( + health_delta is not None + and health_delta < 0 + and _contains_keyword(text, STRUCTURAL_SCOPE_KEYWORDS) + ): + warnings.append( + { + "type": "health_regression_overclaim", + "message": claim_msgs.WARN_HEALTH_REGRESSION_OVERCLAIM.format( + health_delta=health_delta, + ), + } + ) + return warnings + + +def _text_violations( + text: str, + *, + report_context: ReportContext, +) -> tuple[Violation, ...]: + health_delta = report_context.patch_health_delta + if health_delta is None or health_delta >= 0: + return () + if not _contains_keyword(text, STRUCTURAL_SCOPE_KEYWORDS): + return () + return ( + Violation( + pattern="health_regression_overclaim", + claim=text.strip()[:TEXT_WINDOW_RADIUS], + cited_id="", + reason=claim_msgs.VIOLATION_REASON_HEALTH_REGRESSION_OVERCLAIM.format( + health_delta=health_delta, + ), + source_flag=f"patch.health_delta={health_delta}", + ), + ) + + +def _metric_family_patterns(family_name: str) -> tuple[re.Pattern[str], ...]: + canonical = re.compile(rf"\b{re.escape(family_name)}\b", flags=re.IGNORECASE) + if "_" not in family_name: + return (canonical,) + spaced_escaped = re.escape(family_name).replace("_", r"\s+") + spaced = re.compile(rf"\b{spaced_escaped}\b", flags=re.IGNORECASE) + return (canonical, spaced) + + +def _find_literal_matches(text: str, literal: str) -> tuple[re.Match[str], ...]: + pattern = re.compile( + rf"(? bool: + lowered = text.casefold() + return any(keyword.casefold() in lowered for keyword in keywords) + + +def _dedupe_citations(citations: Sequence[Citation]) -> tuple[Citation, ...]: + seen: set[tuple[str, str, int, int]] = set() + deduped: list[Citation] = [] + for citation in citations: + key = ( + citation.kind, + citation.cited_id.casefold(), + citation.start_offset, + citation.end_offset, + ) + if key in seen: + continue + seen.add(key) + deduped.append(citation) + return tuple(deduped) + + +def _dedupe_violations(violations: Sequence[Violation]) -> tuple[Violation, ...]: + seen: set[tuple[str, str, str, str]] = set() + deduped: list[Violation] = [] + for violation in violations: + key = ( + violation.pattern, + violation.cited_id, + violation.claim, + violation.source_flag, + ) + if key in seen: + continue + seen.add(key) + deduped.append(violation) + return tuple(deduped) + + +def _violation_payload(violation: Violation) -> dict[str, str]: + return { + "pattern": violation.pattern, + "claim": violation.claim, + "cited_id": violation.cited_id, + "reason": violation.reason, + "source_flag": violation.source_flag, + } + + +def _is_dead_code_finding( + finding_id: str, + finding: Mapping[str, object], +) -> bool: + return ( + finding_id.startswith("dead_code:") + or str(finding.get("family", "")) == "dead_code" + or str(finding.get("category", "")) == "dead_code" + ) + + +def _extract_qualnames_from_finding( + finding_id: str, + finding: Mapping[str, object], +) -> frozenset[str]: + qualnames: set[str] = set() + _collect_qualname_fields(finding, qualnames) + for item in _as_sequence(finding.get("items")): + if isinstance(item, Mapping): + _collect_qualname_fields(item, qualnames) + if finding_id.startswith("dead_code:"): + _, _, remainder = finding_id.partition(":") + if remainder: + qualnames.add(remainder) + return frozenset(sorted(qualnames)) + + +def _collect_qualname_fields( + payload: Mapping[str, object], + qualnames: set[str], +) -> None: + for field_name in ( + "qualname", + "target_qualname", + "symbol", + "name", + "subject_key", + ): + value = str(payload.get(field_name, "")).strip() + if value: + qualnames.add(value) + + +def _as_sequence(value: object) -> Sequence[object]: + return value if isinstance(value, Sequence) and not isinstance(value, str) else () diff --git a/codeclone/surfaces/mcp/_graph_search.py b/codeclone/surfaces/mcp/_graph_search.py new file mode 100644 index 00000000..07cace60 --- /dev/null +++ b/codeclone/surfaces/mcp/_graph_search.py @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Bounded, deterministic name search across one stored MCP run. + +Three lanes are indexed from a single ``MCPRunRecord``: + +* ``definition`` — analyzed function/method/class qualnames (``unit_inventory``). +* ``call`` / ``reference`` — resolved relationship targets, including external + ``module:attr`` targets reached through a tracked import (``relationship_facts``). +* ``import`` — raw imports, including external/stdlib, that the report's + dependency family filters out to internal-only (``module_imports``). + +Matching is an adaptive cascade (``exact`` -> ``token`` -> ``prefix`` -> +``substring``); only the narrowest non-empty tier is returned, so a precise hit +never drowns under loose substring noise. The projection is read-only and never +authorizes edits. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path + +from ._implementation_context import _repo_relative +from ._session_shared import MCPRunRecord + +_TIER_ORDER: tuple[str, ...] = ("exact", "token", "prefix", "substring") +_LANE_ORDER: dict[str, int] = { + "definition": 0, + "call": 1, + "reference": 2, + "import": 3, +} +_LANE_RESULT_KEYS: dict[str, str] = { + "definition": "definitions", + "call": "calls", + "reference": "references", + "import": "imports", +} +_TOKEN_SPLIT = re.compile(r"[.:_]") + + +@dataclass(frozen=True, slots=True) +class _SearchEntry: + lane: str + name: str + location: str + line: int + detail: str | None + + +def _name_tokens(name_casefold: str) -> frozenset[str]: + return frozenset(token for token in _TOKEN_SPLIT.split(name_casefold) if token) + + +def _match_tier(name: str, query_casefold: str) -> str | None: + """Narrowest tier at which ``name`` matches the (already casefolded) query.""" + name_cf = name.casefold() + if name_cf == query_casefold: + return "exact" + if query_casefold in _name_tokens(name_cf): + return "token" + if name_cf.startswith(query_casefold): + return "prefix" + if query_casefold in name_cf: + return "substring" + return None + + +def _module_path_index(record: MCPRunRecord) -> dict[str, str]: + """Map each analyzed module to a repo-relative file path via its units.""" + index: dict[str, str] = {} + for unit in record.unit_inventory: + module = unit.qualname.split(":", 1)[0] + index.setdefault(module, unit.path) + return index + + +def _build_search_entries( + record: MCPRunRecord, + root: Path, +) -> list[_SearchEntry]: + entries: list[_SearchEntry] = [ + _SearchEntry( + lane="definition", + name=unit.qualname, + location=unit.path, + line=unit.start_line, + detail=None, + ) + for unit in record.unit_inventory + ] + for facts in record.relationship_facts: + for relation in facts.relationships: + if relation.target_qualname is None: + continue + lane = "call" if relation.relation_kind == "call" else "reference" + entries.append( + _SearchEntry( + lane=lane, + name=relation.target_qualname, + location=_repo_relative(relation.path, root), + line=relation.line, + detail=relation.source_qualname, + ) + ) + module_paths = _module_path_index(record) + entries.extend( + _SearchEntry( + lane="import", + name=dep.target, + location=module_paths.get(dep.source, dep.source), + line=dep.line, + detail=dep.import_type, + ) + for dep in record.module_imports + ) + return entries + + +def _entry_row(entry: _SearchEntry, *, tier: str) -> dict[str, object]: + row: dict[str, object] = { + "lane": entry.lane, + "name": entry.name, + "location": entry.location, + "line": entry.line, + "match_tier": tier, + } + if entry.detail is not None: + row["detail"] = entry.detail + return row + + +def _bounded_summary(*, total: int, shown: int) -> dict[str, object]: + return { + "total": total, + "shown": shown, + "truncated": shown < total, + "omitted": total - shown, + } + + +def _search_response( + *, + query: str, + tier: str, + matches: list[_SearchEntry], + budget: int, +) -> dict[str, object]: + ordered = sorted( + matches, + key=lambda entry: ( + _LANE_ORDER.get(entry.lane, 99), + entry.name, + entry.location, + entry.line, + ), + ) + limit = max(0, budget) + shown = ordered[:limit] + grouped: dict[str, list[dict[str, object]]] = {} + for entry in shown: + key = _LANE_RESULT_KEYS[entry.lane] + grouped.setdefault(key, []).append(_entry_row(entry, tier=tier)) + return { + "status": "ok", + "query": query, + "match_tier": tier, + "results": grouped, + "results_summary": _bounded_summary(total=len(ordered), shown=len(shown)), + } + + +def _no_matches_response(*, query: str) -> dict[str, object]: + return { + "status": "no_matches", + "query": query, + "results": {}, + "results_summary": _bounded_summary(total=0, shown=0), + "next_steps": [ + "Broaden the query — names match by exact, then whole-token, prefix, " + "and substring across definitions, call targets, and imports.", + "Try a shorter fragment or a module name (for example 'logging' " + "rather than 'logging.getLogger').", + "Re-run analyze_repository if the stored run is stale.", + ], + } + + +def search_graph( + *, + record: MCPRunRecord, + root: Path, + query: str, + budget: int = 50, +) -> dict[str, object]: + """Search analyzed names across definitions, calls/references, and imports. + + Returns the narrowest non-empty match tier grouped by lane, or a compact + ``no_matches`` response with ``next_steps`` when nothing matches. + """ + normalized = query.strip() + if not normalized: + return _no_matches_response(query=query) + query_casefold = normalized.casefold() + entries = _build_search_entries(record, root) + buckets: dict[str, list[_SearchEntry]] = {tier: [] for tier in _TIER_ORDER} + for entry in entries: + tier = _match_tier(entry.name, query_casefold) + if tier is not None: + buckets[tier].append(entry) + for tier in _TIER_ORDER: + if buckets[tier]: + return _search_response( + query=normalized, + tier=tier, + matches=buckets[tier], + budget=budget, + ) + return _no_matches_response(query=normalized) diff --git a/codeclone/surfaces/mcp/_implementation_context.py b/codeclone/surfaces/mcp/_implementation_context.py new file mode 100644 index 00000000..afa69743 --- /dev/null +++ b/codeclone/surfaces/mcp/_implementation_context.py @@ -0,0 +1,1603 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic implementation-context projection over one MCP run.""" + +from __future__ import annotations + +import hashlib +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Final + +import orjson + +from ...models import RelationshipRecord +from ...paths import classify_source_kind +from ._blast_radius import _path_to_module +from ._session_shared import MCPRunRecord, MCPUnitLocation +from ._workspace_drift import WorkspaceDrift, compute_drift +from .messages.params import Facet + +CONTEXT_CONTRACT_VERSION: Final = "1" +CALL_RESOLUTION_VERSION: Final = "1" +MAX_CONTEXT_TOTAL_ITEMS: Final = 200 +DEFAULT_IMPLEMENTATION_FACETS: Final[tuple[Facet, ...]] = ( + "module_role", + "imports", + "importers", + "callees", + "public_surface", + "blast_radius", + "tests", + "docs", + "memory", +) +DEFAULT_IMPACT_FACETS: Final[tuple[Facet, ...]] = ( + "blast_radius", + "importers", + "callers", + "public_surface", + "baseline_sensitive_findings", + "tests", + "review_context", + "memory", +) +DEFAULT_CONTRACT_FACETS: Final[tuple[Facet, ...]] = ( + "definition_sites", + "version_constants", + "contract_tests", + "public_surface", + "callers", + "persistence_path_callers", + "serialization_path_callers", + "deserialization_path_callers", + "store_api_consumers", + "memory_conflicts", + "docs", + "memory", +) +MEMORY_BACKED_FACETS: Final[frozenset[Facet]] = frozenset( + { + "docs", + "memory", + "trajectories", + "experiences", + "tests", + "contract_tests", + "memory_conflicts", + } +) +IMPLEMENTED_CONTEXT_FACETS: Final[frozenset[Facet]] = frozenset( + { + *DEFAULT_IMPLEMENTATION_FACETS, + *DEFAULT_IMPACT_FACETS, + *DEFAULT_CONTRACT_FACETS, + "references", + "test_callers", + "scope", + "trajectories", + "experiences", + } +) +_CONTRACT_PATH_FACET_ROLES: Final[Mapping[Facet, str]] = { + "persistence_path_callers": "persistence", + "serialization_path_callers": "serialization", + "deserialization_path_callers": "deserialization", + "store_api_consumers": "store", +} + + +@dataclass(slots=True) +class _EntryBudget: + limit: int + remaining: int + emitted: int = 0 + + def take( + self, + items: Sequence[Mapping[str, object]], + ) -> tuple[list[dict[str, object]], dict[str, object]]: + total = len(items) + shown = min(total, self.remaining) + projected = [dict(item) for item in items[:shown]] + self.remaining -= shown + self.emitted += shown + return projected, { + "total": total, + "shown": shown, + "truncated": shown < total, + "omitted": total - shown, + } + + def take_values( + self, + items: Sequence[str], + ) -> tuple[list[str], dict[str, object]]: + total = len(items) + shown = min(total, self.remaining) + projected = list(items[:shown]) + self.remaining -= shown + self.emitted += shown + return projected, { + "total": total, + "shown": shown, + "truncated": shown < total, + "omitted": total - shown, + } + + @property + def used(self) -> int: + return self.emitted + + def reserve(self, count: int) -> None: + self.remaining = max(0, self.remaining - max(0, count)) + + +# (facet, output_key, reverse_index, keyed_on_source, relation_kind, status, lane) +_CALL_CONTEXT_LANES: Final[ + tuple[tuple[Facet, str, bool, bool, str | None, str, str | None], ...] +] = ( + ("callers", "callers", True, True, "call", "resolved", "production"), + ("test_callers", "test_callers", True, True, None, "resolved", "test"), + ("callees", "callees", False, False, "call", "resolved", None), + ("callees", "unresolved", False, False, "call", "unresolved", None), + ("references", "references", False, False, "reference", "resolved", None), +) + + +def _relationship_indexes( + record: MCPRunRecord, +) -> tuple[dict[str, list[RelationshipRecord]], dict[str, list[RelationshipRecord]]]: + """Forward (by source) and reverse (by resolved target) relationship indexes.""" + by_source: dict[str, list[RelationshipRecord]] = {} + by_target: dict[str, list[RelationshipRecord]] = {} + for facts in record.relationship_facts: + for relation in facts.relationships: + by_source.setdefault(relation.source_qualname, []).append(relation) + if relation.target_qualname is not None: + by_target.setdefault(relation.target_qualname, []).append(relation) + return by_source, by_target + + +def _repo_relative(path: str, root: Path) -> str: + """Render a relationship-fact path repo-relative so call_context rows and the + artifact digest stay machine-independent (relationship facts carry the + absolute analysis filepath; the rest of the tool is repo-relative).""" + candidate = Path(path) + if not candidate.is_absolute(): + return path + try: + return candidate.relative_to(root).as_posix() + except ValueError: + return path + + +def _relationship_row( + relation: RelationshipRecord, + *, + keyed_on_source: bool, + root: Path, +) -> dict[str, object]: + row: dict[str, object] = { + "relation_kind": relation.relation_kind, + "resolution_status": relation.resolution_status, + "origin_lane": relation.origin_lane, + "evidence": f"{relation.resolution_status}_{relation.relation_kind}", + "path": _repo_relative(relation.path, root), + "line": relation.line, + } + if keyed_on_source: + row["source_qualname"] = relation.source_qualname + else: + row["target_qualname"] = relation.target_qualname + if relation.resolution_status == "unresolved": + row["expression"] = relation.expression + row["resolution_rule"] = relation.resolution_rule + return row + + +def _collect_relationship_rows( + index: Mapping[str, Sequence[RelationshipRecord]], + subject_qualnames: frozenset[str], + *, + keyed_on_source: bool, + relation_kind: str | None, + resolution_status: str, + origin_lane: str | None, + root: Path, +) -> list[dict[str, object]]: + rows: dict[tuple[str, str, int], dict[str, object]] = {} + for qualname in subject_qualnames: + for relation in index.get(qualname, ()): + mismatched = ( + (relation_kind is not None and relation.relation_kind != relation_kind) + or relation.resolution_status != resolution_status + or (origin_lane is not None and relation.origin_lane != origin_lane) + ) + if mismatched: + continue + counterpart = ( + relation.source_qualname + if keyed_on_source + else relation.target_qualname or relation.expression or "" + ) + rows.setdefault( + (counterpart, relation.path, relation.line), + _relationship_row(relation, keyed_on_source=keyed_on_source, root=root), + ) + return [rows[key] for key in sorted(rows)] + + +def _project_call_context( + *, + record: MCPRunRecord, + subject_qualnames: frozenset[str], + include_set: frozenset[Facet], + budget: _EntryBudget, +) -> dict[str, object]: + """Project bounded callers/callees/references/test_callers from run-record facts. + + Reverse-index callers are production-lane resolved calls; test-origin callers + are a separate lane that never feeds production liveness (D11). Unresolved + calls are emitted as observations (target=null) alongside callees. + """ + by_source, by_target = _relationship_indexes(record) + call_context: dict[str, object] = {} + for ( + facet, + key, + reverse_index, + keyed_on_source, + relation_kind, + status, + lane, + ) in _CALL_CONTEXT_LANES: + if facet not in include_set: + continue + _attach_bounded( + call_context, + key=key, + items=_collect_relationship_rows( + by_target if reverse_index else by_source, + subject_qualnames, + keyed_on_source=keyed_on_source, + relation_kind=relation_kind, + resolution_status=status, + origin_lane=lane, + root=record.root, + ), + budget=budget, + ) + return call_context + + +def _subject_qualnames( + record: MCPRunRecord, + *, + paths: Sequence[str], + resolved_symbols: Sequence[Mapping[str, object]], + resolved_from: str, +) -> frozenset[str]: + qualnames: set[str] = { + str(item["qualname"]) for item in resolved_symbols if item.get("qualname") + } + # An explicit-symbol subject is the named symbols themselves; the symbol's + # file is recorded for file-level structural facts but must NOT pull every + # file-mate's call edges into a function-level call_context. + if resolved_from == "explicit_symbols": + return frozenset(qualnames) + path_set = frozenset(paths) + for row in _unit_location_index(record): + if str(row["path"]) in path_set: + qualnames.add(str(row["qualname"])) + return frozenset(qualnames) + + +def _call_graph_status(record: MCPRunRecord) -> tuple[str, list[str]]: + failed = sorted({failure.split(": ", 1)[0] for failure in record.failures}) + return ("partial" if failed else "complete"), failed + + +def _relationship_digest_records(record: MCPRunRecord) -> list[dict[str, object]]: + """Canonical relationship rows for the artifact digest (expression excluded).""" + rows: list[dict[str, object]] = [ + { + "relation_kind": relation.relation_kind, + "resolution_status": relation.resolution_status, + "origin_lane": relation.origin_lane, + "source_qualname": relation.source_qualname, + "target_qualname": relation.target_qualname, + "path": _repo_relative(relation.path, record.root), + "line": relation.line, + "resolution_rule": relation.resolution_rule, + } + for facts in record.relationship_facts + for relation in facts.relationships + ] + rows.sort( + key=lambda row: ( + str(row["source_qualname"]), + str(row["relation_kind"]), + str(row["origin_lane"]), + str(row["target_qualname"] or ""), + str(row["path"]), + _as_int(row["line"]), + ) + ) + return rows + + +def _context_uncertainties(call_graph_status: str) -> list[str]: + notes = [ + "resolved call/reference edges are best-effort (cross-module imports, " + "same-module functions/methods, self/cls); dynamic dispatch and deep " + "aliasing stay unresolved observations — verify dispatch against source" + ] + if call_graph_status != "complete": + notes.append( + "call_graph_status is not complete: some files failed analysis and " + "their relationship edges are missing" + ) + return notes + + +def _contract_path_role(records: Sequence[Mapping[str, object]]) -> str | None: + """D18 anchor: a deterministic contract role for the subject, or None. + + Priority is a typed contract registry, then a known protocol/interface + symbol, then an Engineering Memory module_role/contract_note — never a name + or directory heuristic. Phase 30 wires only the memory anchor: a module_role + record whose role_kind is a contract role (not the inventory_module default). + """ + for row in records: + if row.get("type") != "module_role": + continue + role_kind = str(_as_mapping(row.get("payload")).get("role_kind", "")).strip() + if role_kind and role_kind != "inventory_module": + return role_kind + return None + + +def _project_contracts( + *, + record: MCPRunRecord, + subject_paths: Sequence[str], + subject_qualnames: frozenset[str], + memory_result: Mapping[str, object] | None, + include_set: frozenset[Facet], + budget: _EntryBudget, +) -> dict[str, object]: + """Project the contract truth-map: where the shape is defined, its pinned + contract tests and version constants, memory conflicts, and D18-gated + persistence/serialization path callers. Path-specific caller facets are + emitted only with a typed or memory-backed anchor; otherwise they are marked + not_available rather than name/dir-guessed (D13/D18).""" + contracts: dict[str, object] = {} + surface = _public_surface_rows( + record, paths=frozenset(subject_paths), detail_level="normal" + ) + records = _mapping_rows((memory_result or {}).get("records")) + facet_items: dict[Facet, list[dict[str, object]]] = { + "definition_sites": [ + row for row in surface if row["symbol_kind"] in {"class", "constant"} + ], + "version_constants": [ + row for row in surface if row["symbol_kind"] == "constant" + ], + "contract_tests": [row for row in records if row.get("type") == "test_anchor"], + "memory_conflicts": [row for row in records if row.get("contradiction_note")], + } + for facet, items in facet_items.items(): + if facet in include_set: + _attach_bounded(contracts, key=facet, items=items, budget=budget) + role = _contract_path_role(records) + _, by_target = _relationship_indexes(record) + for facet, facet_role in _CONTRACT_PATH_FACET_ROLES.items(): + if facet not in include_set: + continue + if role == facet_role: + _attach_bounded( + contracts, + key=facet, + items=_collect_relationship_rows( + by_target, + subject_qualnames, + keyed_on_source=True, + relation_kind="call", + resolution_status="resolved", + origin_lane="production", + root=record.root, + ), + budget=budget, + ) + else: + contracts[facet] = { + "status": "not_available", + "reason": "no_typed_or_memory_anchor", + "tier": "resolvable", + } + return contracts + + +def build_implementation_context( + *, + record: MCPRunRecord, + paths: Sequence[str], + symbols: Sequence[str], + subject_resolved_from: str, + subject_source_summary: Mapping[str, object], + resolved_symbols: Sequence[Mapping[str, object]], + unresolved_symbols: Sequence[str], + mode: str, + include: Sequence[Facet], + depth: int, + detail_level: str, + budget: int, + blast_radius: Mapping[str, object], + memory_result: Mapping[str, object] | None, + change_control: Mapping[str, object] | None, +) -> dict[str, object]: + """Build the path-owned implementation-context response.""" + normalized_paths = tuple(sorted(set(paths))) + normalized_symbols = tuple(sorted(set(symbols))) + normalized_resolved_symbols = tuple( + sorted( + (dict(item) for item in resolved_symbols), + key=lambda item: ( + str(item.get("qualname", "")), + str(item.get("path", "")), + _as_int(item.get("start_line")), + ), + ) + ) + normalized_unresolved_symbols = tuple(sorted(set(unresolved_symbols))) + include_set = frozenset(include) + ( + entry_budget, + projected_change_control, + safety_summary, + safety_overflow, + ) = _initialize_context_budget( + requested_budget=budget, + change_control=change_control, + ) + subject = _project_subject( + paths=normalized_paths, + symbols=normalized_symbols, + resolved_symbols=normalized_resolved_symbols, + unresolved_symbols=normalized_unresolved_symbols, + resolved_from=subject_resolved_from, + source_summary=subject_source_summary, + budget=entry_budget, + ) + if projected_change_control is not None: + _project_change_control_scope( + projected_change_control, + change_control=change_control or {}, + budget=entry_budget, + ) + drift = compute_drift(record) + freshness = _project_freshness( + drift=drift, + budget=entry_budget, + ) + dependency_rows = _dependency_rows(record) + context_artifact_digest = _context_artifact_digest( + record=record, + dependency_rows=dependency_rows, + ) + request_projection = _context_request_projection( + subject_resolved_from=subject_resolved_from, + paths=normalized_paths, + symbols=normalized_symbols, + mode=mode, + include=include, + depth=depth, + detail_level=detail_level, + budget=budget, + change_control=change_control, + freshness_status=drift.status, + ) + if ( + not safety_overflow + and normalized_symbols + and not normalized_resolved_symbols + and not normalized_paths + ): + return _subject_not_found_payload( + record=record, + mode=mode, + subject=subject, + freshness=freshness, + context_artifact_digest=context_artifact_digest, + projected_change_control=projected_change_control, + request=request_projection, + ) + module_paths = _module_path_index(record) + selected_modules = frozenset(_path_to_module(path) for path in normalized_paths) + structural_context: dict[str, object] = {} + + if "module_role" in include_set: + module_roles = tuple( + { + "path": path, + "module": _path_to_module(path), + "source_kind": classify_source_kind(path), + "evidence": "structural", + } + for path in normalized_paths + ) + _attach_bounded( + structural_context, + key="module_role", + items=module_roles, + budget=entry_budget, + ) + + imports = _imports_for_modules( + dependency_rows=dependency_rows, + selected_modules=selected_modules, + module_paths=module_paths, + ) + importers = _importers_for_modules( + dependency_rows=dependency_rows, + selected_modules=selected_modules, + module_paths=module_paths, + ) + if {"imports", "importers", "tests"}.intersection(include_set): + _attach_bounded( + structural_context, + key="related_modules", + items=_collapsed_related_modules( + imports=imports if "imports" in include_set else (), + importers=( + importers + if {"importers", "tests"}.intersection(include_set) + else () + ), + include_production_importers="importers" in include_set, + include_test_importers="tests" in include_set, + ), + budget=entry_budget, + ) + + if "public_surface" in include_set: + _attach_bounded( + structural_context, + key="public_surface", + items=_public_surface_rows( + record, + paths=frozenset(normalized_paths), + detail_level=detail_level, + ), + budget=entry_budget, + ) + + if "blast_radius" in include_set: + structural_context["blast_radius"] = _bounded_blast_radius( + blast_radius, + budget=entry_budget, + depth=2 if mode == "impact" else depth, + ) + + if "baseline_sensitive_findings" in include_set: + _attach_bounded( + structural_context, + key="baseline_sensitive_findings", + items=_baseline_sensitive_findings( + record, + relevant_paths=_blast_zone_paths( + paths=normalized_paths, + blast_radius=blast_radius, + ), + ), + budget=entry_budget, + ) + + if "review_context" in include_set and change_control is None: + _attach_bounded( + structural_context, + key="review_context", + items=_mapping_rows(blast_radius.get("review_context")), + budget=entry_budget, + ) + + call_graph_status, failed_files = _call_graph_status(record) + subject_qualnames = _subject_qualnames( + record, + paths=normalized_paths, + resolved_symbols=normalized_resolved_symbols, + resolved_from=subject_resolved_from, + ) + call_context = _project_call_context( + record=record, + subject_qualnames=subject_qualnames, + include_set=include_set, + budget=entry_budget, + ) + contracts = _project_contracts( + record=record, + subject_paths=normalized_paths, + subject_qualnames=subject_qualnames, + memory_result=memory_result, + include_set=include_set, + budget=entry_budget, + ) + + analysis: dict[str, object] = { + "run_id": record.run_id, + "report_digest": record.run_id, + "context_artifact_digest": context_artifact_digest, + "context_contract_version": CONTEXT_CONTRACT_VERSION, + "call_resolution_version": CALL_RESOLUTION_VERSION, + "freshness": freshness, + "cache_mode": _cache_mode(record), + "call_graph_status": call_graph_status, + "failed_files": failed_files, + } + unavailable_facets = sorted(include_set - IMPLEMENTED_CONTEXT_FACETS) + payload: dict[str, object] = { + "status": ( + "safety_context_overflow" + if safety_overflow + else "subject_not_found" + if normalized_symbols + and not normalized_resolved_symbols + and not normalized_paths + else "ok" + ), + "mode": mode, + "subject": subject, + "analysis": analysis, + "structural_context": structural_context, + "budget_summary": { + "requested": budget, + "effective": entry_budget.limit, + "emitted": entry_budget.used, + "remaining": entry_budget.remaining, + "hard_cap": MAX_CONTEXT_TOTAL_ITEMS, + "safety": safety_summary, + }, + "dataflow": { + "writers": {"status": "not_available", "tier": "dataflow"}, + "readers": {"status": "not_available", "tier": "dataflow"}, + }, + "uncertainties": _context_uncertainties(call_graph_status), + "next_queries": [ + "Re-run after analyze_repository when analysis.freshness.status is drifted." + ], + } + if memory_result is not None: + payload["implementation_evidence"] = _implementation_evidence( + memory_result, + include=include_set, + budget=entry_budget, + ) + if call_context: + payload["call_context"] = call_context + if contracts: + payload["contracts"] = contracts + if projected_change_control is not None: + payload["change_control"] = projected_change_control + if unavailable_facets: + payload["unavailable_facets"] = unavailable_facets + budget_summary = _as_mapping(payload["budget_summary"]) + if isinstance(budget_summary, dict): + budget_summary["emitted"] = entry_budget.used + budget_summary["remaining"] = entry_budget.remaining + _attach_projection_digest( + payload, + analysis, + context_artifact_digest=context_artifact_digest, + request=request_projection, + ) + return payload + + +def _context_request_projection( + *, + subject_resolved_from: str, + paths: Sequence[str], + symbols: Sequence[str], + mode: str, + include: Sequence[Facet], + depth: int, + detail_level: str, + budget: int, + change_control: Mapping[str, object] | None, + freshness_status: str, +) -> dict[str, object]: + """Deterministic request fingerprint bound into the projection digest.""" + return { + "subject": { + "resolved_from": subject_resolved_from, + "paths": list(paths), + "symbols": list(symbols), + }, + "mode": mode, + "include": sorted(include), + "depth": depth, + "detail_level": detail_level, + "budget": budget, + "intent_id": ( + str(change_control.get("intent_id")) if change_control is not None else None + ), + "freshness_status": freshness_status, + } + + +def _attach_projection_digest( + payload: Mapping[str, object], + analysis: dict[str, object], + *, + context_artifact_digest: str, + request: Mapping[str, object], +) -> None: + """Bind the request and bounded response into analysis.context_projection_digest.""" + analysis["context_projection_digest"] = _digest( + _projection_digest_payload( + payload, + context_artifact_digest=context_artifact_digest, + request=request, + ) + ) + + +def _subject_not_found_payload( + *, + record: MCPRunRecord, + mode: str, + subject: Mapping[str, object], + freshness: Mapping[str, object], + context_artifact_digest: str, + projected_change_control: Mapping[str, object] | None, + request: Mapping[str, object], +) -> dict[str, object]: + """Compact response when an explicit symbol query resolves nothing. + + Emitting the full empty facet scaffolding (structural_context, budget, + dataflow, uncertainties, call_context) on a miss burns LLM context for zero + signal. Return only the status, the unresolved subject, a slim provenance + block, the projection digest for determinism, and actionable next steps. + """ + analysis: dict[str, object] = { + "run_id": record.run_id, + "report_digest": record.run_id, + "context_artifact_digest": context_artifact_digest, + "context_contract_version": CONTEXT_CONTRACT_VERSION, + "call_resolution_version": CALL_RESOLUTION_VERSION, + "freshness": freshness, + } + payload: dict[str, object] = { + "status": "subject_not_found", + "mode": mode, + "subject": subject, + "analysis": analysis, + "next_steps": [ + "Pass an exact qualname as module:symbol with a colon separator " + "(for example pkg.mod:func); dot notation does not resolve.", + "Only analyzed definitions resolve — functions, methods, classes, " + "and public API rows. External or stdlib names are not indexed.", + "Inspect subject.unresolved_symbols for the exact tokens that " + "failed to resolve.", + "Run analyze_repository again if analysis.freshness.status is drifted.", + ], + } + if projected_change_control is not None: + payload["change_control"] = projected_change_control + _attach_projection_digest( + payload, + analysis, + context_artifact_digest=context_artifact_digest, + request=request, + ) + return payload + + +def build_unit_location_inventory( + *, + root: Path, + units: Sequence[Mapping[str, object]], +) -> tuple[MCPUnitLocation, ...]: + """Project analyzed units into a deterministic, repository-relative index.""" + locations: set[MCPUnitLocation] = set() + for unit in units: + qualname = str(unit.get("qualname", "")).strip() + path = _repo_relative_location(root, unit.get("filepath")) + start_line = _as_int(unit.get("start_line")) + end_line = _as_int(unit.get("end_line")) + if not qualname or path is None or start_line <= 0: + continue + locations.add( + MCPUnitLocation( + qualname=qualname, + path=path, + start_line=start_line, + end_line=max(start_line, end_line), + ) + ) + return tuple( + sorted( + locations, + key=lambda item: ( + item.qualname, + item.path, + item.start_line, + item.end_line, + ), + ) + ) + + +def resolve_context_symbols( + record: MCPRunRecord, + symbols: Sequence[str], +) -> tuple[tuple[dict[str, object], ...], tuple[str, ...]]: + """Resolve exact qualnames against the off-report Unit/API location index.""" + requested = tuple(sorted({symbol.strip() for symbol in symbols if symbol.strip()})) + by_qualname: dict[str, list[dict[str, object]]] = {} + for row in _unit_location_index(record): + by_qualname.setdefault(str(row["qualname"]), []).append(row) + resolved = tuple( + { + "qualname": symbol, + "path": str(row["path"]), + "start_line": _as_int(row["start_line"]), + "end_line": _as_int(row.get("end_line")), + "tier": "structural", + "source": str(row["source"]), + } + for symbol in requested + for row in by_qualname.get(symbol, ()) + ) + unresolved = tuple(symbol for symbol in requested if symbol not in by_qualname) + return resolved, unresolved + + +def _initialize_context_budget( + *, + requested_budget: int, + change_control: Mapping[str, object] | None, +) -> tuple[_EntryBudget, dict[str, object] | None, dict[str, object], bool]: + if change_control is None: + budget = _EntryBudget(limit=requested_budget, remaining=requested_budget) + return ( + budget, + None, + _summary_from_counts(total=0, shown=0), + False, + ) + do_not_touch = _sorted_safety_rows(change_control.get("do_not_touch")) + review_context = _sorted_safety_rows(change_control.get("review_context")) + do_not_total = _summary_total( + change_control.get("do_not_touch_summary"), + fallback=len(do_not_touch), + ) + review_total = _summary_total( + change_control.get("review_context_summary"), + fallback=len(review_context), + ) + safety_total = do_not_total + review_total + effective_limit = min( + MAX_CONTEXT_TOTAL_ITEMS, + max(requested_budget, safety_total), + ) + budget = _EntryBudget(limit=effective_limit, remaining=effective_limit) + projected = { + key: value + for key, value in change_control.items() + if key + not in { + "allowed_files", + "allowed_related", + "do_not_touch", + "do_not_touch_summary", + "guards", + "review_context", + "review_context_summary", + } + } + shown_do_not, _ = budget.take(do_not_touch) + shown_review, _ = budget.take(review_context) + shown_total = len(shown_do_not) + len(shown_review) + budget.reserve(max(0, safety_total - shown_total)) + projected["do_not_touch"] = shown_do_not + projected["do_not_touch_summary"] = _summary_from_counts( + total=do_not_total, + shown=len(shown_do_not), + ) + projected["review_context"] = shown_review + projected["review_context_summary"] = _summary_from_counts( + total=review_total, + shown=len(shown_review), + ) + safety_summary = _summary_from_counts( + total=safety_total, + shown=shown_total, + ) + safety_overflow = ( + safety_total > MAX_CONTEXT_TOTAL_ITEMS or safety_total > shown_total + ) + return budget, projected, safety_summary, safety_overflow + + +def _project_subject( + *, + paths: Sequence[str], + symbols: Sequence[str], + resolved_symbols: Sequence[Mapping[str, object]], + unresolved_symbols: Sequence[str], + resolved_from: str, + source_summary: Mapping[str, object], + budget: _EntryBudget, +) -> dict[str, object]: + shown_paths, paths_summary = budget.take_values(paths) + shown_symbols, symbols_summary = budget.take_values(symbols) + shown_resolved, resolved_summary = budget.take(resolved_symbols) + shown_unresolved, unresolved_summary = budget.take_values(unresolved_symbols) + return { + "resolved_from": resolved_from, + "paths": shown_paths, + "paths_summary": paths_summary, + "symbols": shown_symbols, + "symbols_summary": symbols_summary, + "resolved_symbols": shown_resolved, + "resolved_symbols_summary": resolved_summary, + "unresolved_symbols": shown_unresolved, + "unresolved_symbols_summary": unresolved_summary, + "source_summary": dict(source_summary), + } + + +def _project_change_control_scope( + projected: dict[str, object], + *, + change_control: Mapping[str, object], + budget: _EntryBudget, +) -> None: + for key in ("allowed_files", "allowed_related", "guards"): + values = tuple( + sorted( + { + str(item) + for item in _as_sequence(change_control.get(key)) + if str(item).strip() + } + ) + ) + shown, summary = budget.take_values(values) + projected[key] = shown + projected[f"{key}_summary"] = summary + + +def _project_freshness( + *, + drift: WorkspaceDrift, + budget: _EntryBudget, +) -> dict[str, object]: + payload: dict[str, object] = { + "status": drift.status, + "topology_drift": drift.topology_drift, + "strength": drift.strength, + } + for key, values in ( + ("drifted_files", drift.drifted_files), + ("added_files", drift.added_files), + ("deleted_files", drift.deleted_files), + ): + shown, summary = budget.take_values(values) + payload[key] = shown + payload[f"{key}_summary"] = summary + return payload + + +def _collapsed_related_modules( + *, + imports: Sequence[Mapping[str, object]], + importers: Sequence[Mapping[str, object]], + include_production_importers: bool, + include_test_importers: bool, +) -> tuple[dict[str, object], ...]: + rows: dict[tuple[str, str], dict[str, object]] = {} + for item in imports: + _append_related_relation( + rows, + path=str(item.get("target_path") or ""), + module=str(item.get("target_module") or ""), + source_kind=classify_source_kind(str(item.get("target_path") or "")), + relation={ + "kind": "imports", + "evidence": "structural", + "import_type": str(item.get("import_type") or ""), + "line": _as_int(item.get("line")), + }, + ) + for item in importers: + source_kind = str(item.get("source_kind") or "") + is_test = source_kind in {"tests", "fixtures"} + if (is_test and not include_test_importers) or ( + not is_test and not include_production_importers + ): + continue + _append_related_relation( + rows, + path=str(item.get("source_path") or ""), + module=str(item.get("source_module") or ""), + source_kind=source_kind, + relation={ + "kind": "tested_by" if is_test else "imported_by", + "evidence": "structural", + "import_type": str(item.get("import_type") or ""), + "line": _as_int(item.get("line")), + }, + ) + return tuple( + sorted( + rows.values(), + key=lambda row: ( + _as_int(row.get("relevance_rank")), + str(row.get("path", "")), + str(row.get("module", "")), + ), + ) + ) + + +def _append_related_relation( + rows: dict[tuple[str, str], dict[str, object]], + *, + path: str, + module: str, + source_kind: str, + relation: Mapping[str, object], +) -> None: + key = (path, module) + row = rows.setdefault( + key, + { + "path": path or None, + "module": module, + "source_kind": source_kind, + "relations": [], + "relevance_rank": 3, + }, + ) + relations = row["relations"] + if not isinstance(relations, list): + return + normalized_relation = dict(relation) + if normalized_relation not in relations: + relations.append(normalized_relation) + relations.sort( + key=lambda item: ( + str(item.get("kind", "")), + str(item.get("import_type", "")), + _as_int(item.get("line")), + ) + ) + relation_rank = { + "tested_by": 0, + "imported_by": 1, + "imports": 2, + }.get(str(relation.get("kind", "")), 3) + row["relevance_rank"] = min( + _as_int(row.get("relevance_rank")), + relation_rank, + ) + + +def _sorted_safety_rows(value: object) -> tuple[dict[str, object], ...]: + return tuple( + sorted( + _mapping_rows(value), + key=lambda row: ( + str(row.get("path", "")), + str(row.get("category", "")), + str(row.get("reason", "")), + str(row.get("severity", "")), + ), + ) + ) + + +def _summary_total(value: object, *, fallback: int) -> int: + return max(fallback, _as_int(_as_mapping(value).get("total"))) + + +def _summary_from_counts(*, total: int, shown: int) -> dict[str, object]: + return { + "total": total, + "shown": shown, + "truncated": shown < total, + "omitted": max(0, total - shown), + } + + +def _implementation_evidence( + memory_result: Mapping[str, object], + *, + include: frozenset[Facet], + budget: _EntryBudget, +) -> dict[str, object]: + records = _mapping_rows(memory_result.get("records")) + test_records = tuple(row for row in records if row.get("type") == "test_anchor") + doc_records = tuple(row for row in records if row.get("type") == "document_link") + general_records = tuple( + row + for row in records + if row.get("type") not in {"document_link", "test_anchor"} + ) + payload: dict[str, object] = { + "scope_resolved_from": memory_result.get("scope_resolved_from"), + "retrieval_policy": dict(_as_mapping(memory_result.get("retrieval_policy"))), + } + if "memory" in include: + _attach_bounded(payload, key="memory", items=general_records, budget=budget) + if {"memory", "trajectories"}.intersection(include): + _attach_bounded( + payload, + key="trajectories", + items=_mapping_rows(memory_result.get("trajectories")), + budget=budget, + ) + if {"memory", "experiences"}.intersection(include): + _attach_bounded( + payload, + key="experiences", + items=_mapping_rows(memory_result.get("experiences")), + budget=budget, + ) + if "tests" in include: + _attach_bounded(payload, key="test_anchors", items=test_records, budget=budget) + if "docs" in include: + _attach_bounded(payload, key="doc_anchors", items=doc_records, budget=budget) + return payload + + +def _baseline_sensitive_findings( + record: MCPRunRecord, + *, + relevant_paths: frozenset[str], +) -> tuple[dict[str, object], ...]: + findings = _as_mapping(record.report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + rows: list[dict[str, object]] = [] + for family, family_payload in sorted(groups.items()): + for category, category_payload in sorted(_as_mapping(family_payload).items()): + for raw_group in _as_sequence(category_payload): + group = _as_mapping(raw_group) + paths = _finding_paths(group) + novelty = str(group.get("novelty", "")).strip() + if not relevant_paths.intersection(paths) or novelty not in { + "known", + "new", + }: + continue + rows.append( + { + "id": str(group.get("id", "")).strip(), + "family": str(family), + "category": str(category), + "kind": str(group.get("kind", "")).strip(), + "severity": str(group.get("severity", "")).strip(), + "novelty": novelty, + "paths": list(paths), + "evidence": "structural", + } + ) + return tuple( + sorted( + rows, + key=lambda row: ( + 0 if row["novelty"] == "new" else 1, + str(row["severity"]), + str(row["family"]), + str(row["category"]), + str(row["id"]), + ), + ) + ) + + +def _finding_paths(group: Mapping[str, object]) -> tuple[str, ...]: + paths = { + str( + item.get("relative_path") or item.get("path") or item.get("file") or "" + ).strip() + for item in _mapping_rows(group.get("items")) + } + return tuple(sorted(path for path in paths if path)) + + +def _blast_zone_paths( + *, + paths: Sequence[str], + blast_radius: Mapping[str, object], +) -> frozenset[str]: + return frozenset( + { + *paths, + *( + str(item) + for key in ( + "direct_dependents", + "transitive_dependents", + "clone_cohort_members", + "in_dependency_cycle", + ) + for item in _as_sequence(blast_radius.get(key)) + if str(item).strip() + ), + } + ) + + +def _mapping_rows(value: object) -> tuple[dict[str, object], ...]: + return tuple(dict(_as_mapping(item)) for item in _as_sequence(value)) + + +def _dependency_rows(record: MCPRunRecord) -> tuple[dict[str, object], ...]: + families = _report_families(record) + dependencies = _as_mapping(families.get("dependencies")) + rows: list[dict[str, object]] = [ + { + "source": str(item.get("source", "")).strip(), + "target": str(item.get("target", "")).strip(), + "import_type": str(item.get("import_type", "")).strip(), + "line": _as_int(item.get("line")), + } + for raw in _as_sequence(dependencies.get("items")) + for item in (_as_mapping(raw),) + ] + return tuple( + sorted( + (row for row in rows if row["source"] and row["target"]), + key=lambda row: ( + str(row["source"]), + str(row["target"]), + str(row["import_type"]), + _as_int(row["line"]), + ), + ) + ) + + +def _imports_for_modules( + *, + dependency_rows: Sequence[Mapping[str, object]], + selected_modules: frozenset[str], + module_paths: Mapping[str, str], +) -> tuple[dict[str, object], ...]: + rows: list[dict[str, object]] = [ + { + "source_module": str(row["source"]), + "target_module": str(row["target"]), + "target_path": module_paths.get(str(row["target"])), + "import_type": str(row["import_type"]), + "line": _as_int(row["line"]), + "evidence": "structural", + } + for row in dependency_rows + if str(row["source"]) in selected_modules + ] + return tuple( + sorted( + rows, + key=lambda row: ( + str(row["source_module"]), + str(row["target_module"]), + str(row["import_type"]), + _as_int(row["line"]), + ), + ) + ) + + +def _importers_for_modules( + *, + dependency_rows: Sequence[Mapping[str, object]], + selected_modules: frozenset[str], + module_paths: Mapping[str, str], +) -> tuple[dict[str, object], ...]: + rows: list[dict[str, object]] = [ + { + "source_module": str(row["source"]), + "source_path": module_paths.get(str(row["source"])), + "source_kind": classify_source_kind( + module_paths.get(str(row["source"]), "") + ), + "target_module": str(row["target"]), + "import_type": str(row["import_type"]), + "line": _as_int(row["line"]), + "evidence": "structural", + } + for row in dependency_rows + if str(row["target"]) in selected_modules + ] + return tuple( + sorted( + rows, + key=lambda row: ( + str(row["source_module"]), + str(row["target_module"]), + str(row["import_type"]), + _as_int(row["line"]), + ), + ) + ) + + +def _public_surface_rows( + record: MCPRunRecord, + *, + paths: frozenset[str], + detail_level: str, +) -> tuple[dict[str, object], ...]: + families = _report_families(record) + api_surface = _as_mapping(families.get("api_surface")) + rows: list[dict[str, object]] = [] + for raw in _as_sequence(api_surface.get("items")): + item = _as_mapping(raw) + path = _report_item_path(record, item) + if path not in paths: + continue + row: dict[str, object] = { + "qualname": str(item.get("qualname", "")).strip(), + "path": path, + "start_line": _as_int(item.get("start_line")), + "end_line": _as_int(item.get("end_line")), + "symbol_kind": str(item.get("symbol_kind", "")).strip(), + "evidence": "structural", + } + if detail_level != "compact": + row["params"] = [ + dict(_as_mapping(param)) for param in _as_sequence(item.get("params")) + ] + row["returns_annotated"] = bool(item.get("returns_annotated")) + row["exported_via"] = item.get("exported_via") + if detail_level == "full": + row["record_kind"] = str(item.get("record_kind", "symbol")) + row["module"] = str(item.get("module", "")).strip() + rows.append(row) + return tuple( + sorted( + rows, + key=lambda row: ( + str(row["path"]), + _as_int(row["start_line"]), + str(row["qualname"]), + ), + ) + ) + + +def _bounded_blast_radius( + payload: Mapping[str, object], + *, + budget: _EntryBudget, + depth: int, +) -> dict[str, object]: + result: dict[str, object] = { + "radius_level": str(payload.get("radius_level", "low")), + "depth": "transitive" if depth > 1 else "direct", + } + for source_key, output_key in ( + ("direct_dependents", "direct"), + ("transitive_dependents", "transitive"), + ("clone_cohort_members", "clone_cohorts"), + ("in_dependency_cycle", "dependency_cycles"), + ): + items = tuple( + {"path": str(item), "evidence": "structural"} + for item in _as_sequence(payload.get(source_key)) + if str(item).strip() + ) + _attach_bounded(result, key=output_key, items=items, budget=budget) + return result + + +def _attach_bounded( + payload: dict[str, object], + *, + key: str, + items: Sequence[Mapping[str, object]], + budget: _EntryBudget, +) -> None: + projected, summary = budget.take(items) + payload[key] = projected + payload[f"{key}_summary"] = summary + + +def _module_path_index(record: MCPRunRecord) -> dict[str, str]: + if record.manifest is None: + return {} + return { + module: path + for path in sorted(record.manifest) + if (module := _path_to_module(path)) + } + + +def _context_artifact_digest( + *, + record: MCPRunRecord, + dependency_rows: Sequence[Mapping[str, object]], +) -> str: + del dependency_rows + manifest = record.manifest or {} + call_graph_status, failed_files = _call_graph_status(record) + return _digest( + { + "canonicalization": { + "version": "1", + "algorithm": "sha256", + "scope": "context_artifact", + "wire": "bare_hex", + }, + "report_digest": record.run_id, + "context_contract_version": CONTEXT_CONTRACT_VERSION, + "call_resolution_version": CALL_RESOLUTION_VERSION, + "call_graph_status": call_graph_status, + "failed_files": failed_files, + "manifest": [ + { + "path": path, + "mtime_ns": int(manifest[path]["mtime_ns"]), + "size": int(manifest[path]["size"]), + } + for path in sorted(manifest) + ], + "unit_location_index": [ + { + "qualname": str(row["qualname"]), + "path": str(row["path"]), + "start_line": _as_int(row["start_line"]), + } + for row in _unit_location_index(record) + ], + "relationship_records": _relationship_digest_records(record), + } + ) + + +def _projection_digest_payload( + payload: Mapping[str, object], + *, + context_artifact_digest: str, + request: Mapping[str, object], +) -> dict[str, object]: + return { + "canonicalization": { + "version": "1", + "algorithm": "sha256", + "scope": "context_projection", + "wire": "bare_hex", + }, + "context_artifact_digest": context_artifact_digest, + "request": dict(request), + "projection": { + key: value + for key, value in payload.items() + if key not in {"message", "next_queries", "uncertainties"} + }, + } + + +def _cache_mode(record: MCPRunRecord) -> str: + cache = _as_mapping(record.summary.get("cache")) + freshness = str(cache.get("freshness", "")).strip() + return { + "fresh": "fresh_compute", + "mixed": "partial_reuse", + "reused": "full_reuse", + }.get(freshness, "fresh_compute") + + +def _report_families(record: MCPRunRecord) -> Mapping[str, object]: + metrics = _as_mapping(record.report_document.get("metrics")) + return _as_mapping(metrics.get("families")) + + +def _unit_location_index( + record: MCPRunRecord, +) -> tuple[dict[str, object], ...]: + rows: dict[tuple[str, str, int], dict[str, object]] = {} + for location in record.unit_inventory: + key = (location.qualname, location.path, location.start_line) + rows[key] = { + "qualname": location.qualname, + "path": location.path, + "start_line": location.start_line, + "end_line": location.end_line, + "source": "unit_inventory", + } + api_surface = _as_mapping(_report_families(record).get("api_surface")) + for raw in _as_sequence(api_surface.get("items")): + item = _as_mapping(raw) + qualname = str(item.get("qualname", "")).strip() + path = _report_item_path(record, item) + start_line = _as_int(item.get("start_line")) + if not qualname or not path or start_line <= 0: + continue + key = (qualname, path, start_line) + rows[key] = { + "qualname": qualname, + "path": path, + "start_line": start_line, + "end_line": max(start_line, _as_int(item.get("end_line"))), + "source": "api_surface", + } + return tuple( + rows[key] + for key in sorted( + rows, + key=lambda item: (item[0], item[1], item[2]), + ) + ) + + +def _report_item_path( + record: MCPRunRecord, + item: Mapping[str, object], +) -> str: + raw = item.get("relative_path") or item.get("filepath") + path = _repo_relative_location(record.root, raw) + return path or "" + + +def _repo_relative_location(root: Path, raw: object) -> str | None: + text = str(raw or "").strip() + if not text: + return None + root_path = root.resolve() + candidate = Path(text) + if not candidate.is_absolute(): + candidate = root_path / candidate + try: + return candidate.resolve().relative_to(root_path).as_posix() + except (OSError, ValueError): + return None + + +def _digest(payload: Mapping[str, object]) -> str: + return hashlib.sha256( + orjson.dumps(payload, option=orjson.OPT_SORT_KEYS), + ).hexdigest() + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _as_int(value: object) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + try: + return int(str(value)) + except (TypeError, ValueError): + return 0 + + +__all__ = [ + "CALL_RESOLUTION_VERSION", + "CONTEXT_CONTRACT_VERSION", + "DEFAULT_IMPACT_FACETS", + "DEFAULT_IMPLEMENTATION_FACETS", + "IMPLEMENTED_CONTEXT_FACETS", + "build_implementation_context", + "build_unit_location_inventory", + "resolve_context_symbols", +] diff --git a/codeclone/surfaces/mcp/_intent.py b/codeclone/surfaces/mcp/_intent.py new file mode 100644 index 00000000..a305e044 --- /dev/null +++ b/codeclone/surfaces/mcp/_intent.py @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from enum import Enum +from fnmatch import fnmatchcase +from pathlib import Path +from typing import Final + +from ...paths.workspace import FORBIDDEN_WORKSPACE_GLOBS + +DEFAULT_FORBIDDEN: Final[tuple[str, ...]] = ( + "codeclone.baseline.json", + *FORBIDDEN_WORKSPACE_GLOBS, +) +DEFAULT_INTENT_GUARDS: Final[tuple[str, ...]] = ( + "scope_expansion_requires_explanation", + "baseline_update_forbidden", + "cache_update_forbidden", + "generated_report_update_forbidden", + "out_of_scope_production_change_requires_human", + "new_structural_regression_forbidden", + "report_only_claims_forbidden", + "concurrent_workspace_intent_conflict_requires_review", +) + + +class IntentStatus(str, Enum): + ACTIVE = "active" + QUEUED = "queued" + CLEAN = "clean" + EXPANDED = "expanded" + VIOLATED = "violated" + UNVERIFIED = "unverified" + EXPIRED = "expired" + + +@dataclass(frozen=True, slots=True) +class IntentScope: + allowed_files: tuple[str, ...] + allowed_related: tuple[str, ...] = () + forbidden: tuple[str, ...] = DEFAULT_FORBIDDEN + + @property + def allowed_paths(self) -> tuple[str, ...]: + return tuple(sorted({*self.allowed_files, *self.allowed_related})) + + def to_payload(self) -> dict[str, object]: + return { + "allowed_files": list(self.allowed_files), + "allowed_related": list(self.allowed_related), + "forbidden": list(self.forbidden), + } + + +@dataclass(frozen=True, slots=True) +class IntentCheckResult: + status: IntentStatus + declared_scope: tuple[str, ...] + actual_changed_files: tuple[str, ...] + unexpected_files: tuple[str, ...] + forbidden_touched: tuple[str, ...] + untouched_in_declared: tuple[str, ...] + required_action: str | None + message: str + + def to_payload(self) -> dict[str, object]: + return { + "status": self.status.value, + "declared_scope": list(self.declared_scope), + "actual_changed_files": list(self.actual_changed_files), + "unexpected_files": list(self.unexpected_files), + "forbidden_touched": list(self.forbidden_touched), + "untouched_in_declared": list(self.untouched_in_declared), + "required_action": self.required_action, + "message": self.message, + } + + +@dataclass(frozen=True, slots=True) +class IntentRecord: + intent_id: str + run_id: str + report_digest: str + status: IntentStatus + declared_at_utc: str + scope: IntentScope + intent_description: str + expected_effects: tuple[str, ...] + guards: tuple[str, ...] + blast_radius_summary: dict[str, object] | None = None + check_result: IntentCheckResult | None = None + + def to_payload(self, *, short_run_id: str | None = None) -> dict[str, object]: + payload: dict[str, object] = { + "intent_id": self.intent_id, + "run_id": short_run_id or self.run_id, + "status": self.status.value, + "scope": self.scope.to_payload(), + "intent": self.intent_description, + "expected_effects": list(self.expected_effects), + "guards": list(self.guards), + "declared_at_utc": self.declared_at_utc, + "report_digest": self.report_digest, + "blast_radius_summary": self.blast_radius_summary or {}, + } + if self.check_result is not None: + payload["check_result"] = self.check_result.to_payload() + return payload + + +def _normalize_path(value: object) -> str: + text = str(value).replace("\\", "/").strip() + if text == ".": + return "" + if text.startswith("./"): + text = text[2:] + text = text.rstrip("/") + if Path(text).is_absolute(): + raise ValueError(f"intent paths must be relative: {value!r}") + if ".." in Path(text).parts: + raise ValueError(f"path traversal not allowed: {value!r}") + return text + + +def _normalize_required_paths(value: object, *, field_name: str) -> tuple[str, ...]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + raise ValueError(f"scope.{field_name} must be a list of relative paths.") + paths = tuple( + sorted({_normalize_path(item) for item in value if str(item).strip()}) + ) + if not paths: + raise ValueError(f"scope.{field_name} must contain at least one path.") + return paths + + +def _normalize_optional_paths(value: object, *, field_name: str) -> tuple[str, ...]: + if value is None: + return () + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + raise ValueError(f"scope.{field_name} must be a list of relative paths.") + return tuple(sorted({_normalize_path(item) for item in value if str(item).strip()})) + + +def normalize_intent_scope(scope: object) -> IntentScope: + if not isinstance(scope, Mapping): + raise ValueError( + 'scope must be an object, e.g. {"allowed_files": ["path/to/file.py"]}.' + ) + allowed_files = _normalize_required_paths( + scope.get("allowed_files"), + field_name="allowed_files", + ) + allowed_related = _normalize_optional_paths( + scope.get("allowed_related"), + field_name="allowed_related", + ) + raw_forbidden = scope.get("forbidden") + forbidden = ( + ( + *DEFAULT_FORBIDDEN, + *_normalize_optional_paths(raw_forbidden, field_name="forbidden"), + ) + if raw_forbidden is not None + else DEFAULT_FORBIDDEN + ) + return IntentScope( + allowed_files=allowed_files, + allowed_related=allowed_related, + forbidden=tuple(sorted(set(forbidden))), + ) + + +def normalize_expected_effects(value: object) -> tuple[str, ...]: + if value is None: + return () + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + raise ValueError("expected_effects must be a list of strings.") + return tuple(sorted({str(item).strip() for item in value if str(item).strip()})) + + +def forbidden_touched( + *, + changed_files: Sequence[str], + forbidden_patterns: Sequence[str], +) -> tuple[str, ...]: + return tuple( + sorted( + { + path + for path in changed_files + if any(fnmatchcase(path, pattern) for pattern in forbidden_patterns) + } + ) + ) diff --git a/codeclone/surfaces/mcp/_patch_contract.py b/codeclone/surfaces/mcp/_patch_contract.py new file mode 100644 index 00000000..fb50a598 --- /dev/null +++ b/codeclone/surfaces/mcp/_patch_contract.py @@ -0,0 +1,200 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum +from typing import Final, Literal + +from ...contracts import DEFAULT_COVERAGE_MIN + +StrictnessProfile = Literal["ci", "strict", "relaxed"] +PatchContractMode = Literal["budget", "verify"] + +VALID_PATCH_CONTRACT_MODES: Final[frozenset[str]] = frozenset({"budget", "verify"}) +VALID_STRICTNESS_PROFILES: Final[frozenset[str]] = frozenset( + {"ci", "strict", "relaxed"} +) + + +class PatchContractStatus(str, Enum): + ACCEPTED = "accepted" + ACCEPTED_EXTERNAL = "accepted_with_external_changes" + VIOLATED = "violated" + UNVERIFIED = "unverified" + EXPIRED = "expired" + + +@dataclass(frozen=True, slots=True) +class PatchBudgets: + clone_regression: int = 0 + dead_code_regression: bool = False + dependency_cycle: bool = False + coverage_hotspot: bool = False + complexity_delta: int = -1 + coupling_delta: int = -1 + cohesion_delta: int = -1 + health_floor: int = -1 + typing_regression: bool = False + docstring_regression: bool = False + api_break: bool = False + coverage_min: int = DEFAULT_COVERAGE_MIN + + def to_payload(self) -> dict[str, object]: + disabled = tuple( + name + for name, value in ( + ("clone_regression", self.clone_regression), + ("complexity_delta", self.complexity_delta), + ("coupling_delta", self.coupling_delta), + ("cohesion_delta", self.cohesion_delta), + ("health_floor", self.health_floor), + ("coverage_min", self.coverage_min), + ) + if value < 0 + ) + return { + "clone_regression": _none_if_unlimited(self.clone_regression), + "forbid_dead_code_regression": self.dead_code_regression, + "forbid_dependency_cycle": self.dependency_cycle, + "forbid_coverage_hotspot": self.coverage_hotspot, + "complexity_delta": _none_if_unlimited(self.complexity_delta), + "coupling_delta": _none_if_unlimited(self.coupling_delta), + "cohesion_delta": _none_if_unlimited(self.cohesion_delta), + "health_floor": _none_if_unlimited(self.health_floor), + "forbid_typing_regression": self.typing_regression, + "forbid_docstring_regression": self.docstring_regression, + "forbid_api_break": self.api_break, + "coverage_min": _none_if_unlimited(self.coverage_min), + "disabled": list(disabled), + } + + +STRICT_BUDGETS: Final[PatchBudgets] = PatchBudgets( + clone_regression=0, + dead_code_regression=True, + dependency_cycle=True, + coverage_hotspot=True, + complexity_delta=10, + coupling_delta=5, + cohesion_delta=3, + health_floor=70, + typing_regression=True, + docstring_regression=True, + api_break=True, + coverage_min=80, +) + +RELAXED_BUDGETS: Final[PatchBudgets] = PatchBudgets( + clone_regression=-1, + dead_code_regression=False, + dependency_cycle=False, + coverage_hotspot=False, + complexity_delta=-1, + coupling_delta=-1, + cohesion_delta=-1, + health_floor=-1, + typing_regression=False, + docstring_regression=False, + api_break=False, + coverage_min=-1, +) + + +def budgets_from_request( + *, + coverage_min: int | None, + complexity_threshold: int | None, + coupling_threshold: int | None, + cohesion_threshold: int | None, +) -> PatchBudgets: + return PatchBudgets( + clone_regression=0, + complexity_delta=_none_to_unlimited(complexity_threshold), + coupling_delta=_none_to_unlimited(coupling_threshold), + cohesion_delta=_none_to_unlimited(cohesion_threshold), + coverage_min=coverage_min if coverage_min is not None else DEFAULT_COVERAGE_MIN, + ) + + +def budgets_for_strictness( + *, + strictness: StrictnessProfile, + coverage_min: int | None, + complexity_threshold: int | None, + coupling_threshold: int | None, + cohesion_threshold: int | None, +) -> PatchBudgets: + if strictness == "strict": + return STRICT_BUDGETS + if strictness == "relaxed": + return RELAXED_BUDGETS + return budgets_from_request( + coverage_min=coverage_min, + complexity_threshold=complexity_threshold, + coupling_threshold=coupling_threshold, + cohesion_threshold=cohesion_threshold, + ) + + +def detect_baseline_abuse( + *, + before_gate_would_fail: bool, + after_gate_would_fail: bool, + after_baseline_status: str, + regressions: int, + changed_files: int, + intent_available: bool, +) -> dict[str, object]: + baseline_updated = after_baseline_status == "updated" + triggers: list[str] = [] + if baseline_updated and (regressions > 0 or changed_files > 0): + triggers.append("baseline_changed_with_functional_code") + if baseline_updated and regressions > 0: + triggers.append("baseline_updated_while_regressions_present") + if baseline_updated and not intent_available: + triggers.append("baseline_updated_without_intent") + if baseline_updated and before_gate_would_fail and not after_gate_would_fail: + triggers.append("ci_greened_by_accepting_debt") + return { + "detected": bool(triggers), + "triggers": triggers, + } + + +def baseline_status(report_document: Mapping[str, object]) -> str: + meta = _as_mapping(report_document.get("meta")) + baseline = _as_mapping(meta.get("baseline")) + return str(baseline.get("status", "")).strip() + + +def _none_to_unlimited(value: int | None) -> int: + return value if value is not None else -1 + + +def _none_if_unlimited(value: int) -> int | None: + return value if value >= 0 else None + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +__all__ = [ + "RELAXED_BUDGETS", + "STRICT_BUDGETS", + "VALID_PATCH_CONTRACT_MODES", + "VALID_STRICTNESS_PROFILES", + "PatchBudgets", + "PatchContractMode", + "PatchContractStatus", + "StrictnessProfile", + "baseline_status", + "budgets_for_strictness", + "detect_baseline_abuse", +] diff --git a/codeclone/surfaces/mcp/_patch_trail_bridge.py b/codeclone/surfaces/mcp/_patch_trail_bridge.py new file mode 100644 index 00000000..e64d6686 --- /dev/null +++ b/codeclone/surfaces/mcp/_patch_trail_bridge.py @@ -0,0 +1,111 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from pathlib import Path + +from ...audit.events import repo_root_digest +from ...memory.trajectory.dto import ( + BlastRadiusSnapshot, + HygieneSnapshot, + PatchTrailEvidenceInput, + PatchTrailInputs, + VerifySnapshot, +) +from ._intent import IntentRecord +from ._workspace_hygiene import WorkspaceHygieneResult + + +def build_patch_trail_inputs( + *, + root_path: Path, + intent: IntentRecord, + check_payload: Mapping[str, object], + verify_payload: Mapping[str, object], + hygiene: WorkspaceHygieneResult, + report_digest: str | None, + scope_check_audit_sequence: int | None, + patch_verify_audit_sequence: int | None, + receipt_audit_sequence: int | None = None, +) -> PatchTrailInputs: + declared_files = _path_tuple(check_payload.get("declared_scope")) + declared_related = tuple(sorted(set(intent.scope.allowed_related))) + changed_files = _path_tuple(check_payload.get("actual_changed_files")) + unexpected_files = _path_tuple(check_payload.get("unexpected_files")) + forbidden_touched = _path_tuple(check_payload.get("forbidden_touched")) + untouched = _path_tuple(check_payload.get("untouched_in_declared")) + if not untouched and declared_files and changed_files: + untouched = tuple(sorted(set(declared_files) - set(changed_files))) + expanded_related = tuple( + sorted(path for path in changed_files if path in set(declared_related)) + ) + blast_summary = intent.blast_radius_summary or {} + blast = BlastRadiusSnapshot( + do_not_touch_declared=_path_tuple(blast_summary.get("do_not_touch_declared")), + review_context_declared=_path_tuple( + blast_summary.get("review_context_declared") + ), + ) + verify = VerifySnapshot( + verification_profile=str(verify_payload.get("verification_profile", "unknown")), + verification_status=str(verify_payload.get("status", "not_reached")), + verification_skipped=_string_tuple(verify_payload.get("checks_not_applicable")), + verification_failed=_string_tuple(verify_payload.get("contract_violations")), + ) + hygiene_snapshot = HygieneSnapshot( + blocks_finish=hygiene.blocks_finish, + finish_block_reason=hygiene.finish_block_reason, + unacknowledged_dirty_in_scope=hygiene.unacknowledged_dirty_in_scope, + dirty_paths_outside_scope=hygiene.dirty_paths_outside_scope, + attribution_counts=hygiene._counts(), + ) + evidence = PatchTrailEvidenceInput( + repo_root_digest=repo_root_digest(root_path.resolve()), + report_digest=report_digest, + scope_check_audit_sequence=scope_check_audit_sequence, + patch_verify_audit_sequence=patch_verify_audit_sequence, + receipt_audit_sequence=receipt_audit_sequence, + ) + return PatchTrailInputs( + intent_id=intent.intent_id, + intent_description=intent.intent_description, + declared_files=declared_files, + declared_related=declared_related, + changed_files=changed_files, + unexpected_files=unexpected_files, + forbidden_touched=forbidden_touched, + expanded_related_files=expanded_related, + scope_check_status=str(check_payload.get("status", "")), + blast_radius=blast, + verify=verify, + hygiene=hygiene_snapshot, + evidence=evidence, + ) + + +def _path_tuple(value: object) -> tuple[str, ...]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return () + return tuple( + sorted( + { + str(item).replace("\\", "/").strip() + for item in value + if str(item).strip() + } + ) + ) + + +def _string_tuple(value: object) -> tuple[str, ...]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return () + return tuple(sorted({str(item).strip() for item in value if str(item).strip()})) + + +__all__ = ["build_patch_trail_inputs"] diff --git a/codeclone/surfaces/mcp/_review_receipt.py b/codeclone/surfaces/mcp/_review_receipt.py new file mode 100644 index 00000000..fcaa2c2f --- /dev/null +++ b/codeclone/surfaces/mcp/_review_receipt.py @@ -0,0 +1,426 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from enum import Enum +from typing import Final, Literal + +from ...contracts import REPORT_SCHEMA_VERSION +from ._verification_profile import ( + check_matrix, + classify_patch, + profile_limitations, +) +from .messages import receipt as receipt_msgs + +RECEIPT_VERSION: Final = "1" +ReceiptFormat = Literal["json", "markdown"] +VALID_RECEIPT_FORMATS: Final[frozenset[str]] = frozenset({"json", "markdown"}) +MAX_HUMAN_DECISION_POINTS: Final = 10 + + +class ReceiptVerdict(str, Enum): + CLEAN = "clean" + INCOMPLETE = "incomplete" + NEEDS_ATTENTION = "needs_attention" + + +class ReceiptPatchStatus(str, Enum): + ACCEPTED = "accepted" + VIOLATED = "violated" + NOT_CHECKED = "not_checked" + + +CLAIMS_NOT_MADE: Final[tuple[dict[str, str], ...]] = ( + { + "claim_type": "security_vulnerability", + "reason": receipt_msgs.CLAIM_REASON_SECURITY_NOT_VULNERABILITY, + }, + { + "claim_type": "baseline_regression", + "reason": receipt_msgs.CLAIM_REASON_BASELINE_DEBT_NOT_REGRESSION, + }, + { + "claim_type": "report_only_ci_failure", + "reason": receipt_msgs.CLAIM_REASON_REPORT_ONLY_NOT_CI_FAILURE, + }, +) + + +def derive_baseline_status(report_document: Mapping[str, object]) -> str: + meta = _as_mapping(report_document.get("meta")) + baseline = _as_mapping(meta.get("baseline")) + if not bool(baseline.get("loaded", False)): + return "not_loaded" + status = str(baseline.get("status", "")).strip().lower() + if bool(baseline.get("trusted_for_diff", False)) or status == "ok": + return "trusted" + return "untrusted" + + +def derive_patch_status( + *, + gate_result: Mapping[str, object] | None, + intent_check_status: str | None, + regressions: int, + has_structural_delta: bool, +) -> str: + if intent_check_status == "violated": + return ReceiptPatchStatus.VIOLATED.value + if gate_result is not None and bool(gate_result.get("would_fail")): + return ReceiptPatchStatus.VIOLATED.value + if regressions > 0: + return ReceiptPatchStatus.VIOLATED.value + if gate_result is None and intent_check_status is None and not has_structural_delta: + return ReceiptPatchStatus.NOT_CHECKED.value + return ReceiptPatchStatus.ACCEPTED.value + + +def derive_human_decision_points( + *, + changed_findings: Sequence[Mapping[str, object]], + intent_status: str | None, +) -> list[dict[str, object]]: + points: list[dict[str, object]] = [] + for finding in changed_findings: + if str(finding.get("family", "")).strip() == "clone": + points.append( + _decision_point( + category="clone_divergence", + finding_id=str(finding.get("id", "")), + reason=receipt_msgs.DECISION_REASON_CLONE_DIVERGENCE, + ) + ) + if str(finding.get("novelty", "")).strip() == "known": + points.append( + _decision_point( + category="baseline_debt_touched", + finding_id=str(finding.get("id", "")), + reason=receipt_msgs.DECISION_REASON_BASELINE_DEBT_TOUCHED, + ) + ) + if intent_status == "expanded": + points.append( + _decision_point( + category="scope_expansion", + finding_id="", + reason=receipt_msgs.DECISION_REASON_SCOPE_EXPANSION, + ) + ) + return _numbered_decisions(points[:MAX_HUMAN_DECISION_POINTS]) + + +def derive_claims_not_made( + report_document: Mapping[str, object], +) -> list[dict[str, object]]: + claims: list[dict[str, object]] = [dict(item) for item in CLAIMS_NOT_MADE] + if _suppressed_clone_count(report_document) > 0: + claims.append( + { + "claim_type": "suppressed_clone_regression", + "reason": receipt_msgs.CLAIM_REASON_SUPPRESSED_CLONE_NOT_REGRESSION, + } + ) + return claims + + +def receipt_verdict( + *, + reviewed_count: int, + gate_relevant_count: int, + patch_status: str, + human_decision_count: int, +) -> str: + if patch_status == ReceiptPatchStatus.VIOLATED.value: + return ReceiptVerdict.NEEDS_ATTENTION.value + if human_decision_count > 0: + return ReceiptVerdict.NEEDS_ATTENTION.value + if patch_status == ReceiptPatchStatus.NOT_CHECKED.value: + return ReceiptVerdict.INCOMPLETE.value + if gate_relevant_count > 0 and reviewed_count < gate_relevant_count: + return ReceiptVerdict.INCOMPLETE.value + return ReceiptVerdict.CLEAN.value + + +def derive_verification_profile_section( + changed_files: Sequence[str], +) -> dict[str, object]: + """Build the ``verification_profile`` section for a receipt. + + Pure function — delegates to :func:`classify_patch` and enriches the + payload with human-readable limitations. + """ + result = classify_patch(list(changed_files)) + matrix = check_matrix(result.profile) + return { + "profile": result.profile.value, + "reason": result.reason, + "python_source_touched": result.python_source_touched, + "state_artifact_touched": result.state_artifact_touched, + "governance_config_touched": result.governance_config_touched, + "after_run_required": matrix.after_run_required, + "structural_checks_applicable": matrix.structural_checks_applicable, + "checks_performed": list(matrix.checks_performed), + "checks_not_applicable": list(matrix.checks_not_applicable), + "limitations": list(profile_limitations(result.profile)), + } + + +def render_receipt_markdown(receipt: Mapping[str, object]) -> str: + provenance = _as_mapping(receipt.get("provenance")) + vp_section = _optional_mapping(receipt.get("verification_profile")) + scope = _optional_mapping(receipt.get("scope")) + blast_radius = _optional_mapping(receipt.get("blast_radius")) + reviewed = _as_mapping(receipt.get("reviewed_evidence")) + patch = _optional_mapping(receipt.get("patch_contract")) + structural_delta = _as_mapping(receipt.get("structural_delta")) + health = _as_mapping(receipt.get("health")) + decisions = _mapping_rows(receipt.get("human_decision_points")) + claims = _mapping_rows(receipt.get("claims_not_made")) + + lines = [ + receipt_msgs.RECEIPT_MD_TITLE, + "", + ( + f"**Report:** " + f"`{provenance.get('report_digest', receipt_msgs.RECEIPT_MD_UNKNOWN)}`" + ), + ( + f"**Schema:** " + f"`{provenance.get('report_schema_version', REPORT_SCHEMA_VERSION)}`" + ), + ( + f"**Baseline:** " + f"{provenance.get('baseline_status', receipt_msgs.RECEIPT_MD_UNKNOWN)}" + ), + receipt_msgs.RECEIPT_MD_REVIEW_CONTRACT, + "", + "---", + ] + lines.extend(_render_verification_profile(vp_section)) + lines.extend( + [ + "", + receipt_msgs.RECEIPT_MD_SECTION_SCOPE, + ] + ) + if scope is None: + lines.append(receipt_msgs.RECEIPT_MD_NO_INTENT) + else: + lines.extend( + [ + f"**Intent:** {scope.get('intent_description') or 'none'}", + f"**Status:** {scope.get('intent_status') or 'unknown'}", + f"**Declared files:** {_inline_paths(scope.get('declared_files'))}", + f"**Changed files:** {_inline_paths(scope.get('changed_files'))}", + f"**Untouched in declared:** " + f"{_inline_paths(scope.get('untouched_files'))}", + f"**Unexpected files:** {_inline_paths(scope.get('unexpected_files'))}", + f"**Forbidden touched:** " + f"{_inline_paths(scope.get('forbidden_touched'))}", + ] + ) + held = scope.get("do_not_touch_held") + if held: + lines.append(f"**Do-not-touch held:** {_inline_paths(held)}") + lines.extend(["", receipt_msgs.RECEIPT_MD_SECTION_BLAST_RADIUS]) + if blast_radius is None: + lines.append(receipt_msgs.RECEIPT_MD_NOT_AVAILABLE) + else: + lines.extend( + [ + f"**Level:** {blast_radius.get('radius_level', 'unknown')}", + ( + f"**Direct dependents:** " + f"{blast_radius.get('direct_dependents_count', 0)}" + ), + ( + f"**Clone cohort members:** " + f"{blast_radius.get('clone_cohort_members_count', 0)}" + ), + ( + f"**Do-not-touch entries:** " + f"{blast_radius.get('do_not_touch_count', 0)}" + ), + ] + ) + lines.extend(["", receipt_msgs.RECEIPT_MD_SECTION_REVIEWED_EVIDENCE]) + lines.append( + f"**Reviewed:** {reviewed.get('reviewed_count', 0)} / " + f"{reviewed.get('total_gate_relevant', 0)} gate-relevant findings" + ) + for item in _mapping_rows(reviewed.get("items")): + note = item.get("note") + suffix = f" - note: {note}" if note else "" + lines.append( + f"- `{item.get('finding_id', '')}`: {item.get('kind', 'finding')}" + f" ({item.get('severity', 'info')}){suffix}" + ) + if not _mapping_rows(reviewed.get("items")): + lines.append(receipt_msgs.RECEIPT_MD_LIST_NONE) + lines.extend(["", receipt_msgs.RECEIPT_MD_SECTION_PATCH_CONTRACT]) + if patch is None: + lines.append(receipt_msgs.RECEIPT_MD_NOT_AVAILABLE) + else: + lines.extend( + [ + f"**Status:** {patch.get('status', 'not_checked')}", + f"**Regressions:** {patch.get('regressions', 0)}", + f"**Improvements:** {patch.get('improvements', 0)}", + f"**Health delta:** {_signed_delta(patch.get('health_delta'))}", + ] + ) + lines.extend( + [ + "", + receipt_msgs.RECEIPT_MD_SECTION_STRUCTURAL_DELTA, + f"**Verdict:** {structural_delta.get('verdict', 'stable')}", + f"**Health delta:** {_signed_delta(structural_delta.get('health_delta'))}", + "", + receipt_msgs.RECEIPT_MD_SECTION_HUMAN_DECISIONS, + ] + ) + if decisions: + lines.extend( + f"- **{decision.get('id', '')}:** {decision.get('reason', '')}" + for decision in decisions + ) + else: + lines.append(receipt_msgs.RECEIPT_MD_LIST_NONE) + lines.extend(["", receipt_msgs.RECEIPT_MD_SECTION_CLAIMS_NOT_MADE]) + lines.extend(f"- {claim.get('reason', '')}" for claim in claims) + lines.extend( + [ + "", + f"**Health:** {health.get('score', 'n/a')}/100 " + f"({health.get('grade', 'n/a')})", + f"**Receipt verdict:** {receipt.get('verdict', 'incomplete')}", + "", + f"*Generated by CodeClone | run: `{provenance.get('run_id', 'unknown')}` | " + f"{receipt.get('generated_at_utc', '')}*", + ] + ) + return "\n".join(lines) + + +def _render_verification_profile( + vp_section: Mapping[str, object] | None, +) -> list[str]: + lines = ["", "### Verification Profile"] + if vp_section is None: + lines.append("Not available.") + return lines + profile = str(vp_section.get("profile", "unknown")) + reason = str(vp_section.get("reason", "")) + structural = bool(vp_section.get("structural_checks_applicable", False)) + structural_label = "applicable" if structural else "not applicable" + lines.extend( + [ + f"**Profile:** {profile}", + f"**Reason:** {reason}", + f"**Structural checks:** {structural_label}", + f"**After-run required:** {vp_section.get('after_run_required', False)}", + ] + ) + not_applicable = [ + str(c) for c in _as_sequence(vp_section.get("checks_not_applicable")) + ] + if not_applicable: + lines.append(f"**Not applicable:** {', '.join(not_applicable)}") + limitations = [str(lim) for lim in _as_sequence(vp_section.get("limitations"))] + if limitations: + lines.extend(f"- {lim}" for lim in limitations) + return lines + + +def _decision_point( + *, + category: str, + finding_id: str, + reason: str, +) -> dict[str, object]: + return { + "id": "", + "finding_id": finding_id, + "reason": reason, + "category": category, + } + + +def _numbered_decisions( + points: Sequence[Mapping[str, object]], +) -> list[dict[str, object]]: + return [ + { + "id": f"D-{index}", + "finding_id": str(point.get("finding_id", "")), + "reason": str(point.get("reason", "")), + "category": str(point.get("category", "")), + } + for index, point in enumerate(points, start=1) + ] + + +def _suppressed_clone_count(report_document: Mapping[str, object]) -> int: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get("clones")) + suppressed = _as_mapping(clones.get("suppressed")) + return sum( + len(_as_sequence(suppressed.get(kind))) + for kind in ("function", "block", "segment") + ) + + +def _inline_paths(value: object) -> str: + paths = [str(item) for item in _as_sequence(value) if str(item)] + if not paths: + return "none" + return ", ".join(f"`{path}`" for path in paths) + + +def _signed_delta(value: object) -> str: + if isinstance(value, int): + return f"{value:+d}" + return "n/a" + + +def _optional_mapping(value: object) -> Mapping[str, object] | None: + return value if isinstance(value, Mapping) else None + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _mapping_rows(value: object) -> list[Mapping[str, object]]: + return [_as_mapping(item) for item in _as_sequence(value)] + + +__all__ = [ + "CLAIMS_NOT_MADE", + "MAX_HUMAN_DECISION_POINTS", + "RECEIPT_VERSION", + "VALID_RECEIPT_FORMATS", + "ReceiptFormat", + "ReceiptPatchStatus", + "ReceiptVerdict", + "derive_baseline_status", + "derive_claims_not_made", + "derive_human_decision_points", + "derive_patch_status", + "derive_verification_profile_section", + "receipt_verdict", + "render_receipt_markdown", +] diff --git a/codeclone/surfaces/mcp/_session_blast_radius_mixin.py b/codeclone/surfaces/mcp/_session_blast_radius_mixin.py new file mode 100644 index 00000000..39c01a61 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_blast_radius_mixin.py @@ -0,0 +1,118 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from . import _session_helpers as _helpers +from ._blast_radius import ( + DEFAULT_BLAST_RADIUS_INCLUDE, + DEFAULT_DO_NOT_TOUCH_PATTERNS, + VALID_BLAST_RADIUS_DEPTHS, + VALID_BLAST_RADIUS_INCLUDE, + BlastRadiusDepth, + BlastRadiusResult, + blast_radius_to_payload, + compute_blast_radius, +) +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) + + +class _MCPSessionBlastRadiusMixin: + _runs: CodeCloneMCPRunStore + _blast_radius_cache: dict[tuple[str, tuple[str, ...], str], BlastRadiusResult] + + def get_blast_radius( + self, + *, + files: Sequence[str], + run_id: str | None = None, + depth: str = "direct", + include: Sequence[str] | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + normalized_depth = self._validated_blast_radius_depth(depth) + normalized_files = self._normalize_changed_paths( + root_path=record.root, + paths=files, + ) + if not normalized_files: + raise MCPServiceContractError( + "get_blast_radius requires at least one file." + ) + normalized_include = self._validated_blast_radius_include(include) + result = self._blast_radius_result( + record=record, + files=normalized_files, + depth=normalized_depth, + ) + return blast_radius_to_payload(result, include=normalized_include) + + def _blast_radius_result( + self, + *, + record: MCPRunRecord, + files: Sequence[str], + depth: BlastRadiusDepth, + forbidden_patterns: Sequence[str] = DEFAULT_DO_NOT_TOUCH_PATTERNS, + allowed_scope: Sequence[str] = (), + ) -> BlastRadiusResult: + normalized_files = tuple(sorted(set(files))) + cache_key = (record.run_id, normalized_files, depth) + cacheable = ( + not allowed_scope + and tuple(forbidden_patterns) == DEFAULT_DO_NOT_TOUCH_PATTERNS + ) + if cacheable: + with self._state_lock: + cached = self._blast_radius_cache.get(cache_key) + if cached is not None: + return cached + result = compute_blast_radius( + run_id=_helpers._short_run_id(record.run_id), + report_document=record.report_document, + files=normalized_files, + depth=depth, + forbidden_patterns=forbidden_patterns, + allowed_scope=allowed_scope, + ) + if cacheable: + with self._state_lock: + self._blast_radius_cache[cache_key] = result + return result + + def _validated_blast_radius_depth(self, depth: str) -> BlastRadiusDepth: + if depth not in VALID_BLAST_RADIUS_DEPTHS: + expected = ", ".join(sorted(VALID_BLAST_RADIUS_DEPTHS)) + raise MCPServiceContractError( + f"Invalid value for depth: {depth!r}. Expected one of: {expected}." + ) + return "transitive" if depth == "transitive" else "direct" + + def _validated_blast_radius_include( + self, + include: Sequence[str] | None, + ) -> tuple[str, ...]: + if include is None: + return DEFAULT_BLAST_RADIUS_INCLUDE + invalid = sorted( + {item for item in include if item not in VALID_BLAST_RADIUS_INCLUDE} + ) + if invalid: + expected = ", ".join(sorted(VALID_BLAST_RADIUS_INCLUDE)) + raise MCPServiceContractError( + "Invalid value for include: " + f"{', '.join(invalid)}. Expected values: {expected}." + ) + return tuple(sorted(set(include))) + + +__all__ = ["_MCPSessionBlastRadiusMixin"] diff --git a/codeclone/surfaces/mcp/_session_claim_guard_mixin.py b/codeclone/surfaces/mcp/_session_claim_guard_mixin.py new file mode 100644 index 00000000..14df69f4 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_claim_guard_mixin.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ...audit import EVENT_CLAIM_COMPLETED, EVENT_CLAIM_VIOLATED +from ...metrics.registry import METRIC_FAMILIES +from . import _session_helpers as _helpers +from ._claim_guard import ( + ReportContext, + validate_claims, + validate_text_input, +) +from ._session_shared import MCPRunRecord, MCPServiceContractError +from ._verification_profile import classify_patch + + +class _MCPSessionClaimGuardMixin: + def validate_review_claims( + self, + *, + text: str, + run_id: str | None = None, + require_citations: bool = True, + patch_health_delta: int | None = None, + ) -> dict[str, object]: + try: + validated_text = validate_text_input(text) + except ValueError as exc: + raise MCPServiceContractError(str(exc)) from exc + record = self._runs.get(run_id) + context = self._claim_guard_context( + record, + patch_health_delta=patch_health_delta, + ) + payload = validate_claims( + text=validated_text, + report_context=context, + require_citations=bool(require_citations), + ) + result = {"run_id": _helpers._short_run_id(record.run_id), **payload} + valid = bool(result.get("valid")) + self._audit_emit( + root=record.root, + event_type=EVENT_CLAIM_COMPLETED if valid else EVENT_CLAIM_VIOLATED, + severity="info" if valid else "warn", + run_id=_helpers._short_run_id(record.run_id), + report_digest=self._report_digest_value(record), + status="valid" if valid else "violated", + payload=result, + ) + return result + + def _claim_guard_context( + self, + record: MCPRunRecord, + *, + patch_health_delta: int | None = None, + ) -> ReportContext: + _canonical_to_short, short_to_canonical = self._finding_id_maps(record) + findings = { + canonical_id: dict(finding) + for finding in self._base_findings(record) + if (canonical_id := str(finding.get("id", "")).strip()) + } + changed_paths = list(record.changed_paths) + profile_value = ( + classify_patch(changed_paths).profile.value if changed_paths else None + ) + return ReportContext( + findings=findings, + short_to_canonical=short_to_canonical, + reachable_qualnames=self._reachable_qualnames(record), + report_only_families=frozenset( + sorted( + family.name + for family in METRIC_FAMILIES.values() + if not family.gate_keys + ) + ), + has_comparison_run=self._previous_run_for_root(record) is not None, + metric_families=frozenset(sorted(METRIC_FAMILIES)), + verification_profile=profile_value, + patch_health_delta=patch_health_delta, + ) + + def _reachable_qualnames(self, record: MCPRunRecord) -> frozenset[str]: + project_metrics = record.project_metrics + if project_metrics is None: + return frozenset() + return frozenset( + sorted( + str(getattr(fact, "target_qualname", "")).strip() + for fact in getattr(project_metrics, "runtime_reachability", ()) + if str(getattr(fact, "target_qualname", "")).strip() + ) + ) diff --git a/codeclone/surfaces/mcp/_session_context_mixin.py b/codeclone/surfaces/mcp/_session_context_mixin.py new file mode 100644 index 00000000..4d4920d3 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_context_mixin.py @@ -0,0 +1,513 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP implementation-context query surface.""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Protocol, cast + +from ...utils.repo_paths import RepoPathError, resolve_repo_relative_path +from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult, blast_radius_to_payload +from ._graph_search import search_graph +from ._implementation_context import ( + DEFAULT_CONTRACT_FACETS, + DEFAULT_IMPACT_FACETS, + DEFAULT_IMPLEMENTATION_FACETS, + MAX_CONTEXT_TOTAL_ITEMS, + MEMORY_BACKED_FACETS, + build_implementation_context, + resolve_context_symbols, +) +from ._intent import IntentRecord, IntentStatus +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, +) +from ._workspace_hygiene import collect_dirty_snapshot +from .messages.params import VALID_FACETS, Facet + +_VALID_CONTEXT_MODES = frozenset({"implementation", "impact", "contract"}) +_VALID_CONTEXT_DETAIL_LEVELS = frozenset({"compact", "normal", "full"}) +_MAX_CONTEXT_BUDGET = MAX_CONTEXT_TOTAL_ITEMS +_MAX_CONTEXT_DEPTH = 3 +_DEFAULT_FACETS_BY_MODE: dict[str, tuple[Facet, ...]] = { + "implementation": DEFAULT_IMPLEMENTATION_FACETS, + "impact": DEFAULT_IMPACT_FACETS, + "contract": DEFAULT_CONTRACT_FACETS, +} + + +@dataclass(frozen=True, slots=True) +class _ContextSubject: + paths: tuple[str, ...] + symbols: tuple[str, ...] + resolved_symbols: tuple[dict[str, object], ...] + unresolved_symbols: tuple[str, ...] + resolved_from: str + source_summary: dict[str, object] + + +class _ContextSessionDependencies(Protocol): + def _blast_radius_result( + self, + *, + record: MCPRunRecord, + files: Sequence[str], + depth: str, + forbidden_patterns: Sequence[str] = (), + allowed_scope: Sequence[str] = (), + ) -> BlastRadiusResult: ... + + def _latest_run_for_root(self, root_path: Path) -> MCPRunRecord | None: ... + + def get_relevant_memory(self, **params: object) -> dict[str, object]: ... + + +class _MCPSessionContextMixin: + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + + def get_implementation_context( + self, + *, + root: str, + paths: Sequence[str] | None = None, + symbols: Sequence[str] | None = None, + intent_id: str | None = None, + changed_scope: bool = False, + mode: str = "implementation", + include: Sequence[str] | None = None, + depth: int = 1, + detail_level: str = "compact", + budget: int = 50, + run_id: str | None = None, + query: str | None = None, + ) -> dict[str, object]: + root_path = _helpers._resolve_root(root) + intent = self._context_intent(intent_id) + record = self._context_record( + root_path=root_path, + run_id=run_id, + intent=intent, + ) + if record is None: + return { + "status": "needs_analysis", + "root": str(root_path), + "message": ( + "No MCP analysis run exists for this repository root. " + "Run analyze_repository first." + ), + "next_tool": "analyze_repository", + } + if query is not None: + if paths or symbols or changed_scope: + raise MCPServiceContractError( + "query is mutually exclusive with paths, symbols, and " + "changed_scope." + ) + return search_graph( + record=record, + root=root_path, + query=query, + budget=budget, + ) + self._validate_context_request( + paths=paths, + symbols=symbols, + intent_id=intent_id, + changed_scope=changed_scope, + mode=mode, + include=include, + depth=depth, + detail_level=detail_level, + budget=budget, + ) + subject = self._resolve_context_subject( + root_path=root_path, + record=record, + paths=paths, + symbols=symbols, + intent=intent, + changed_scope=changed_scope, + ) + if subject is None: + return { + "status": "no_current_work", + "root": str(root_path), + "analysis": { + "run_id": record.run_id, + "report_digest": record.run_id, + }, + "message": ( + "No explicit subject, active intent scope, or live git-dirty " + "path is available. Whole-repository context is never inferred." + ), + } + normalized_include = self._validated_context_include( + include, + mode=mode, + has_intent=intent is not None, + ) + session = cast("_ContextSessionDependencies", self) + transitive = depth > 1 or mode == "impact" + if subject.paths: + blast_result = session._blast_radius_result( + record=record, + files=subject.paths, + depth="transitive" if transitive else "direct", + forbidden_patterns=( + intent.scope.forbidden if intent is not None else () + ), + allowed_scope=( + intent.scope.allowed_paths if intent is not None else () + ), + ) + blast_payload = blast_radius_to_payload(blast_result) + else: + blast_payload = {} + memory_result = None + if MEMORY_BACKED_FACETS.intersection(normalized_include): + memory_result = session.get_relevant_memory( + root=str(root_path), + scope=subject.paths or None, + symbols=subject.symbols or None, + max_records=min(budget, 20), + include_drafts=True, + detail_level=detail_level, + ) + return build_implementation_context( + record=record, + paths=subject.paths, + symbols=subject.symbols, + subject_resolved_from=subject.resolved_from, + subject_source_summary=subject.source_summary, + resolved_symbols=subject.resolved_symbols, + unresolved_symbols=subject.unresolved_symbols, + mode=mode, + include=normalized_include, + depth=depth, + detail_level=detail_level, + budget=budget, + blast_radius=blast_payload, + memory_result=memory_result, + change_control=( + self._context_change_control(intent, blast_payload) + if intent is not None + else None + ), + ) + + def _context_record( + self, + *, + root_path: Path, + run_id: str | None, + intent: IntentRecord | None, + ) -> MCPRunRecord | None: + if intent is not None: + if run_id is not None and run_id not in { + intent.run_id, + _helpers._short_run_id(intent.run_id), + }: + raise MCPServiceContractError( + "Selected run_id does not match the active intent run." + ) + try: + record = self._runs.get(intent.run_id) + except MCPRunNotFoundError as exc: + raise MCPServiceContractError( + "The active intent's analysis run is no longer available. " + "Analyze again and declare a new intent." + ) from exc + if record.root.resolve() != root_path.resolve(): + raise MCPServiceContractError( + "Active intent does not belong to the supplied root." + ) + return record + if run_id is None: + session = cast("_ContextSessionDependencies", self) + return session._latest_run_for_root(root_path) + record = self._runs.get(run_id) + if record.root.resolve() != root_path.resolve(): + raise MCPServiceContractError( + "Selected MCP run does not belong to the supplied root. " + f"Run root: {record.root}; requested root: {root_path}." + ) + return record + + def _context_intent(self, intent_id: str | None) -> IntentRecord | None: + if intent_id is None: + return None + intent = self._active_intents.get(intent_id) + if intent is None: + raise MCPServiceContractError( + f"Unknown implementation-context intent_id: {intent_id!r}." + ) + if intent.status is not IntentStatus.ACTIVE: + raise MCPServiceContractError( + "Implementation context requires an active intent; " + f"{intent_id!r} is {intent.status.value}." + ) + return intent + + def _validate_context_request( + self, + *, + paths: Sequence[str] | None, + symbols: Sequence[str] | None, + intent_id: str | None, + changed_scope: bool, + mode: str, + include: Sequence[str] | None, + depth: int, + detail_level: str, + budget: int, + ) -> None: + if changed_scope and (paths or symbols): + raise MCPServiceContractError( + "changed_scope=true is mutually exclusive with explicit " + "paths or symbols." + ) + for field_name, value, choices in ( + ("mode", mode, _VALID_CONTEXT_MODES), + ("detail_level", detail_level, _VALID_CONTEXT_DETAIL_LEVELS), + ): + if value in choices: + continue + expected = ", ".join(sorted(choices)) + raise MCPServiceContractError( + f"Invalid context {field_name} {value!r}. Expected one of: {expected}." + ) + if isinstance(depth, bool) or not 0 <= depth <= _MAX_CONTEXT_DEPTH: + raise MCPServiceContractError( + f"Context depth must be between 0 and {_MAX_CONTEXT_DEPTH}." + ) + if isinstance(budget, bool) or not 1 <= budget <= _MAX_CONTEXT_BUDGET: + raise MCPServiceContractError( + f"Context budget must be between 1 and {_MAX_CONTEXT_BUDGET}." + ) + self._validated_context_include( + include, + mode=mode, + has_intent=intent_id is not None, + ) + + def _validated_context_include( + self, + include: Sequence[str] | None, + *, + mode: str, + has_intent: bool, + ) -> tuple[Facet, ...]: + if include is None: + defaults = _DEFAULT_FACETS_BY_MODE.get(mode, DEFAULT_IMPLEMENTATION_FACETS) + if has_intent: + return (*defaults, "scope") + return defaults + requested = frozenset(include) + invalid = sorted(requested.difference(VALID_FACETS)) + if invalid: + expected = ", ".join(sorted(VALID_FACETS)) + raise MCPServiceContractError( + "Invalid implementation-context facet(s): " + f"{', '.join(invalid)}. Expected values: {expected}." + ) + return cast("tuple[Facet, ...]", tuple(sorted(requested))) + + def _resolve_context_subject( + self, + *, + root_path: Path, + record: MCPRunRecord, + paths: Sequence[str] | None, + symbols: Sequence[str] | None, + intent: IntentRecord | None, + changed_scope: bool, + ) -> _ContextSubject | None: + explicit_paths = self._normalize_context_paths( + root_path=root_path, + paths=paths or (), + ) + explicit_symbols = tuple( + sorted({symbol.strip() for symbol in symbols or () if symbol.strip()}) + ) + if explicit_paths or explicit_symbols: + return self._context_subject_from_explicit( + record=record, + paths=explicit_paths, + symbols=explicit_symbols, + ) + if intent is not None and not changed_scope and intent.scope.allowed_files: + intent_paths = self._normalize_context_paths( + root_path=root_path, + paths=intent.scope.allowed_files, + ) + return _ContextSubject( + paths=intent_paths, + symbols=(), + resolved_symbols=(), + unresolved_symbols=(), + resolved_from="intent_scope", + source_summary=_subject_source_summary(intent_paths), + ) + snapshot = collect_dirty_snapshot(root_path) + if not snapshot.paths: + return None + ranked_paths = self._rank_dirty_context_paths( + snapshot.paths, + intent=intent, + ) + shown_paths = ranked_paths[:MAX_CONTEXT_TOTAL_ITEMS] + normalized_paths = self._normalize_context_paths( + root_path=root_path, + paths=shown_paths, + ) + return _ContextSubject( + paths=normalized_paths, + symbols=(), + resolved_symbols=(), + unresolved_symbols=(), + resolved_from="changed_scope", + source_summary={ + "total": len(ranked_paths), + "shown": len(normalized_paths), + "truncated": len(normalized_paths) < len(ranked_paths), + "omitted": max(0, len(ranked_paths) - len(normalized_paths)), + "git_available": snapshot.git_available, + }, + ) + + @staticmethod + def _context_subject_from_explicit( + *, + record: MCPRunRecord, + paths: tuple[str, ...], + symbols: tuple[str, ...], + ) -> _ContextSubject: + resolved_symbols, unresolved_symbols = resolve_context_symbols( + record, + symbols, + ) + symbol_paths = { + str(item["path"]) + for item in resolved_symbols + if str(item.get("path", "")).strip() + } + effective_paths = tuple(sorted({*paths, *symbol_paths})) + resolved_from = ( + "explicit_mixed" + if paths and symbols + else "explicit_symbols" + if symbols + else "explicit_paths" + ) + return _ContextSubject( + paths=effective_paths, + symbols=symbols, + resolved_symbols=resolved_symbols, + unresolved_symbols=unresolved_symbols, + resolved_from=resolved_from, + source_summary=_subject_source_summary(effective_paths), + ) + + @staticmethod + def _rank_dirty_context_paths( + paths: Sequence[str], + *, + intent: IntentRecord | None, + ) -> tuple[str, ...]: + intent_paths = ( + frozenset(intent.scope.allowed_paths) if intent is not None else frozenset() + ) + return tuple( + sorted( + set(paths), + key=lambda path: (0 if path in intent_paths else 1, path), + ) + ) + + def _context_change_control( + self, + intent: IntentRecord, + blast_payload: dict[str, object], + ) -> dict[str, object]: + review_context = blast_payload.get("review_context") + do_not_touch = blast_payload.get("do_not_touch") + review_context_summary = blast_payload.get("review_context_summary") + do_not_touch_summary = blast_payload.get("do_not_touch_summary") + return { + "intent_id": intent.intent_id, + "intent_status": intent.status.value, + "edit_allowed": intent.status is IntentStatus.ACTIVE, + "authorization_source": "start_controlled_change", + "allowed_files": list(intent.scope.allowed_files), + "allowed_related": list(intent.scope.allowed_related), + "review_context": ( + list(review_context) if isinstance(review_context, list) else [] + ), + "review_context_summary": ( + dict(review_context_summary) + if isinstance(review_context_summary, dict) + else {} + ), + "do_not_touch": ( + list(do_not_touch) if isinstance(do_not_touch, list) else [] + ), + "do_not_touch_summary": ( + dict(do_not_touch_summary) + if isinstance(do_not_touch_summary, dict) + else {} + ), + "guards": list(intent.guards), + } + + def _normalize_context_paths( + self, + *, + root_path: Path, + paths: Sequence[str], + ) -> tuple[str, ...]: + normalized: set[str] = set() + resolved_root = root_path.resolve() + for raw_path in paths: + if not isinstance(raw_path, str) or not raw_path.strip(): + raise MCPServiceContractError( + "Implementation-context paths must be non-empty strings." + ) + try: + absolute_path = resolve_repo_relative_path(resolved_root, raw_path) + relative_path = absolute_path.relative_to(resolved_root).as_posix() + except (RepoPathError, ValueError) as exc: + raise MCPServiceContractError( + "Implementation-context paths must be repo-relative and " + f"contained under {resolved_root}: {raw_path!r}." + ) from exc + if not relative_path or relative_path == ".": + raise MCPServiceContractError( + "Repository root is not a valid implementation-context path." + ) + normalized.add(relative_path) + return tuple(sorted(normalized)) + + +__all__ = ["_MCPSessionContextMixin"] + + +def _subject_source_summary(paths: Sequence[str]) -> dict[str, object]: + return { + "total": len(paths), + "shown": len(paths), + "truncated": False, + "omitted": 0, + } diff --git a/codeclone/surfaces/mcp/_session_finding_mixin.py b/codeclone/surfaces/mcp/_session_finding_mixin.py index ff8bccb6..0688351e 100644 --- a/codeclone/surfaces/mcp/_session_finding_mixin.py +++ b/codeclone/surfaces/mcp/_session_finding_mixin.py @@ -9,6 +9,7 @@ from types import TracebackType from typing import Protocol +from ...utils.repo_paths import RepoPathError, RepoPathPolicy, resolve_under_repo_root from . import _session_helpers as _helpers from ._session_shared import ( _CHECK_TO_DIMENSION, @@ -71,6 +72,34 @@ def __exit__( ) -> bool | None: ... +def _safe_location_path( + *, + root: Path, + raw_path: str, +) -> tuple[str, Path] | None: + try: + relative_path = _helpers._normalize_relative_path(raw_path) + if not relative_path: + return None + absolute_path = resolve_under_repo_root( + root, + relative_path, + policy=RepoPathPolicy(), + ) + except (MCPServiceContractError, RepoPathError): + return None + return relative_path, absolute_path + + +def _validate_run_root(record: MCPRunRecord, root_path: Path | None) -> None: + if root_path is None or record.root.resolve() == root_path.resolve(): + return + raise MCPServiceContractError( + "Selected MCP run does not belong to the supplied root. " + f"Run root: {record.root}; requested root: {root_path}." + ) + + class _MCPSessionFindingMixin: _runs: CodeCloneMCPRunStore _state_lock: _StateLock @@ -89,11 +118,6 @@ def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None: request.cache_policy, _VALID_CACHE_POLICIES, ) - if request.cache_policy == "refresh": - raise MCPServiceContractError( - "cache_policy='refresh' is not supported by the read-only " - "CodeClone MCP server. Use 'reuse' or 'off'." - ) if request.analysis_mode == "clones_only" and request.coverage_xml is not None: raise MCPServiceContractError( "coverage_xml requires analysis_mode='full' because coverage join " @@ -210,6 +234,7 @@ def _resolve_granular_record( ) -> MCPRunRecord: if run_id is not None: record = self._runs.get(run_id) + _validate_run_root(record, self._resolve_optional_root(root)) if _helpers._record_supports_analysis_mode( record, analysis_mode=analysis_mode, @@ -737,9 +762,13 @@ def _locations_for_finding( locations: list[dict[str, object]] = [] for item in _helpers._as_sequence(finding.get("items")): item_map = _helpers._as_mapping(item) - relative_path = str(item_map.get("relative_path", "")).strip() - if not relative_path: + resolved_location = _safe_location_path( + root=record.root, + raw_path=str(item_map.get("relative_path", "")), + ) + if resolved_location is None: continue + relative_path, absolute_path = resolved_location line = _as_int(item_map.get("start_line", 0) or 0, 0) end_line = _as_int(item_map.get("end_line", 0) or 0, 0) symbol = str(item_map.get("qualname", item_map.get("module", ""))).strip() @@ -750,7 +779,6 @@ def _locations_for_finding( "symbol": symbol, } if include_uri: - absolute_path = (record.root / relative_path).resolve() uri = absolute_path.as_uri() if line > 0: uri = f"{uri}#L{line}" diff --git a/codeclone/surfaces/mcp/_session_helpers.py b/codeclone/surfaces/mcp/_session_helpers.py index a9c902dd..1e75a731 100644 --- a/codeclone/surfaces/mcp/_session_helpers.py +++ b/codeclone/surfaces/mcp/_session_helpers.py @@ -6,16 +6,11 @@ from __future__ import annotations +import os + from ...cache.store import Cache from ...contracts import REPORT_SCHEMA_VERSION from ...domain.findings import ( - CATEGORY_CLONE, - CATEGORY_COHESION, - CATEGORY_COMPLEXITY, - CATEGORY_COUPLING, - CATEGORY_DEAD_CODE, - CATEGORY_DEPENDENCY, - CATEGORY_STRUCTURAL, FAMILY_CLONE, FAMILY_DEAD_CODE, ) @@ -32,6 +27,12 @@ SOURCE_KIND_OTHER, ) from ...models import MetricsDiff +from ...utils.repo_paths import ( + PathOutsideRepoError, + RepoPathError, + RepoPathPolicy, + resolve_under_repo_root, +) from ._session_runtime import resolve_cache_path from ._session_shared import ( _COMPACT_ITEM_EMPTY_VALUES, @@ -71,8 +72,20 @@ _suggestion_finding_id_payload, _summarize_metrics_diff, ) +from .messages import remediation as remediation_msgs +from .messages.facts import SECURITY_SURFACES_SUMMARY_NOTE from .payloads import short_id +_MCP_MAX_PROCESS_COUNT = 64 + + +def _cap_mcp_process_count(processes: int | None) -> int | None: + """Clamp MCP worker pool size without changing analysis semantics.""" + if processes is None: + return None + host_limit = os.cpu_count() or 4 + return min(processes, host_limit, _MCP_MAX_PROCESS_COUNT) + def _summary_health_payload(summary: Mapping[str, object]) -> dict[str, object]: if str(summary.get("analysis_mode", "")) == "clones_only": @@ -111,10 +124,9 @@ def _validate_choice( allowed: Sequence[str] | frozenset[str], ) -> ChoiceT: if value not in allowed: - allowed_list = ", ".join(sorted(allowed)) - raise MCPServiceContractError( - f"Invalid value for {name}: {value!r}. Expected one of: {allowed_list}." - ) + from .messages import errors as err_msgs + + raise MCPServiceContractError(err_msgs.invalid_choice(name, value, allowed)) return value @@ -192,12 +204,32 @@ def _normalize_relative_path(path: str) -> str: return "" if cleaned.startswith("./"): cleaned = cleaned[2:] + if Path(cleaned).is_absolute(): + _raise_path_traversal(path) cleaned = cleaned.rstrip("/") if ".." in Path(cleaned).parts: - raise MCPServiceContractError(f"path traversal not allowed: {path}") + _raise_path_traversal(path) + return cleaned + + +def _validate_resource_suffix(suffix: str) -> str: + cleaned = suffix.strip() + if ( + not cleaned + or cleaned.startswith("/") + or Path(cleaned).is_absolute() + or ".." in Path(cleaned).parts + ): + _raise_path_traversal(suffix) return cleaned +def _raise_path_traversal(path: str) -> None: + from .messages import errors as err_msgs + + raise MCPServiceContractError(err_msgs.PATH_TRAVERSAL.format(path=path)) + + def _path_matches(relative_path: str, changed_paths: Sequence[str]) -> bool: return any( relative_path == candidate or relative_path.startswith(candidate + "/") @@ -217,38 +249,51 @@ def _record_supports_analysis_mode( def _resolve_root(root: str | None) -> Path: + from .messages import errors as err_msgs + if not isinstance(root, str) or not root.strip(): - raise MCPServiceContractError( - "CodeClone MCP analyze_repository requires an absolute repository root." - ) + raise MCPServiceContractError(err_msgs.ROOT_REQUIRED_ABSOLUTE) root_path = Path(root).expanduser() if not root_path.is_absolute(): - raise MCPServiceContractError( - "CodeClone MCP analyze_repository requires an absolute repository root." - ) + raise MCPServiceContractError(err_msgs.ROOT_REQUIRED_ABSOLUTE) try: resolved = root_path.resolve() except OSError as exc: raise MCPServiceContractError( - f"Unable to resolve repository root '{root}': {exc}" + err_msgs.ROOT_RESOLVE_FAILED.format(root=root, error=exc) ) from exc if not resolved.exists(): - raise MCPServiceContractError(f"Repository root '{resolved}' does not exist.") + raise MCPServiceContractError(err_msgs.ROOT_NOT_EXISTS.format(root=resolved)) if not resolved.is_dir(): - raise MCPServiceContractError( - f"Repository root '{resolved}' is not a directory." - ) + raise MCPServiceContractError(err_msgs.ROOT_NOT_DIRECTORY.format(root=resolved)) return resolved -def _resolve_optional_path(value: str, root_path: Path) -> Path: - candidate = Path(value).expanduser() - resolved = candidate if candidate.is_absolute() else root_path / candidate +def _resolve_optional_path( + value: str, + root_path: Path, + *, + allow_external_artifacts: bool = False, + allow_repo_absolute: bool = False, +) -> Path: + from .messages import errors as err_msgs + try: - return resolved.resolve() - except OSError as exc: + return resolve_under_repo_root( + root_path, + value, + policy=RepoPathPolicy( + allow_absolute=allow_external_artifacts or allow_repo_absolute, + allow_external=allow_external_artifacts, + ), + ) + except (PathOutsideRepoError, RepoPathError) as exc: raise MCPServiceContractError( - f"Invalid path '{value}' relative to '{root_path}': {exc}" + err_msgs.INVALID_RELATIVE_PATH.format( + value=value, + root=root_path, + error=exc, + ) ) from exc @@ -342,28 +387,11 @@ def _project_remediation( def _safe_refactor_shape(suggestion: object) -> str: - category = str(getattr(suggestion, "category", "")).strip() - clone_type = str(getattr(suggestion, "clone_type", "")).strip() - title = str(getattr(suggestion, "title", "")).strip() - if category == CATEGORY_CLONE and clone_type == "Type-1": - return "Keep one canonical implementation and route callers through it." - if category == CATEGORY_CLONE and clone_type == "Type-2": - return "Extract shared implementation with explicit parameters." - if category == CATEGORY_CLONE and "Block" in title: - return "Extract the repeated statement sequence into a helper." - if category == CATEGORY_STRUCTURAL: - return "Extract the repeated branch family into a named helper." - if category == CATEGORY_COMPLEXITY: - return "Split the function into smaller named steps." - if category == CATEGORY_COUPLING: - return "Isolate responsibilities and invert unnecessary dependencies." - if category == CATEGORY_COHESION: - return "Split the class by responsibility boundary." - if category == CATEGORY_DEAD_CODE: - return "Delete the unused symbol or document intentional reachability." - if category == CATEGORY_DEPENDENCY: - return "Break the cycle by moving shared abstractions to a lower layer." - return "Extract the repeated logic into a shared, named abstraction." + return remediation_msgs.safe_refactor_shape( + category=str(getattr(suggestion, "category", "")).strip(), + clone_type=str(getattr(suggestion, "clone_type", "")).strip(), + title=str(getattr(suggestion, "title", "")).strip(), + ) def _risk_level_for_effort(effort: str) -> str: @@ -564,7 +592,19 @@ def _comparison_summary_text( def _resolve_cache_path(*, root_path: Path, args: Namespace) -> Path: - return resolve_cache_path(root_path=root_path, args=args) + from .messages import errors as err_msgs + + raw_value = getattr(args, "cache_path", None) + try: + return resolve_cache_path(root_path=root_path, args=args) + except (PathOutsideRepoError, RepoPathError) as exc: + raise MCPServiceContractError( + err_msgs.INVALID_RELATIVE_PATH.format( + value=raw_value, + root=root_path, + error=str(exc), + ) + ) from exc def _build_cache( @@ -786,6 +826,7 @@ def _summary_security_surfaces_payload(record: MCPRunRecord) -> dict[str, object "production": _as_int(summary.get("production", 0), 0), "tests": _as_int(summary.get("tests", 0), 0), "report_only": bool(summary.get("report_only", True)), + "note": SECURITY_SURFACES_SUMMARY_NOTE, } @@ -917,3 +958,37 @@ def _render_pr_summary_markdown(payload: Mapping[str, object]) -> str: else: lines.extend([f"- `{reason}`" for reason in blocking_gates]) return "\n".join(lines) + + +def workspace_hygiene_tips(root: Path) -> list[dict[str, object]]: + from ...paths.gitignore import repo_gitignore_covers_codeclone_cache + from .messages.tips import gitignore_codeclone_cache_tip + + if repo_gitignore_covers_codeclone_cache(root): + return [] + return [gitignore_codeclone_cache_tip()] + + +def attach_workspace_hygiene_tips( + payload: dict[str, object], + *, + root: Path, +) -> dict[str, object]: + tips = workspace_hygiene_tips(root) + if tips: + payload["tips"] = tips + return payload + + +def workspace_dirty_summary_payload(*, root: Path) -> dict[str, object]: + from ._workspace_hygiene import workspace_dirty_summary + + return workspace_dirty_summary(root=root) + + +def coerce_repo_path_tuple(paths: Iterable[object]) -> tuple[str, ...]: + return tuple(str(path) for path in paths) + + +def coerce_object_dict(payload: Mapping[object, object]) -> dict[str, object]: + return {str(key): value for key, value in payload.items()} diff --git a/codeclone/surfaces/mcp/_session_insights_mixin.py b/codeclone/surfaces/mcp/_session_insights_mixin.py new file mode 100644 index 00000000..47e492d2 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_insights_mixin.py @@ -0,0 +1,69 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +from ...controller_insights import ( + controller_audit_trail_payload, + workspace_session_stats_payload, +) +from ...memory.ide_governance import IdeGovernanceSessionState +from . import _session_helpers as _helpers +from ._session_shared import MCPServiceContractError + +IDE_INSIGHTS_UNAVAILABLE_MESSAGE = ( + "This tool is only available through the CodeClone VS Code extension " + "(MCP server launched with --ide-governance-channel). Agents must use " + "manage_change_intent(action='list_workspace') for coordination state." +) +IDE_INSIGHTS_UNAVAILABLE_NEXT_STEP = ( + "Connect with the CodeClone VS Code extension or run " + "codeclone . --session-stats / --audit in the terminal." +) + + +class _MCPSessionInsightsMixin: + _ide_governance: IdeGovernanceSessionState + + def _require_ide_insights_channel(self, *, tool_name: str) -> None: + if not self._ide_governance.channel_enabled: + raise MCPServiceContractError( + f"{tool_name} is IDE-only. {IDE_INSIGHTS_UNAVAILABLE_MESSAGE} " + f"{IDE_INSIGHTS_UNAVAILABLE_NEXT_STEP}" + ) + + def get_workspace_session_stats(self, *, root: str) -> dict[str, object]: + self._require_ide_insights_channel(tool_name="get_workspace_session_stats") + root_path = _helpers._resolve_root(root) + try: + payload = workspace_session_stats_payload(root_path) + except Exception as exc: + raise MCPServiceContractError( + f"Failed to read workspace session stats: {exc}" + ) from exc + return {"tool": "get_workspace_session_stats", **payload} + + def get_controller_audit_trail( + self, + *, + root: str, + limit: int = 50, + audit_path: str | None = None, + ) -> dict[str, object]: + self._require_ide_insights_channel(tool_name="get_controller_audit_trail") + root_path = _helpers._resolve_root(root) + if limit < 1 or limit > 200: + raise MCPServiceContractError( + "limit must be between 1 and 200 for get_controller_audit_trail." + ) + payload = controller_audit_trail_payload( + Path(root_path), + limit=limit, + audit_path_value=audit_path, + ) + return {"tool": "get_controller_audit_trail", **payload} diff --git a/codeclone/surfaces/mcp/_session_intent_mixin.py b/codeclone/surfaces/mcp/_session_intent_mixin.py new file mode 100644 index 00000000..9429e6a9 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_intent_mixin.py @@ -0,0 +1,1589 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, replace +from datetime import datetime, timedelta, timezone +from fnmatch import fnmatchcase +from pathlib import Path + +from ...audit import ( + EVENT_BLAST_RADIUS, + EVENT_INTENT_CHECKED, + EVENT_INTENT_CLEARED, + EVENT_INTENT_DECLARED, + EVENT_INTENT_EXPANDED, + EVENT_INTENT_EXPIRED, + EVENT_INTENT_PROMOTED, + EVENT_INTENT_QUEUE_BLOCKED, + EVENT_INTENT_QUEUED, + EVENT_INTENT_RENEWED, + EVENT_INTENT_VIOLATED, + EVENT_WORKSPACE_CONFLICT, + EVENT_WORKSPACE_GC, +) +from . import _session_helpers as _helpers +from ._blast_radius import blast_radius_to_payload +from ._intent import ( + DEFAULT_INTENT_GUARDS, + IntentCheckResult, + IntentRecord, + IntentScope, + IntentStatus, + forbidden_touched, + normalize_expected_effects, + normalize_intent_scope, +) +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, +) +from ._workspace_intents import ( + DEFAULT_LEASE_SECONDS, + MAX_LEASE_SECONDS, + MIN_LEASE_SECONDS, + IntentOwnership, + WorkspaceIntentRecord, + WorkspaceIntentStatus, + classify_intent_ownership, + compute_scope_digest, + detect_conflicts, + detect_workspace_relations, + expires_at, + find_workspace_intent, + format_utc, + gc_workspace, + list_workspace_intents, + remove_workspace_intent, + remove_workspace_record, + renew_workspace_intent_lease, + resolved_lease_seconds, + resolved_ttl_seconds, + stale_reason, + update_workspace_intent_status, + utc_now, + workspace_intent_to_payload, + workspace_status_counts, + write_workspace_intent, + write_workspace_intent_with_existing, +) +from .messages import intent as intent_msgs + + +@dataclass(frozen=True, slots=True) +class _RecoveryTarget: + root_path: Path + workspace_record: WorkspaceIntentRecord + now: datetime + + +@dataclass(frozen=True, slots=True) +class _RecoveryRun: + record: MCPRunRecord + report_digest: str + + +class _MCPSessionIntentMixin: + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + _intent_sequence: int + _agent_pid: int + _agent_start_epoch: int + _agent_label: str + + def _audit_emit( + self, + *, + root: Path, + event_type: str, + severity: str, + run_id: str | None = None, + intent_id: str | None = None, + report_digest: str | None = None, + status: str | None = None, + payload: Mapping[str, object] | None = None, + ) -> int | None: + raise NotImplementedError + + def get_blast_radius( + self, + *, + files: Sequence[str], + run_id: str | None = None, + depth: str = "direct", + include: Sequence[str] | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + payload = super().get_blast_radius( + files=files, + run_id=record.run_id, + depth=depth, + include=include, + ) + normalized_payload = _helpers.coerce_object_dict(payload) + self._renew_lease_for_run(record=record) + self._audit_emit( + root=record.root, + event_type=EVENT_BLAST_RADIUS, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + report_digest=self._report_digest_value(record), + status=str(normalized_payload.get("radius_level", "")), + payload=normalized_payload, + ) + return normalized_payload + + def manage_change_intent( + self, + *, + action: str, + run_id: str | None = None, + intent_id: str | None = None, + scope: dict[str, object] | None = None, + intent: str | None = None, + expected_effects: Sequence[str] | None = None, + diff_ref: str | None = None, + changed_files: Sequence[str] | None = None, + root: str | None = None, + ttl_seconds: int | None = None, + lease_seconds: int | None = None, + on_conflict: str | None = None, + ) -> dict[str, object]: + match action: + case "declare": + return self._declare_change_intent( + run_id=run_id, + scope=scope, + intent=intent, + expected_effects=expected_effects, + ttl_seconds=ttl_seconds, + on_conflict=on_conflict, + ) + case "promote": + return self._promote_queued_intent(intent_id=intent_id) + case "get": + record, active_intent = self._resolve_intent( + run_id=run_id, + intent_id=intent_id, + ) + return self._intent_payload_with_expiry( + record=record, + intent=active_intent, + ) + case "check": + return self._check_change_intent( + run_id=run_id, + intent_id=intent_id, + diff_ref=diff_ref, + changed_files=changed_files, + ) + case "clear": + return self._clear_change_intent(intent_id=intent_id) + case "renew": + return self._renew_change_intent( + intent_id=intent_id, + lease_seconds=lease_seconds, + ) + case "list_workspace": + return self._list_workspace_intents(root=root) + case "gc_workspace": + return self._gc_workspace_intents(root=root) + case "recover": + return self._recover_change_intent( + root=root, + run_id=run_id, + intent_id=intent_id, + ) + case "reset_workspace": + return self._reset_workspace_intent( + root=root, + intent_id=intent_id, + ttl_seconds=ttl_seconds, + ) + case _: + raise MCPServiceContractError( + "Invalid value for action: " + f"{action!r}. Expected one of: check, clear, declare, " + "gc_workspace, get, list_workspace, promote, recover, " + "renew, reset_workspace." + ) + + def _declare_change_intent( + self, + *, + run_id: str | None, + scope: dict[str, object] | None, + intent: str | None, + expected_effects: Sequence[str] | None, + ttl_seconds: int | None, + on_conflict: str | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + try: + normalized_scope = normalize_intent_scope(scope) + normalized_expected_effects = normalize_expected_effects(expected_effects) + except ValueError as exc: + raise MCPServiceContractError(str(exc)) from exc + description = str(intent or "").strip() + if not description: + raise MCPServiceContractError("action='declare' requires intent text.") + blast = self._blast_radius_result( + record=record, + files=normalized_scope.allowed_paths, + depth="direct", + forbidden_patterns=normalized_scope.forbidden, + ) + blast_payload = blast_radius_to_payload(blast) + blast_summary = self._blast_radius_summary( + blast_payload=blast_payload, + scope=normalized_scope, + ) + ttl = resolved_ttl_seconds( + ttl_seconds, + env_value=os.environ.get("CODECLONE_INTENT_TTL_SECONDS"), + ) + replaced_intents: list[IntentRecord] = [] + with self._state_lock: + for existing_id, existing in tuple(self._active_intents.items()): + if existing.run_id == record.run_id: + self._active_intents.pop(existing_id, None) + replaced_intents.append(existing) + self._intent_sequence += 1 + intent_id = ( + f"intent-{_helpers._short_run_id(record.run_id)}-" + f"{self._intent_sequence:03d}" + ) + declared_at = _utc_now() + record_payload = IntentRecord( + intent_id=intent_id, + run_id=record.run_id, + report_digest=self._report_digest_value(record), + status=IntentStatus.ACTIVE, + declared_at_utc=declared_at, + scope=normalized_scope, + intent_description=description, + expected_effects=normalized_expected_effects, + guards=DEFAULT_INTENT_GUARDS, + blast_radius_summary=blast_summary, + ) + self._active_intents[intent_id] = record_payload + self._runs.pin(record.run_id) + workspace_record = self._workspace_record_from_intent( + record=record, + intent=record_payload, + ttl_seconds=ttl, + ) + from ._workspace_hygiene import collect_dirty_snapshot + + dirty_snapshot = collect_dirty_snapshot(record.root) + workspace_record = replace( + workspace_record, + dirty_snapshot=dirty_snapshot.to_payload(), + ) + for replaced_intent in replaced_intents: + remove_workspace_intent( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=replaced_intent.intent_id, + ) + ( + workspace_existing, + workspace_registered, + ) = write_workspace_intent_with_existing( + root=record.root, + record=workspace_record, + ) + concurrent_intents = detect_conflicts( + new_scope=normalized_scope.to_payload(), + existing=workspace_existing, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + ) + workspace_relations = detect_workspace_relations( + new_scope=normalized_scope.to_payload(), + existing=workspace_existing, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + ) + # ── Queue branch: downgrade to queued if conflicts block ─── + if on_conflict == "queue" and concurrent_intents: + return self._downgrade_to_queued( + record=record, + intent=record_payload, + workspace_record=workspace_record, + workspace_registered=workspace_registered, + concurrent_intents=concurrent_intents, + workspace_existing=workspace_existing, + blast_payload=blast_payload, + ttl=ttl, + ) + # ── Queued context: advisory info about waiting agents ───── + queued_context = self._queued_context_from_workspace( + scope=normalized_scope, + workspace_existing=workspace_existing, + ) + payload = record_payload.to_payload( + short_run_id=_helpers._short_run_id(record.run_id) + ) + _apply_blast_context(payload, blast_payload) + payload["workspace_registered"] = workspace_registered + payload["dirty_snapshot"] = dirty_snapshot.summary_payload() + payload["concurrent_intents"] = concurrent_intents + payload["workspace_relations"] = workspace_relations + if queued_context: + payload["queued_context"] = queued_context + payload["ttl_seconds"] = ttl + if concurrent_intents and on_conflict != "queue": + payload["edit_allowed"] = False + payload["user_action_required"] = True + payload["next_step"] = _declare_conflict_next_step(concurrent_intents) + else: + payload["edit_allowed"] = True + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_DECLARED, + severity="warn" if concurrent_intents else "info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=record_payload.intent_id, + report_digest=record_payload.report_digest, + status=record_payload.status.value, + # The human-authored intent description is the single most useful + # forensic field; capture it in audit (the response payload itself + # is unchanged). Compact mode preserves a bounded copy. + payload={**payload, "intent_description": description}, + ) + if concurrent_intents: + self._audit_emit( + root=record.root, + event_type=EVENT_WORKSPACE_CONFLICT, + severity="warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=record_payload.intent_id, + report_digest=record_payload.report_digest, + status="conflict", + payload={"concurrent_intents": concurrent_intents}, + ) + return payload + + def _downgrade_to_queued( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + workspace_record: WorkspaceIntentRecord, + workspace_registered: bool, + concurrent_intents: list[dict[str, object]], + workspace_existing: tuple[WorkspaceIntentRecord, ...], + blast_payload: dict[str, object], + ttl: int, + ) -> dict[str, object]: + """Downgrade an already-registered active intent to queued.""" + queued_intent = replace(intent, status=IntentStatus.QUEUED) + with self._state_lock: + self._active_intents[intent.intent_id] = queued_intent + self._runs.unpin(record.run_id) + update_workspace_intent_status( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent.intent_id, + new_status=IntentStatus.QUEUED.value, + ) + blocked_by = [ + { + "intent_id": conflict.get("intent_id"), + "agent_pid": conflict.get("agent_pid"), + "agent_label": conflict.get("agent_label"), + "ownership": conflict.get("ownership"), + "overlapping_files": sorted( + { + *_as_str_sequence(conflict.get("hard_overlap")), + *_as_str_sequence(conflict.get("soft_overlap")), + } + ), + } + for conflict in concurrent_intents + ] + queue_position = self._compute_queue_position( + intent_id=intent.intent_id, + workspace_records=workspace_existing, + ) + payload = queued_intent.to_payload( + short_run_id=_helpers._short_run_id(record.run_id) + ) + _apply_blast_context(payload, blast_payload) + payload["workspace_registered"] = workspace_registered + payload["before_run_pinned"] = False + payload["blocked_by"] = blocked_by + payload["concurrent_intents"] = concurrent_intents + payload["queue_position"] = queue_position + payload["ttl_seconds"] = ttl + payload["message"] = intent_msgs.QUEUED_PROMOTE_BEFORE_EDIT + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_QUEUED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id, + report_digest=intent.report_digest, + status="queued", + payload=payload, + ) + return payload + + def _promote_queued_intent( + self, + *, + intent_id: str | None, + ) -> dict[str, object]: + """Promote a queued intent to active after re-checking conflicts.""" + if intent_id is None: + raise MCPServiceContractError("action='promote' requires intent_id.") + with self._state_lock: + queued_intent = self._active_intents.get(intent_id) + if queued_intent is None: + raise MCPServiceContractError(f"Unknown change intent id: {intent_id}") + if queued_intent.status != IntentStatus.QUEUED: + raise MCPServiceContractError( + f"Intent {intent_id} has status " + f"{queued_intent.status.value!r}, not 'queued'. " + "Only queued intents can be promoted." + ) + # Resolve the before-run — may have been evicted (not pinned). + try: + record = self._runs.get(queued_intent.run_id) + except MCPRunNotFoundError: + return { + "intent_id": intent_id, + "status": "unverified", + "reason": "before_run_evicted", + "next_step": intent_msgs.PROMOTE_BEFORE_RUN_EVICTED_NEXT, + "message": intent_msgs.PROMOTE_BEFORE_RUN_EVICTED, + } + # Re-check workspace conflicts. + workspace_existing = list_workspace_intents(root=record.root) + conflicts = detect_conflicts( + new_scope=queued_intent.scope.to_payload(), + existing=workspace_existing, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + ) + if conflicts: + blocked_by = [ + { + "intent_id": conflict.get("intent_id"), + "ownership": conflict.get("ownership"), + "overlapping_files": sorted( + { + *_as_str_sequence(conflict.get("hard_overlap")), + *_as_str_sequence(conflict.get("soft_overlap")), + } + ), + } + for conflict in conflicts + ] + payload: dict[str, object] = { + "intent_id": intent_id, + "status": "queued", + "blocked_by": blocked_by, + "blocking_count": len(blocked_by), + "message": intent_msgs.PROMOTE_STILL_BLOCKED, + } + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_QUEUE_BLOCKED, + severity="warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent_id, + report_digest=queued_intent.report_digest, + status="queued", + payload=payload, + ) + return payload + # Promote: active status, pin run, renew lease. + promoted = replace(queued_intent, status=IntentStatus.ACTIVE) + with self._state_lock: + self._active_intents[intent_id] = promoted + self._runs.pin(record.run_id) + update_workspace_intent_status( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + new_status=IntentStatus.ACTIVE.value, + ) + renew_workspace_intent_lease( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + ) + promoted_payload: dict[str, object] = { + "intent_id": intent_id, + "previous_status": "queued", + "status": "active", + "run_id": _helpers._short_run_id(record.run_id), + "message": intent_msgs.PROMOTED_RECHECK, + } + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_PROMOTED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent_id, + report_digest=promoted.report_digest, + status="active", + payload=promoted_payload, + ) + return promoted_payload + + def _queued_context_from_workspace( + self, + *, + scope: IntentScope, + workspace_existing: tuple[WorkspaceIntentRecord, ...], + ) -> list[dict[str, object]]: + """Return advisory info about queued intents with overlapping scope.""" + new_allowed = set(scope.allowed_files) + if not new_allowed: + return [] + context: list[dict[str, object]] = [] + for record in workspace_existing: + if record.status != IntentStatus.QUEUED.value or ( + record.agent_pid == self._agent_pid + and record.agent_start_epoch == self._agent_start_epoch + ): + continue + raw_existing = record.scope.get("allowed_files") + existing_allowed = ( + set(raw_existing) if isinstance(raw_existing, list) else set() + ) + overlap = sorted(new_allowed & existing_allowed) + if overlap: + context.append( + { + "intent_id": record.intent_id, + "overlapping_files": overlap, + "message": intent_msgs.QUEUED_SCOPE_WAITING, + } + ) + return context + + @staticmethod + def _compute_queue_position( + *, + intent_id: str, + workspace_records: tuple[WorkspaceIntentRecord, ...], + ) -> int: + """Compute advisory queue position among all queued records.""" + queued = sorted( + (r for r in workspace_records if r.status == IntentStatus.QUEUED.value), + key=lambda r: (r.declared_at_utc, r.intent_id), + ) + for i, record in enumerate(queued, start=1): + if record.intent_id == intent_id: + return i + return 1 + + def _check_change_intent( + self, + *, + run_id: str | None, + intent_id: str | None, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> dict[str, object]: + if diff_ref is None and not changed_files: + raise MCPServiceContractError( + "action='check' requires diff_ref or changed_files." + ) + record, active_intent = self._resolve_intent( + run_id=run_id, + intent_id=intent_id, + ) + self._renew_lease_if_active(record=record, intent=active_intent) + if self._is_intent_expired(record=record, intent=active_intent): + expired = replace(active_intent, status=IntentStatus.EXPIRED) + with self._state_lock: + self._active_intents[expired.intent_id] = expired + self._sync_workspace_intent_status(record=record, intent=expired) + payload = expired.to_payload( + short_run_id=_helpers._short_run_id(record.run_id) + ) + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_EXPIRED, + severity="warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=expired.intent_id, + report_digest=expired.report_digest, + status=expired.status.value, + payload=payload, + ) + return payload + actual = ( + self._normalize_changed_paths(root_path=record.root, paths=changed_files) + if changed_files + else self._git_diff_paths(root_path=record.root, git_diff_ref=str(diff_ref)) + ) + check_result = self._intent_check_result(intent=active_intent, actual=actual) + updated = replace( + active_intent, + status=check_result.status, + check_result=check_result, + ) + with self._state_lock: + self._active_intents[updated.intent_id] = updated + self._sync_workspace_intent_status(record=record, intent=updated) + payload = check_result.to_payload() + payload["intent_id"] = updated.intent_id + event_type = { + IntentStatus.EXPANDED: EVENT_INTENT_EXPANDED, + IntentStatus.VIOLATED: EVENT_INTENT_VIOLATED, + }.get(check_result.status, EVENT_INTENT_CHECKED) + audit_sequence = self._audit_emit( + root=record.root, + event_type=event_type, + severity="warn" if check_result.status != IntentStatus.CLEAN else "info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=updated.intent_id, + report_digest=updated.report_digest, + status=check_result.status.value, + payload=payload, + ) + if audit_sequence is not None: + payload["_audit_sequence"] = audit_sequence + return payload + + def _clear_change_intent(self, *, intent_id: str | None) -> dict[str, object]: + with self._state_lock: + removed_ids: tuple[str, ...] + removed_intents: tuple[IntentRecord, ...] + if intent_id is not None: + if intent_id not in self._active_intents: + raise MCPServiceContractError( + f"Unknown change intent id: {intent_id}" + ) + removed_ids = (intent_id,) + removed = self._active_intents.pop(intent_id) + removed_intents = (removed,) + else: + removed_ids = tuple(self._active_intents) + removed_intents = tuple(self._active_intents.values()) + self._active_intents.clear() + workspace_targets: tuple[tuple[Path, IntentRecord, str], ...] = tuple( + (record.root, removed_intent, self._report_digest_value(record)) + for removed_intent in removed_intents + for record in (self._optional_run_record(removed_intent.run_id),) + if record is not None + ) + for removed_intent in removed_intents: + self._runs.unpin(removed_intent.run_id) + workspace_cleared = True + for root_path, removed_intent, _report_digest in workspace_targets: + workspace_cleared = ( + remove_workspace_intent( + root=root_path, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=removed_intent.intent_id, + ) + and workspace_cleared + ) + payload = { + "cleared": len(removed_ids), + "cleared_intent_ids": list(removed_ids), + "workspace_cleared": workspace_cleared, + } + for root_path, removed_intent, report_digest in workspace_targets: + self._audit_emit( + root=root_path, + event_type=EVENT_INTENT_CLEARED, + severity="info", + run_id=_helpers._short_run_id(removed_intent.run_id), + intent_id=removed_intent.intent_id, + report_digest=report_digest, + status="cleared", + payload=payload, + ) + return payload + + def _resolve_intent( + self, + *, + run_id: str | None, + intent_id: str | None, + ) -> tuple[MCPRunRecord, IntentRecord]: + if intent_id is not None: + with self._state_lock: + active_intent = self._active_intents.get(intent_id) + if active_intent is None: + raise MCPServiceContractError(f"Unknown change intent id: {intent_id}") + return self._runs.get(active_intent.run_id), active_intent + record = self._runs.get(run_id) + with self._state_lock: + matching = [ + intent + for intent in self._active_intents.values() + if intent.run_id == record.run_id + ] + if not matching: + raise MCPServiceContractError("No active change intent is available.") + return record, matching[-1] + + def _intent_payload_with_expiry( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> dict[str, object]: + if self._is_intent_expired(record=record, intent=intent): + intent = replace(intent, status=IntentStatus.EXPIRED) + with self._state_lock: + self._active_intents[intent.intent_id] = intent + self._sync_workspace_intent_status(record=record, intent=intent) + else: + self._renew_lease_if_active(record=record, intent=intent) + return intent.to_payload(short_run_id=_helpers._short_run_id(record.run_id)) + + def _is_intent_expired( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> bool: + return intent.report_digest != self._report_digest_value(record) + + def _report_digest_value(self, record: MCPRunRecord) -> str: + integrity = _as_mapping(record.report_document.get("integrity")) + digest = _as_mapping(integrity.get("digest")) + value = str(digest.get("value", "")).strip() + if value: + return value + return record.run_id + + def _workspace_record_from_intent( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ttl_seconds: int, + ) -> WorkspaceIntentRecord: + scope_payload = intent.scope.to_payload() + declared_at = _parse_utc(intent.declared_at_utc) or utc_now() + return WorkspaceIntentRecord( + intent_id=intent.intent_id, + agent_pid=self._agent_pid, + agent_start_epoch=self._agent_start_epoch, + agent_label=self._agent_label, + run_id=record.run_id, + declared_at_utc=format_utc(declared_at), + expires_at_utc=expires_at( + declared_at=declared_at, + ttl_seconds=ttl_seconds, + ), + ttl_seconds=ttl_seconds, + status=intent.status.value, + intent=intent.intent_description, + scope=scope_payload, + scope_digest=compute_scope_digest(scope_payload), + blast_radius_summary=dict(intent.blast_radius_summary or {}), + lease_renewed_at_utc=format_utc(declared_at), + lease_seconds=resolved_lease_seconds( + env_value=os.environ.get("CODECLONE_INTENT_LEASE_SECONDS"), + ), + report_digest=intent.report_digest, + ) + + def _sync_workspace_intent_status( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> None: + update_workspace_intent_status( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent.intent_id, + new_status=intent.status.value, + ) + + def _renew_lease_if_active( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + ) -> None: + try: + renew_workspace_intent_lease( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent.intent_id, + ) + except Exception: + return + + def _renew_lease_for_run(self, *, record: MCPRunRecord) -> None: + with self._state_lock: + intents = tuple( + intent + for intent in self._active_intents.values() + if intent.run_id == record.run_id + ) + for intent in intents: + self._renew_lease_if_active(record=record, intent=intent) + + def _renew_change_intent( + self, + *, + intent_id: str | None, + lease_seconds: int | None, + ) -> dict[str, object]: + if intent_id is None: + with self._state_lock: + all_intents = list(self._active_intents.values()) + if not all_intents: + raise MCPServiceContractError( + "action='renew' requires intent_id or an active intent." + ) + active_intent = all_intents[-1] + intent_id = active_intent.intent_id + record, active_intent = self._resolve_intent( + run_id=None, + intent_id=intent_id, + ) + renewed = renew_workspace_intent_lease( + root=record.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=active_intent.intent_id, + lease_seconds=lease_seconds, + ) + if renewed: + latest_record = find_workspace_intent( + root=record.root, + intent_id=active_intent.intent_id, + ) + else: + latest_record = None + effective_lease = ( + latest_record.lease_seconds + if latest_record is not None + else resolved_lease_seconds(lease_seconds) + ) + payload: dict[str, object] = { + "intent_id": active_intent.intent_id, + "status": active_intent.status.value, + "lease_renewed": renewed, + "lease_seconds": effective_lease, + "lease_expires_at_utc": ( + self._lease_expired_at_utc(latest_record) + if latest_record is not None + else None + ), + "lease_policy": { + "min_seconds": MIN_LEASE_SECONDS, + "default_seconds": DEFAULT_LEASE_SECONDS, + "max_seconds": MAX_LEASE_SECONDS, + }, + } + self._audit_emit( + root=record.root, + event_type=EVENT_INTENT_RENEWED, + severity="info" if renewed else "warn", + run_id=_helpers._short_run_id(record.run_id), + intent_id=active_intent.intent_id, + report_digest=active_intent.report_digest, + status=active_intent.status.value, + payload=payload, + ) + return payload + + def _list_workspace_intents(self, *, root: str | None) -> dict[str, object]: + from ...config.intent_registry import intent_registry_summary + from ._workspace_intents import list_workspace_intent_records_for_recovery + + root_path = self._resolve_workspace_root(root) + counts = workspace_status_counts(root=root_path) + now = utc_now() + recovery_records = list_workspace_intent_records_for_recovery(root=root_path) + records = list_workspace_intents(root=root_path, exclude_stale=False) + recovery_available = self._recovery_available_payload( + records=recovery_records, + now=now, + ) + payload: dict[str, object] = { + "workspace_intents": [ + workspace_intent_to_payload( + item, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, + ) + for item in records + ], + "recovery_available": recovery_available, + "stale_count": counts["stale_count"], + "orphaned_count": counts["orphaned_count"], + "total_agents": len({item.agent_pid for item in records}), + "own_pid": self._agent_pid, + "own_start_epoch": self._agent_start_epoch, + "workspace_dirty_summary": _helpers.workspace_dirty_summary_payload( + root=root_path + ), + **intent_registry_summary(root_path), + } + if recovery_available: + payload["recovery_next_step"] = intent_msgs.RECOVERY_LIST_NEXT_STEP + return payload + + def _gc_workspace_intents(self, *, root: str | None) -> dict[str, object]: + root_path = self._resolve_workspace_root(root) + payload = gc_workspace(root=root_path) + self._audit_emit( + root=root_path, + event_type=EVENT_WORKSPACE_GC, + severity="info", + status="completed", + payload=payload, + ) + return payload + + def _recover_change_intent( + self, + *, + root: str | None, + run_id: str | None, + intent_id: str | None, + ) -> dict[str, object]: + request_error = self._recovery_required_fields_error( + root=root, + run_id=run_id, + intent_id=intent_id, + ) + if request_error is not None: + return request_error + assert root is not None + assert run_id is not None + assert intent_id is not None + target = self._recovery_target(root=root, intent_id=intent_id) + if isinstance(target, dict): + return target + recovery_run = self._recovery_run(run_id=run_id, target=target) + if isinstance(recovery_run, dict): + return recovery_run + recovered = self._activate_recovered_intent( + target=target, + recovery_run=recovery_run, + ) + if isinstance(recovered, dict): + return recovered + workspace_update = self._rewrite_recovered_workspace_record( + target=target, + recovery_run=recovery_run, + recovered=recovered, + ) + if isinstance(workspace_update, dict): + return workspace_update + recovered_at, previous_removed = workspace_update + return self._recovered_payload( + target=target, + recovery_run=recovery_run, + recovered=recovered, + recovered_at=recovered_at, + previous_removed=previous_removed, + ) + + def _recovery_required_fields_error( + self, + *, + root: str | None, + run_id: str | None, + intent_id: str | None, + ) -> dict[str, object] | None: + if intent_id is None: + return self._recovery_rejected( + intent_id=None, + reason="missing_intent_id", + message="action='recover' requires intent_id.", + ) + if run_id is None: + return self._recovery_rejected( + intent_id=intent_id, + reason="missing_run_id", + message="action='recover' requires run_id.", + ) + if root is None: + return self._recovery_rejected( + intent_id=intent_id, + reason="missing_root", + message="action='recover' requires root.", + ) + return None + + def _recovery_target( + self, + *, + root: str, + intent_id: str, + ) -> _RecoveryTarget | dict[str, object]: + root_path = self._resolve_workspace_root(root) + found = find_workspace_intent( + root=root_path, + intent_id=intent_id, + apply_lazy_close=False, + ) + if found is None: + return self._recovery_rejected( + intent_id=intent_id, + reason="not_found", + message=f"No workspace intent found for intent_id: {intent_id}.", + ) + workspace_record = found + now = utc_now() + ownership = classify_intent_ownership( + workspace_record, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, + ) + if ownership not in {IntentOwnership.RECOVERABLE, IntentOwnership.OWN_STALE}: + return self._recovery_rejected( + intent_id=intent_id, + reason="not_recoverable", + message=self._recovery_rejection_message(ownership), + details={"ownership": ownership.value}, + ) + return _RecoveryTarget( + root_path=root_path, + workspace_record=workspace_record, + now=now, + ) + + def _recovery_run( + self, + *, + run_id: str, + target: _RecoveryTarget, + ) -> _RecoveryRun | dict[str, object]: + workspace_record = target.workspace_record + try: + record = self._runs.get(run_id) + except MCPRunNotFoundError: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="run_not_available", + message=( + f"Run {run_id} is not available in this session. " + "Run analyze_repository first." + ), + ) + report_digest = self._report_digest_value(record) + if report_digest != workspace_record.report_digest: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="report_digest_mismatch", + message=( + "Report digest does not match. The analysis run may have " + "changed since the intent was declared." + ), + details={ + "expected": workspace_record.report_digest, + "actual": report_digest, + }, + ) + if ( + compute_scope_digest(workspace_record.scope) + != workspace_record.scope_digest + ): + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="scope_digest_mismatch", + message="Workspace intent scope digest does not match.", + ) + return _RecoveryRun(record=record, report_digest=report_digest) + + def _activate_recovered_intent( + self, + *, + target: _RecoveryTarget, + recovery_run: _RecoveryRun, + ) -> IntentRecord | dict[str, object]: + workspace_record = target.workspace_record + with self._state_lock: + if workspace_record.intent_id in self._active_intents: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="already_active", + message=( + f"Intent {workspace_record.intent_id} is already active " + "in this session." + ), + ) + try: + scope = normalize_intent_scope(workspace_record.scope) + except ValueError as exc: + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="invalid_scope", + message=str(exc), + ) + recovered = IntentRecord( + intent_id=workspace_record.intent_id, + run_id=recovery_run.record.run_id, + report_digest=recovery_run.report_digest, + status=IntentStatus.ACTIVE, + declared_at_utc=workspace_record.declared_at_utc, + scope=scope, + intent_description=workspace_record.intent, + expected_effects=(), + guards=DEFAULT_INTENT_GUARDS, + blast_radius_summary=dict(workspace_record.blast_radius_summary), + ) + self._active_intents[workspace_record.intent_id] = recovered + self._runs.pin(recovery_run.record.run_id) + return recovered + + def _rewrite_recovered_workspace_record( + self, + *, + target: _RecoveryTarget, + recovery_run: _RecoveryRun, + recovered: IntentRecord, + ) -> tuple[str, bool] | dict[str, object]: + workspace_record = target.workspace_record + recovered_at = format_utc(target.now) + updated_workspace_record = replace( + workspace_record, + agent_pid=self._agent_pid, + agent_start_epoch=self._agent_start_epoch, + agent_label=self._agent_label, + status=WorkspaceIntentStatus.ACTIVE.value, + lease_renewed_at_utc=recovered_at, + report_digest=recovery_run.report_digest, + ) + if not write_workspace_intent( + root=target.root_path, + record=updated_workspace_record, + ): + self._rollback_recovered_intent(recovered) + return self._recovery_rejected( + intent_id=workspace_record.intent_id, + reason="workspace_rewrite_failed", + message="Failed to rewrite workspace intent owner.", + ) + previous_removed = True + if ( + workspace_record.agent_pid != self._agent_pid + or workspace_record.agent_start_epoch != self._agent_start_epoch + ): + previous_removed = remove_workspace_record( + root=target.root_path, + record=workspace_record, + ) + return recovered_at, previous_removed + + def _rollback_recovered_intent(self, recovered: IntentRecord) -> None: + with self._state_lock: + self._active_intents.pop(recovered.intent_id, None) + self._runs.unpin(recovered.run_id) + + def _recovered_payload( + self, + *, + target: _RecoveryTarget, + recovery_run: _RecoveryRun, + recovered: IntentRecord, + recovered_at: str, + previous_removed: bool, + ) -> dict[str, object]: + workspace_record = target.workspace_record + return { + "intent_id": recovered.intent_id, + "action_taken": "recovered", + "run_id": _helpers._short_run_id(recovery_run.record.run_id), + "scope": recovered.scope.to_payload(), + "previous_owner": { + "agent_pid": workspace_record.agent_pid, + "agent_start_epoch": workspace_record.agent_start_epoch, + "agent_label": workspace_record.agent_label, + "lease_renewed_at_utc": workspace_record.lease_renewed_at_utc, + }, + "new_owner": { + "agent_pid": self._agent_pid, + "agent_start_epoch": self._agent_start_epoch, + "agent_label": self._agent_label, + }, + "recovered_at_utc": recovered_at, + "previous_workspace_record_removed": previous_removed, + "next_steps": [ + "Run manage_change_intent(action='get') to inspect recovered state.", + "Run check_patch_contract(mode='budget') to verify patch budget.", + "Continue editing within declared scope.", + ], + } + + def _reset_workspace_intent( + self, + *, + root: str | None, + intent_id: str | None, + ttl_seconds: int | None, + ) -> dict[str, object]: + if intent_id is None: + raise MCPServiceContractError( + "action='reset_workspace' requires intent_id." + ) + root_path = self._resolve_workspace_root(root) + found = find_workspace_intent( + root=root_path, + intent_id=intent_id, + apply_lazy_close=False, + ) + if found is None: + raise MCPServiceContractError(f"Unknown workspace intent id: {intent_id}") + workspace_record = found + now = utc_now() + ownership = classify_intent_ownership( + workspace_record, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, + ) + if ownership in {IntentOwnership.EXPIRED, IntentOwnership.RECOVERABLE}: + removed = remove_workspace_record(root=root_path, record=workspace_record) + reason = ( + "expired" + if ownership == IntentOwnership.EXPIRED + else stale_reason(workspace_record) or "recoverable" + ) + return { + "intent_id": workspace_record.intent_id, + "action_taken": "removed" if removed else "failed", + "reason": reason, + } + if ownership in {IntentOwnership.FOREIGN_ACTIVE, IntentOwnership.FOREIGN_STALE}: + hint = ( + ( + "This intent belongs to a live process with a valid lease. " + "Do NOT kill the process. Ask the user to confirm whether " + "this is an abandoned session or a parallel agent." + ) + if ownership == IntentOwnership.FOREIGN_ACTIVE + else ( + "This intent belongs to a live process whose lease has expired. " + "The owner may still be working. Coordinate with the user " + "before resetting." + ) + ) + return { + "intent_id": workspace_record.intent_id, + "action_taken": "rejected", + "reason": ownership.value, + "ownership": ownership.value, + "agent_pid": workspace_record.agent_pid, + "agent_start_epoch": workspace_record.agent_start_epoch, + "agent_label": workspace_record.agent_label, + "escalation_hint": hint, + "message": intent_msgs.RESET_LIVE_FOREIGN, + } + ttl = resolved_ttl_seconds( + ttl_seconds, + env_value=os.environ.get("CODECLONE_INTENT_TTL_SECONDS"), + ) + updated = update_workspace_intent_status( + root=root_path, + pid=workspace_record.agent_pid, + start_epoch=workspace_record.agent_start_epoch, + intent_id=workspace_record.intent_id, + new_status=WorkspaceIntentStatus.ACTIVE.value, + ttl_seconds=ttl, + ) + latest = find_workspace_intent(root=root_path, intent_id=intent_id) + latest_record = latest if latest is not None else workspace_record + return { + "intent_id": workspace_record.intent_id, + "action_taken": "reset" if updated else "failed", + "new_status": latest_record.status, + "new_expires_at_utc": latest_record.expires_at_utc, + } + + def _recovery_available_payload( + self, + *, + records: Sequence[WorkspaceIntentRecord], + now: datetime, + ) -> list[dict[str, object]]: + available: list[dict[str, object]] = [] + for record in records: + ownership = classify_intent_ownership( + record, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + now=now, + ) + if ownership != IntentOwnership.RECOVERABLE: + continue + run_available = self._optional_run_record(record.run_id) is not None + available.append( + { + "intent_id": record.intent_id, + "run_id": _helpers._short_run_id(record.run_id), + "scope_digest": record.scope_digest, + "previous_agent_label": record.agent_label, + "lease_expired_at_utc": self._lease_expired_at_utc(record), + "run_available": run_available, + "hint": ( + intent_msgs.RECOVERY_HINT + if run_available + else intent_msgs.RECOVERY_NEEDS_ANALYSIS_HINT + ), + } + ) + return sorted( + available, + key=lambda item: ( + str(item["previous_agent_label"]), + str(item["intent_id"]), + ), + ) + + def _lease_expired_at_utc(self, record: WorkspaceIntentRecord) -> str | None: + renewed_at = _parse_utc(record.lease_renewed_at_utc) + if renewed_at is None: + return None + return format_utc(renewed_at + timedelta(seconds=record.lease_seconds)) + + def _recovery_rejected( + self, + *, + intent_id: str | None, + reason: str, + message: str, + details: Mapping[str, object] | None = None, + ) -> dict[str, object]: + return { + "intent_id": intent_id, + "action_taken": "recovery_rejected", + "reason": reason, + "message": message, + "details": dict(details or {}), + } + + def _recovery_rejection_message(self, ownership: IntentOwnership) -> str: + if ownership == IntentOwnership.FOREIGN_ACTIVE: + return intent_msgs.RECOVERY_FOREIGN_ACTIVE + if ownership == IntentOwnership.FOREIGN_STALE: + return intent_msgs.RECOVERY_FOREIGN_STALE + if ownership == IntentOwnership.EXPIRED: + return intent_msgs.RECOVERY_EXPIRED + if ownership == IntentOwnership.OWN_ACTIVE: + return "Intent is already actively owned by this session." + return "Intent is not recoverable." + + def _resolve_workspace_root(self, root: str | None) -> Path: + if root is not None: + return _helpers._resolve_root(root) + try: + return self._runs.get(None).root + except MCPRunNotFoundError as exc: + raise MCPServiceContractError( + "Workspace intent actions require root or a latest MCP run." + ) from exc + + def _optional_run_record(self, run_id: str) -> MCPRunRecord | None: + try: + return self._runs.get(run_id) + except MCPRunNotFoundError: + return None + + def _blast_radius_summary( + self, + *, + blast_payload: Mapping[str, object], + scope: IntentScope, + ) -> dict[str, object]: + affected = tuple( + sorted( + { + *( + str(item) + for item in _as_sequence(blast_payload.get("direct_dependents")) + ), + *( + str(item) + for item in _as_sequence( + blast_payload.get("transitive_dependents") + ) + ), + *( + str(item) + for item in _as_sequence( + blast_payload.get("clone_cohort_members") + ) + ), + } + ) + ) + do_not_touch_paths = _blast_boundary_paths( + blast_payload.get("do_not_touch"), + limit=200, + ) + review_context_paths = _blast_boundary_paths( + blast_payload.get("review_context"), + limit=200, + ) + return { + "radius_level": str(blast_payload.get("radius_level", "low")), + "direct_dependents_count": len( + _as_sequence(blast_payload.get("direct_dependents")) + ), + "clone_cohort_members_count": len( + _as_sequence(blast_payload.get("clone_cohort_members")) + ), + "affected_but_forbidden": list( + forbidden_touched( + changed_files=affected, + forbidden_patterns=scope.forbidden, + ) + ), + "do_not_touch_count": len(_as_sequence(blast_payload.get("do_not_touch"))), + "review_context_count": len( + _as_sequence(blast_payload.get("review_context")) + ), + "do_not_touch_declared": list(do_not_touch_paths), + "review_context_declared": list(review_context_paths), + } + + def _intent_check_result( + self, + *, + intent: IntentRecord, + actual: Sequence[str], + ) -> IntentCheckResult: + actual_files = tuple(sorted(set(actual))) + declared_scope = intent.scope.allowed_files + allowed = set(intent.scope.allowed_files) + related = set(intent.scope.allowed_related) + forbidden = forbidden_touched( + changed_files=actual_files, + forbidden_patterns=intent.scope.forbidden, + ) + unexpected = tuple( + path + for path in actual_files + if path not in allowed + and path not in related + and not any( + fnmatchcase(path, pattern) for pattern in intent.scope.forbidden + ) + ) + expanded = tuple(path for path in actual_files if path in related) + if forbidden or unexpected: + status = IntentStatus.VIOLATED + required_action = "human_approval" + message = intent_msgs.SCOPE_CHECK_FORBIDDEN + elif expanded: + status = IntentStatus.EXPANDED + required_action = None + message = intent_msgs.SCOPE_CHECK_RELATED + else: + status = IntentStatus.CLEAN + required_action = None + message = intent_msgs.SCOPE_CHECK_CLEAN + untouched_in_declared = tuple(sorted(set(declared_scope) - set(actual_files))) + return IntentCheckResult( + status=status, + declared_scope=declared_scope, + actual_changed_files=actual_files, + unexpected_files=unexpected, + forbidden_touched=forbidden, + untouched_in_declared=untouched_in_declared, + required_action=required_action, + message=message, + ) + + +def _apply_blast_context( + payload: dict[str, object], + blast_payload: Mapping[str, object], +) -> None: + """Copy blast radius context fields into an intent payload.""" + for key in ( + "do_not_touch", + "do_not_touch_summary", + "review_context", + "review_context_summary", + ): + payload[key] = blast_payload[key] + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _as_str_sequence(value: object) -> tuple[str, ...]: + return tuple(str(item) for item in _as_sequence(value)) + + +def _blast_boundary_paths(value: object, *, limit: int) -> tuple[str, ...]: + paths: list[str] = [] + for item in _as_sequence(value): + if isinstance(item, Mapping): + path = str(item.get("path", "")).strip() + else: + path = str(item).strip() + if path: + paths.append(path.replace("\\", "/")) + unique = tuple(sorted(set(paths))) + return unique[:limit] + + +def _utc_now() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + + +def _parse_utc(value: str) -> datetime | None: + try: + parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + if parsed.tzinfo is None: + return None + return parsed.astimezone(timezone.utc) + + +def _declare_conflict_next_step( + concurrent_intents: list[dict[str, object]], +) -> str: + ownerships = {str(item.get("ownership", "")) for item in concurrent_intents} + if "foreign_active" in ownerships: + return intent_msgs.DECLARE_FOREIGN_ACTIVE_OVERLAP + if "foreign_stale" in ownerships: + return intent_msgs.DECLARE_FOREIGN_STALE_OVERLAP + return intent_msgs.DECLARE_FOREIGN_OVERLAP + + +__all__ = ["_MCPSessionIntentMixin"] diff --git a/codeclone/surfaces/mcp/_session_memory_mixin.py b/codeclone/surfaces/mcp/_session_memory_mixin.py new file mode 100644 index 00000000..7fadf4f8 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_memory_mixin.py @@ -0,0 +1,684 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import cast + +from ...audit.validation import DEFAULT_AUDIT_PATH, resolve_audit_path +from ...config.memory import MemoryConfig, resolve_memory_config +from ...memory.embedding import resolve_embedding_provider +from ...memory.exceptions import ( + MemoryCapacityError, + MemoryContractError, + MemorySemanticUnavailableError, +) +from ...memory.ide_governance import ( + IdeGovernanceSessionState, + _governance_rejected, + commit_governance, + prepare_governance, + register_ide_governance, +) +from ...memory.ingest.mcp_sync import execute_mcp_memory_sync +from ...memory.models import MemoryProject +from ...memory.paths import normalize_memory_scope_path +from ...memory.project import resolve_memory_db_path, resolve_project_identity +from ...memory.retrieval import get_relevant_memory, query_engineering_memory +from ...memory.semantic import ( + close_semantic_index, + execute_semantic_index_rebuild, + resolve_semantic_index, +) +from ...memory.sqlite_store import SqliteEngineeringMemoryStore +from . import _session_helpers as _helpers +from ._intent import IntentRecord +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, +) + + +class _MCPSessionMemoryMixin: + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + _ide_governance: IdeGovernanceSessionState + + def get_relevant_memory( + self, + *, + root: str, + scope: Sequence[str] | None = None, + intent_id: str | None = None, + symbols: Sequence[str] | None = None, + max_records: int = 20, + include_stale: bool = False, + include_drafts: bool = False, + include_routine: bool = False, + detail_level: str = "compact", + ) -> dict[str, object]: + root_path = _helpers._resolve_root(root) + memory_sync = self._maybe_auto_sync_memory(root_path) + if not scope and not intent_id and not symbols: + raise MCPServiceContractError( + "get_relevant_memory requires scope, intent_id, or symbols. " + "Use query_engineering_memory(mode=status|search) for project " + "orientation." + ) + if scope or intent_id: + scope_paths, scope_resolved_from = self._resolve_memory_scope_paths( + scope=scope, + intent_id=intent_id, + ) + else: + scope_paths, scope_resolved_from = (), "symbols" + effective_include_drafts = include_drafts or bool(scope_paths) + store, _db_path, _config, project = self._open_memory_store(root_path) + try: + blast_dependents = self._memory_blast_dependents(root_path, scope_paths) + result = get_relevant_memory( + store, + project_id=project.id, + scope_paths=scope_paths, + symbols=symbols, + blast_dependents=tuple(blast_dependents), + scope_resolved_from=scope_resolved_from, + max_records=max_records, + include_stale=include_stale, + include_drafts=effective_include_drafts, + include_routine=include_routine, + detail_level=detail_level, + ) + if memory_sync is not None: + result = dict(result) + result["memory_sync"] = memory_sync + return result + except MemoryContractError as exc: + raise MCPServiceContractError(str(exc)) from exc + finally: + store.close() + + def query_engineering_memory( + self, + *, + root: str, + mode: str, + record_id: str | None = None, + path: str | None = None, + symbol: str | None = None, + query: str | None = None, + scope: Sequence[str] | None = None, + filters: Mapping[str, object] | None = None, + max_results: int = 20, + include_stale: bool = False, + include_drafts: bool = False, + detail_level: str = "compact", + semantic: bool = False, + ) -> dict[str, object]: + root_path = _helpers._resolve_root(root) + store, db_path, config, project = self._open_memory_store(root_path) + index = resolve_semantic_index(config.semantic) if semantic else None + provider = None + semantic_reason = None + if semantic: + try: + provider = resolve_embedding_provider(config.semantic) + except MemorySemanticUnavailableError as exc: + semantic_reason = str(exc) + audit_path = ( + resolve_audit_path(root_path=root_path, value=DEFAULT_AUDIT_PATH) + if semantic + else None + ) + try: + return query_engineering_memory( + store, + project_id=project.id, + root_path=root_path, + backend=config.backend, + db_path=db_path, + mode=mode, + record_id=record_id, + path=path, + symbol=symbol, + query=query, + scope=scope, + filters=filters, + max_results=max_results, + include_stale=include_stale, + include_drafts=include_drafts, + detail_level=detail_level, + semantic=semantic, + semantic_index=index, + embedding_provider=provider, + provider_label=config.semantic.embedding_provider, + semantic_reason=semantic_reason, + audit_db_path=audit_path, + ) + except MemoryContractError as exc: + raise MCPServiceContractError(str(exc)) from exc + finally: + close_semantic_index(index) + store.close() + + def manage_engineering_memory( + self, + *, + root: str, + action: str, + record_type: str | None = None, + statement: str | None = None, + subject_path: str | None = None, + text: str | None = None, + intent_id: str | None = None, + run_id: str | None = None, + record_id: str | None = None, + experience_id: str | None = None, + decision: str | None = None, + ide_governance_key: str | None = None, + client_name: str | None = None, + client_version: str | None = None, + governance_ticket: str | None = None, + confirmation_nonce: str | None = None, + proof: str | None = None, + actor: str | None = None, + protocol: int | None = None, + reject_reason: str | None = None, + ) -> dict[str, object]: + from ...memory.exceptions import MemoryContractError + + root_path = _helpers._resolve_root(root) + try: + normalized = action.strip().lower() + if normalized in {"approve", "reject", "archive"}: + return _governance_rejected(normalized) + if normalized == "register_ide_governance": + if not ide_governance_key or not client_name: + raise MCPServiceContractError( + "register_ide_governance requires ide_governance_key and " + "client_name." + ) + return register_ide_governance( + self._ide_governance, + ide_governance_key=ide_governance_key, + client_name=client_name, + client_version=client_version, + ) + if normalized == "prepare_governance": + if not record_id or not decision: + raise MCPServiceContractError( + "prepare_governance requires record_id and decision." + ) + store, _db_path, _config, project = self._open_memory_store(root_path) + try: + return prepare_governance( + self._ide_governance, + store, + project_id=project.id, + root_path=str(root_path), + record_id=record_id, + decision=decision, + ) + finally: + store.close() + if normalized == "commit_governance": + if ( + not record_id + or not decision + or not governance_ticket + or not confirmation_nonce + or not proof + or protocol is None + ): + raise MCPServiceContractError( + "commit_governance requires record_id, decision, " + "governance_ticket, confirmation_nonce, proof, and protocol." + ) + store, _db_path, _config, project = self._open_memory_store(root_path) + try: + return commit_governance( + self._ide_governance, + store, + project_id=project.id, + root_path=str(root_path), + record_id=record_id, + decision=decision, + governance_ticket=governance_ticket, + confirmation_nonce=confirmation_nonce, + proof=proof, + actor=actor or "", + protocol=protocol, + ) + finally: + store.close() + if normalized == "rebuild_semantic_index": + config = resolve_memory_config(root_path) + return cast( + dict[str, object], + execute_semantic_index_rebuild( + root_path=root_path, + config=config, + ), + ) + if normalized == "rebuild_trajectories": + config = resolve_memory_config(root_path) + from ...memory.trajectory.rebuild_workflow import ( + execute_trajectory_rebuild, + ) + + return cast( + dict[str, object], + execute_trajectory_rebuild( + root_path=root_path, + config=config, + ), + ) + if normalized == "enqueue_projection_rebuild": + from ...memory.jobs import execute_enqueue_projection_rebuild + + return execute_enqueue_projection_rebuild( + root_path=root_path, + trigger="explicit", + ) + if normalized == "projection_rebuild_status": + from ...memory.jobs import execute_projection_rebuild_status + + return execute_projection_rebuild_status(root_path=root_path) + if normalized == "run_projection_jobs_once": + from ...memory.jobs import execute_run_projection_jobs_once + + return execute_run_projection_jobs_once(root_path=root_path) + if normalized == "refresh_from_run": + return self._manage_memory_refresh_from_run( + root_path, + run_id=run_id, + ) + if normalized == "record_candidate": + store, _db_path, config, project = self._open_memory_store(root_path) + try: + return self._manage_memory_record_candidate( + store, + project=project, + config=config, + record_type=record_type, + statement=statement, + subject_path=subject_path, + ) + finally: + store.close() + if normalized == "promote_experience": + store, _db_path, config, project = self._open_memory_store(root_path) + try: + return self._manage_memory_promote_experience( + store, + project=project, + config=config, + experience_id=experience_id, + ) + finally: + store.close() + if normalized == "validate_claims": + store, _db_path, _config, project = self._open_memory_store(root_path) + try: + return self._manage_memory_validate_claims( + store, + project=project, + text=text, + ) + finally: + store.close() + if normalized == "propose_from_receipt": + store, _db_path, config, project = self._open_memory_store(root_path) + try: + return self._manage_memory_propose_from_receipt( + store, + project=project, + config=config, + text=text, + intent_id=intent_id, + ) + finally: + store.close() + allowed = ( + "record_candidate", + "promote_experience", + "validate_claims", + "propose_from_receipt", + "refresh_from_run", + "rebuild_semantic_index", + "rebuild_trajectories", + "enqueue_projection_rebuild", + "projection_rebuild_status", + "run_projection_jobs_once", + "register_ide_governance", + "prepare_governance", + "commit_governance", + ) + raise MCPServiceContractError( + f"Unknown manage_engineering_memory action: {action!r}. " + f"Allowed: {', '.join(allowed)}" + ) + except MemoryCapacityError as exc: + raise MCPServiceContractError(str(exc)) from exc + except MemoryContractError as exc: + raise MCPServiceContractError(str(exc)) from exc + + def _manage_memory_record_candidate( + self, + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + config: MemoryConfig, + record_type: str | None, + statement: str | None, + subject_path: str | None, + ) -> dict[str, object]: + from ...memory.governance import record_candidate + + if not record_type or not statement: + raise MCPServiceContractError( + "record_candidate requires record_type and statement." + ) + record = record_candidate( + store, + project=project, + record_type=record_type, # type: ignore[arg-type] + statement=statement, + subject_path=subject_path, + max_candidates=config.max_candidates, + max_statement_chars=config.max_statement_chars, + ) + return { + "action": "record_candidate", + "record_id": record.id, + "status": record.status, + "type": record.type, + } + + def _manage_memory_promote_experience( + self, + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + config: MemoryConfig, + experience_id: str | None, + ) -> dict[str, object]: + from ...memory.governance import promote_experience + + if not experience_id: + raise MCPServiceContractError("promote_experience requires experience_id.") + record = promote_experience( + store, + project=project, + experience_id=experience_id, + max_candidates=config.max_candidates, + ) + return { + "action": "promote_experience", + "record_id": record.id, + "status": record.status, + "type": record.type, + "promoted_from_experience": experience_id, + } + + def _manage_memory_validate_claims( + self, + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + text: str | None, + ) -> dict[str, object]: + from ...memory.governance import validate_memory_claims + + if not text: + raise MCPServiceContractError("validate_claims requires text.") + result = validate_memory_claims( + store, + project_id=project.id, + text=text, + ) + return { + "action": "validate_claims", + "valid": result.valid, + "warnings": list(result.warnings), + "errors": list(result.errors), + } + + def _manage_memory_propose_from_receipt( + self, + store: SqliteEngineeringMemoryStore, + *, + project: MemoryProject, + config: MemoryConfig, + text: str | None, + intent_id: str | None, + ) -> dict[str, object]: + from ...memory.ingest.receipts import propose_memory_from_finish_payload + + payload: dict[str, object] = { + "claims_text": text, + "scope_check": {}, + } + if intent_id: + intent = self._active_intents.get(intent_id) + if intent is not None: + payload["scope_check"] = { + "declared_scope": list(intent.scope.allowed_files), + } + candidates = propose_memory_from_finish_payload( + store, + project=project, + finish_payload=payload, + max_candidates=config.max_candidates, + max_statement_chars=config.max_statement_chars, + ) + return {"action": "propose_from_receipt", "memory_candidates": candidates} + + def _manage_memory_refresh_from_run( + self, + root_path: Path, + *, + run_id: str | None, + ) -> dict[str, object]: + record = self._memory_run_record(root_path, run_id) + config = resolve_memory_config(root_path) + sync_payload = execute_mcp_memory_sync( + root_path=root_path, + report_document=record.report_document, + config=config, + trigger="explicit", + run_id=record.run_id, + force=True, + ) + return {"action": "refresh_from_run", **sync_payload} + + def _maybe_auto_sync_memory( + self, + root_path: Path, + *, + run_id: str | None = None, + ) -> dict[str, object] | None: + config = resolve_memory_config(root_path) + if config.mcp_sync_policy == "off": + return None + try: + record = self._memory_run_record(root_path, run_id) + except MCPServiceContractError: + return None + sync_payload = execute_mcp_memory_sync( + root_path=root_path, + report_document=record.report_document, + config=config, + trigger="auto", + run_id=record.run_id, + force=False, + ) + if sync_payload["status"] == "unchanged": + return None + return sync_payload + + def _memory_run_record( + self, + root_path: Path, + run_id: str | None = None, + ) -> MCPRunRecord: + try: + record = self._runs.get(run_id) + except MCPRunNotFoundError as exc: + raise MCPServiceContractError( + "No MCP analysis run available for this repository. " + "Call analyze_repository first." + ) from exc + if record.root.resolve() != root_path.resolve(): + raise MCPServiceContractError( + "The selected MCP run belongs to a different repository root." + ) + return record + + def finish_propose_memory( + self, + *, + root_path: Path, + changed_files: Sequence[str], + claims_text: str | None, + review_text: str | None, + verification_profile: str | None, + ) -> dict[str, object]: + from ...memory.coverage import compute_scope_coverage, coverage_delta + from ...memory.ingest.receipts import propose_memory_from_changed_paths + from ...memory.staleness import apply_scope_staleness + + try: + store, _db_path, config, project = self._open_memory_store(root_path) + except MCPServiceContractError: + return {} + try: + before = compute_scope_coverage( + store, + project_id=project.id, + scope_paths=changed_files, + ) + candidates = propose_memory_from_changed_paths( + store, + project=project, + changed_paths=changed_files, + claims_text=claims_text, + review_text=review_text, + verification_profile=verification_profile, + max_candidates=config.max_candidates, + max_statement_chars=config.max_statement_chars, + ) + stale_report = apply_scope_staleness( + store, + project_id=project.id, + changed_paths=changed_files, + ) + after = compute_scope_coverage( + store, + project_id=project.id, + scope_paths=changed_files, + ) + delta = coverage_delta(before, after) + return { + "memory_candidates": candidates, + "memory_staleness": { + "records_marked_stale": stale_report.records_marked_stale, + "reasons": stale_report.reasons, + }, + "memory_coverage_delta": delta, + } + finally: + store.close() + + def maybe_auto_enqueue_projection_rebuild( + self, + *, + root_path: Path, + ) -> dict[str, object] | None: + from ...memory.jobs import maybe_auto_enqueue_projection_rebuild + + return maybe_auto_enqueue_projection_rebuild( + root_path=root_path, + trigger="mcp_finish", + ) + + def _open_memory_store( + self, + root_path: Path, + ) -> tuple[SqliteEngineeringMemoryStore, Path, MemoryConfig, MemoryProject]: + config = resolve_memory_config(root_path) + db_path = resolve_memory_db_path(root_path, config) + if not db_path.exists(): + self._maybe_auto_sync_memory(root_path) + if not db_path.exists(): + raise MCPServiceContractError( + "Engineering memory database not found. " + "Call manage_engineering_memory(action='refresh_from_run') after " + "analyze_repository, or run `codeclone memory init`." + ) + project = resolve_project_identity(root_path) + return SqliteEngineeringMemoryStore(db_path), db_path, config, project + + def _resolve_memory_scope_paths( + self, + *, + scope: Sequence[str] | None, + intent_id: str | None, + ) -> tuple[tuple[str, ...], str]: + if scope: + return ( + tuple(normalize_memory_scope_path(path) for path in scope), + "explicit", + ) + if intent_id: + intent = self._active_intents.get(intent_id) + if intent is None: + raise MCPServiceContractError( + f"Intent '{intent_id}' is not active in this MCP session. " + "Pass explicit scope or re-run start_controlled_change." + ) + return ( + tuple( + normalize_memory_scope_path(path) + for path in intent.scope.allowed_files + ), + "intent", + ) + raise MCPServiceContractError( + "get_relevant_memory requires scope or intent_id. " + "Use query_engineering_memory(mode=status|search) for project " + "orientation." + ) + + def _memory_blast_dependents( + self, + root_path: Path, + scope_paths: Sequence[str], + ) -> frozenset[str]: + if not scope_paths: + return frozenset() + try: + record = self._runs.get() + except MCPRunNotFoundError: + return frozenset() + if record.root.resolve() != root_path.resolve(): + return frozenset() + try: + result = self._blast_radius_result( + record=record, + files=list(scope_paths), + depth="direct", + ) + except MCPServiceContractError: + return frozenset() + return frozenset(result.direct_dependents) + + +__all__ = ["_MCPSessionMemoryMixin"] diff --git a/codeclone/surfaces/mcp/_session_patch_contract_mixin.py b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py new file mode 100644 index 00000000..2399f3bc --- /dev/null +++ b/codeclone/surfaces/mcp/_session_patch_contract_mixin.py @@ -0,0 +1,1222 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from fnmatch import fnmatchcase + +from ...audit import ( + EVENT_BASELINE_ABUSE, + EVENT_PATCH_BUDGET, + EVENT_PATCH_EXPIRED, + EVENT_PATCH_VERIFIED, + EVENT_PATCH_VIOLATED, +) +from ...utils.coerce import as_int as _coerce_int +from . import _session_helpers as _helpers +from ._intent import IntentCheckResult, IntentRecord, IntentScope, IntentStatus +from ._patch_contract import ( + VALID_PATCH_CONTRACT_MODES, + VALID_STRICTNESS_PROFILES, + PatchBudgets, + PatchContractMode, + PatchContractStatus, + StrictnessProfile, + baseline_status, + budgets_for_strictness, + detect_baseline_abuse, +) +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPGateRequest, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, +) +from ._verification_profile import ( + ClassificationResult, + VerificationProfile, + classify_patch, + profile_accepted_message, + profile_limitations, + profile_unverified_message, +) + +MAX_WORSENED_ITEMS = 20 + + +class _MCPSessionPatchContractMixin: + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + + def check_patch_contract( + self, + *, + mode: str, + run_id: str | None = None, + before_run_id: str | None = None, + after_run_id: str | None = None, + intent_id: str | None = None, + strictness: str = "ci", + diff_ref: str | None = None, + changed_files: Sequence[str] | None = None, + ) -> dict[str, object]: + validated_mode = self._validated_patch_contract_mode(mode) + validated_strictness = self._validated_strictness(strictness) + if validated_mode == "budget": + return self._patch_contract_budget( + run_id=run_id, + intent_id=intent_id, + strictness=validated_strictness, + ) + return self._patch_contract_verify( + before_run_id=before_run_id, + after_run_id=after_run_id, + intent_id=intent_id, + strictness=validated_strictness, + diff_ref=diff_ref, + changed_files=changed_files, + ) + + def _patch_contract_budget( + self, + *, + run_id: str | None, + intent_id: str | None, + strictness: StrictnessProfile, + ) -> dict[str, object]: + record = self._runs.get(run_id) + intent = self._optional_intent(record=record, intent_id=intent_id) + if intent is not None: + self._renew_lease_if_active(record=record, intent=intent) + budgets = self._budgets_for_record(record=record, strictness=strictness) + current_state = self._current_state(record) + gate_preview = self._gate_preview(record=record, budgets=budgets) + is_queued = intent is not None and intent.status == IntentStatus.QUEUED + from .messages import patch_contract as patch_msgs + + budget_message = ( + patch_msgs.QUEUED_BUDGET_MESSAGE + if is_queued + else self._budget_message( + strictness=strictness, + gate_preview=gate_preview, + ) + ) + payload: dict[str, object] = { + "mode": "budget", + "run_id": _helpers._short_run_id(record.run_id), + "strictness": strictness, + "intent_id": intent.intent_id if intent is not None else None, + "scope": "changed" if intent is not None else "full", + "declared_scope": ( + intent.scope.to_payload() if intent is not None else None + ), + "blast_radius_summary": ( + intent.blast_radius_summary if intent is not None else None + ), + "budgets": budgets.to_payload(), + "current_state": current_state, + "headroom": self._headroom(budgets=budgets, current_state=current_state), + "gate_preview": gate_preview, + "message": budget_message, + } + if is_queued: + payload["intent_status"] = "queued" + payload["edit_allowed"] = False + self._audit_emit( + root=record.root, + event_type=EVENT_PATCH_BUDGET, + severity="warn" if bool(gate_preview.get("would_fail")) else "info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._report_digest_value(record), + status="budget", + payload=payload, + ) + return payload + + def _patch_contract_verify( + self, + *, + before_run_id: str | None, + after_run_id: str | None, + intent_id: str | None, + strictness: StrictnessProfile, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> dict[str, object]: + # ── 1. Resolve before-run (required for intent binding) ───── + # When intent_id is provided but before_run_id is not, auto- + # resolve from the intent's stored run_id. This removes one + # mandatory parameter the agent must track across sessions. + resolved_before_run_id = before_run_id + if resolved_before_run_id is None and intent_id is not None: + resolved_before_run_id = self._before_run_id_from_intent(intent_id) + if resolved_before_run_id is None: + return self._unverified_patch_contract(reason="no_before_run") + try: + before = self._runs.get(resolved_before_run_id) + except MCPRunNotFoundError: + return self._unverified_patch_contract(reason="no_before_run") + + # ── 2. Resolve intent ─────────────────────────────────────── + intent = self._optional_intent(record=before, intent_id=intent_id) + if intent is not None: + self._renew_lease_if_active(record=before, intent=intent) + + # ── 2b. Queued intents cannot be verified ────────────────── + if intent is not None and intent.status == IntentStatus.QUEUED: + return self._unverified_patch_contract( + reason="intent_not_active", + before=before, + ) + + # ── 3. Compute actual changed files ───────────────────────── + actual_changed_files = self._patch_changed_files_flexible( + before=before, + after_run_id=after_run_id, + diff_ref=diff_ref, + changed_files=changed_files, + ) + + # ── 4. Classify verification profile ──────────────────────── + classification = classify_patch(actual_changed_files) + + # ── 5. Scope/forbidden checks (always run) ────────────────── + scope_check = ( + self._scope_check_payload(intent=intent, actual=actual_changed_files) + if intent is not None + else None + ) + + # ── 6. State artifact → violated early ────────────────────── + if classification.profile == VerificationProfile.STATE_ARTIFACT_CHANGE: + return self._state_artifact_violated( + before=before, + intent=intent, + classification=classification, + scope_check=scope_check, + ) + + # ── 7. Intent expiry check ────────────────────────────────── + if intent is not None and self._is_intent_expired(record=before, intent=intent): + after = self._optional_after_run(after_run_id) + return self._expired_patch_contract( + before=before, + after=after or before, + intent=intent, + ) + + # ── 8. Scope violation early exit ─────────────────────────── + scope_violated = ( + scope_check is not None + and scope_check.get("status") == IntentStatus.VIOLATED.value + ) + + # ── 9. Profile-based fast path (no after_run needed) ──────── + # Fast path requires explicit changed files evidence. When + # neither changed_files nor diff_ref was provided, the caller + # has no diff evidence and must provide after_run_id. + if after_run_id is None: + has_diff_evidence = changed_files is not None or diff_ref is not None + if not has_diff_evidence: + return self._unverified_patch_contract( + reason="no_after_run", + before=before, + ) + return self._profile_fast_path( + before=before, + intent=intent, + strictness=strictness, + classification=classification, + scope_check=scope_check, + scope_violated=scope_violated, + ) + + # ── 10. Full structural path (after_run available) ────────── + try: + after = self._runs.get(after_run_id) + except MCPRunNotFoundError: + return self._unverified_patch_contract( + reason="no_after_run", + before=before, + classification=classification, + ) + if before.run_id == after.run_id and classification.profile in { + VerificationProfile.PYTHON_STRUCTURAL, + VerificationProfile.GOVERNANCE_CONFIG, + }: + return self._unverified_patch_contract( + reason="after_run_not_new", + before=before, + after=after, + classification=classification, + scope_check=scope_check, + ) + return self._full_structural_verify( + before=before, + after=after, + intent=intent, + strictness=strictness, + classification=classification, + scope_check=scope_check, + actual_changed_files=actual_changed_files, + ) + + def _validated_patch_contract_mode(self, mode: str) -> PatchContractMode: + if mode not in VALID_PATCH_CONTRACT_MODES: + from .messages import errors as err_msgs + + raise MCPServiceContractError( + err_msgs.invalid_choice("mode", mode, VALID_PATCH_CONTRACT_MODES) + ) + return "verify" if mode == "verify" else "budget" + + def _validated_strictness(self, strictness: str) -> StrictnessProfile: + if strictness not in VALID_STRICTNESS_PROFILES: + from .messages import errors as err_msgs + + raise MCPServiceContractError( + err_msgs.invalid_choice( + "strictness", + strictness, + VALID_STRICTNESS_PROFILES, + ) + ) + if strictness == "strict": + return "strict" + if strictness == "relaxed": + return "relaxed" + return "ci" + + def _before_run_id_from_intent(self, intent_id: str) -> str | None: + """Resolve before_run_id from an active intent's stored run_id.""" + with self._state_lock: + intent = self._active_intents.get(intent_id) + if intent is not None: + return intent.run_id + return None + + @staticmethod + def _next_step_hint(reason: str) -> str | None: + from .messages.patch_contract import next_step_hint + + return next_step_hint(reason) + + @staticmethod + def _claim_validation_recommended( + classification: ClassificationResult | None, + ) -> bool: + """Decide whether claim validation is meaningful for the profile.""" + if classification is None: + return True + return classification.profile in ( + VerificationProfile.PYTHON_STRUCTURAL, + VerificationProfile.GOVERNANCE_CONFIG, + ) + + def _optional_intent( + self, + *, + record: MCPRunRecord, + intent_id: str | None, + ) -> IntentRecord | None: + if intent_id is not None: + _, intent = self._resolve_intent(run_id=None, intent_id=intent_id) + assert isinstance(intent, IntentRecord) + return intent + with self._state_lock: + matching = [ + intent + for intent in self._active_intents.values() + if intent.run_id == record.run_id + ] + return matching[-1] if matching else None + + def _budgets_for_record( + self, + *, + record: MCPRunRecord, + strictness: StrictnessProfile, + ) -> PatchBudgets: + request = record.request + return budgets_for_strictness( + strictness=strictness, + coverage_min=request.coverage_min, + complexity_threshold=request.complexity_threshold, + coupling_threshold=request.coupling_threshold, + cohesion_threshold=request.cohesion_threshold, + ) + + def _gate_request( + self, *, record: MCPRunRecord, budgets: PatchBudgets + ) -> MCPGateRequest: + clone_budget = budgets.clone_regression + return MCPGateRequest( + run_id=record.run_id, + fail_on_new=clone_budget == 0, + fail_threshold=-1, + fail_complexity=budgets.complexity_delta, + fail_coupling=budgets.coupling_delta, + fail_cohesion=budgets.cohesion_delta, + fail_cycles=budgets.dependency_cycle, + fail_dead_code=budgets.dead_code_regression, + fail_health=budgets.health_floor, + fail_on_typing_regression=budgets.typing_regression, + fail_on_docstring_regression=budgets.docstring_regression, + fail_on_api_break=budgets.api_break, + fail_on_untested_hotspots=budgets.coverage_hotspot, + coverage_min=budgets.coverage_min, + ) + + def _gate_preview( + self, + *, + record: MCPRunRecord, + budgets: PatchBudgets, + ) -> dict[str, object]: + gate_result = self._evaluate_gate_snapshot( + record=record, + request=self._gate_request(record=record, budgets=budgets), + ) + return { + "would_fail": gate_result.exit_code != 0, + "exit_code": gate_result.exit_code, + "reasons": list(gate_result.reasons), + } + + def _current_state(self, record: MCPRunRecord) -> dict[str, object]: + report_document = record.report_document + return { + "health_score": _helpers._summary_health_score(record.summary), + "complexity_max": self._family_max( + report_document, + family="complexity", + keys=("cyclomatic_complexity", "complexity", "value"), + ), + "coupling_max": self._family_max( + report_document, + family="coupling", + keys=("cbo", "coupling", "value"), + ), + "cohesion_max": self._family_max( + report_document, + family="cohesion", + keys=("lcom4", "cohesion", "value"), + ), + "dependency_cycles": len(self._dependency_cycles(report_document)), + "clone_groups": record.func_clones_count + record.block_clones_count, + "dead_code_high_confidence": self._dead_code_high_confidence( + report_document + ), + } + + def _headroom( + self, + *, + budgets: PatchBudgets, + current_state: Mapping[str, object], + ) -> dict[str, object]: + return { + "complexity_headroom": self._threshold_headroom( + budget=budgets.complexity_delta, + current=_coerce_int(current_state.get("complexity_max")), + ), + "coupling_headroom": self._threshold_headroom( + budget=budgets.coupling_delta, + current=_coerce_int(current_state.get("coupling_max")), + ), + "cohesion_headroom": self._threshold_headroom( + budget=budgets.cohesion_delta, + current=_coerce_int(current_state.get("cohesion_max")), + ), + "health_headroom": ( + _coerce_int(current_state.get("health_score")) - budgets.health_floor + if budgets.health_floor >= 0 + and current_state.get("health_score") is not None + else None + ), + } + + def _patch_changed_files( + self, + *, + after: MCPRunRecord, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> tuple[str, ...]: + if changed_files: + return _helpers.coerce_repo_path_tuple( + self._normalize_changed_paths(root_path=after.root, paths=changed_files) + ) + if diff_ref is not None: + return _helpers.coerce_repo_path_tuple( + self._git_diff_paths(root_path=after.root, git_diff_ref=diff_ref) + ) + return tuple(after.changed_paths) + + def _patch_changed_files_flexible( + self, + *, + before: MCPRunRecord, + after_run_id: str | None, + diff_ref: str | None, + changed_files: Sequence[str] | None, + ) -> tuple[str, ...]: + """Resolve changed files without requiring an after-run record. + + When *after_run_id* is available, delegates to + ``_patch_changed_files``. Otherwise falls back to explicit + *changed_files* or *diff_ref* resolved against the before-run root. + """ + if after_run_id is not None: + try: + after = self._runs.get(after_run_id) + return self._patch_changed_files( + after=after, + diff_ref=diff_ref, + changed_files=changed_files, + ) + except MCPRunNotFoundError: + pass + if changed_files: + return _helpers.coerce_repo_path_tuple( + self._normalize_changed_paths( + root_path=before.root, paths=changed_files + ) + ) + if diff_ref is not None: + return _helpers.coerce_repo_path_tuple( + self._git_diff_paths(root_path=before.root, git_diff_ref=diff_ref) + ) + return () + + def _optional_after_run(self, after_run_id: str | None) -> MCPRunRecord | None: + if after_run_id is None: + return None + try: + return self._runs.get(after_run_id) + except MCPRunNotFoundError: + return None + + # ── profile-aware verify paths ────────────────────────────────── + + def _state_artifact_violated( + self, + *, + before: MCPRunRecord, + intent: IntentRecord | None, + classification: ClassificationResult, + scope_check: dict[str, object] | None, + ) -> dict[str, object]: + """Return violated status for state artifact mutations.""" + from .messages import patch_contract as patch_msgs + + profile_payload = classification.to_payload() + violations = ["state_artifact_mutation"] + if ( + scope_check is not None + and scope_check.get("status") == IntentStatus.VIOLATED.value + ): + violations.append("scope_violation") + reason = "state_artifact_mutation" + payload: dict[str, object] = { + "mode": "verify", + "status": PatchContractStatus.VIOLATED.value, + "reason": reason, + "before": self._run_ref_payload(before), + "after": None, + "intent_id": intent.intent_id if intent is not None else None, + "scope_check": scope_check, + "contract_violations": violations, + "blocking_violations": violations, + **profile_payload, + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": False, + "message": patch_msgs.STATE_ARTIFACT_VIOLATION_MESSAGE, + } + self._audit_emit( + root=before.root, + event_type=EVENT_PATCH_VIOLATED, + severity="warn", + run_id=_helpers._short_run_id(before.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._report_digest_value(before), + status=PatchContractStatus.VIOLATED.value, + payload=payload, + ) + return payload + + def _profile_fast_path( + self, + *, + before: MCPRunRecord, + intent: IntentRecord | None, + strictness: StrictnessProfile, + classification: ClassificationResult, + scope_check: dict[str, object] | None, + scope_violated: bool, + ) -> dict[str, object]: + """Handle verify when after_run_id is not provided. + + Returns accepted for documentation-only and non-python patches + (with limitations), unverified for profiles that require an + after-run. + """ + profile = classification.profile + profile_payload = classification.to_payload() + + # Scope violation is always blocking, regardless of profile. + if scope_violated and strictness != "relaxed": + reason = "scope_violation" + violations = [reason] + payload: dict[str, object] = { + "mode": "verify", + "status": PatchContractStatus.VIOLATED.value, + "reason": reason, + "before": self._run_ref_payload(before), + "after": None, + "intent_id": (intent.intent_id if intent is not None else None), + "scope_check": scope_check, + "contract_violations": violations, + "blocking_violations": violations, + **profile_payload, + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": False, + "message": self._verify_message( + status=PatchContractStatus.VIOLATED.value, + violations=tuple(violations), + ), + } + self._audit_emit( + root=before.root, + event_type=EVENT_PATCH_VIOLATED, + severity="warn", + run_id=_helpers._short_run_id(before.run_id), + intent_id=(intent.intent_id if intent is not None else None), + report_digest=self._report_digest_value(before), + status=PatchContractStatus.VIOLATED.value, + payload=payload, + ) + return payload + + # Profiles that require after_run return unverified. + matrix = classification.to_payload() + if matrix["after_run_required"]: + reason = ( + "after_run_required_for_governance" + if profile == VerificationProfile.GOVERNANCE_CONFIG + else "no_after_run" + ) + return self._unverified_patch_contract( + reason=reason, + before=before, + classification=classification, + scope_check=scope_check, + ) + + # Documentation-only and non-python: accepted without after_run. + limitations = list(profile_limitations(profile)) + status = PatchContractStatus.ACCEPTED.value + payload = { + "mode": "verify", + "status": status, + "reason": None, + "before": self._run_ref_payload(before), + "after": None, + "intent_id": (intent.intent_id if intent is not None else None), + "strictness": strictness, + "scope_check": scope_check, + "structural_delta": { + "verdict": "not_applicable", + "reason": "no_python_source_files_touched", + "regressions": [], + "improvements": [], + "health_delta": None, + }, + "contract_violations": [], + "blocking_violations": [], + **profile_payload, + "limitations": limitations, + "claim_validation_recommended": self._claim_validation_recommended( + classification + ), + "message": profile_accepted_message(profile), + } + self._audit_emit( + root=before.root, + event_type=EVENT_PATCH_VERIFIED, + severity="info", + run_id=_helpers._short_run_id(before.run_id), + intent_id=(intent.intent_id if intent is not None else None), + report_digest=self._report_digest_value(before), + status=status, + payload=payload, + ) + return payload + + def _full_structural_verify( + self, + *, + before: MCPRunRecord, + after: MCPRunRecord, + intent: IntentRecord | None, + strictness: StrictnessProfile, + classification: ClassificationResult, + scope_check: dict[str, object] | None, + actual_changed_files: tuple[str, ...], + ) -> dict[str, object]: + """Full structural verification path (before + after runs).""" + compare_payload = self.compare_runs( + run_id_before=before.run_id, + run_id_after=after.run_id, + focus="all", + ) + if not bool(compare_payload.get("comparable")): + return self._unverified_patch_contract( + reason="incomparable_runs", + before=before, + after=after, + structural_delta=self._structural_delta(compare_payload), + classification=classification, + ) + budgets = self._budgets_for_record(record=after, strictness=strictness) + before_gate = self._gate_preview(record=before, budgets=budgets) + after_gate = self._gate_preview(record=after, budgets=budgets) + structural_delta = self._structural_delta(compare_payload) + regressions = _as_sequence(structural_delta.get("regressions")) + intent_regressions, external_regressions = self._partition_regressions( + after=after, + regressions=regressions, + intent=intent, + ) + worsened = self._worsened_symbols(before=before, after=after) + intent_worsened, external_worsened = self._partition_worsened( + worsened=worsened, + intent=intent, + ) + before_gate_fails = bool(before_gate["would_fail"]) + after_gate_fails = bool(after_gate["would_fail"]) + gate_worsened = not before_gate_fails and after_gate_fails + intent_caused_gate_failure = ( + after_gate_fails + if intent is None + else bool(intent_regressions or intent_worsened) + ) + gate_contract_failure = ( + after_gate_fails + if intent is None + else gate_worsened and intent_caused_gate_failure + ) + external_gate_failure = ( + intent is not None and gate_worsened and not intent_caused_gate_failure + ) + baseline_abuse = detect_baseline_abuse( + before_gate_would_fail=before_gate_fails, + after_gate_would_fail=after_gate_fails, + after_baseline_status=baseline_status(after.report_document), + regressions=len(regressions), + changed_files=len(actual_changed_files), + intent_available=intent is not None, + ) + violations = self._contract_violations( + intent_regressions=intent_regressions, + gate_contract_failure=gate_contract_failure, + scope_check=scope_check, + baseline_abuse=baseline_abuse, + ) + blocking_violations = () if strictness == "relaxed" else violations + external_context = bool(external_regressions or external_gate_failure) + if blocking_violations: + status = PatchContractStatus.VIOLATED.value + elif external_context: + status = PatchContractStatus.ACCEPTED_EXTERNAL.value + else: + status = PatchContractStatus.ACCEPTED.value + profile_payload = classification.to_payload() + violated = status == PatchContractStatus.VIOLATED.value + health_delta_value = structural_delta.get("health_delta") + health_regression_advisory: dict[str, object] | None = None + if ( + isinstance(health_delta_value, int) + and health_delta_value < 0 + and status + in { + PatchContractStatus.ACCEPTED.value, + PatchContractStatus.ACCEPTED_EXTERNAL.value, + } + ): + from .messages import patch_contract as patch_msgs + + health_regression_advisory = { + "health_delta": health_delta_value, + "message": patch_msgs.HEALTH_REGRESSION_ADVISORY, + } + payload: dict[str, object] = { + "mode": "verify", + "status": status, + "reason": None, + "before": self._run_ref_payload(before), + "after": self._run_ref_payload(after), + "intent_id": (intent.intent_id if intent is not None else None), + "strictness": strictness, + "structural_delta": structural_delta, + "intent_regressions": intent_regressions, + "external_regressions": external_regressions, + "worsened": worsened, + "intent_worsened": intent_worsened, + "external_worsened": external_worsened, + "scope_check": scope_check, + "before_gate": before_gate, + "gate_preview": after_gate, + "gate_worsened": gate_worsened, + "intent_caused_gate_failure": intent_caused_gate_failure, + "baseline_abuse": baseline_abuse, + "contract_violations": list(violations), + "blocking_violations": list(blocking_violations), + **profile_payload, + "claim_validation_recommended": not violated, + "message": self._verify_message( + status=status, + violations=violations, + health_delta=( + health_delta_value if isinstance(health_delta_value, int) else None + ), + ), + } + if health_regression_advisory is not None: + payload["health_regression_advisory"] = health_regression_advisory + event_type = ( + EVENT_PATCH_VIOLATED + if status == PatchContractStatus.VIOLATED.value + else EVENT_PATCH_VERIFIED + ) + audit_sequence = self._audit_emit( + root=after.root, + event_type=event_type, + severity="warn" if blocking_violations else "info", + run_id=_helpers._short_run_id(after.run_id), + intent_id=(intent.intent_id if intent is not None else None), + report_digest=self._report_digest_value(after), + status=status, + payload=payload, + ) + if audit_sequence is not None: + payload["_audit_sequence"] = audit_sequence + if bool(baseline_abuse.get("detected")): + self._audit_emit( + root=after.root, + event_type=EVENT_BASELINE_ABUSE, + severity="error", + run_id=_helpers._short_run_id(after.run_id), + intent_id=(intent.intent_id if intent is not None else None), + report_digest=self._report_digest_value(after), + status="detected", + payload=payload, + ) + return payload + + def _scope_check_payload( + self, + *, + intent: IntentRecord, + actual: Sequence[str], + ) -> dict[str, object]: + check_result = self._intent_check_result(intent=intent, actual=actual) + assert isinstance(check_result, IntentCheckResult) + return check_result.to_payload() + + def _partition_regressions( + self, + *, + after: MCPRunRecord, + regressions: Sequence[object], + intent: IntentRecord | None, + ) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + if intent is None: + return ( + [ + self._regression_card_with_paths(regression, paths=frozenset()) + for regression in regressions + ], + [], + ) + path_index = self._finding_path_index(after) + intent_regressions: list[dict[str, object]] = [] + external_regressions: list[dict[str, object]] = [] + for regression in regressions: + regression_map = _as_mapping(regression) + regression_id = str(regression_map.get("id", "")).strip() + paths = path_index.get(regression_id, frozenset()) + card = self._regression_card_with_paths(regression_map, paths=paths) + if self._paths_in_intent_scope(paths=paths, scope=intent.scope): + intent_regressions.append(card) + else: + external_regressions.append(card) + return intent_regressions, external_regressions + + def _finding_path_index( + self, + record: MCPRunRecord, + ) -> dict[str, frozenset[str]]: + index: dict[str, frozenset[str]] = {} + for finding in self._base_findings(record): + finding_id = str(finding.get("id", "")).strip() + if not finding_id: + continue + paths = self._finding_paths(finding) + index[finding_id] = paths + index[self._short_finding_id(record, finding_id)] = paths + return index + + def _finding_paths(self, finding: Mapping[str, object]) -> frozenset[str]: + paths: set[str] = set() + for key in ("locations", "items"): + for item in _as_sequence(finding.get(key)): + item_map = _as_mapping(item) + for path_key in ("file", "relative_path", "path", "filepath"): + path = self._normalized_report_path(item_map.get(path_key)) + if path: + paths.add(path) + for path_key in ("file", "relative_path", "path", "filepath"): + path = self._normalized_report_path(finding.get(path_key)) + if path: + paths.add(path) + return frozenset(sorted(paths)) + + def _regression_card_with_paths( + self, + regression: object, + *, + paths: frozenset[str], + ) -> dict[str, object]: + card = dict(_as_mapping(regression)) + card["paths"] = sorted(paths) + return card + + def _partition_worsened( + self, + *, + worsened: Sequence[Mapping[str, object]], + intent: IntentRecord | None, + ) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + if intent is None: + return ([dict(item) for item in worsened], []) + intent_worsened: list[dict[str, object]] = [] + external_worsened: list[dict[str, object]] = [] + for item in worsened: + item_copy = dict(item) + path = self._normalized_report_path(item.get("path")) + if not path or self._path_in_scope(path=path, scope=intent.scope): + intent_worsened.append(item_copy) + else: + external_worsened.append(item_copy) + return intent_worsened, external_worsened + + def _paths_in_intent_scope( + self, + *, + paths: frozenset[str], + scope: IntentScope, + ) -> bool: + if not paths: + return True + return any(self._path_in_scope(path=path, scope=scope) for path in paths) + + def _path_in_scope(self, *, path: str, scope: IntentScope) -> bool: + patterns = (*scope.allowed_files, *scope.allowed_related) + return any( + path == pattern or fnmatchcase(path, pattern) for pattern in patterns + ) + + def _normalized_report_path(self, value: object) -> str: + path = str(value or "").replace("\\", "/").strip() + if path == ".": + return "" + if path.startswith("./"): + path = path[2:] + return path.rstrip("/") + + def _contract_violations( + self, + *, + intent_regressions: Sequence[object], + gate_contract_failure: bool, + scope_check: Mapping[str, object] | None, + baseline_abuse: Mapping[str, object], + ) -> tuple[str, ...]: + violations: list[str] = [] + if intent_regressions: + violations.append("structural_regressions") + if gate_contract_failure: + violations.append("gate_failures") + if ( + scope_check is not None + and scope_check.get("status") == IntentStatus.VIOLATED.value + ): + violations.append("scope_violation") + violations.extend( + f"baseline_abuse:{trigger}" + for trigger in _as_sequence(baseline_abuse.get("triggers")) + ) + return tuple(violations) + + def _structural_delta( + self, compare_payload: Mapping[str, object] + ) -> dict[str, object]: + return { + "regressions": list(_as_sequence(compare_payload.get("regressions"))), + "improvements": list(_as_sequence(compare_payload.get("improvements"))), + "health_delta": compare_payload.get("health_delta"), + "verdict": str(compare_payload.get("verdict", "")), + } + + def _worsened_symbols( + self, + *, + before: MCPRunRecord, + after: MCPRunRecord, + ) -> list[dict[str, object]]: + worsened: list[dict[str, object]] = [] + for family, value_keys in ( + ("complexity", ("cyclomatic_complexity", "complexity", "value")), + ("coupling", ("cbo", "coupling", "value")), + ("cohesion", ("lcom4", "cohesion", "value")), + ): + before_items = self._metric_item_index( + before.report_document, + family=family, + value_keys=value_keys, + ) + after_items = self._metric_item_index( + after.report_document, + family=family, + value_keys=value_keys, + ) + for key, after_value in after_items.items(): + before_value = before_items.get(key) + if before_value is not None and after_value > before_value: + path, symbol = key + worsened.append( + { + "family": family, + "path": path, + "symbol": symbol, + "before": before_value, + "after": after_value, + "delta": after_value - before_value, + } + ) + return sorted( + worsened, + key=lambda item: ( + -_coerce_int(item.get("delta")), + str(item.get("family", "")), + str(item.get("path", "")), + str(item.get("symbol", "")), + ), + )[:MAX_WORSENED_ITEMS] + + def _metric_item_index( + self, + report_document: Mapping[str, object], + *, + family: str, + value_keys: Sequence[str], + ) -> dict[tuple[str, str], int]: + result: dict[tuple[str, str], int] = {} + for item in self._metric_family_items(report_document, family=family): + path = self._item_path(item) + symbol = self._item_symbol(item) + value = self._first_int(item, keys=value_keys) + if path or symbol: + result[(path, symbol)] = value + return result + + def _metric_family_items( + self, + report_document: Mapping[str, object], + *, + family: str, + ) -> tuple[Mapping[str, object], ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + family_payload = _as_mapping(families.get(family)) + return tuple( + _as_mapping(item) for item in _as_sequence(family_payload.get("items")) + ) + + def _family_max( + self, + report_document: Mapping[str, object], + *, + family: str, + keys: Sequence[str], + ) -> int: + values = [ + self._first_int(item, keys=keys) + for item in self._metric_family_items(report_document, family=family) + ] + return max(values, default=0) + + def _dead_code_high_confidence(self, report_document: Mapping[str, object]) -> int: + return sum( + 1 + for item in self._metric_family_items(report_document, family="dead_code") + if str(item.get("confidence", "")).strip().lower() == "high" + ) + + def _dependency_cycles( + self, + report_document: Mapping[str, object], + ) -> tuple[object, ...]: + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + return tuple(_as_sequence(dependencies.get("cycles"))) + + def _first_int(self, item: Mapping[str, object], *, keys: Sequence[str]) -> int: + for key in keys: + if key in item: + return _coerce_int(item.get(key)) + return 0 + + def _item_path(self, item: Mapping[str, object]) -> str: + for key in ("relative_path", "path", "filepath", "file"): + value = str(item.get(key, "")).strip() + if value: + return value.replace("\\", "/") + return "" + + def _item_symbol(self, item: Mapping[str, object]) -> str: + for key in ("qualname", "symbol", "name", "class_name", "function"): + value = str(item.get(key, "")).strip() + if value: + return value + return "" + + def _threshold_headroom(self, *, budget: int, current: int) -> int | None: + return budget - current if budget >= 0 else None + + def _run_ref_payload(self, record: MCPRunRecord) -> dict[str, object]: + return { + "run_id": _helpers._short_run_id(record.run_id), + "health": _helpers._summary_health_score(record.summary), + } + + def _unverified_patch_contract( + self, + *, + reason: str, + before: MCPRunRecord | None = None, + after: MCPRunRecord | None = None, + structural_delta: Mapping[str, object] | None = None, + classification: ClassificationResult | None = None, + scope_check: dict[str, object] | None = None, + ) -> dict[str, object]: + from .messages import patch_contract as patch_msgs + + profile_fields: dict[str, object] = ( + classification.to_payload() if classification is not None else {} + ) + message = ( + profile_unverified_message(classification.profile) + if classification is not None + else patch_msgs.VERIFY_UNVERIFIED_PREFIX.format(reason=reason) + ) + return { + "mode": "verify", + "status": PatchContractStatus.UNVERIFIED.value, + "reason": reason, + "before": (self._run_ref_payload(before) if before is not None else None), + "after": (self._run_ref_payload(after) if after is not None else None), + "structural_delta": dict(structural_delta or {}), + "scope_check": scope_check, + "contract_violations": [], + **profile_fields, + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": self._claim_validation_recommended( + classification + ), + "message": message, + } + + def _expired_patch_contract( + self, + *, + before: MCPRunRecord, + after: MCPRunRecord, + intent: IntentRecord, + ) -> dict[str, object]: + reason = "report_digest_mismatch" + from .messages import patch_contract as patch_msgs + + payload: dict[str, object] = { + "mode": "verify", + "status": PatchContractStatus.EXPIRED.value, + "reason": reason, + "before": self._run_ref_payload(before), + "after": self._run_ref_payload(after), + "intent_id": intent.intent_id, + "contract_violations": ["intent_expired"], + "next_step": self._next_step_hint(reason), + "claim_validation_recommended": False, + "message": patch_msgs.PATCH_CONTRACT_EXPIRED_MESSAGE, + } + self._audit_emit( + root=after.root, + event_type=EVENT_PATCH_EXPIRED, + severity="warn", + run_id=_helpers._short_run_id(after.run_id), + intent_id=intent.intent_id, + report_digest=self._report_digest_value(after), + status=PatchContractStatus.EXPIRED.value, + payload=payload, + ) + return payload + + def _budget_message( + self, + *, + strictness: StrictnessProfile, + gate_preview: Mapping[str, object], + ) -> str: + from .messages import patch_contract as patch_msgs + + return patch_msgs.budget_message( + relaxed=strictness == "relaxed", + would_fail=bool(gate_preview.get("would_fail")), + ) + + def _verify_message( + self, + *, + status: str, + violations: Sequence[str], + health_delta: int | None = None, + ) -> str: + from .messages import patch_contract as patch_msgs + + return patch_msgs.verify_message( + status=status, + violations=violations, + health_delta=health_delta, + ) + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +__all__ = ["_MCPSessionPatchContractMixin"] diff --git a/codeclone/surfaces/mcp/_session_review_receipt_mixin.py b/codeclone/surfaces/mcp/_session_review_receipt_mixin.py new file mode 100644 index 00000000..b26616de --- /dev/null +++ b/codeclone/surfaces/mcp/_session_review_receipt_mixin.py @@ -0,0 +1,444 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import OrderedDict +from collections.abc import Mapping, Sequence + +from ...audit import EVENT_RECEIPT_CREATED +from ...contracts import REPORT_SCHEMA_VERSION +from ...utils.coerce import as_int as _coerce_int +from . import _session_helpers as _helpers +from ._intent import IntentRecord +from ._review_receipt import ( + RECEIPT_VERSION, + VALID_RECEIPT_FORMATS, + derive_baseline_status, + derive_claims_not_made, + derive_human_decision_points, + derive_patch_status, + derive_verification_profile_section, + receipt_verdict, + render_receipt_markdown, +) +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) + + +class _MCPSessionReviewReceiptMixin: + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + + def create_review_receipt( + self, + *, + run_id: str | None = None, + intent_id: str | None = None, + format: str = "markdown", + include_blast_radius: bool = True, + include_patch_contract: bool = True, + ) -> dict[str, object]: + output_format = self._validated_receipt_format(format) + record = self._runs.get(run_id) + intent = self._receipt_intent(record=record, intent_id=intent_id) + changed_paths = self._receipt_changed_paths(record=record, intent=intent) + changed_findings = self._receipt_changed_findings( + record=record, + changed_paths=changed_paths, + ) + verification_profile = derive_verification_profile_section(changed_paths) + structural_delta = self._receipt_structural_delta( + record, + structural_checks_applicable=bool( + verification_profile.get("structural_checks_applicable", True) + ), + ) + reviewed_evidence = self._reviewed_evidence(record) + patch_contract = ( + self._receipt_patch_contract( + record=record, + intent=intent, + structural_delta=structural_delta, + changed_paths=changed_paths, + ) + if include_patch_contract + else None + ) + human_decisions = derive_human_decision_points( + changed_findings=changed_findings, + intent_status=self._intent_status(intent), + ) + patch_status = ( + str(patch_contract.get("status", "not_checked")) + if patch_contract is not None + else "not_checked" + ) + receipt: dict[str, object] = { + "receipt_version": RECEIPT_VERSION, + "generated_at_utc": self._receipt_generated_at(record), + "provenance": self._receipt_provenance(record), + "verification_profile": verification_profile, + "scope": self._receipt_scope(intent), + "blast_radius": ( + self._receipt_blast_radius(intent) if include_blast_radius else None + ), + "reviewed_evidence": reviewed_evidence, + "patch_contract": patch_contract, + "structural_delta": structural_delta, + "human_decision_points": human_decisions, + "claims_not_made": derive_claims_not_made(record.report_document), + "health": self._receipt_health(record), + "verdict": receipt_verdict( + reviewed_count=_coerce_int(reviewed_evidence.get("reviewed_count")), + gate_relevant_count=_coerce_int( + reviewed_evidence.get("total_gate_relevant") + ), + patch_status=patch_status, + human_decision_count=len(human_decisions), + ), + } + if output_format == "json": + self._audit_emit( + root=record.root, + event_type=EVENT_RECEIPT_CREATED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._receipt_digest(record), + status=str(receipt.get("verdict", "")), + payload={"receipt": receipt, "format": output_format}, + ) + return receipt + payload: dict[str, object] = { + "run_id": _helpers._short_run_id(record.run_id), + "format": output_format, + "content": render_receipt_markdown(receipt), + "receipt": receipt, + } + self._audit_emit( + root=record.root, + event_type=EVENT_RECEIPT_CREATED, + severity="info", + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id if intent is not None else None, + report_digest=self._receipt_digest(record), + status=str(receipt.get("verdict", "")), + payload=payload, + ) + return payload + + def _validated_receipt_format(self, value: str) -> str: + if value not in VALID_RECEIPT_FORMATS: + expected = ", ".join(sorted(VALID_RECEIPT_FORMATS)) + raise MCPServiceContractError( + f"Invalid value for format: {value!r}. Expected one of: {expected}." + ) + return "json" if value == "json" else "markdown" + + def _receipt_intent( + self, + *, + record: MCPRunRecord, + intent_id: str | None, + ) -> IntentRecord | None: + intent_record: MCPRunRecord | None = None + intent: IntentRecord | None + if intent_id is not None: + intent_record, intent = self._resolve_intent( + run_id=None, + intent_id=intent_id, + ) + else: + intent = self._optional_intent(record=record, intent_id=None) + if intent is not None and intent.run_id != record.run_id: + intent_record = intent_record or self._runs.get(intent.run_id) + if intent_record.root != record.root: + raise MCPServiceContractError( + "Receipt intent must belong to the selected run or the same root." + ) + return intent + + def _receipt_changed_paths( + self, + *, + record: MCPRunRecord, + intent: IntentRecord | None, + ) -> tuple[str, ...]: + if intent is not None and intent.check_result is not None: + return tuple(intent.check_result.actual_changed_files) + return tuple(record.changed_paths) + + def _receipt_changed_findings( + self, + *, + record: MCPRunRecord, + changed_paths: tuple[str, ...], + ) -> list[dict[str, object]]: + if not changed_paths: + return [] + findings = self._base_findings(record) + return [ + finding + for finding in findings + if self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ) + ] + + def _receipt_provenance(self, record: MCPRunRecord) -> dict[str, object]: + return { + "report_digest": self._receipt_digest(record), + "report_schema_version": REPORT_SCHEMA_VERSION, + "baseline_status": derive_baseline_status(record.report_document), + "run_id": _helpers._short_run_id(record.run_id), + "root": str(record.root), + } + + def _receipt_digest(self, record: MCPRunRecord) -> str: + integrity = _helpers._as_mapping(record.report_document.get("integrity")) + digest = _helpers._as_mapping(integrity.get("digest")) + algorithm = str(digest.get("algorithm", "sha256")).strip() or "sha256" + return f"{algorithm}:{_helpers._report_digest(record.report_document)}" + + def _receipt_generated_at(self, record: MCPRunRecord) -> str: + meta = _helpers._as_mapping(record.report_document.get("meta")) + value = str(meta.get("report_generated_at_utc", "")).strip() + if value: + return value + runtime = _helpers._as_mapping(meta.get("runtime")) + value = str(runtime.get("report_generated_at_utc", "")).strip() + if value: + return value + return str(record.summary.get("analysis_started_at_utc", "")).strip() + + def _receipt_scope(self, intent: IntentRecord | None) -> dict[str, object] | None: + if intent is None: + return None + check = intent.check_result + scope_payload: dict[str, object] = { + "intent_id": intent.intent_id, + "intent_status": self._intent_status(intent), + "intent_description": intent.intent_description, + "declared_files": list(intent.scope.allowed_files), + "changed_files": list(check.actual_changed_files) if check else [], + "unexpected_files": list(check.unexpected_files) if check else [], + "forbidden_touched": list(check.forbidden_touched) if check else [], + "untouched_files": list(check.untouched_in_declared) if check else [], + } + if check and intent.blast_radius_summary: + summary = intent.blast_radius_summary + changed = set(check.actual_changed_files) + do_not_touch = _coerce_str_list(summary.get("do_not_touch_declared")) + scope_payload["do_not_touch_held"] = [ + path for path in do_not_touch if path not in changed + ] + return scope_payload + + def _intent_status(self, intent: IntentRecord | None) -> str | None: + if intent is None: + return None + if intent.check_result is not None: + return intent.check_result.status.value + return intent.status.value + + def _receipt_blast_radius( + self, + intent: IntentRecord | None, + ) -> dict[str, object] | None: + if intent is None or not intent.blast_radius_summary: + return None + summary = intent.blast_radius_summary + return { + "radius_level": summary.get("radius_level", "unknown"), + "direct_dependents_count": _coerce_int( + summary.get("direct_dependents_count") + ), + "clone_cohort_members_count": _coerce_int( + summary.get("clone_cohort_members_count") + ), + "do_not_touch_count": _coerce_int(summary.get("do_not_touch_count")), + } + + def _reviewed_evidence(self, record: MCPRunRecord) -> dict[str, object]: + findings = self._base_findings(record) + gate_relevant = [ + finding + for finding in findings + if str(finding.get("novelty", "")) == "new" + or str(finding.get("severity", "")) in {"critical", "warning"} + ] + with self._state_lock: + review_items = tuple( + self._review_state.get(record.run_id, OrderedDict()).items() + ) + items: list[dict[str, object]] = [] + for canonical_id, note in review_items: + finding = self._finding_by_id(record=record, canonical_id=canonical_id) + if finding is None: + continue + summary = self._finding_summary_card(record, finding) + items.append( + { + "finding_id": self._short_finding_id(record, canonical_id), + "kind": str(summary.get("kind") or "finding"), + "severity": str(summary.get("severity") or "info"), + "note": note, + } + ) + return { + "total_gate_relevant": len(gate_relevant), + "reviewed_count": len(items), + "items": items, + } + + def _finding_by_id( + self, + *, + record: MCPRunRecord, + canonical_id: str, + ) -> dict[str, object] | None: + for finding in self._base_findings(record): + if isinstance(finding, dict) and str(finding.get("id", "")) == canonical_id: + return finding + return None + + def _receipt_structural_delta( + self, + record: MCPRunRecord, + *, + structural_checks_applicable: bool = True, + ) -> dict[str, object]: + if not structural_checks_applicable: + return { + "available": False, + "regressions": 0, + "improvements": 0, + "health_delta": None, + "verdict": "not_applicable", + } + previous = self._previous_run_for_root(record) + if previous is None: + return { + "available": False, + "regressions": 0, + "improvements": 0, + "health_delta": None, + "verdict": "not_available", + } + compare_payload = self.compare_runs( + run_id_before=previous.run_id, + run_id_after=record.run_id, + focus="all", + ) + return { + "available": bool(compare_payload.get("comparable")), + "regressions": len( + _helpers._as_sequence(compare_payload.get("regressions")) + ), + "improvements": len( + _helpers._as_sequence(compare_payload.get("improvements")) + ), + "health_delta": compare_payload.get("health_delta"), + "verdict": str(compare_payload.get("verdict", "stable")), + } + + def _receipt_patch_contract( + self, + *, + record: MCPRunRecord, + intent: IntentRecord | None, + structural_delta: Mapping[str, object], + changed_paths: tuple[str, ...], + ) -> dict[str, object]: + with self._state_lock: + gate_result = self._last_gate_results.get(record.run_id) + gate_payload = dict(gate_result) if gate_result is not None else None + regressions = _coerce_int(structural_delta.get("regressions")) + intent_check_status = ( + intent.check_result.status.value + if intent is not None and intent.check_result is not None + else None + ) + baseline_abuse = self._receipt_baseline_abuse_detected( + record=record, + regressions=regressions, + changed_files=len(changed_paths), + ) + contract_violations = self._receipt_contract_violations( + gate_result=gate_payload, + intent_check_status=intent_check_status, + regressions=regressions, + baseline_abuse=baseline_abuse, + ) + return { + "status": derive_patch_status( + gate_result=gate_payload, + intent_check_status=intent_check_status, + regressions=regressions, + has_structural_delta=bool(structural_delta.get("available")), + ), + "regressions": regressions, + "improvements": _coerce_int(structural_delta.get("improvements")), + "health_delta": structural_delta.get("health_delta"), + "contract_violations": contract_violations, + "baseline_abuse_detected": baseline_abuse, + } + + def _receipt_baseline_abuse_detected( + self, + *, + record: MCPRunRecord, + regressions: int, + changed_files: int, + ) -> bool: + meta = _helpers._as_mapping(record.report_document.get("meta")) + baseline = _helpers._as_mapping(meta.get("baseline")) + return str(baseline.get("status", "")).strip() == "updated" and ( + regressions > 0 or changed_files > 0 + ) + + def _receipt_contract_violations( + self, + *, + gate_result: Mapping[str, object] | None, + intent_check_status: str | None, + regressions: int, + baseline_abuse: bool, + ) -> list[str]: + violations: list[str] = [] + if regressions > 0: + violations.append("structural_regressions") + if gate_result is not None and bool(gate_result.get("would_fail")): + violations.append("gate_failures") + if intent_check_status == "violated": + violations.append("scope_violation") + if baseline_abuse: + violations.append("baseline_abuse") + return violations + + def _receipt_health(self, record: MCPRunRecord) -> dict[str, object]: + health = _helpers._summary_health_payload(record.summary) + return { + "score": health.get("score"), + "grade": health.get("grade"), + "delta": _helpers._summary_health_delta(record.summary), + } + + +def _coerce_str_list(value: object) -> list[str]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return [] + return [str(item) for item in value if str(item).strip()] + + +__all__ = ["_MCPSessionReviewReceiptMixin"] diff --git a/codeclone/surfaces/mcp/_session_runtime.py b/codeclone/surfaces/mcp/_session_runtime.py index 9b5179dc..63123903 100644 --- a/codeclone/surfaces/mcp/_session_runtime.py +++ b/codeclone/surfaces/mcp/_session_runtime.py @@ -8,6 +8,8 @@ from pathlib import Path +from ...utils.repo_paths import RepoPathPolicy, resolve_under_repo_root + def validate_numeric_args(args: object) -> bool: return bool( @@ -32,8 +34,20 @@ def validate_numeric_args(args: object) -> bool: def resolve_cache_path(*, root_path: Path, args: object) -> Path: raw_value = getattr(args, "cache_path", None) if isinstance(raw_value, str) and raw_value.strip(): - return Path(raw_value).expanduser() - return root_path / ".cache" / "codeclone" / "cache.json" + allow_external_artifacts = bool( + getattr(args, "allow_external_artifacts", False) + ) + return resolve_under_repo_root( + root_path, + raw_value, + policy=RepoPathPolicy( + allow_absolute=True, + allow_external=allow_external_artifacts, + ), + ) + from ...paths.workspace import default_cache_path + + return default_cache_path(root_path) def _int_attr(args: object, name: str, default: int = 0) -> int: diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index 97be9961..ba8960e8 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -15,12 +15,13 @@ from json import JSONDecodeError from pathlib import Path from threading import RLock -from typing import Final, Literal, TypeVar +from typing import TYPE_CHECKING, Final, Literal, TypeVar import orjson from ... import __version__ from ...baseline import Baseline +from ...cache.entries import FileStat from ...cache.store import Cache from ...cache.versioning import CacheStatus from ...config.pyproject_loader import ( @@ -39,13 +40,11 @@ DEFAULT_SEGMENT_MIN_STMT, ) from ...contracts import ( - BASELINE_SCHEMA_VERSION, DEFAULT_COVERAGE_MIN, DEFAULT_JSON_REPORT_PATH, DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - DOCS_URL, REPORT_SCHEMA_VERSION, ) from ...core._types import OutputPaths @@ -94,7 +93,14 @@ design_group_id, structural_group_id, ) -from ...models import CoverageJoinResult, MetricsDiff, ProjectMetrics, Suggestion +from ...models import ( + CoverageJoinResult, + FunctionRelationshipFacts, + MetricsDiff, + ModuleDep, + ProjectMetrics, + Suggestion, +) from ...report.gates.evaluator import GateResult as GatingResult from ...report.gates.evaluator import MetricGateConfig from ...report.gates.evaluator import evaluate_gates as _evaluate_report_gates @@ -102,10 +108,14 @@ from ...utils.coerce import as_float as _as_float from ...utils.coerce import as_int as _as_int from ...utils.git_diff import validate_git_diff_ref +from .messages.help_topics import HELP_TOPIC_SPECS as _HELP_TOPIC_SPECS from .payloads import paginate, resolve_finding_id, short_id +if TYPE_CHECKING: + from ._workspace_hygiene import DirtySnapshot + AnalysisMode = Literal["full", "clones_only"] -CachePolicy = Literal["reuse", "refresh", "off"] +CachePolicy = Literal["reuse", "off"] FreshnessKind = Literal["fresh", "mixed", "reused"] HotlistKind = Literal[ "most_actionable", @@ -129,6 +139,11 @@ "latest_runs", "review_state", "changed_scope", + "change_control", + "trust_boundaries", + "engineering_memory", + "implementation_context", + "verification_profiles", ] HelpDetail = Literal["compact", "normal"] MetricsDetailFamily = Literal[ @@ -164,6 +179,11 @@ _FOCUS_REPOSITORY: Final[SummaryFocus] = "repository" _FOCUS_PRODUCTION: Final[SummaryFocus] = "production" _FOCUS_CHANGED_PATHS: Final[SummaryFocus] = "changed_paths" +_MCP_GOVERNANCE_CONFIG_KEYS = frozenset( + { + "golden_fixture_paths", + } +) _MCP_CONFIG_KEYS = frozenset( { "min_loc", @@ -217,7 +237,7 @@ # Canonical report groups use FAMILY_CLONES ("clones"), while individual finding # payloads use FAMILY_CLONE ("clone"). _VALID_ANALYSIS_MODES = frozenset({"full", "clones_only"}) -_VALID_CACHE_POLICIES = frozenset({"reuse", "refresh", "off"}) +_VALID_CACHE_POLICIES = frozenset({"reuse", "off"}) _VALID_FINDING_FAMILIES = frozenset( {"all", "clone", "structural", "dead_code", "design"} ) @@ -236,6 +256,12 @@ "latest_runs", "review_state", "changed_scope", + "change_control", + "trust_boundaries", + "observability", + "engineering_memory", + "implementation_context", + "verification_profiles", } ) _VALID_HELP_DETAILS = frozenset({"compact", "normal"}) @@ -332,407 +358,6 @@ ChoiceT = TypeVar("ChoiceT", bound=str) -@dataclass(frozen=True) -class MCPHelpTopicSpec: - summary: str - key_points: tuple[str, ...] - recommended_tools: tuple[str, ...] - doc_links: tuple[tuple[str, str], ...] - warnings: tuple[str, ...] = () - anti_patterns: tuple[str, ...] = () - - -_MCP_BOOK_URL: Final = f"{DOCS_URL}book/" -_MCP_GUIDE_URL: Final = f"{DOCS_URL}mcp/" -_MCP_INTERFACE_DOC_LINK: Final[tuple[str, str]] = ( - "MCP interface contract", - f"{_MCP_BOOK_URL}20-mcp-interface/", -) -_BASELINE_DOC_LINK: Final[tuple[str, str]] = ( - "Baseline contract", - f"{_MCP_BOOK_URL}06-baseline/", -) -_CONFIG_DOC_LINK: Final[tuple[str, str]] = ( - "Config and defaults", - f"{_MCP_BOOK_URL}04-config-and-defaults/", -) -_REPORT_DOC_LINK: Final[tuple[str, str]] = ( - "Report contract", - f"{_MCP_BOOK_URL}08-report/", -) -_CLI_DOC_LINK: Final[tuple[str, str]] = ( - "CLI contract", - f"{_MCP_BOOK_URL}09-cli/", -) -_PIPELINE_DOC_LINK: Final[tuple[str, str]] = ( - "Core pipeline", - f"{_MCP_BOOK_URL}05-core-pipeline/", -) -_SUPPRESSIONS_DOC_LINK: Final[tuple[str, str]] = ( - "Inline suppressions contract", - f"{_MCP_BOOK_URL}19-inline-suppressions/", -) -_MCP_GUIDE_DOC_LINK: Final[tuple[str, str]] = ("MCP usage guide", _MCP_GUIDE_URL) -_HELP_TOPIC_SPECS: Final[dict[str, MCPHelpTopicSpec]] = { - "workflow": MCPHelpTopicSpec( - summary=( - "CodeClone MCP is triage-first and budget-aware. Start with a " - "summary or production triage, then narrow through hotspots or " - "focused checks before opening one finding in detail." - ), - key_points=( - "Recommended first pass: analyze_repository or analyze_changed_paths.", - ( - "Start with default or pyproject-resolved thresholds; lower them " - "only for an explicit higher-sensitivity follow-up pass." - ), - ( - "Use get_run_summary or get_production_triage before broad " - "finding listing." - ), - ( - "Prefer list_hotspots or focused check_* tools over " - "list_findings on noisy repositories." - ), - ("Use get_finding and get_remediation only after selecting an issue."), - ( - "get_report_section(section='all') is an exception path, not " - "a default first step." - ), - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "get_production_triage", - "list_hotspots", - "check_clones", - "check_dead_code", - "get_finding", - "get_remediation", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - ( - "Broad list_findings calls burn context quickly on large or " - "noisy repositories." - ), - ( - "Prefer generate_pr_summary(format='markdown') unless machine " - "JSON is explicitly required." - ), - ), - anti_patterns=( - "Starting exploration with list_findings on a noisy repository.", - "Using get_report_section(section='all') as the default first step.", - ( - "Escalating detail on larger lists instead of opening one " - "finding with get_finding." - ), - ), - ), - "analysis_profile": MCPHelpTopicSpec( - summary=( - "CodeClone default analysis is intentionally conservative: stable " - "first-pass review, baseline-aware governance, and CI-friendly " - "signal over maximum local sensitivity." - ), - key_points=( - ( - "Default thresholds are intentionally conservative and " - "production-friendly." - ), - ( - "A clean default run does not rule out smaller local " - "duplication or repetition." - ), - ( - "Lowering thresholds increases sensitivity and can surface " - "smaller functions, tighter windows, and finer local signals." - ), - ( - "Lower-threshold runs are best for exploratory local review, " - "not as a silent replacement for the default governance profile." - ), - "Interpret results in the context of the active threshold profile.", - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "compare_runs", - ), - doc_links=( - _CONFIG_DOC_LINK, - _PIPELINE_DOC_LINK, - _MCP_INTERFACE_DOC_LINK, - ), - warnings=( - ( - "Do not treat a default-threshold run as proof that no smaller " - "local clone or repetition exists." - ), - ( - "Lower-threshold runs usually increase noise and should be read " - "as higher-sensitivity exploratory passes." - ), - "Run comparisons are most meaningful when profiles are aligned.", - ), - anti_patterns=( - ( - "Assuming a clean default pass means no finer-grained " - "duplication exists anywhere in the repository." - ), - ( - "Lowering thresholds for exploration and then interpreting the " - "result as if it had the same meaning as the conservative " - "default pass." - ), - ( - "Mixing low-threshold exploratory output into baseline or CI " - "reasoning without acknowledging the profile change." - ), - ), - ), - "suppressions": MCPHelpTopicSpec( - summary=( - "CodeClone supports explicit inline suppressions for selected " - "findings. They are local policy, not analysis truth, and should " - "stay narrow and declaration-scoped." - ), - key_points=( - "Current syntax uses codeclone: ignore[rule-id,...].", - "Binding is declaration-scoped: def, async def, or class.", - ( - "Supported placement is the previous line or inline on the " - "declaration or header line." - ), - ( - "Suppressions are target-specific and do not imply file-wide " - "or cascading scope." - ), - ( - "Use suppressions for accepted dynamic or runtime false " - "positives, not to hide broad classes of debt." - ), - ), - recommended_tools=("get_finding", "get_remediation"), - doc_links=(_SUPPRESSIONS_DOC_LINK, _MCP_INTERFACE_DOC_LINK), - warnings=( - ( - "MCP explains suppression semantics but never creates or " - "updates suppressions." - ), - ), - anti_patterns=( - "Treating suppressions as file-wide or inherited state.", - ( - "Using suppressions to hide broad structural debt instead of " - "accepted false positives." - ), - ), - ), - "baseline": MCPHelpTopicSpec( - summary=( - "A baseline is CodeClone's accepted comparison snapshot for clones " - "and optional metrics. It separates known debt from new regressions " - "and is trust-checked before use." - ), - key_points=( - ( - f"Canonical baseline schema is v{BASELINE_SCHEMA_VERSION} " - "with meta and clone keys; metrics may be embedded for " - "unified flows." - ), - ( - "Compatibility depends on generator identity, supported " - "schema version, fingerprint version, python tag, and payload " - "integrity." - ), - ( - "Known means already present in the trusted baseline; new " - "means not accepted by baseline." - ), - ( - "In CI and gating contexts, untrusted baseline states are " - "contract errors rather than soft warnings." - ), - "MCP is read-only and does not update or rewrite baselines.", - ), - recommended_tools=("get_run_summary", "evaluate_gates", "compare_runs"), - doc_links=(_BASELINE_DOC_LINK,), - warnings=( - "Baseline trust semantics directly affect new-vs-known classification.", - ), - anti_patterns=( - "Treating baseline as mutable MCP session state.", - "Assuming an untrusted baseline is only cosmetic in CI contexts.", - ), - ), - "coverage": MCPHelpTopicSpec( - summary=( - "Coverage join is an external current-run signal: CodeClone reads " - "an existing Cobertura XML report and joins line hits to risky " - "function spans." - ), - key_points=( - "Use Cobertura XML such as `coverage xml` output from coverage.py.", - "Coverage join does not become baseline truth and does not affect health.", - ( - "Coverage hotspot gating is current-run only and focuses on " - "medium/high-risk functions measured below the configured " - "threshold." - ), - ( - "Functions missing from the supplied coverage.xml are surfaced " - "as scope gaps, not labeled as untested." - ), - "Use metrics_detail(family='coverage_join') for bounded drill-down.", - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "get_report_section", - "evaluate_gates", - ), - doc_links=( - _MCP_INTERFACE_DOC_LINK, - _CLI_DOC_LINK, - _REPORT_DOC_LINK, - ), - warnings=( - "Coverage join is only as accurate as the external XML path mapping.", - "It does not infer branch coverage and does not execute tests.", - "Use fail-on-untested-hotspots only with a valid joined coverage input.", - ), - anti_patterns=( - "Treating missing coverage XML as zero coverage without stating it.", - "Reading coverage join as a baseline-aware trend signal.", - "Assuming dynamic runtime dispatch is visible through a static line join.", - ), - ), - "latest_runs": MCPHelpTopicSpec( - summary=( - "latest/* resources point to the most recent analysis run in the " - "current MCP session. They are convenience handles, not persistent " - "truth anchors." - ), - key_points=( - "Run history is in-memory only and bounded by history-limit.", - "The latest pointer moves when a newer analyze_* call registers a run.", - "A fresh repository state requires a fresh analyze run.", - ( - "Short run ids are convenience handles derived from canonical " - "run identity." - ), - ( - "Do not assume latest/* is globally current outside the " - "active MCP session." - ), - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "compare_runs", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - ( - "latest/* can point at a different repository after a later " - "analyze call in the same session." - ), - ), - anti_patterns=( - ( - "Assuming latest/* remains tied to one repository across the " - "whole client session." - ), - ( - "Using latest/* as a substitute for starting a fresh run when " - "freshness matters." - ), - ), - ), - "review_state": MCPHelpTopicSpec( - summary=( - "Reviewed state in MCP is session-local workflow state. It helps " - "long sessions track review progress without modifying canonical " - "findings, baseline, or persisted artifacts." - ), - key_points=( - "Review markers are in-memory only.", - "They do not change report truth, finding identity, or CI semantics.", - "They are useful for triage workflows across long sessions.", - ( - "They should not be interpreted as acceptance, suppression, " - "or baseline update." - ), - ), - recommended_tools=( - "list_hotspots", - "get_finding", - "mark_finding_reviewed", - "list_reviewed_findings", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - "Reviewed markers disappear when the MCP session is cleared or restarted.", - ), - anti_patterns=( - "Treating reviewed state as a persistent acceptance signal.", - "Assuming reviewed findings are removed from canonical report truth.", - ), - ), - "changed_scope": MCPHelpTopicSpec( - summary=( - "Changed-scope analysis narrows review to findings that touch a " - "selected change set. It is for PR and patch review, not a " - "replacement for full canonical analysis." - ), - key_points=( - ( - "Use analyze_changed_paths with explicit changed_paths or " - "git_diff_ref for review-focused runs." - ), - ( - "Start with the same conservative profile as the default " - "review, then lower thresholds only when you explicitly want " - "a higher-sensitivity changed-files pass." - ), - ( - "Changed-scope is best for asking what new issues touch " - "modified files and whether anything should block CI." - ), - "Prefer production triage and hotspot views before broad listing.", - "If repository-wide truth is needed, run full analysis first.", - ), - recommended_tools=( - "analyze_changed_paths", - "get_run_summary", - "get_production_triage", - "evaluate_gates", - "generate_pr_summary", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - ( - "Changed-scope narrows review focus; it does not replace the " - "full canonical report for repository-wide truth." - ), - ), - anti_patterns=( - "Using changed-scope as if it were the only source of repository truth.", - ( - "Starting changed-files review with broad listing instead of " - "compact triage." - ), - ), - ), -} - - def _suggestion_finding_id_payload(suggestion: object) -> str: if not hasattr(suggestion, "finding_family"): return "" @@ -971,6 +596,7 @@ class MCPAnalysisRequest: cache_policy: CachePolicy = "reuse" cache_path: str | None = None max_cache_size_mb: int | None = None + allow_external_artifacts: bool = False @dataclass(frozen=True, slots=True) @@ -994,6 +620,14 @@ class MCPGateRequest: coverage_min: int = DEFAULT_COVERAGE_MIN +@dataclass(frozen=True, slots=True) +class MCPUnitLocation: + qualname: str + path: str + start_line: int + end_line: int + + @dataclass(frozen=True, slots=True) class MCPRunRecord: run_id: str @@ -1014,6 +648,11 @@ class MCPRunRecord: new_func: frozenset[str] new_block: frozenset[str] metrics_diff: MetricsDiff | None + manifest: Mapping[str, FileStat] | None = None + dirty_snapshot: DirtySnapshot | None = None + unit_inventory: tuple[MCPUnitLocation, ...] = () + relationship_facts: tuple[FunctionRelationshipFacts, ...] = () + module_imports: tuple[ModuleDep, ...] = () class CodeCloneMCPRunStore: @@ -1022,6 +661,7 @@ def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._lock = RLock() self._records: OrderedDict[str, MCPRunRecord] = OrderedDict() self._latest_run_id: str | None = None + self._pinned_run_ids: set[str] = set() def register(self, record: MCPRunRecord) -> MCPRunRecord: with self._lock: @@ -1029,8 +669,7 @@ def register(self, record: MCPRunRecord) -> MCPRunRecord: self._records[record.run_id] = record self._records.move_to_end(record.run_id) self._latest_run_id = record.run_id - while len(self._records) > self._history_limit: - self._records.popitem(last=False) + self._prune_unpinned_locked() return record def get(self, run_id: str | None = None) -> MCPRunRecord: @@ -1060,13 +699,44 @@ def records(self) -> tuple[MCPRunRecord, ...]: with self._lock: return tuple(self._records.values()) + def pin(self, run_id: str) -> str: + with self._lock: + resolved_run_id = self._resolve_run_id(run_id) + if resolved_run_id is None: + raise MCPRunNotFoundError("No matching MCP analysis run is available.") + self._pinned_run_ids.add(resolved_run_id) + return resolved_run_id + + def unpin(self, run_id: str) -> None: + with self._lock: + resolved_run_id = self._resolve_run_id(run_id) or run_id + self._pinned_run_ids.discard(resolved_run_id) + self._prune_unpinned_locked() + def clear(self) -> tuple[str, ...]: with self._lock: removed_run_ids = tuple(self._records.keys()) self._records.clear() + self._pinned_run_ids.clear() self._latest_run_id = None return removed_run_ids + def _prune_unpinned_locked(self) -> None: + while self._unpinned_count_locked() > self._history_limit: + for run_id in tuple(self._records): + if run_id in self._pinned_run_ids: + continue + self._records.pop(run_id, None) + if self._latest_run_id == run_id: + self._latest_run_id = next(reversed(self._records), None) + break + else: + break + self._pinned_run_ids.intersection_update(self._records) + + def _unpinned_count_locked(self) -> int: + return sum(1 for run_id in self._records if run_id not in self._pinned_run_ids) + __all__ = [ "CATEGORY_CLONE", @@ -1119,6 +789,7 @@ def clear(self) -> tuple[str, ...]: "_HELP_TOPIC_SPECS", "_HOTLIST_REPORT_KEYS", "_MCP_CONFIG_KEYS", + "_MCP_GOVERNANCE_CONFIG_KEYS", "_METRICS_DETAIL_FAMILY_ALIASES", "_NOVELTY_WEIGHT", "_REPORT_DUMMY_PATH", diff --git a/codeclone/surfaces/mcp/_session_state_mixin.py b/codeclone/surfaces/mcp/_session_state_mixin.py index a8d58e53..da11545e 100644 --- a/codeclone/surfaces/mcp/_session_state_mixin.py +++ b/codeclone/surfaces/mcp/_session_state_mixin.py @@ -8,6 +8,8 @@ from ...baseline.metrics_baseline import probe_metrics_baseline_section from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult +from ._intent import IntentRecord from ._session_baseline import ( CloneBaselineState, MetricsBaselineState, @@ -20,6 +22,7 @@ _HEALTH_SCOPE_REPOSITORY, _HELP_TOPIC_SPECS, _MCP_CONFIG_KEYS, + _MCP_GOVERNANCE_CONFIG_KEYS, _METRICS_DETAIL_FAMILY_ALIASES, _VALID_COMPARISON_FOCUS, _VALID_HELP_DETAILS, @@ -53,6 +56,7 @@ Mapping, MCPAnalysisRequest, MCPGateRequest, + MCPRunNotFoundError, MCPRunRecord, MCPServiceContractError, MetricGateConfig, @@ -71,6 +75,8 @@ load_pyproject_config, paginate, ) +from ._workspace_drift import compute_drift +from ._workspace_intents import remove_workspace_intent class _MCPSessionChangedProjectionMixin(_MCPSessionFindingMixin): @@ -79,6 +85,9 @@ class _MCPSessionChangedProjectionMixin(_MCPSessionFindingMixin): _review_state: dict[str, OrderedDict[str, str | None]] _last_gate_results: dict[str, dict[str, object]] _spread_max_cache: dict[str, int] + _active_intents: dict[str, IntentRecord] + _agent_pid: int + _agent_start_epoch: int def _build_changed_projection( self, @@ -229,17 +238,24 @@ def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namesp debug=False, open_html_report=False, timestamped_report_paths=False, + allow_external_artifacts=request.allow_external_artifacts, ) + try: + config_values = load_pyproject_config(root_path) + except ConfigValidationError as exc: + raise MCPServiceContractError(str(exc)) from exc if request.respect_pyproject: - try: - config_values = load_pyproject_config(root_path) - except ConfigValidationError as exc: - raise MCPServiceContractError(str(exc)) from exc - for key in sorted(_MCP_CONFIG_KEYS.intersection(config_values)): - setattr(args, key, config_values[key]) + config_keys = _MCP_CONFIG_KEYS + else: + config_keys = _MCP_GOVERNANCE_CONFIG_KEYS + for key in sorted(config_keys.intersection(config_values)): + setattr(args, key, config_values[key]) self._apply_request_overrides(args=args, root_path=root_path, request=request) + if isinstance(args.processes, int): + args.processes = _helpers._cap_mcp_process_count(args.processes) + if request.analysis_mode == "clones_only": args.skip_metrics = True args.skip_dead_code = True @@ -286,22 +302,36 @@ def _apply_request_overrides( if request.baseline_path is not None: args.baseline = str( - _helpers._resolve_optional_path(request.baseline_path, root_path) + _helpers._resolve_optional_path( + request.baseline_path, + root_path, + allow_external_artifacts=request.allow_external_artifacts, + ) ) if request.metrics_baseline_path is not None: args.metrics_baseline = str( _helpers._resolve_optional_path( request.metrics_baseline_path, root_path, + allow_external_artifacts=request.allow_external_artifacts, ) ) if request.cache_path is not None: args.cache_path = str( - _helpers._resolve_optional_path(request.cache_path, root_path) + _helpers._resolve_optional_path( + request.cache_path, + root_path, + allow_external_artifacts=request.allow_external_artifacts, + ) ) if request.coverage_xml is not None: args.coverage_xml = str( - _helpers._resolve_optional_path(request.coverage_xml, root_path) + _helpers._resolve_optional_path( + request.coverage_xml, + root_path, + allow_external_artifacts=request.allow_external_artifacts, + allow_repo_absolute=True, + ) ) def _resolve_baseline_inputs( @@ -310,12 +340,22 @@ def _resolve_baseline_inputs( root_path: Path, args: Namespace, ) -> tuple[Path, bool, Path, bool, dict[str, object] | None]: - baseline_path = _helpers._resolve_optional_path(str(args.baseline), root_path) + allow_external_artifacts = bool( + getattr(args, "allow_external_artifacts", False) + ) + baseline_path = _helpers._resolve_optional_path( + str(args.baseline), + root_path, + allow_external_artifacts=allow_external_artifacts, + allow_repo_absolute=True, + ) baseline_exists = baseline_path.exists() metrics_baseline_arg_path = _helpers._resolve_optional_path( str(args.metrics_baseline), root_path, + allow_external_artifacts=allow_external_artifacts, + allow_repo_absolute=True, ) shared_baseline_payload: dict[str, object] | None = None if metrics_baseline_arg_path == baseline_path: @@ -383,6 +423,7 @@ def _changed_analysis_payload( "resolved_findings": 0, "changed_findings": [], "coverage_join": _helpers._summary_coverage_join_payload(record), + "next_tool": "get_report_section", } def _build_run_summary_payload( @@ -533,6 +574,10 @@ def _summary_payload( security_surfaces = _helpers._summary_security_surfaces_payload(record) if security_surfaces: payload["security_surfaces"] = security_surfaces + payload["drifted_files"] = list(compute_drift(record).drifted_files) + payload["next_tool"] = "get_production_triage" + if record is not None: + _helpers.attach_workspace_hygiene_tips(payload, root=record.root) return payload def _summary_baseline_payload( @@ -782,6 +827,9 @@ class _MCPSessionStateMixin(_MCPSessionReportMixin): _review_state: dict[str, OrderedDict[str, str | None]] _last_gate_results: dict[str, dict[str, object]] _spread_max_cache: dict[str, int] + _blast_radius_cache: dict[tuple[str, tuple[str, ...], str], BlastRadiusResult] + _active_intents: dict[str, IntentRecord] + _intent_sequence: int def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: record = self._runs.get(request.run_id) @@ -1008,6 +1056,7 @@ def get_production_triage( security_surfaces = _helpers._summary_security_surfaces_payload(record) if security_surfaces: payload["security_surfaces"] = security_surfaces + _helpers.attach_workspace_hygiene_tips(payload, root=record.root) return payload def get_help( @@ -1033,13 +1082,40 @@ def get_help( {"title": title, "url": url} for title, url in spec.doc_links ], } - if validated_detail == "normal": - if spec.warnings: - payload["warnings"] = list(spec.warnings) - if spec.anti_patterns: - payload["anti_patterns"] = list(spec.anti_patterns) + if spec.anti_patterns: + payload["anti_patterns"] = list(spec.anti_patterns) + if validated_detail == "normal" and spec.warnings: + payload["warnings"] = list(spec.warnings) return payload + def query_platform_observability( + self, + *, + root: str, + section: str, + detail_level: str = "compact", + limit: int = 10, + window: str = "latest", + operation_id: str | None = None, + span_id: str | None = None, + ) -> dict[str, object]: + # Dev-only telemetry slicer; read-only, never touches analysis/memory/ + # audit state. Local import keeps the MCP session import-light and avoids + # shadowing this method name. + from ...observability.query import ( + query_platform_observability as _query_observability, + ) + + return _query_observability( + root=root, + section=section, + detail_level=detail_level, + limit=limit, + window=window, + operation_id=operation_id, + span_id=span_id, + ) + def generate_pr_summary( self, *, @@ -1109,6 +1185,15 @@ def generate_pr_summary( } def clear_session_runs(self) -> dict[str, object]: + workspace_targets: list[tuple[Path, str]] = [] + with self._state_lock: + intent_snapshot = tuple(self._active_intents.values()) + for intent in intent_snapshot: + try: + record = self._runs.get(intent.run_id) + except (MCPRunNotFoundError, MCPServiceContractError): + continue + workspace_targets.append((record.root, intent.intent_id)) removed_run_ids = self._runs.clear() with self._state_lock: cleared_review_entries = sum( @@ -1116,9 +1201,25 @@ def clear_session_runs(self) -> dict[str, object]: ) cleared_gate_results = len(self._last_gate_results) cleared_spread_cache_entries = len(self._spread_max_cache) + cleared_blast_radius_entries = len(self._blast_radius_cache) + cleared_intents = len(self._active_intents) self._review_state.clear() self._last_gate_results.clear() self._spread_max_cache.clear() + self._blast_radius_cache.clear() + self._active_intents.clear() + self._intent_sequence = 0 + workspace_cleared = True + for root_path, intent_id in workspace_targets: + workspace_cleared = ( + remove_workspace_intent( + root=root_path, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + ) + and workspace_cleared + ) return { "cleared_runs": len(removed_run_ids), "cleared_run_ids": [ @@ -1127,6 +1228,9 @@ def clear_session_runs(self) -> dict[str, object]: "cleared_review_entries": cleared_review_entries, "cleared_gate_results": cleared_gate_results, "cleared_spread_cache_entries": cleared_spread_cache_entries, + "cleared_blast_radius_entries": cleared_blast_radius_entries, + "cleared_intents": cleared_intents, + "workspace_cleared": workspace_cleared, } def read_resource(self, uri: str) -> str: @@ -1139,7 +1243,7 @@ def read_resource(self, uri: str) -> str: run_prefix = "codeclone://runs/" if uri.startswith(latest_prefix): latest = self._runs.get() - suffix = uri[len(latest_prefix) :] + suffix = _helpers._validate_resource_suffix(uri[len(latest_prefix) :]) return self._render_resource(latest, suffix) if not uri.startswith(run_prefix): raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") @@ -1147,6 +1251,7 @@ def read_resource(self, uri: str) -> str: run_id, sep, suffix = remainder.partition("/") if not sep: raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") + suffix = _helpers._validate_resource_suffix(suffix) record = self._runs.get(run_id) return self._render_resource(record, suffix) @@ -1209,3 +1314,17 @@ def _prune_session_state(self) -> None: ] for run_id in stale_run_ids: state_map.pop(run_id, None) + stale_blast_radius_keys = [ + cache_key + for cache_key in self._blast_radius_cache + if cache_key[0] not in active_run_ids + ] + for cache_key in stale_blast_radius_keys: + self._blast_radius_cache.pop(cache_key, None) + stale_intent_ids = [ + intent_id + for intent_id, intent in self._active_intents.items() + if intent.run_id not in active_run_ids + ] + for intent_id in stale_intent_ids: + self._active_intents.pop(intent_id, None) diff --git a/codeclone/surfaces/mcp/_session_workflow_mixin.py b/codeclone/surfaces/mcp/_session_workflow_mixin.py new file mode 100644 index 00000000..26b4e8d0 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_workflow_mixin.py @@ -0,0 +1,999 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Workflow-level orchestration for agent change control. + +``start_controlled_change`` and ``finish_controlled_change`` aggregate +atomic change-control steps into two workflow calls. They call existing +internal methods only — no new engine logic. + +Design invariants (phase-16 spec): +- No implicit ``analyze_repository``. +- No hidden boundary decisions. +- ``check`` before ``verify`` is mandatory (check writes state). +- Changed files resolved once from exactly one source. +- ``auto_clear`` only on ``accepted`` / ``accepted_with_external_changes``. +- Audit events are emitted by the internal methods, not duplicated here. +""" + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import Final + +from ...audit.events import EVENT_PATCH_TRAIL_COMPUTED +from ...memory.trajectory.patch_trail import compute_patch_trail +from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult, blast_radius_to_payload +from ._intent import IntentRecord, IntentStatus +from ._patch_contract import PatchContractStatus +from ._patch_trail_bridge import build_patch_trail_inputs +from ._session_shared import ( + CodeCloneMCPRunStore, + MCPRunRecord, + MCPServiceContractError, +) +from ._workspace_hygiene import WorkspaceHygieneResult +from .messages import errors as err_msgs +from .messages import workflow as workflow_msgs + +TRANSITIVE_SUMMARY_LIMIT: Final[int] = 10 + +VALID_BLAST_RADIUS_DEPTHS: Final[frozenset[str]] = frozenset( + {"direct", "transitive", "auto"} +) + +_ACCEPTED_STATUSES: Final[frozenset[str]] = frozenset( + { + PatchContractStatus.ACCEPTED.value, + PatchContractStatus.ACCEPTED_EXTERNAL.value, + } +) + + +class _MCPSessionWorkflowMixin: + """Workflow orchestration over atomic change-control primitives.""" + + _runs: CodeCloneMCPRunStore + _active_intents: dict[str, IntentRecord] + + # ------------------------------------------------------------------ + # start_controlled_change + # ------------------------------------------------------------------ + + def start_controlled_change( + self, + *, + root: str, + scope: dict[str, object], + intent: str, + expected_effects: Sequence[str] | None = None, + on_conflict: str | None = None, + strictness: str = "ci", + ttl_seconds: int | None = None, + blast_radius_depth: str = "auto", + dirty_scope_policy: str = "block", + ) -> dict[str, object]: + validated_depth = _validated_blast_radius_depth(blast_radius_depth) + validated_dirty_scope_policy = _validated_dirty_scope_policy(dirty_scope_policy) + root_path = _helpers._resolve_root(root) + + # 1. Workspace check (lazy close inside list_workspace) + self._list_workspace_intents(root=root) + + # 2. Root-aware run resolution (not _runs.get(None) — multi-repo safe) + record = self._latest_run_for_root(root_path) + if record is None: + return _helpers.attach_workspace_hygiene_tips( + { + "status": "needs_analysis", + "intent_id": None, + "edit_allowed": False, + "root": str(root_path), + "message": workflow_msgs.START_NEEDS_ANALYSIS, + "workspace": _workspace_summary_from_declare({}, {}), + }, + root=root_path, + ) + + # 3. Declare intent + declare_payload = self._declare_change_intent( + run_id=record.run_id, + scope=scope, + intent=intent, + expected_effects=expected_effects, + ttl_seconds=ttl_seconds, + on_conflict=on_conflict, + ) + + intent_id = str(declare_payload.get("intent_id", "")) + declare_status = str(declare_payload.get("status", "")) + + # Queued: no blast radius or budget + if declare_status == IntentStatus.QUEUED.value: + workspace_after = self._list_workspace_intents(root=root) + queued_payload: dict[str, object] = { + "intent_id": intent_id, + "status": "queued", + "run_id": _helpers._short_run_id(record.run_id), + "blocked_by": declare_payload.get("blocked_by", []), + "queue_position": declare_payload.get("queue_position", 1), + "before_run_pinned": declare_payload.get("before_run_pinned", False), + "edit_allowed": False, + "workspace": _workspace_summary_from_declare( + workspace_after, + declare_payload, + ), + "message": workflow_msgs.START_QUEUED, + } + dirty_snapshot = declare_payload.get("dirty_snapshot") + if isinstance(dirty_snapshot, dict): + queued_payload["dirty_snapshot"] = dirty_snapshot + return _helpers.attach_workspace_hygiene_tips( + queued_payload, + root=root_path, + ) + + # 4. Fresh workspace snapshot after declare + workspace_after = self._list_workspace_intents(root=root) + + with self._state_lock: + active_intent = self._active_intents.get(intent_id) + if active_intent is None: + raise MCPServiceContractError( + f"Intent {intent_id} not found after declare." + ) + + from ._workspace_hygiene import evaluate_scoped_hygiene + from ._workspace_intent_store import get_workspace_intent_store + + hygiene = evaluate_scoped_hygiene( + root=root_path, + allowed_files=active_intent.scope.allowed_files, + allowed_related=active_intent.scope.allowed_related, + store=get_workspace_intent_store(root_path), + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + own_intent_id=intent_id, + ) + + # 5. Blast radius (full payload, not just declare's subset) + blast_result = self._blast_radius_result( + record=record, + files=active_intent.scope.allowed_paths, + depth="direct", + forbidden_patterns=active_intent.scope.forbidden, + ) + blast_payload = blast_radius_to_payload(blast_result) + + # 6. Transitive summary (auto-escalated or explicit) + transitive_summary = self._compute_transitive_summary( + record=record, + intent=active_intent, + blast_result=blast_result, + depth=validated_depth, + ) + if transitive_summary is not None: + blast_payload["transitive_summary"] = transitive_summary + + # 7. Budget + budget_payload = self._patch_contract_budget( + run_id=record.run_id, + intent_id=intent_id, + strictness=self._validated_strictness(strictness), + ) + + concurrent_intents = _as_conflict_list( + declare_payload.get("concurrent_intents") + ) + coordination_blocked = bool(concurrent_intents) and on_conflict != "queue" + edit_allowed = _start_edit_allowed( + declare_status=declare_status, + concurrent_intents=concurrent_intents, + on_conflict=on_conflict, + hygiene=hygiene, + dirty_scope_policy=validated_dirty_scope_policy, + ) + workflow_status = _start_workflow_status( + declare_status=declare_status, + coordination_blocked=coordination_blocked, + hygiene=hygiene, + dirty_scope_policy=validated_dirty_scope_policy, + ) + + continuing_own_wip = ( + validated_dirty_scope_policy == "continue_own_wip" + and hygiene.blocks_edit + and not hygiene.foreign_dirty_overlaps + and workflow_status == "active" + ) + + payload: dict[str, object] = { + "intent_id": intent_id, + "status": workflow_status, + "run_id": _helpers._short_run_id(record.run_id), + "dirty_scope_policy": validated_dirty_scope_policy, + "workspace": _workspace_summary_from_declare( + workspace_after, + declare_payload, + ), + "blast_radius": blast_payload, + "budget": _budget_summary(budget_payload), + "scope": active_intent.scope.to_payload(), + "edit_allowed": edit_allowed, + "message": self._start_message( + workflow_status=workflow_status, + blast_payload=blast_payload, + budget_payload=budget_payload, + concurrent_intents=concurrent_intents, + hygiene=hygiene, + continuing_own_wip=continuing_own_wip, + ), + } + dirty_snapshot = declare_payload.get("dirty_snapshot") + if isinstance(dirty_snapshot, dict): + payload["dirty_snapshot"] = dirty_snapshot + if hygiene.git_available or hygiene.blocks_edit: + hygiene_payload = hygiene.to_payload() + if continuing_own_wip: + hygiene_payload["continuing_own_wip"] = True + payload["workspace_hygiene"] = hygiene_payload + if not edit_allowed: + payload["user_action_required"] = True + payload["next_step"] = _start_next_step( + concurrent_intents=concurrent_intents, + hygiene=hygiene, + dirty_scope_policy=validated_dirty_scope_policy, + ) + return _helpers.attach_workspace_hygiene_tips(payload, root=root_path) + + # ------------------------------------------------------------------ + # finish_controlled_change + # ------------------------------------------------------------------ + + def finish_controlled_change( + self, + *, + intent_id: str, + changed_files: Sequence[str] | None = None, + diff_ref: str | None = None, + after_run_id: str | None = None, + review_text: str | None = None, + claims_text: str | None = None, + create_receipt: bool = True, + auto_clear: bool = True, + strictness: str = "ci", + propose_memory: bool = False, + detail_level: str = "summary", + patch_trail_detail: str = "summary", + ) -> dict[str, object]: + # 1. Resolve intent + record, active_intent = self._resolve_intent( + run_id=None, + intent_id=intent_id, + ) + + # Queued intents cannot be verified + if active_intent.status == IntentStatus.QUEUED: + return { + "intent_id": intent_id, + "status": "unverified", + "reason": "intent_not_active", + "scope_check": None, + "verification": None, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": False, + "next_step": workflow_msgs.FINISH_PROMOTE_BEFORE_VERIFY, + "message": workflow_msgs.FINISH_QUEUED_NOT_ACTIVE, + } + + # 2. Resolve changed files — exactly one source + resolved_files = self._resolve_changed_files_once( + root_path=record.root, + changed_files=changed_files, + diff_ref=diff_ref, + ) + + from ._workspace_hygiene import ( + dirty_snapshot_from_payload, + finish_hygiene_check, + workspace_dirty_summary, + ) + from ._workspace_intent_store import get_workspace_intent_store + + intent_store = get_workspace_intent_store(record.root) + workspace_record = intent_store.find_raw(intent_id) + start_dirty_snapshot = dirty_snapshot_from_payload( + workspace_record.dirty_snapshot if workspace_record is not None else None + ) + finish_hygiene = finish_hygiene_check( + root=record.root, + allowed_files=active_intent.scope.allowed_files, + allowed_related=active_intent.scope.allowed_related, + resolved_files=resolved_files, + store=intent_store, + own_pid=self._agent_pid, + own_start_epoch=self._agent_start_epoch, + own_intent_id=intent_id, + start_dirty_snapshot=start_dirty_snapshot, + ) + workspace_hygiene_after = { + **finish_hygiene.to_payload(detail_level=detail_level), + "workspace_dirty_summary": workspace_dirty_summary(root=record.root), + } + if finish_hygiene.blocks_finish: + block_reason = finish_hygiene.finish_block_reason or "" + # Only proven patch/scope conflicts block finish: in-scope dirt + # missing from evidence, or a live foreign intent overlapping the + # declared scope. Out-of-scope unattributed dirt is advisory. + detail_message = { + "missing_evidence": workflow_msgs.FINISH_HYGIENE_MISSING_EVIDENCE, + "foreign_dirty_overlap": workflow_msgs.FINISH_HYGIENE_FOREIGN_DIRTY, + }.get(block_reason, workflow_msgs.FINISH_HYGIENE_BLOCKED) + return { + "intent_id": intent_id, + "status": "unverified", + "reason": "workspace_hygiene", + "scope_check": None, + "verification": None, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": True, + "next_step": workflow_msgs.FINISH_HYGIENE_NEXT, + "workspace_hygiene_after": workspace_hygiene_after, + "message": detail_message, + } + + scope_files = ( + finish_hygiene.files_for_scope_check + if finish_hygiene.files_for_scope_check + else resolved_files + ) + + # 3. Check (writes IntentRecord.check_result — required for receipt) + check_payload = self._check_change_intent( + run_id=None, + intent_id=intent_id, + diff_ref=None, + changed_files=scope_files, + ) + check_status = str(check_payload.get("status", "")) + scope_check_audit_sequence = _pop_audit_sequence(check_payload) + + # Expired intent + if check_status == IntentStatus.EXPIRED.value: + return { + "intent_id": intent_id, + "status": "expired", + "reason": "report_digest_mismatch", + "scope_check": check_payload, + "verification": None, + "claims": None, + "receipt": None, + "intent_cleared": False, + "user_action_required": True, + "next_step": workflow_msgs.FINISH_DIGEST_MISMATCH_NEXT, + "message": workflow_msgs.FINISH_DIGEST_MISMATCH, + } + + # 4. Scope violation — early exit + if check_status == IntentStatus.VIOLATED.value: + patch_trail_payload = self._finish_patch_trail( + record=record, + intent=active_intent, + check_payload=check_payload, + verify_payload=_NOT_REACHED_VERIFY_PAYLOAD, + finish_hygiene=finish_hygiene, + scope_check_audit_sequence=scope_check_audit_sequence, + patch_verify_audit_sequence=None, + patch_trail_detail=patch_trail_detail, + ) + return { + "intent_id": intent_id, + "status": "violated", + "reason": "scope_violation", + "scope_check": check_payload, + "verification": None, + "claims": None, + "receipt": None, + "patch_trail": patch_trail_payload, + "intent_cleared": False, + "user_action_required": True, + "next_step": workflow_msgs.FINISH_SCOPE_VIOLATION_NEXT, + "message": workflow_msgs.FINISH_SCOPE_VIOLATION, + } + + # 5. Verify (before_run_id auto-resolves from intent) + verify_payload = self._patch_contract_verify( + before_run_id=None, + after_run_id=after_run_id, + intent_id=intent_id, + strictness=self._validated_strictness(strictness), + diff_ref=None, + changed_files=scope_files, + ) + verify_status = str(verify_payload.get("status", "")) + patch_verify_audit_sequence = _pop_audit_sequence(verify_payload) + patch_trail_payload = self._finish_patch_trail( + record=record, + intent=active_intent, + check_payload=check_payload, + verify_payload=verify_payload, + finish_hygiene=finish_hygiene, + scope_check_audit_sequence=scope_check_audit_sequence, + patch_verify_audit_sequence=patch_verify_audit_sequence, + patch_trail_detail=patch_trail_detail, + ) + + # 6. Non-accepted verification — return without receipt/clear + if verify_status not in _ACCEPTED_STATUSES: + verify_reason = str(verify_payload.get("reason", "")) + return { + "intent_id": intent_id, + "status": verify_status, + "reason": verify_reason, + "scope_check": check_payload, + "verification": verify_payload, + "claims": None, + "receipt": None, + "patch_trail": patch_trail_payload, + "intent_cleared": False, + "workspace_hygiene_after": workspace_hygiene_after, + "summary": _finish_summary( + verify_status=verify_status, + intent_cleared=False, + check_payload=check_payload, + verify_payload=verify_payload, + claims_payload=None, + receipt_payload=None, + receipt_error=None, + workspace_hygiene_after=workspace_hygiene_after, + review_text_present=bool(review_text), + claims_text_present=bool(claims_text), + ), + "user_action_required": verify_status + == PatchContractStatus.VIOLATED.value, + "next_step": verify_payload.get("next_step"), + "message": str(verify_payload.get("message", "")), + } + + health_regression_advisory = verify_payload.get("health_regression_advisory") + claims_payload = self._conditional_claim_validation( + record=record, + verify_payload=verify_payload, + claims_text=claims_text, + ) + + # 8. Receipt (after claims, before clear) + receipt_payload: dict[str, object] | None = None + receipt_error: str | None = None + if create_receipt: + try: + receipt_payload = self.create_review_receipt( + run_id=record.run_id, + intent_id=intent_id, + ) + except MCPServiceContractError as exc: + receipt_error = str(exc) + + # 9. Auto-clear (only on accepted, only if receipt didn't fail) + intent_cleared = False + if auto_clear and verify_status in _ACCEPTED_STATUSES and receipt_error is None: + self._clear_change_intent(intent_id=intent_id) + intent_cleared = True + + # External workspace changes (dirty outside the declared scope) are + # advisory, never blocking; a clean accepted verdict is elevated to + # accepted_with_external_changes. See _external_change_advisory. + effective_status, external_advisory = _external_change_advisory( + verify_status, + finish_hygiene.dirty_paths_outside_scope, + ) + + # 10. Compose response + result: dict[str, object] = { + "intent_id": intent_id, + "status": effective_status, + "reason": verify_payload.get("reason"), + "scope_check": check_payload, + "verification": verify_payload, + "claims": claims_payload, + "receipt": receipt_payload, + "patch_trail": patch_trail_payload, + "intent_cleared": intent_cleared, + "workspace_hygiene_after": workspace_hygiene_after, + "summary": _finish_summary( + verify_status=verify_status, + intent_cleared=intent_cleared, + check_payload=check_payload, + verify_payload=verify_payload, + claims_payload=claims_payload, + receipt_payload=receipt_payload, + receipt_error=receipt_error, + workspace_hygiene_after=workspace_hygiene_after, + review_text_present=bool(review_text), + claims_text_present=bool(claims_text), + ), + "user_action_required": False, + "message": self._finish_message( + verify_status=verify_status, + intent_cleared=intent_cleared, + receipt_error=receipt_error, + ), + } + if receipt_error is not None: + result["receipt_error"] = receipt_error + if external_advisory is not None: + result["external_changes"] = external_advisory + if isinstance(health_regression_advisory, dict): + result["health_regression_advisory"] = health_regression_advisory + if propose_memory and verify_status in _ACCEPTED_STATUSES: + profile = verify_payload.get("verification_profile") + memory_hook = self.finish_propose_memory( + root_path=record.root, + changed_files=resolved_files, + claims_text=claims_text, + review_text=review_text, + verification_profile=(str(profile) if profile is not None else None), + ) + if memory_hook: + result.update(memory_hook) + if verify_status in _ACCEPTED_STATUSES: + projection_hook = self.maybe_auto_enqueue_projection_rebuild( + root_path=record.root, + ) + if projection_hook is not None: + result["projection_rebuild"] = projection_hook + return result + + def _finish_patch_trail( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + check_payload: dict[str, object], + verify_payload: dict[str, object], + finish_hygiene: WorkspaceHygieneResult, + scope_check_audit_sequence: int | None, + patch_verify_audit_sequence: int | None, + patch_trail_detail: str, + ) -> dict[str, object]: + detail = "full" if patch_trail_detail == "full" else "summary" + inputs = build_patch_trail_inputs( + root_path=record.root, + intent=intent, + check_payload=check_payload, + verify_payload=verify_payload, + hygiene=finish_hygiene, + report_digest=intent.report_digest, + scope_check_audit_sequence=scope_check_audit_sequence, + patch_verify_audit_sequence=patch_verify_audit_sequence, + ) + trail = compute_patch_trail(inputs) + severity = ( + "warn" + if trail.scope_check_status == IntentStatus.VIOLATED.value + or trail.verification_status + not in { + *_ACCEPTED_STATUSES, + "not_reached", + } + else "info" + ) + patch_trail_audit_sequence = self._audit_emit( + root=record.root, + event_type=EVENT_PATCH_TRAIL_COMPUTED, + severity=severity, + run_id=_helpers._short_run_id(record.run_id), + intent_id=intent.intent_id, + report_digest=intent.report_digest, + status=trail.scope_check_status, + payload=trail.audit_payload(), + ) + payload = trail.to_payload(detail_level=detail) + if patch_trail_audit_sequence is not None: + evidence_raw = payload.get("evidence", {}) + if isinstance(evidence_raw, Mapping): + evidence = dict(evidence_raw) + evidence["patch_trail_audit_sequence"] = patch_trail_audit_sequence + payload["evidence"] = evidence + return payload + + # ------------------------------------------------------------------ + # Internal helpers (no new engine logic) + # ------------------------------------------------------------------ + + def _latest_run_for_root(self, root_path: Path) -> MCPRunRecord | None: + """Find the latest run matching the requested root (root-safe).""" + resolved = root_path.resolve() + latest: MCPRunRecord | None = None + for record in self._runs.records(): + if record.root == resolved: + latest = record + return latest + + def _resolve_changed_files_once( + self, + *, + root_path: Path, + changed_files: Sequence[str] | None, + diff_ref: str | None, + ) -> tuple[str, ...]: + """Resolve changed files from exactly one source. + + Contract: providing both or neither is a contract error. + ``diff_ref`` is resolved here and never passed further. + """ + has_files = changed_files is not None and len(changed_files) > 0 + has_ref = diff_ref is not None and str(diff_ref).strip() != "" + if has_files and has_ref: + raise MCPServiceContractError(workflow_msgs.FINISH_EVIDENCE_XOR) + if has_ref: + return _require_non_empty_changed_evidence( + self._git_diff_paths( + root_path=root_path, + git_diff_ref=str(diff_ref), + ) + ) + if has_files: + assert changed_files is not None + return _require_non_empty_changed_evidence( + self._normalize_changed_paths( + root_path=root_path, + paths=changed_files, + ) + ) + raise MCPServiceContractError(workflow_msgs.FINISH_EVIDENCE_REQUIRED) + + def _compute_transitive_summary( + self, + *, + record: MCPRunRecord, + intent: IntentRecord, + blast_result: BlastRadiusResult, + depth: str, + ) -> dict[str, object] | None: + """Compute bounded transitive summary when appropriate.""" + needs_transitive = depth == "transitive" or ( + depth == "auto" and blast_result.radius_level == "high" + ) + if not needs_transitive: + return None + + transitive_result = self._blast_radius_result( + record=record, + files=intent.scope.allowed_paths, + depth="transitive", + forbidden_patterns=intent.scope.forbidden, + ) + all_transitive = transitive_result.transitive_dependents + shown = min(len(all_transitive), TRANSITIVE_SUMMARY_LIMIT) + return { + "total": len(all_transitive), + "shown": shown, + "truncated": shown < len(all_transitive), + "top_paths": list(all_transitive[:TRANSITIVE_SUMMARY_LIMIT]), + } + + def _conditional_claim_validation( + self, + *, + record: MCPRunRecord, + verify_payload: dict[str, object], + claims_text: str | None, + ) -> dict[str, object] | None: + """Run claim validation only when both conditions are met.""" + if not claims_text: + return None + if not verify_payload.get("claim_validation_recommended"): + return None + structural_delta = verify_payload.get("structural_delta") + patch_health_delta: int | None = None + if isinstance(structural_delta, dict): + health_delta = structural_delta.get("health_delta") + if isinstance(health_delta, int): + patch_health_delta = health_delta + return _helpers.coerce_object_dict( + self.validate_review_claims( + text=claims_text, + run_id=record.run_id, + patch_health_delta=patch_health_delta, + ) + ) + + @staticmethod + def _start_message( + *, + workflow_status: str, + blast_payload: dict[str, object], + budget_payload: dict[str, object], + concurrent_intents: list[dict[str, object]], + hygiene: object, + continuing_own_wip: bool = False, + ) -> str: + if workflow_status == "blocked": + return _start_next_step( + concurrent_intents=concurrent_intents, + hygiene=hygiene, + dirty_scope_policy="block", + ) + gate = budget_payload.get("gate_preview") + return workflow_msgs.start_controlled_change_message( + radius_level=str(blast_payload.get("radius_level", "low")), + budget_would_fail=(isinstance(gate, dict) and bool(gate.get("would_fail"))), + continuing_own_wip=continuing_own_wip, + ) + + @staticmethod + def _finish_message( + *, + verify_status: str, + intent_cleared: bool, + receipt_error: str | None, + ) -> str: + return workflow_msgs.finish_controlled_change_message( + verify_status=verify_status, + intent_cleared=intent_cleared, + receipt_error=receipt_error, + ) + + +def _validated_blast_radius_depth(value: str) -> str: + if value not in VALID_BLAST_RADIUS_DEPTHS: + raise MCPServiceContractError( + err_msgs.invalid_choice( + "blast_radius_depth", + value, + VALID_BLAST_RADIUS_DEPTHS, + ) + ) + return value + + +def _workspace_summary_from_declare( + workspace: dict[str, object], + declare_payload: dict[str, object], +) -> dict[str, object]: + """Merge fresh workspace counts with declare conflict context.""" + concurrent_intents = declare_payload.get("concurrent_intents") + if not concurrent_intents: + blocked_by = declare_payload.get("blocked_by", []) + if isinstance(blocked_by, list) and blocked_by: + concurrent_intents = blocked_by + return { + "concurrent_intents": concurrent_intents or [], + "workspace_relations": declare_payload.get("workspace_relations", []), + "queued_context": declare_payload.get("queued_context", []), + "total_agents": workspace.get("total_agents", 0), + "stale_count": workspace.get("stale_count", 0), + } + + +def _as_conflict_list(value: object) -> list[dict[str, object]]: + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, dict)] + + +def _require_non_empty_changed_evidence(paths: Sequence[str]) -> tuple[str, ...]: + resolved = _helpers.coerce_repo_path_tuple(paths) + if not resolved: + raise MCPServiceContractError(workflow_msgs.FINISH_EVIDENCE_REQUIRED) + return resolved + + +def _external_change_advisory( + verify_status: str, + external_paths: Sequence[str], +) -> tuple[str, dict[str, object] | None]: + """Elevate a clean accepted verdict when external workspace dirt exists. + + ``external_paths`` are dirty paths outside the declared scope — advisory + only, never blocking. Returns ``(effective_status, advisory_or_None)``: a + plain ``accepted`` becomes ``accepted_with_external_changes`` and a compact + advisory is produced; any other status is returned unchanged. + """ + external = list(external_paths) + if not external: + return verify_status, None + effective_status = verify_status + if verify_status == PatchContractStatus.ACCEPTED.value: + effective_status = PatchContractStatus.ACCEPTED_EXTERNAL.value + advisory = { + "count": len(external), + "sample": external[:10], + "truncated": len(external) > 10, + } + return effective_status, advisory + + +def _finish_summary( + *, + verify_status: str, + intent_cleared: bool, + check_payload: dict[str, object], + verify_payload: dict[str, object], + claims_payload: dict[str, object] | None, + receipt_payload: dict[str, object] | None, + receipt_error: str | None, + workspace_hygiene_after: dict[str, object], + review_text_present: bool, + claims_text_present: bool, +) -> dict[str, object]: + structural_delta = _helpers._as_mapping(verify_payload.get("structural_delta")) + dirty_summary = _helpers._as_mapping( + workspace_hygiene_after.get("workspace_dirty_summary") + ) + return { + "status": verify_status, + "scope_status": str(check_payload.get("status", "")), + "verification_profile": verify_payload.get("verification_profile"), + "structural_verdict": structural_delta.get("verdict"), + "health_delta": structural_delta.get("health_delta"), + "regressions": len(_helpers._as_sequence(structural_delta.get("regressions"))), + "worsened_symbols": len(_helpers._as_sequence(verify_payload.get("worsened"))), + "claims": _finish_claim_status( + claims_payload=claims_payload, + claims_text_present=claims_text_present, + ), + "review_note_present": review_text_present, + "receipt": _finish_receipt_status( + receipt_payload=receipt_payload, + receipt_error=receipt_error, + ), + "intent_cleared": intent_cleared, + "workspace_dirty_paths": _helpers._as_int( + dirty_summary.get("dirty_paths_count"), + 0, + ), + "workspace_hygiene_blocked": bool(workspace_hygiene_after.get("blocks_finish")), + } + + +def _finish_claim_status( + *, + claims_payload: dict[str, object] | None, + claims_text_present: bool, +) -> str: + if not claims_text_present: + return "skipped_no_claims_text" + if claims_payload is None: + return "skipped_not_recommended" + return "valid" if claims_payload.get("valid") is True else "violated" + + +def _finish_receipt_status( + *, + receipt_payload: dict[str, object] | None, + receipt_error: str | None, +) -> str: + if receipt_error is not None: + return "failed" + if receipt_payload is None: + return "skipped" + return "created" + + +def _validated_dirty_scope_policy(value: str) -> str: + from ._workspace_hygiene import VALID_DIRTY_SCOPE_POLICIES + + if value not in VALID_DIRTY_SCOPE_POLICIES: + raise MCPServiceContractError( + err_msgs.invalid_choice( + "dirty_scope_policy", + value, + VALID_DIRTY_SCOPE_POLICIES, + ) + ) + return value + + +def _start_edit_allowed( + *, + declare_status: str, + concurrent_intents: list[dict[str, object]], + on_conflict: str | None, + hygiene: object, + dirty_scope_policy: str, +) -> bool: + from ._workspace_hygiene import WorkspaceHygieneResult, hygiene_blocks_start_edit + + if declare_status != IntentStatus.ACTIVE.value: + return False + if concurrent_intents and on_conflict != "queue": + return False + return not ( + isinstance(hygiene, WorkspaceHygieneResult) + and hygiene_blocks_start_edit( + hygiene, + dirty_scope_policy=dirty_scope_policy, + ) + ) + + +def _start_workflow_status( + *, + declare_status: str, + coordination_blocked: bool, + hygiene: object, + dirty_scope_policy: str, +) -> str: + from ._workspace_hygiene import WorkspaceHygieneResult, hygiene_blocks_start_edit + + if declare_status == IntentStatus.QUEUED.value: + return "queued" + if coordination_blocked: + return "blocked" + if isinstance(hygiene, WorkspaceHygieneResult) and hygiene_blocks_start_edit( + hygiene, + dirty_scope_policy=dirty_scope_policy, + ): + return "blocked" + return "active" + + +def _start_next_step( + *, + concurrent_intents: list[dict[str, object]], + hygiene: object, + dirty_scope_policy: str = "block", +) -> str: + from ._workspace_hygiene import WorkspaceHygieneResult, hygiene_blocks_start_edit + + parts: list[str] = [] + if concurrent_intents: + ownerships = {str(item.get("ownership", "")) for item in concurrent_intents} + if "foreign_active" in ownerships: + parts.append(workflow_msgs.START_FOREIGN_ACTIVE_OVERLAP) + elif "foreign_stale" in ownerships: + parts.append(workflow_msgs.START_FOREIGN_STALE_OVERLAP) + else: + parts.append(workflow_msgs.START_FOREIGN_ACTIVE_OVERLAP) + if isinstance(hygiene, WorkspaceHygieneResult) and hygiene_blocks_start_edit( + hygiene, + dirty_scope_policy=dirty_scope_policy, + ): + if concurrent_intents: + parts = [workflow_msgs.START_COMBINED_BLOCK] + elif hygiene.foreign_dirty_overlaps: + parts.append(workflow_msgs.START_FOREIGN_DIRTY_OVERLAP) + elif dirty_scope_policy == "continue_own_wip": + parts.append(workflow_msgs.START_CONTINUE_OWN_WIP) + else: + parts.append(workflow_msgs.START_DIRTY_SCOPE) + return " ".join(parts) + + +_NOT_REACHED_VERIFY_PAYLOAD: Final[dict[str, object]] = { + "status": "not_reached", + "verification_profile": "unknown", + "checks_not_applicable": [], + "contract_violations": [], +} + + +def _pop_audit_sequence(payload: dict[str, object]) -> int | None: + value = payload.pop("_audit_sequence", None) + return value if isinstance(value, int) and not isinstance(value, bool) else None + + +def _budget_summary(budget_payload: dict[str, object]) -> dict[str, object]: + """Extract budget-relevant fields for the start response.""" + return { + "strictness": budget_payload.get("strictness"), + "budgets": budget_payload.get("budgets"), + "current_state": budget_payload.get("current_state"), + "headroom": budget_payload.get("headroom"), + "gate_preview": budget_payload.get("gate_preview"), + "message": budget_payload.get("message"), + } + + +__all__ = ["_MCPSessionWorkflowMixin"] diff --git a/codeclone/surfaces/mcp/_tool_param_docs.py b/codeclone/surfaces/mcp/_tool_param_docs.py new file mode 100644 index 00000000..0dacd752 --- /dev/null +++ b/codeclone/surfaces/mcp/_tool_param_docs.py @@ -0,0 +1,10 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Backward-compatible re-export of MCP tool parameter annotations.""" + +from __future__ import annotations + +from .messages.params import * # noqa: F403 diff --git a/codeclone/surfaces/mcp/_verification_profile.py b/codeclone/surfaces/mcp/_verification_profile.py new file mode 100644 index 00000000..7eaa5fce --- /dev/null +++ b/codeclone/surfaces/mcp/_verification_profile.py @@ -0,0 +1,345 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Verification profile classifier for the patch contract. + +Classifies a patch by its changed file types to determine which structural +checks are applicable. The profile is **derived** from actual changed files, +never declared by the agent. + +Priority chain (highest wins): +1. State artifact patterns → STATE_ARTIFACT_CHANGE +2. Python source extensions → PYTHON_STRUCTURAL +3. Governance config → GOVERNANCE_CONFIG +4. Documentation patterns → DOCUMENTATION_ONLY +5. Fallback → NON_PYTHON_PATCH + +Invariants: +- ``classify_patch`` is a pure function: same input always yields same profile. +- A single file from a higher-priority category overrides the rest. +- Scope/forbidden checks are **not** gated by profile — they always run. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from enum import Enum +from fnmatch import fnmatchcase +from typing import Final + +from ...paths.workspace import FORBIDDEN_WORKSPACE_GLOBS +from .messages.verification import ( + EMPTY_PROFILE_REASON, + PROFILE_REASONS, +) +from .messages.verification import ( + profile_accepted_message as _profile_accepted_message, +) +from .messages.verification import ( + profile_limitations as _profile_limitations, +) +from .messages.verification import ( + profile_unverified_message as _profile_unverified_message, +) + + +class VerificationProfile(str, Enum): + """Verification depth derived from the patch diff.""" + + PYTHON_STRUCTURAL = "python_structural" + DOCUMENTATION_ONLY = "documentation_only" + GOVERNANCE_CONFIG = "governance_config" + NON_PYTHON_PATCH = "non_python_patch" + STATE_ARTIFACT_CHANGE = "state_artifact_change" + + +# ── pattern sets ──────────────────────────────────────────────────── + +STATE_ARTIFACT_PATTERNS: Final[tuple[str, ...]] = ( + "codeclone.baseline.json", + *FORBIDDEN_WORKSPACE_GLOBS, +) + +PYTHON_SOURCE_EXTENSIONS: Final[tuple[str, ...]] = (".py", ".pyi") + +GOVERNANCE_CONFIG_PATTERNS: Final[tuple[str, ...]] = ( + "pyproject.toml", + "setup.cfg", + "tox.ini", + "pytest.ini", + "mypy.ini", + "ruff.toml", + ".coveragerc", + "coverage.toml", + ".pre-commit-config.yaml", + ".github/workflows/*", + ".github/workflows/**", + ".github/actions/*", + ".github/actions/**", + "py.typed", + "Makefile", + "Dockerfile", + "docker-compose*.yml", +) + +DOCUMENTATION_EXTENSIONS: Final[tuple[str, ...]] = ( + ".md", + ".rst", + ".txt", + ".adoc", + ".textile", +) + +DOCUMENTATION_PATTERNS: Final[tuple[str, ...]] = ( + "docs/**", + "doc/**", + "README*", + "CHANGELOG*", + "CHANGES*", + "HISTORY*", + "NEWS*", + "LICENSE*", + "LICENCE*", + "COPYING*", + "NOTICE*", + "CONTRIBUTING*", + "CONTRIBUTORS*", + "AUTHORS*", + "CREDITS*", + "MAINTAINERS*", + "THANKS*", + "SECURITY*", + "CODE_OF_CONDUCT*", +) + + +# ── check names ───────────────────────────────────────────────────── + +CHECK_PROFILE_CLASSIFICATION: Final = "verification_profile_classification" +CHECK_SCOPE: Final = "scope_check" +CHECK_FORBIDDEN: Final = "forbidden_paths_check" +CHECK_STRUCTURAL_DELTA: Final = "python_structural_delta" +CHECK_GATE_COMPARISON: Final = "gate_comparison" +CHECK_WORSENED_SYMBOLS: Final = "worsened_symbols" + +_ALL_STRUCTURAL_CHECKS: Final[tuple[str, ...]] = ( + CHECK_STRUCTURAL_DELTA, + CHECK_GATE_COMPARISON, + CHECK_WORSENED_SYMBOLS, +) + +_ALWAYS_PERFORMED: Final[tuple[str, ...]] = ( + CHECK_PROFILE_CLASSIFICATION, + CHECK_SCOPE, + CHECK_FORBIDDEN, +) + + +# ── check matrix ──────────────────────────────────────────────────── + + +@dataclass(frozen=True, slots=True) +class CheckMatrix: + """Deterministic check matrix for a verification profile.""" + + profile: VerificationProfile + after_run_required: bool + structural_checks_applicable: bool + + @property + def checks_performed(self) -> tuple[str, ...]: + if self.structural_checks_applicable: + return (*_ALWAYS_PERFORMED, *_ALL_STRUCTURAL_CHECKS) + return _ALWAYS_PERFORMED + + @property + def checks_not_applicable(self) -> tuple[str, ...]: + if self.structural_checks_applicable: + return () + return _ALL_STRUCTURAL_CHECKS + + +_MATRICES: Final[dict[VerificationProfile, CheckMatrix]] = { + VerificationProfile.PYTHON_STRUCTURAL: CheckMatrix( + profile=VerificationProfile.PYTHON_STRUCTURAL, + after_run_required=True, + structural_checks_applicable=True, + ), + VerificationProfile.GOVERNANCE_CONFIG: CheckMatrix( + profile=VerificationProfile.GOVERNANCE_CONFIG, + after_run_required=True, + structural_checks_applicable=False, + ), + VerificationProfile.DOCUMENTATION_ONLY: CheckMatrix( + profile=VerificationProfile.DOCUMENTATION_ONLY, + after_run_required=False, + structural_checks_applicable=False, + ), + VerificationProfile.NON_PYTHON_PATCH: CheckMatrix( + profile=VerificationProfile.NON_PYTHON_PATCH, + after_run_required=False, + structural_checks_applicable=False, + ), + VerificationProfile.STATE_ARTIFACT_CHANGE: CheckMatrix( + profile=VerificationProfile.STATE_ARTIFACT_CHANGE, + after_run_required=False, + structural_checks_applicable=False, + ), +} + + +def check_matrix(profile: VerificationProfile) -> CheckMatrix: + """Return the deterministic check matrix for *profile*.""" + return _MATRICES[profile] + + +@dataclass(frozen=True, slots=True) +class ClassificationResult: + """Immutable result of ``classify_patch``.""" + + profile: VerificationProfile + reason: str + python_source_touched: bool + state_artifact_touched: bool + governance_config_touched: bool + + def to_payload(self) -> dict[str, object]: + matrix = check_matrix(self.profile) + return { + "verification_profile": self.profile.value, + "profile_reason": self.reason, + "python_source_touched": self.python_source_touched, + "after_run_required": matrix.after_run_required, + "checks_performed": list(matrix.checks_performed), + "checks_not_applicable": list(matrix.checks_not_applicable), + } + + +def classify_patch( + changed_files: Sequence[str], +) -> ClassificationResult: + """Classify a patch by its changed file set. + + Pure function — deterministic for the same input. Priority: + state artifact > Python source > governance config > docs > fallback. + + When *changed_files* is empty, returns ``NON_PYTHON_PATCH`` with a + dedicated reason. + """ + if not changed_files: + return ClassificationResult( + profile=VerificationProfile.NON_PYTHON_PATCH, + reason=EMPTY_PROFILE_REASON, + python_source_touched=False, + state_artifact_touched=False, + governance_config_touched=False, + ) + + has_state_artifact, has_python_source, has_governance_config = False, False, False + all_documentation = True + normalized_paths = filter(None, (_normalize(p) for p in changed_files)) + + for normalized in normalized_paths: + if _matches_any(normalized, STATE_ARTIFACT_PATTERNS): + has_state_artifact = True + elif _is_python_source(normalized): + has_python_source = True + elif _matches_any(normalized, GOVERNANCE_CONFIG_PATTERNS): + has_governance_config = True + elif _is_documentation(normalized): + continue + + all_documentation = False + + # Priority chain: state artifact > python > governance > docs > fallback + if has_state_artifact: + profile = VerificationProfile.STATE_ARTIFACT_CHANGE + elif has_python_source: + profile = VerificationProfile.PYTHON_STRUCTURAL + elif has_governance_config: + profile = VerificationProfile.GOVERNANCE_CONFIG + elif all_documentation: + profile = VerificationProfile.DOCUMENTATION_ONLY + else: + profile = VerificationProfile.NON_PYTHON_PATCH + + return ClassificationResult( + profile=profile, + reason=PROFILE_REASONS[profile.value], + python_source_touched=has_python_source, + state_artifact_touched=has_state_artifact, + governance_config_touched=has_governance_config, + ) + + +def profile_limitations(profile: VerificationProfile) -> tuple[str, ...]: + """Return human-readable limitations for *profile*.""" + return _profile_limitations(profile.value) + + +def profile_accepted_message(profile: VerificationProfile) -> str: + """Return the accepted message for a lightweight-verified profile.""" + return _profile_accepted_message(profile.value) + + +def profile_unverified_message(profile: VerificationProfile) -> str: + """Return the unverified message when after_run is missing.""" + return _profile_unverified_message(profile.value) + + +# ── internals ─────────────────────────────────────────────────────── + + +def _normalize(path: str) -> str: + text = path.replace("\\", "/").strip() + if text.startswith("./"): + text = text[2:] + return text.rstrip("/") + + +def _is_python_source(path: str) -> bool: + return any(path.endswith(ext) for ext in PYTHON_SOURCE_EXTENSIONS) + + +def _is_documentation(path: str) -> bool: + if any(path.endswith(ext) for ext in DOCUMENTATION_EXTENSIONS): + return True + return _matches_any(path, DOCUMENTATION_PATTERNS) + + +def _matches_any(path: str, patterns: Sequence[str]) -> bool: + for pattern in patterns: + if fnmatchcase(path, pattern): + return True + # Also match the basename for non-glob patterns (e.g. "pyproject.toml"). + if "/" not in pattern and "*" not in pattern: + basename = path.rsplit("/", 1)[-1] if "/" in path else path + if fnmatchcase(basename, pattern): + return True + return False + + +__all__ = [ + "CHECK_FORBIDDEN", + "CHECK_GATE_COMPARISON", + "CHECK_PROFILE_CLASSIFICATION", + "CHECK_SCOPE", + "CHECK_STRUCTURAL_DELTA", + "CHECK_WORSENED_SYMBOLS", + "DOCUMENTATION_EXTENSIONS", + "DOCUMENTATION_PATTERNS", + "GOVERNANCE_CONFIG_PATTERNS", + "PYTHON_SOURCE_EXTENSIONS", + "STATE_ARTIFACT_PATTERNS", + "CheckMatrix", + "ClassificationResult", + "VerificationProfile", + "check_matrix", + "classify_patch", + "profile_accepted_message", + "profile_limitations", + "profile_unverified_message", +] diff --git a/codeclone/surfaces/mcp/_workspace_drift.py b/codeclone/surfaces/mcp/_workspace_drift.py new file mode 100644 index 00000000..5cc82ecd --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_drift.py @@ -0,0 +1,224 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Deterministic workspace drift projection for in-memory MCP runs.""" + +from __future__ import annotations + +from collections.abc import Iterable, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from ...cache.entries import FileStat +from ...cache.store import file_stat_signature +from ...contracts.errors import ValidationError +from ...scanner import iter_py_files +from ._session_shared import MCPRunRecord +from ._workspace_hygiene import DirtySnapshot, collect_dirty_snapshot + +WorkspaceDriftStatus = Literal["fresh", "drifted", "unknown"] +WorkspaceDriftStrength = Literal["mtime_size", "mtime_size_plus_git"] + + +@dataclass(frozen=True, slots=True) +class WorkspaceDrift: + status: WorkspaceDriftStatus + drifted_files: tuple[str, ...] + added_files: tuple[str, ...] + deleted_files: tuple[str, ...] + topology_drift: bool + strength: WorkspaceDriftStrength + + +def build_run_manifest( + *, + root: Path, + filepaths: Iterable[str], +) -> dict[str, FileStat]: + """Capture repo-relative source signatures for one completed discovery.""" + manifest: dict[str, FileStat] = {} + for filepath in sorted(set(filepaths)): + relative_path = _repo_relative_path(root, filepath) + if relative_path is None: + continue + try: + manifest[relative_path] = file_stat_signature(filepath) + except OSError: + continue + return dict(sorted(manifest.items())) + + +def compute_drift( + record: MCPRunRecord, + paths: Sequence[str] | None = None, +) -> WorkspaceDrift: + """Compare a run's source snapshot with current stat, topology, and git state.""" + manifest = record.manifest + if manifest is None: + return WorkspaceDrift( + status="unknown", + drifted_files=(), + added_files=(), + deleted_files=(), + topology_drift=False, + strength=_drift_strength(record.dirty_snapshot, None), + ) + + selected_paths = _selected_paths(paths) + manifest_paths = frozenset(manifest) + current_paths = _current_source_paths(record.root) + topology_known = current_paths is not None + current_source_paths = current_paths or frozenset() + + deleted_files = ( + tuple( + sorted( + path + for path in manifest_paths - current_source_paths + if _path_selected(path, selected_paths) + ) + ) + if topology_known + else () + ) + added_files = ( + tuple( + sorted( + path + for path in current_source_paths - manifest_paths + if _path_selected(path, selected_paths) + ) + ) + if topology_known + else () + ) + + drifted: set[str] = set() + for path in sorted(manifest_paths): + if not _path_selected(path, selected_paths): + continue + try: + live_stat = file_stat_signature(str(record.root / path)) + except OSError: + if not topology_known: + drifted.add(path) + continue + if live_stat != manifest[path]: + drifted.add(path) + + current_dirty_snapshot = collect_dirty_snapshot(record.root) + git_drifted = _dirty_snapshot_delta( + before=record.dirty_snapshot, + after=current_dirty_snapshot, + ) + source_universe = manifest_paths | current_source_paths + drifted.update( + path + for path in git_drifted + if path in source_universe and _path_selected(path, selected_paths) + ) + drifted.difference_update(deleted_files) + drifted.difference_update(added_files) + + has_drift = bool(drifted or added_files or deleted_files) + status: WorkspaceDriftStatus + if has_drift: + status = "drifted" + elif topology_known: + status = "fresh" + else: + status = "unknown" + return WorkspaceDrift( + status=status, + drifted_files=tuple(sorted(drifted)), + added_files=added_files, + deleted_files=deleted_files, + topology_drift=bool(added_files or deleted_files), + strength=_drift_strength(record.dirty_snapshot, current_dirty_snapshot), + ) + + +def _current_source_paths(root: Path) -> frozenset[str] | None: + try: + return frozenset( + relative_path + for filepath in iter_py_files(str(root)) + if (relative_path := _repo_relative_path(root, filepath)) is not None + ) + except (OSError, RuntimeError, ValidationError): + return None + + +def _repo_relative_path(root: Path, filepath: str) -> str | None: + root_path = root.resolve() + candidate = Path(filepath) + if not candidate.is_absolute(): + candidate = root_path / candidate + try: + relative = candidate.relative_to(root_path) + except ValueError: + return None + normalized = relative.as_posix().strip("/") + return normalized or None + + +def _selected_paths(paths: Sequence[str] | None) -> frozenset[str] | None: + if paths is None: + return None + return frozenset( + normalized + for path in paths + if (normalized := path.strip().replace("\\", "/").strip("/")) + ) + + +def _path_selected(path: str, selected_paths: frozenset[str] | None) -> bool: + if selected_paths is None: + return True + return any( + path == selected or path.startswith(f"{selected}/") + for selected in selected_paths + ) + + +def _dirty_snapshot_delta( + *, + before: DirtySnapshot | None, + after: DirtySnapshot, +) -> frozenset[str]: + if before is None or not before.git_available or not after.git_available: + return frozenset() + before_entries = before.entry_map() + after_entries = after.entry_map() + return frozenset( + path + for path in before_entries.keys() | after_entries.keys() + if before_entries.get(path) != after_entries.get(path) + ) + + +def _drift_strength( + before: DirtySnapshot | None, + after: DirtySnapshot | None, +) -> WorkspaceDriftStrength: + if ( + before is not None + and after is not None + and before.git_available + and after.git_available + ): + return "mtime_size_plus_git" + return "mtime_size" + + +__all__ = [ + "WorkspaceDrift", + "WorkspaceDriftStatus", + "WorkspaceDriftStrength", + "build_run_manifest", + "compute_drift", +] diff --git a/codeclone/surfaces/mcp/_workspace_hygiene.py b/codeclone/surfaces/mcp/_workspace_hygiene.py new file mode 100644 index 00000000..282b54a2 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_hygiene.py @@ -0,0 +1,1022 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Git working-tree hygiene evaluation for workspace change control.""" + +from __future__ import annotations + +import hashlib +import os +import subprocess +from collections.abc import Iterator, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Final + +from ._workspace_intent_lifecycle import ( + WorkspaceIntentStatus, + is_terminal_workspace_intent_status, +) +from ._workspace_intent_store import WorkspaceIntentStore +from ._workspace_intents import ( + IntentOwnership, + WorkspaceIntentRecord, + _scope_all_sets, + classify_intent_ownership, + format_utc, + utc_now, +) + +_FOREIGN_DIRTY_OWNERSHIP: frozenset[IntentOwnership] = frozenset( + { + IntentOwnership.FOREIGN_ACTIVE, + IntentOwnership.FOREIGN_STALE, + } +) + +_DIRTY_SUMMARY_SAMPLE_LIMIT = 10 +_BASE_DIRTY_SCOPE_MESSAGE = "Uncommitted changes overlap your declared scope." +STRICT_FINISH_ENV: Final = "CODECLONE_STRICT_FINISH" +_TRUTHY_ENV_VALUES: Final[frozenset[str]] = frozenset({"1", "true", "yes", "on"}) + +DIRTY_SCOPE_POLICY_BLOCK: Final = "block" +DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP: Final = "continue_own_wip" +VALID_DIRTY_SCOPE_POLICIES: frozenset[str] = frozenset( + { + DIRTY_SCOPE_POLICY_BLOCK, + DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP, + } +) + + +def _attach_optional_path_lists( + payload: dict[str, object], + fields: Sequence[tuple[str, tuple[str, ...]]], +) -> None: + for key, paths in fields: + if paths: + payload[key] = list(paths) + + +@dataclass(frozen=True, slots=True) +class DirtyPathsResult: + git_available: bool + dirty_paths: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class DirtySnapshotEntry: + path: str + status_xy: str + digest: str | None + digest_status: str + + def to_payload(self) -> dict[str, object]: + return { + "status_xy": self.status_xy, + "digest": self.digest, + "digest_status": self.digest_status, + } + + +@dataclass(frozen=True, slots=True) +class DirtySnapshot: + git_available: bool + captured_at_utc: str + entries: tuple[DirtySnapshotEntry, ...] + + @property + def paths(self) -> tuple[str, ...]: + return tuple(entry.path for entry in self.entries) + + def entry_map(self) -> dict[str, DirtySnapshotEntry]: + return {entry.path: entry for entry in self.entries} + + def to_payload(self) -> dict[str, object]: + return { + "git_available": self.git_available, + "captured_at_utc": self.captured_at_utc, + "entries": { + entry.path: entry.to_payload() + for entry in sorted(self.entries, key=lambda item: item.path) + }, + } + + def summary_payload(self) -> dict[str, object]: + digest_counts: dict[str, int] = {} + for entry in self.entries: + digest_counts[entry.digest_status] = ( + digest_counts.get( + entry.digest_status, + 0, + ) + + 1 + ) + return { + "git_available": self.git_available, + "captured_at_utc": self.captured_at_utc, + "paths_count": len(self.entries), + "digest_status_counts": dict(sorted(digest_counts.items())), + } + + +@dataclass(frozen=True, slots=True) +class DirtyAttribution: + path: str + scope_relation: str + evidence: str + start_state: str + intent_attribution: str + classification: str + blocking: bool + + def to_payload(self) -> dict[str, object]: + return { + "path": self.path, + "scope_relation": self.scope_relation, + "evidence": self.evidence, + "start_state": self.start_state, + "intent_attribution": self.intent_attribution, + "classification": self.classification, + "blocking": self.blocking, + } + + +@dataclass(frozen=True, slots=True) +class ForeignDirtyOverlap: + path: str + foreign_intent_id: str + foreign_persisted_status: str + foreign_ownership: str + foreign_agent_label: str + message: str + + def to_payload(self) -> dict[str, object]: + return { + "path": self.path, + "foreign_intent_id": self.foreign_intent_id, + "foreign_persisted_status": self.foreign_persisted_status, + "foreign_ownership": self.foreign_ownership, + "foreign_agent_label": self.foreign_agent_label, + "message": self.message, + } + + +@dataclass(frozen=True, slots=True) +class WorkspaceHygieneResult: + git_available: bool + dirty_paths: tuple[str, ...] + dirty_paths_in_scope: tuple[str, ...] + dirty_paths_outside_scope: tuple[str, ...] + foreign_dirty_overlaps: tuple[ForeignDirtyOverlap, ...] + blocks_edit: bool + unacknowledged_dirty_in_scope: tuple[str, ...] = () + own_unscoped_dirty: tuple[str, ...] = () + unattributed_unscoped_dirty: tuple[str, ...] = () + preexisting_unscoped_dirty: tuple[str, ...] = () + new_unattributed_unscoped_dirty: tuple[str, ...] = () + modified_unattributed_unscoped_dirty: tuple[str, ...] = () + unknown_unattributed_unscoped_dirty: tuple[str, ...] = () + foreign_attributed_outside_scope: tuple[str, ...] = () + dirty_attribution: tuple[DirtyAttribution, ...] = () + dirty_snapshot: DirtySnapshot | None = None + dirty_snapshot_status: str | None = None + files_for_scope_check: tuple[str, ...] = () + blocks_finish: bool = False + finish_block_reason: str | None = None + + def _counts(self) -> dict[str, int]: + return { + "in_scope": len(self.dirty_paths_in_scope), + "outside_scope": len(self.dirty_paths_outside_scope), + "missing_evidence": len(self.unacknowledged_dirty_in_scope), + "preexisting_unscoped": len(self.preexisting_unscoped_dirty), + "new_unattributed_unscoped": len(self.new_unattributed_unscoped_dirty), + "modified_unattributed_unscoped": len( + self.modified_unattributed_unscoped_dirty + ), + "unknown_unattributed_unscoped": len( + self.unknown_unattributed_unscoped_dirty + ), + "foreign_attributed_outside_scope": len( + self.foreign_attributed_outside_scope + ), + } + + def to_payload(self, *, detail_level: str = "summary") -> dict[str, object]: + # Summary-first: the agent acts on the blocking subset and counts. + # Full per-path attribution + the derived classification arrays are + # available with detail_level="full" (they are all derivable from + # dirty_attribution, so they are not emitted twice by default). + payload: dict[str, object] = { + "git_available": self.git_available, + "blocks_edit": self.blocks_edit, + "counts": self._counts(), + "foreign_dirty_overlaps": [ + item.to_payload() for item in self.foreign_dirty_overlaps + ], + } + if self.unacknowledged_dirty_in_scope: + payload["unacknowledged_dirty_in_scope"] = list( + self.unacknowledged_dirty_in_scope + ) + if self.dirty_snapshot is not None: + payload["dirty_snapshot"] = self.dirty_snapshot.summary_payload() + if self.dirty_snapshot_status is not None: + payload["dirty_snapshot_status"] = self.dirty_snapshot_status + if self.blocks_finish: + payload["blocks_finish"] = True + if self.finish_block_reason is not None: + payload["finish_block_reason"] = self.finish_block_reason + if detail_level != "full": + return payload + payload["dirty_paths_in_scope"] = list(self.dirty_paths_in_scope) + payload["dirty_paths_outside_scope"] = list(self.dirty_paths_outside_scope) + _attach_optional_path_lists( + payload, + ( + ("own_unscoped_dirty", self.own_unscoped_dirty), + ("unattributed_unscoped_dirty", self.unattributed_unscoped_dirty), + ("preexisting_unscoped_dirty", self.preexisting_unscoped_dirty), + ( + "new_unattributed_unscoped_dirty", + self.new_unattributed_unscoped_dirty, + ), + ( + "modified_unattributed_unscoped_dirty", + self.modified_unattributed_unscoped_dirty, + ), + ( + "unknown_unattributed_unscoped_dirty", + self.unknown_unattributed_unscoped_dirty, + ), + ( + "foreign_attributed_outside_scope", + self.foreign_attributed_outside_scope, + ), + ), + ) + if self.dirty_attribution: + payload["dirty_attribution"] = [ + item.to_payload() for item in self.dirty_attribution + ] + if self.files_for_scope_check: + payload["files_for_scope_check"] = list(self.files_for_scope_check) + return payload + + +def collect_dirty_paths( + root: Path, + *, + scoped_paths: Sequence[str] | None = None, +) -> DirtyPathsResult: + """Collect repo-relative dirty paths from the git working tree.""" + if not _git_available(root): + return DirtyPathsResult(git_available=False, dirty_paths=()) + try: + completed = subprocess.run( + ["git", "status", "--porcelain"], + cwd=root, + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return DirtyPathsResult(git_available=False, dirty_paths=()) + dirty = _dirty_paths_from_porcelain(completed.stdout) + if scoped_paths is not None: + scope_set = {_normalize_path(path) for path in scoped_paths if path.strip()} + dirty = tuple(sorted(path for path in dirty if _path_in_scope(path, scope_set))) + return DirtyPathsResult(git_available=True, dirty_paths=dirty) + + +def collect_dirty_snapshot(root: Path) -> DirtySnapshot: + """Collect full git dirty state with stable per-path digests when available.""" + captured_at = format_utc(utc_now()) + if not _git_available(root): + return DirtySnapshot( + git_available=False, + captured_at_utc=captured_at, + entries=(), + ) + try: + completed = subprocess.run( + ["git", "status", "--porcelain=v1"], + cwd=root, + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return DirtySnapshot( + git_available=False, + captured_at_utc=captured_at, + entries=(), + ) + entries = tuple( + DirtySnapshotEntry( + path=path, + status_xy=status_xy, + digest=digest, + digest_status=digest_status, + ) + for path, status_xy in _dirty_entries_from_porcelain(completed.stdout) + for digest, digest_status in (_dirty_entry_digest(root, path, status_xy),) + ) + return DirtySnapshot( + git_available=True, + captured_at_utc=captured_at, + entries=tuple(sorted(entries, key=lambda entry: entry.path)), + ) + + +def dirty_snapshot_from_payload(payload: object) -> DirtySnapshot | None: + """Decode a stored dirty snapshot. Invalid legacy/corrupt data is ignored.""" + if not isinstance(payload, dict): + return None + git_available = payload.get("git_available") + captured_at = payload.get("captured_at_utc") + raw_entries = payload.get("entries") + if not isinstance(git_available, bool) or not isinstance(captured_at, str): + return None + if not isinstance(raw_entries, dict): + return None + entries: list[DirtySnapshotEntry] = [] + for raw_path, raw_entry in raw_entries.items(): + if not isinstance(raw_path, str) or not isinstance(raw_entry, dict): + return None + try: + path = _normalize_path(raw_path) + except ValueError: + return None + status_xy = raw_entry.get("status_xy") + digest = raw_entry.get("digest") + digest_status = raw_entry.get("digest_status") + if not isinstance(status_xy, str) or not isinstance(digest_status, str): + return None + if digest is not None and not isinstance(digest, str): + return None + entries.append( + DirtySnapshotEntry( + path=path, + status_xy=status_xy[:2].ljust(2), + digest=digest, + digest_status=digest_status, + ) + ) + return DirtySnapshot( + git_available=git_available, + captured_at_utc=captured_at, + entries=tuple(sorted(entries, key=lambda entry: entry.path)), + ) + + +def workspace_dirty_summary(*, root: Path) -> dict[str, object]: + """Repo-level dirty summary for list_workspace (no scoped blocking).""" + dirty_result = collect_dirty_paths(root) + if not dirty_result.git_available: + return { + "git_available": False, + "dirty_paths_count": 0, + "dirty_paths_sample": [], + "sample_truncated": False, + } + sample, truncated = _bounded_sample(dirty_result.dirty_paths) + return { + "git_available": True, + "dirty_paths_count": len(dirty_result.dirty_paths), + "dirty_paths_sample": list(sample), + "sample_truncated": truncated, + } + + +def _declared_scope_sets( + allowed_files: Sequence[str], + allowed_related: Sequence[str] | None, +) -> tuple[set[str], set[str], set[str]]: + blocking_scope = {_normalize_path(path) for path in allowed_files if path.strip()} + related_scope = { + _normalize_path(path) for path in (allowed_related or ()) if path.strip() + } - blocking_scope + return blocking_scope, related_scope, blocking_scope | related_scope + + +def _iter_foreign_intent_scope_matches( + *, + dirty_paths: Sequence[str], + store: WorkspaceIntentStore, + own_pid: int, + own_start_epoch: int, + own_intent_id: str | None, +) -> Iterator[tuple[WorkspaceIntentRecord, str, tuple[str, ...]]]: + if not dirty_paths: + return + now = utc_now() + for record in store.list_records_for_hygiene(): + if _skip_foreign_dirty_record( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ): + continue + foreign_allowed, _, _ = _scope_all_sets(record.scope) + ownership = classify_intent_ownership( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + now=now, + ) + if ownership not in _FOREIGN_DIRTY_OWNERSHIP: + continue + matched = tuple( + sorted( + path for path in dirty_paths if _path_in_scope(path, foreign_allowed) + ) + ) + if matched: + yield record, ownership.value, matched + + +def evaluate_scoped_hygiene( + *, + root: Path, + allowed_files: Sequence[str], + allowed_related: Sequence[str] | None = None, + store: WorkspaceIntentStore, + own_pid: int, + own_start_epoch: int, + own_intent_id: str | None = None, +) -> WorkspaceHygieneResult: + """Evaluate scoped hygiene for start/finish workflow responses.""" + blocking_scope, _, evaluation_scope = _declared_scope_sets( + allowed_files, + allowed_related, + ) + dirty_result = collect_dirty_paths( + root, + scoped_paths=tuple(sorted(evaluation_scope)) if evaluation_scope else None, + ) + if not dirty_result.git_available: + return WorkspaceHygieneResult( + git_available=False, + dirty_paths=(), + dirty_paths_in_scope=(), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=False, + ) + dirty_in_blocking = tuple( + sorted( + path + for path in dirty_result.dirty_paths + if _path_in_scope(path, blocking_scope) + ) + ) + dirty_outside = tuple( + sorted( + path + for path in dirty_result.dirty_paths + if not _path_in_scope(path, blocking_scope) + ) + ) + foreign_overlaps = _foreign_dirty_overlaps( + dirty_paths=dirty_in_blocking, + store=store, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ) + blocks_edit = bool(dirty_in_blocking) + return WorkspaceHygieneResult( + git_available=True, + dirty_paths=dirty_result.dirty_paths, + dirty_paths_in_scope=dirty_in_blocking, + dirty_paths_outside_scope=dirty_outside, + foreign_dirty_overlaps=foreign_overlaps, + blocks_edit=blocks_edit, + ) + + +def finish_hygiene_check( + *, + root: Path, + allowed_files: Sequence[str], + allowed_related: Sequence[str] | None, + resolved_files: Sequence[str], + store: WorkspaceIntentStore, + own_pid: int, + own_start_epoch: int, + own_intent_id: str, + start_dirty_snapshot: DirtySnapshot | None = None, + strict_finish: bool | None = None, +) -> WorkspaceHygieneResult: + """Finish-time hygiene gate against declared scope and git evidence.""" + hygiene = evaluate_scoped_hygiene( + root=root, + allowed_files=allowed_files, + allowed_related=allowed_related, + store=store, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ) + if not hygiene.git_available: + return hygiene + current_snapshot = collect_dirty_snapshot(root) + if not current_snapshot.git_available: + return hygiene + all_dirty_paths = current_snapshot.paths + evidence = {_normalize_path(path) for path in resolved_files if path.strip()} + blocking_scope, related_scope, declared_scope = _declared_scope_sets( + allowed_files, + allowed_related, + ) + dirty_in_declared = tuple( + sorted(path for path in all_dirty_paths if _path_in_scope(path, declared_scope)) + ) + dirty_outside_declared = tuple( + sorted( + path for path in all_dirty_paths if not _path_in_scope(path, declared_scope) + ) + ) + foreign_attributed_outside = _foreign_attributed_dirty_paths( + dirty_paths=dirty_outside_declared, + store=store, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ) + attribution = _dirty_attribution( + dirty_paths=all_dirty_paths, + evidence=evidence, + blocking_scope=blocking_scope, + related_scope=related_scope, + declared_scope=declared_scope, + current_snapshot=current_snapshot, + start_dirty_snapshot=start_dirty_snapshot, + foreign_attributed_outside=foreign_attributed_outside, + ) + new_unattributed = _classified_paths(attribution, "new_unattributed_unscoped_dirty") + modified_unattributed = _classified_paths( + attribution, + "modified_unattributed_unscoped_dirty", + ) + unknown_unattributed = _classified_paths( + attribution, + "unknown_unattributed_unscoped_dirty", + ) + preexisting_unscoped = _classified_paths(attribution, "preexisting_unscoped_dirty") + unattributed_unscoped = tuple( + sorted(new_unattributed + modified_unattributed + unknown_unattributed) + ) + unacknowledged = tuple(sorted(set(dirty_in_declared) - evidence)) + # Scope check covers only the agent's declared patch (its evidence). + # Out-of-scope unattributed dirt is external context, not part of this + # patch's scope assertion, so it must not be fed into the scope check + # (doing so would mislabel a peer's dirt as a scope violation). + files_for_scope_check = tuple(sorted(evidence)) + # Finish blocks ONLY on proven problems with the agent's own patch: + # in-scope dirt missing from evidence, or a live foreign intent + # overlapping the declared scope. New/modified/unknown unattributed dirt + # outside the declared scope is non-blocking advisory (surfaced via + # dirty_paths_outside_scope and the attribution detail). + finish_block_reason = _finish_block_reason( + unacknowledged=unacknowledged, + foreign_dirty_overlaps=hygiene.foreign_dirty_overlaps, + unattributed_unscoped=unattributed_unscoped, + strict_finish=strict_finish, + ) + return WorkspaceHygieneResult( + git_available=hygiene.git_available, + dirty_paths=all_dirty_paths, + dirty_paths_in_scope=dirty_in_declared, + dirty_paths_outside_scope=dirty_outside_declared, + foreign_dirty_overlaps=hygiene.foreign_dirty_overlaps, + blocks_edit=hygiene.blocks_edit, + unacknowledged_dirty_in_scope=unacknowledged, + # Legacy alias retained for one contract cycle. These paths are + # unattributed, not proven to be owned by the current agent. + own_unscoped_dirty=unattributed_unscoped, + unattributed_unscoped_dirty=unattributed_unscoped, + preexisting_unscoped_dirty=preexisting_unscoped, + new_unattributed_unscoped_dirty=new_unattributed, + modified_unattributed_unscoped_dirty=modified_unattributed, + unknown_unattributed_unscoped_dirty=unknown_unattributed, + foreign_attributed_outside_scope=tuple(sorted(foreign_attributed_outside)), + dirty_attribution=attribution, + dirty_snapshot=current_snapshot, + dirty_snapshot_status=_snapshot_status(start_dirty_snapshot), + files_for_scope_check=files_for_scope_check, + blocks_finish=finish_block_reason is not None, + finish_block_reason=finish_block_reason, + ) + + +def _dirty_attribution( + *, + dirty_paths: Sequence[str], + evidence: set[str], + blocking_scope: set[str], + related_scope: set[str], + declared_scope: set[str], + current_snapshot: DirtySnapshot, + start_dirty_snapshot: DirtySnapshot | None, + foreign_attributed_outside: frozenset[str], +) -> tuple[DirtyAttribution, ...]: + current_entries = current_snapshot.entry_map() + start_entries = ( + start_dirty_snapshot.entry_map() if start_dirty_snapshot is not None else {} + ) + items: list[DirtyAttribution] = [] + for path in sorted(dirty_paths): + scope_relation = _scope_relation( + path, + blocking_scope=blocking_scope, + related_scope=related_scope, + declared_scope=declared_scope, + ) + evidence_state = "present" if path in evidence else "absent" + start_state = _dirty_start_state( + current_entries.get(path), + start_entries.get(path), + snapshot=start_dirty_snapshot, + ) + intent_attribution = ( + "foreign_active_or_stale" if path in foreign_attributed_outside else "none" + ) + classification, blocking = _dirty_classification( + scope_relation=scope_relation, + evidence_state=evidence_state, + start_state=start_state, + intent_attribution=intent_attribution, + ) + items.append( + DirtyAttribution( + path=path, + scope_relation=scope_relation, + evidence=evidence_state, + start_state=start_state, + intent_attribution=intent_attribution, + classification=classification, + blocking=blocking, + ) + ) + return tuple(items) + + +def _scope_relation( + path: str, + *, + blocking_scope: set[str], + related_scope: set[str], + declared_scope: set[str], +) -> str: + if _path_in_scope(path, blocking_scope): + return "own_allowed" + if _path_in_scope(path, related_scope): + return "own_related" + if _path_in_scope(path, declared_scope): + return "declared" + return "outside" + + +def _dirty_start_state( + current: DirtySnapshotEntry | None, + start: DirtySnapshotEntry | None, + *, + snapshot: DirtySnapshot | None, +) -> str: + if snapshot is None: + return "unknown" + if start is None: + return "absent" + if current is None: + return "cleaned" + if start.digest_status != "ok" or current.digest_status != "ok": + return "unknown" + if start.digest == current.digest and start.status_xy == current.status_xy: + return "present_same" + return "present_changed" + + +def _dirty_classification( + *, + scope_relation: str, + evidence_state: str, + start_state: str, + intent_attribution: str, +) -> tuple[str, bool]: + if scope_relation != "outside": + if evidence_state == "absent": + return "missing_evidence", True + return "declared_scope_dirty", False + # Scope-aware finish hygiene: finish verifies the agent's OWN patch + # (declared scope ∩ evidence), it does not police the whole dirty tree. + # Dirt outside the declared scope with no foreign-intent attribution is + # surfaced as a non-blocking advisory, not a block — otherwise a peer + # agent's undeclared concurrent edits (or undigestible directory paths) + # would fail an innocent finisher. The write-gate is the primary defense + # against unmanaged writes; finish hygiene is only the backstop. + if intent_attribution == "foreign_active_or_stale": + return "foreign_attributed_outside_scope", False + if start_state == "present_same": + return "preexisting_unscoped_dirty", False + if start_state == "absent": + return "new_unattributed_unscoped_dirty", False + if start_state == "present_changed": + return "modified_unattributed_unscoped_dirty", False + return "unknown_unattributed_unscoped_dirty", False + + +def _finish_block_reason( + *, + unacknowledged: Sequence[str], + foreign_dirty_overlaps: Sequence[ForeignDirtyOverlap], + unattributed_unscoped: Sequence[str], + strict_finish: bool | None, +) -> str | None: + if unacknowledged: + return "missing_evidence" + if foreign_dirty_overlaps: + return "foreign_dirty_overlap" + if _strict_finish_enabled(strict_finish) and unattributed_unscoped: + return "own_unscoped_dirty" + return None + + +def _strict_finish_enabled(value: bool | None) -> bool: + if value is not None: + return value + return os.environ.get(STRICT_FINISH_ENV, "").strip().lower() in _TRUTHY_ENV_VALUES + + +def _classified_paths( + attribution: Sequence[DirtyAttribution], + classification: str, +) -> tuple[str, ...]: + return tuple( + sorted( + item.path for item in attribution if item.classification == classification + ) + ) + + +def _snapshot_status(snapshot: DirtySnapshot | None) -> str: + if snapshot is None: + return "missing_legacy_conservative" + if not snapshot.git_available: + return "git_unavailable" + return "available" + + +def _skip_foreign_dirty_record( + record: WorkspaceIntentRecord, + *, + own_pid: int, + own_start_epoch: int, + own_intent_id: str | None, +) -> bool: + if ( + record.agent_pid == own_pid and record.agent_start_epoch == own_start_epoch + ) or (own_intent_id is not None and record.intent_id == own_intent_id): + return True + if is_terminal_workspace_intent_status(record.status): + return True + return record.status == WorkspaceIntentStatus.QUEUED.value + + +def _foreign_attributed_dirty_paths( + *, + dirty_paths: Sequence[str], + store: WorkspaceIntentStore, + own_pid: int, + own_start_epoch: int, + own_intent_id: str | None, +) -> frozenset[str]: + """Dirty paths outside own scope that belong to a foreign active/stale intent.""" + attributed: set[str] = set() + for _record, _ownership, matched in _iter_foreign_intent_scope_matches( + dirty_paths=dirty_paths, + store=store, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ): + attributed.update(matched) + return frozenset(attributed) + + +def _foreign_dirty_overlaps( + *, + dirty_paths: Sequence[str], + store: WorkspaceIntentStore, + own_pid: int, + own_start_epoch: int, + own_intent_id: str | None, +) -> tuple[ForeignDirtyOverlap, ...]: + overlaps: list[ForeignDirtyOverlap] = [] + for record, ownership, matched in _iter_foreign_intent_scope_matches( + dirty_paths=dirty_paths, + store=store, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ): + overlaps.extend( + ForeignDirtyOverlap( + path=path, + foreign_intent_id=record.intent_id, + foreign_persisted_status=record.status, + foreign_ownership=ownership, + foreign_agent_label=record.agent_label, + message=( + f"{_BASE_DIRTY_SCOPE_MESSAGE} Foreign intent " + f"{record.intent_id} previously declared this path." + ), + ) + for path in matched + ) + return tuple(sorted(overlaps, key=lambda item: (item.path, item.foreign_intent_id))) + + +def _dirty_paths_from_porcelain(output: str) -> tuple[str, ...]: + paths: set[str] = set() + for line in output.splitlines(): + if len(line) < 3: + continue + entry = line[3:].strip() + if not entry: + continue + if " -> " in entry: + old_path, new_path = entry.split(" -> ", 1) + paths.add(_normalize_path(old_path)) + paths.add(_normalize_path(new_path)) + continue + paths.add(_normalize_path(entry)) + return tuple(sorted(paths)) + + +def _dirty_entries_from_porcelain(output: str) -> tuple[tuple[str, str], ...]: + entries: dict[str, str] = {} + for line in output.splitlines(): + if len(line) < 3: + continue + status_xy = line[:2] + entry = line[3:].strip() + if not entry: + continue + if " -> " in entry: + old_path, new_path = entry.split(" -> ", 1) + entries[_normalize_path(old_path)] = status_xy + entries[_normalize_path(new_path)] = status_xy + continue + entries[_normalize_path(entry)] = status_xy + return tuple(sorted(entries.items())) + + +def _dirty_entry_digest( + root: Path, + path: str, + status_xy: str, +) -> tuple[str | None, str]: + """Return a stable digest for the dirty content, or mark it unavailable.""" + if status_xy == "??": + return _untracked_file_digest(root, path) + cached = _git_diff_bytes(root, ["diff", "--cached", "--binary", "--", path]) + worktree = _git_diff_bytes(root, ["diff", "--binary", "--", path]) + if cached is None or worktree is None: + return None, "unavailable" + digest = hashlib.sha256() + digest.update(status_xy.encode("utf-8", "surrogateescape")) + digest.update(b"\0") + digest.update(path.encode("utf-8", "surrogateescape")) + digest.update(b"\0cached\0") + digest.update(cached) + digest.update(b"\0worktree\0") + digest.update(worktree) + return digest.hexdigest(), "ok" + + +def _git_diff_bytes(root: Path, args: Sequence[str]) -> bytes | None: + try: + completed = subprocess.run( + ["git", *args], + cwd=root, + check=True, + capture_output=True, + timeout=30, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return None + stdout = completed.stdout + if isinstance(stdout, bytes): + return stdout + if isinstance(stdout, str): + return stdout.encode("utf-8", "surrogateescape") + return None + + +def _untracked_file_digest(root: Path, path: str) -> tuple[str | None, str]: + target = (root / path).resolve() + try: + target.relative_to(root.resolve()) + except ValueError: + return None, "unavailable" + if not target.is_file(): + return None, "unavailable" + digest = hashlib.sha256() + digest.update(b"untracked\0") + digest.update(path.encode("utf-8", "surrogateescape")) + digest.update(b"\0") + try: + with target.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + except OSError: + return None, "unavailable" + return digest.hexdigest(), "ok" + + +def _git_available(root: Path) -> bool: + try: + completed = subprocess.run( + ["git", "rev-parse", "--is-inside-work-tree"], + cwd=root, + check=True, + capture_output=True, + text=True, + timeout=10, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired): + return False + return completed.stdout.strip().lower() == "true" + + +def _normalize_path(path: str) -> str: + cleaned = path.strip().replace("\\", "/") + if cleaned.startswith("./"): + cleaned = cleaned[2:] + cleaned = cleaned.rstrip("/") + if cleaned == ".": + return "" + if ".." in Path(cleaned).parts: + from .messages import errors as err_msgs + + raise ValueError(err_msgs.PATH_TRAVERSAL.format(path=path)) + return cleaned + + +def _path_in_scope(path: str, scope_paths: set[str]) -> bool: + return any( + path == candidate or path.startswith(f"{candidate}/") + for candidate in scope_paths + ) + + +def _bounded_sample( + paths: Sequence[str], + *, + limit: int = _DIRTY_SUMMARY_SAMPLE_LIMIT, +) -> tuple[tuple[str, ...], bool]: + if len(paths) <= limit: + return tuple(paths), False + return tuple(paths[:limit]), True + + +def hygiene_blocks_start_edit( + hygiene: WorkspaceHygieneResult, + *, + dirty_scope_policy: str, +) -> bool: + """Return whether scoped hygiene blocks edit permission at start.""" + return hygiene.blocks_edit and not ( + dirty_scope_policy == DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP + and not hygiene.foreign_dirty_overlaps + ) + + +__all__ = [ + "DIRTY_SCOPE_POLICY_BLOCK", + "DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP", + "STRICT_FINISH_ENV", + "VALID_DIRTY_SCOPE_POLICIES", + "DirtyAttribution", + "DirtyPathsResult", + "DirtySnapshot", + "DirtySnapshotEntry", + "ForeignDirtyOverlap", + "WorkspaceHygieneResult", + "collect_dirty_paths", + "collect_dirty_snapshot", + "dirty_snapshot_from_payload", + "evaluate_scoped_hygiene", + "finish_hygiene_check", + "hygiene_blocks_start_edit", + "workspace_dirty_summary", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_contract.py b/codeclone/surfaces/mcp/_workspace_intent_contract.py new file mode 100644 index 00000000..f85bae64 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_contract.py @@ -0,0 +1,116 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import hmac +import re +from collections.abc import Mapping +from dataclasses import dataclass +from typing import Final + +from ...cache.integrity import canonical_json + +LEGACY_REGISTRY_VERSION: Final = "1" +REGISTRY_VERSION: Final = "2" +DEFAULT_TTL_SECONDS: Final = 3600 +MIN_TTL_SECONDS: Final = 60 +MAX_TTL_SECONDS: Final = 86400 +DEFAULT_LEASE_SECONDS: Final = 300 +MIN_LEASE_SECONDS: Final = 60 +MAX_LEASE_SECONDS: Final = 600 +_HEX_DIGEST_LENGTH: Final = 64 +_SAFE_INTENT_ID_RE: Final = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$") + + +@dataclass(frozen=True, slots=True) +class WorkspaceIntentRecord: + intent_id: str + agent_pid: int + agent_start_epoch: int + agent_label: str + run_id: str + declared_at_utc: str + expires_at_utc: str + ttl_seconds: int + status: str + intent: str + scope: dict[str, object] + scope_digest: str + blast_radius_summary: dict[str, object] + lease_renewed_at_utc: str + lease_seconds: int + report_digest: str + dirty_snapshot: dict[str, object] | None = None + + def unsigned_payload(self) -> dict[str, object]: + payload: dict[str, object] = { + "registry_version": REGISTRY_VERSION, + "intent_id": self.intent_id, + "agent_pid": self.agent_pid, + "agent_start_epoch": self.agent_start_epoch, + "agent_label": self.agent_label, + "run_id": self.run_id, + "declared_at_utc": self.declared_at_utc, + "expires_at_utc": self.expires_at_utc, + "ttl_seconds": self.ttl_seconds, + "status": self.status, + "intent": self.intent, + "scope": self.scope, + "scope_digest": self.scope_digest, + "blast_radius_summary": self.blast_radius_summary, + "lease_renewed_at_utc": self.lease_renewed_at_utc, + "lease_seconds": self.lease_seconds, + "report_digest": self.report_digest, + } + if self.dirty_snapshot is not None: + payload["dirty_snapshot"] = self.dirty_snapshot + return payload + + +def compute_scope_digest(scope: Mapping[str, object]) -> str: + return hashlib.sha256(canonical_json(dict(scope)).encode("utf-8")).hexdigest() + + +def compute_intent_digest(data: Mapping[str, object]) -> str: + digestable = {key: value for key, value in data.items() if key != "integrity"} + return hashlib.sha256(canonical_json(digestable).encode("utf-8")).hexdigest() + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _is_hex_digest(value: object) -> bool: + if not isinstance(value, str) or len(value) != _HEX_DIGEST_LENGTH: + return False + return all(char in "0123456789abcdef" for char in value.lower()) + + +def verify_intent_integrity(data: Mapping[str, object]) -> bool: + integrity = _as_mapping(data.get("integrity")) + stored = integrity.get("payload_sha256") + if not _is_hex_digest(stored): + return False + expected = compute_intent_digest(data) + return hmac.compare_digest(str(stored), expected) + + +__all__ = [ + "DEFAULT_LEASE_SECONDS", + "DEFAULT_TTL_SECONDS", + "LEGACY_REGISTRY_VERSION", + "MAX_LEASE_SECONDS", + "MAX_TTL_SECONDS", + "MIN_LEASE_SECONDS", + "MIN_TTL_SECONDS", + "REGISTRY_VERSION", + "WorkspaceIntentRecord", + "compute_intent_digest", + "compute_scope_digest", + "verify_intent_integrity", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_lifecycle.py b/codeclone/surfaces/mcp/_workspace_intent_lifecycle.py new file mode 100644 index 00000000..1cc1ad4f --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_lifecycle.py @@ -0,0 +1,112 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from datetime import datetime, timedelta, timezone +from enum import Enum + +from ._workspace_intent_contract import WorkspaceIntentRecord + + +class WorkspaceIntentStatus(str, Enum): + ACTIVE = "active" + QUEUED = "queued" + CLEAN = "clean" + EXPANDED = "expanded" + VIOLATED = "violated" + EXPIRED = "expired" + ORPHANED = "orphaned" + + +class PidLiveness(str, Enum): + ALIVE = "alive" + DEAD = "dead" + UNKNOWN = "unknown" + + +TERMINAL_WORKSPACE_INTENT_STATUSES: frozenset[str] = frozenset( + { + WorkspaceIntentStatus.CLEAN.value, + WorkspaceIntentStatus.EXPIRED.value, + WorkspaceIntentStatus.ORPHANED.value, + } +) + + +def is_terminal_workspace_intent_status(status: str) -> bool: + return status in TERMINAL_WORKSPACE_INTENT_STATUSES + + +def gc_status_for_reason(reason: str) -> str: + if reason == "orphaned": + return WorkspaceIntentStatus.ORPHANED.value + return WorkspaceIntentStatus.EXPIRED.value + + +def utc_now() -> datetime: + return datetime.now(timezone.utc).replace(microsecond=0) + + +def parse_utc(value: str) -> datetime | None: + try: + parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + if parsed.tzinfo is None: + return None + return parsed.astimezone(timezone.utc) + + +def pid_liveness(pid: int) -> PidLiveness: + if pid <= 0: + return PidLiveness.DEAD + try: + os.kill(pid, 0) + except ProcessLookupError: + return PidLiveness.DEAD + except PermissionError: + return PidLiveness.UNKNOWN + except OSError: + return PidLiveness.ALIVE + return PidLiveness.ALIVE + + +def is_pid_alive(pid: int) -> bool: + return pid_liveness(pid) == PidLiveness.ALIVE + + +def is_orphaned(record: WorkspaceIntentRecord) -> bool: + return pid_liveness(record.agent_pid) == PidLiveness.DEAD + + +def lease_expiry(record: WorkspaceIntentRecord) -> datetime | None: + renewed_at = parse_utc(record.lease_renewed_at_utc) + if renewed_at is None: + return None + return renewed_at + timedelta(seconds=record.lease_seconds) + + +def is_lease_expired(record: WorkspaceIntentRecord) -> bool: + expiry = lease_expiry(record) + return expiry is None or expiry <= utc_now() + + +__all__ = [ + "TERMINAL_WORKSPACE_INTENT_STATUSES", + "PidLiveness", + "WorkspaceIntentStatus", + "gc_status_for_reason", + "is_lease_expired", + "is_orphaned", + "is_pid_alive", + "is_terminal_workspace_intent_status", + "lease_expiry", + "parse_utc", + "pid_liveness", + "utc_now", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_models.py b/codeclone/surfaces/mcp/_workspace_intent_models.py new file mode 100644 index 00000000..5cd94263 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_models.py @@ -0,0 +1,443 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Annotated, Literal + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + PositiveInt, + ValidationError, + field_validator, + model_validator, +) +from typing_extensions import Self + +from ._workspace_intent_contract import ( + DEFAULT_LEASE_SECONDS, + LEGACY_REGISTRY_VERSION, + MAX_LEASE_SECONDS, + MIN_LEASE_SECONDS, + WorkspaceIntentRecord, + compute_intent_digest, + compute_scope_digest, +) + +_HEX_DIGEST_LENGTH = 64 +_SAFE_INTENT_ID_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$") +_VALID_STATUSES = frozenset( + { + "active", + "queued", + "clean", + "expanded", + "violated", + "expired", + "orphaned", + } +) +_VALID_DIRTY_DIGEST_STATUSES = frozenset({"ok", "unavailable"}) + + +def _scope_path_violation(path: str) -> str | None: + if Path(path).is_absolute() or ".." in Path(path).parts: + return "scope paths must be repo-relative without traversal" + return None + + +def _normalize_path_list(value: list[str], *, required: bool) -> list[str]: + paths: list[str] = [] + for item in value: + path = item.replace("\\", "/").strip() + if not path: + continue + violation = _scope_path_violation(path) + if violation is not None: + raise ValueError(violation) + paths.append(path.rstrip("/")) + deduped = sorted(set(paths)) + if required and not deduped: + raise ValueError("allowed_files must not be empty") + return deduped + + +def _parse_utc(value: str) -> datetime | None: + try: + parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + if parsed.tzinfo is None: + return None + return parsed.astimezone(timezone.utc) + + +def _is_hex_digest(value: str) -> bool: + if len(value) != _HEX_DIGEST_LENGTH: + return False + return all(char in "0123456789abcdef" for char in value.lower()) + + +def _validate_dirty_snapshot_payload( + value: dict[str, object] | None, +) -> dict[str, object] | None: + if value is None: + return None + git_available = value.get("git_available") + captured_at = value.get("captured_at_utc") + entries = value.get("entries") + if not isinstance(git_available, bool): + raise ValueError("dirty_snapshot.git_available must be boolean") + if not isinstance(captured_at, str) or _parse_utc(captured_at) is None: + raise ValueError("dirty_snapshot.captured_at_utc must be valid UTC ISO-8601") + if not isinstance(entries, dict): + raise ValueError("dirty_snapshot.entries must be an object") + for raw_path, raw_entry in entries.items(): + if not isinstance(raw_path, str) or not raw_path: + raise ValueError("dirty_snapshot entry path must be a non-empty string") + violation = _scope_path_violation(raw_path) + if violation is not None: + raise ValueError(violation) + if not isinstance(raw_entry, dict): + raise ValueError("dirty_snapshot entry must be an object") + status_xy = raw_entry.get("status_xy") + digest = raw_entry.get("digest") + digest_status = raw_entry.get("digest_status") + if not isinstance(status_xy, str) or len(status_xy) != 2: + raise ValueError("dirty_snapshot.status_xy must be two characters") + if digest is not None and ( + not isinstance(digest, str) or not _is_hex_digest(digest) + ): + raise ValueError("dirty_snapshot.digest must be null or 64-char hex") + if ( + not isinstance(digest_status, str) + or digest_status not in _VALID_DIRTY_DIGEST_STATUSES + ): + raise ValueError("dirty_snapshot.digest_status is invalid") + if digest_status == "ok" and digest is None: + raise ValueError("dirty_snapshot.digest is required when status is ok") + return value + + +class IntentScopeModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + allowed_files: list[str] + allowed_related: list[str] = Field(default_factory=list) + forbidden: list[str] = Field(default_factory=list) + + @field_validator("allowed_files") + @classmethod + def validate_allowed_files(cls, value: list[str]) -> list[str]: + return _normalize_path_list(value, required=True) + + @field_validator("allowed_related", "forbidden") + @classmethod + def validate_optional_paths(cls, value: list[str]) -> list[str]: + return _normalize_path_list(value, required=False) + + +class IntentIntegrityModel(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + payload_sha256: str + + @field_validator("payload_sha256") + @classmethod + def validate_digest(cls, value: str) -> str: + if not _is_hex_digest(value): + msg = "payload_sha256 must be a 64-char hex digest" + raise ValueError(msg) + return value.lower() + + +class WorkspaceIntentDocument(BaseModel): + """Integrity-protected on-disk / SQLite JSON payload (registry v1/v2).""" + + model_config = ConfigDict(extra="forbid") + + registry_version: Literal["1", "2"] + intent_id: Annotated[str, Field(min_length=1, max_length=128)] + agent_pid: PositiveInt + agent_start_epoch: PositiveInt + agent_label: str = "" + run_id: Annotated[str, Field(min_length=1)] + declared_at_utc: Annotated[str, Field(min_length=1)] + expires_at_utc: Annotated[str, Field(min_length=1)] + ttl_seconds: PositiveInt + status: str + intent: Annotated[str, Field(min_length=1)] + scope: IntentScopeModel + scope_digest: str + blast_radius_summary: dict[str, object] + lease_renewed_at_utc: str | None = None + lease_seconds: PositiveInt | None = None + report_digest: str | None = None + dirty_snapshot: dict[str, object] | None = None + integrity: IntentIntegrityModel + + @field_validator("dirty_snapshot") + @classmethod + def validate_dirty_snapshot( + cls, + value: dict[str, object] | None, + ) -> dict[str, object] | None: + return _validate_dirty_snapshot_payload(value) + + def _contract_violations(self) -> tuple[str, ...]: + violations: list[str] = [] + if _SAFE_INTENT_ID_RE.match(self.intent_id) is None: + violations.append("intent_id contains unsafe characters") + if not _is_hex_digest(self.scope_digest): + violations.append("scope_digest must be a 64-char hex digest") + if self.status not in _VALID_STATUSES: + violations.append(f"invalid workspace intent status: {self.status}") + if self.registry_version != LEGACY_REGISTRY_VERSION and ( + self.lease_renewed_at_utc is None + or self.lease_seconds is None + or self.report_digest is None + ): + violations.append( + "v2 registry records require lease and report_digest fields" + ) + + lease_renewed_at, lease_seconds, _report_digest = self.normalized_lease_fields() + if self.registry_version != LEGACY_REGISTRY_VERSION and ( + lease_seconds < MIN_LEASE_SECONDS or lease_seconds > MAX_LEASE_SECONDS + ): + violations.append("lease_seconds out of allowed range") + + for timestamp in ( + self.declared_at_utc, + self.expires_at_utc, + lease_renewed_at, + ): + if _parse_utc(timestamp) is None: + violations.append("timestamp fields must be valid UTC ISO-8601") + break + + scope_payload = self.scope.model_dump(mode="json") + if compute_scope_digest(scope_payload) != self.scope_digest.lower(): + violations.append("scope_digest does not match scope payload") + + expected = compute_intent_digest(unsigned_document_payload(self)) + if expected != self.integrity.payload_sha256: + violations.append("integrity.payload_sha256 mismatch") + return tuple(violations) + + @model_validator(mode="after") + def validate_contract(self) -> Self: + violations = self._contract_violations() + if violations: + raise ValueError(violations[0]) + return self + + def normalized_lease_fields(self) -> tuple[str, int, str]: + if self.registry_version == LEGACY_REGISTRY_VERSION: + return ( + self.lease_renewed_at_utc or self.declared_at_utc, + int(self.lease_seconds or DEFAULT_LEASE_SECONDS), + self.report_digest or "", + ) + assert self.lease_renewed_at_utc is not None + assert self.lease_seconds is not None + assert self.report_digest is not None + return self.lease_renewed_at_utc, self.lease_seconds, self.report_digest + + +class WorkspaceIntentRowModel(BaseModel): + """Typed SQLite row for workspace_intents.""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + agent_pid: PositiveInt + agent_start_epoch: PositiveInt + intent_id: Annotated[str, Field(min_length=1, max_length=128)] + declared_at_utc: Annotated[str, Field(min_length=1)] + payload_json: Annotated[str, Field(min_length=2)] + updated_at_utc: Annotated[str, Field(min_length=1)] + closed_at_utc: str | None = None + + @field_validator("intent_id") + @classmethod + def validate_intent_id(cls, value: str) -> str: + if _SAFE_INTENT_ID_RE.match(value) is None: + msg = "intent_id contains unsafe characters" + raise ValueError(msg) + return value + + @field_validator("payload_json") + @classmethod + def validate_payload_json(cls, value: str) -> str: + if parse_workspace_document_json(value) is None: + msg = "payload_json is not a valid workspace intent document" + raise ValueError(msg) + return value + + @classmethod + def from_record_fields( + cls, + *, + agent_pid: int, + agent_start_epoch: int, + intent_id: str, + declared_at_utc: str, + payload_json: str, + updated_at_utc: str, + closed_at_utc: str | None = None, + ) -> WorkspaceIntentRowModel: + return cls( + agent_pid=agent_pid, + agent_start_epoch=agent_start_epoch, + intent_id=intent_id, + declared_at_utc=declared_at_utc, + payload_json=payload_json, + updated_at_utc=updated_at_utc, + closed_at_utc=closed_at_utc, + ) + + +def unsigned_document_payload(document: WorkspaceIntentDocument) -> dict[str, object]: + """Build the integrity-signed payload shape for registry v1/v2 wire records.""" + scope_payload = document.scope.model_dump(mode="json") + payload: dict[str, object] = { + "registry_version": document.registry_version, + "intent_id": document.intent_id, + "agent_pid": document.agent_pid, + "agent_start_epoch": document.agent_start_epoch, + "agent_label": document.agent_label, + "run_id": document.run_id, + "declared_at_utc": document.declared_at_utc, + "expires_at_utc": document.expires_at_utc, + "ttl_seconds": document.ttl_seconds, + "status": document.status, + "intent": document.intent, + "scope": scope_payload, + "scope_digest": document.scope_digest, + "blast_radius_summary": document.blast_radius_summary, + } + if document.registry_version != LEGACY_REGISTRY_VERSION: + lease_renewed_at_utc, lease_seconds, report_digest = ( + document.normalized_lease_fields() + ) + payload["lease_renewed_at_utc"] = lease_renewed_at_utc + payload["lease_seconds"] = lease_seconds + payload["report_digest"] = report_digest + if document.dirty_snapshot is not None: + payload["dirty_snapshot"] = document.dirty_snapshot + return payload + + +def parse_workspace_document(data: object) -> WorkspaceIntentDocument | None: + if not isinstance(data, dict): + return None + try: + return WorkspaceIntentDocument.model_validate(data) + except (ValidationError, TypeError, ValueError): + return None + + +def parse_workspace_document_json(payload_json: str) -> WorkspaceIntentDocument | None: + try: + payload = json.loads(payload_json) + except json.JSONDecodeError: + return None + return parse_workspace_document(payload) + + +def document_to_record_fields(document: WorkspaceIntentDocument) -> dict[str, object]: + lease_renewed_at_utc, lease_seconds, report_digest = ( + document.normalized_lease_fields() + ) + scope_payload = document.scope.model_dump(mode="json") + return { + "intent_id": document.intent_id, + "agent_pid": document.agent_pid, + "agent_start_epoch": document.agent_start_epoch, + "agent_label": document.agent_label, + "run_id": document.run_id, + "declared_at_utc": document.declared_at_utc, + "expires_at_utc": document.expires_at_utc, + "ttl_seconds": document.ttl_seconds, + "status": document.status, + "intent": document.intent, + "scope": scope_payload, + "scope_digest": document.scope_digest, + "blast_radius_summary": document.blast_radius_summary, + "lease_renewed_at_utc": lease_renewed_at_utc, + "lease_seconds": lease_seconds, + "report_digest": report_digest, + "dirty_snapshot": document.dirty_snapshot, + } + + +def record_from_document(document: WorkspaceIntentDocument) -> WorkspaceIntentRecord: + lease_renewed_at_utc, lease_seconds, report_digest = ( + document.normalized_lease_fields() + ) + scope_payload = document.scope.model_dump(mode="json") + return WorkspaceIntentRecord( + intent_id=document.intent_id, + agent_pid=document.agent_pid, + agent_start_epoch=document.agent_start_epoch, + agent_label=document.agent_label, + run_id=document.run_id, + declared_at_utc=document.declared_at_utc, + expires_at_utc=document.expires_at_utc, + ttl_seconds=document.ttl_seconds, + status=document.status, + intent=document.intent, + scope=scope_payload, + scope_digest=document.scope_digest, + blast_radius_summary=document.blast_radius_summary, + lease_renewed_at_utc=lease_renewed_at_utc, + lease_seconds=lease_seconds, + report_digest=report_digest, + dirty_snapshot=document.dirty_snapshot, + ) + + +def signed_payload_dict_from_record(record: object) -> dict[str, object]: + if not isinstance(record, WorkspaceIntentRecord): + msg = "record must be a WorkspaceIntentRecord" + raise TypeError(msg) + unsigned = record.unsigned_payload() + return { + **unsigned, + "integrity": {"payload_sha256": compute_intent_digest(unsigned)}, + } + + +def signed_payload_json_from_record(record: object) -> str: + payload = signed_payload_dict_from_record(record) + return json.dumps( + payload, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + default=str, + ) + + +__all__ = [ + "IntentIntegrityModel", + "IntentScopeModel", + "WorkspaceIntentDocument", + "WorkspaceIntentRowModel", + "document_to_record_fields", + "parse_workspace_document", + "parse_workspace_document_json", + "record_from_document", + "signed_payload_dict_from_record", + "signed_payload_json_from_record", + "unsigned_document_payload", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_paths.py b/codeclone/surfaces/mcp/_workspace_intent_paths.py new file mode 100644 index 00000000..f4bdf81b --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_paths.py @@ -0,0 +1,134 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Final + +from ...paths.workspace import REGISTRY_DIR_PARTS +from ...utils.json_io import read_json_object +from ._workspace_intent_contract import WorkspaceIntentRecord + +_SAFE_INTENT_ID_RE: Final = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$") + + +def registry_dir(root: Path) -> Path: + return root.joinpath(*REGISTRY_DIR_PARTS) + + +def intent_filename(*, pid: int, start_epoch: int, intent_id: str) -> str: + return f"{pid}-{start_epoch}-{intent_id}.json" + + +def intent_path( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> Path: + return registry_dir(root) / intent_filename( + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + + +def registry_files(root: Path) -> tuple[Path, ...]: + directory = registry_dir(root) + try: + return tuple( + path + for path in sorted(directory.glob("*.json")) + if is_safe_intent_path(path, directory) + ) + except OSError: + return () + + +def read_payload(path: Path) -> dict[str, object] | None: + try: + return read_json_object(path) + except (OSError, TypeError, ValueError): + return None + + +def unlink(path: Path) -> bool: + try: + path.unlink(missing_ok=True) + except OSError: + return False + return True + + +def record_sort_key(record: WorkspaceIntentRecord) -> tuple[str, int, str]: + return (record.declared_at_utc, record.agent_pid, record.intent_id) + + +def is_safe_intent_id(value: object) -> bool: + return isinstance(value, str) and _SAFE_INTENT_ID_RE.match(value) is not None + + +def is_safe_intent_path(expected: Path, registry: Path) -> bool: + try: + if not expected.is_absolute(): + return False + resolved = expected.resolve(strict=False) + resolved_registry = registry.resolve(strict=False) + if resolved != expected: + return False + if not resolved.is_relative_to(resolved_registry): + return False + name = expected.name + if not name.endswith(".json") or name.count("-") < 2: + return False + if expected.exists() and not expected.is_file(): + return False + except (OSError, ValueError): + return False + return True + + +def safe_remove_own_intent( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> bool: + try: + if not root.is_absolute(): + return False + registry = registry_dir(root) + expected = intent_path( + root=root, + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + if not is_safe_intent_path(expected, registry): + return False + expected.unlink(missing_ok=True) + except Exception: + return False + return True + + +__all__ = [ + "REGISTRY_DIR_PARTS", + "intent_filename", + "intent_path", + "is_safe_intent_id", + "is_safe_intent_path", + "read_payload", + "record_sort_key", + "registry_dir", + "registry_files", + "safe_remove_own_intent", + "unlink", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_pid.py b/codeclone/surfaces/mcp/_workspace_intent_pid.py new file mode 100644 index 00000000..3d8266e0 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_pid.py @@ -0,0 +1,38 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Process liveness checks for workspace intent coordination (leaf module).""" + +from __future__ import annotations + +from ._workspace_intent_lifecycle import ( + PidLiveness, +) +from ._workspace_intent_lifecycle import ( + is_pid_alive as _lifecycle_is_pid_alive, +) +from ._workspace_intent_lifecycle import ( + pid_liveness as _lifecycle_pid_liveness, +) + + +def is_agent_pid_alive(pid: int) -> bool: + return _lifecycle_is_pid_alive(pid) + + +_DEFAULT_IS_AGENT_PID_ALIVE = is_agent_pid_alive + + +def agent_pid_liveness(pid: int) -> PidLiveness: + # Existing tests and downstream shims sometimes monkeypatch the legacy + # boolean probe. Preserve that compatibility while production uses + # tri-state liveness. + if is_agent_pid_alive is not _DEFAULT_IS_AGENT_PID_ALIVE: + return PidLiveness.ALIVE if is_agent_pid_alive(pid) else PidLiveness.DEAD + return _lifecycle_pid_liveness(pid) + + +__all__ = ["agent_pid_liveness", "is_agent_pid_alive"] diff --git a/codeclone/surfaces/mcp/_workspace_intent_registry_lock.py b/codeclone/surfaces/mcp/_workspace_intent_registry_lock.py new file mode 100644 index 00000000..bd6745e9 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_registry_lock.py @@ -0,0 +1,46 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Cross-process exclusive lock for workspace intent registry I/O.""" + +from __future__ import annotations + +from collections.abc import Iterator +from contextlib import contextmanager +from pathlib import Path +from typing import Final + +from ...utils.file_lock import advisory_file_lock + +DEFAULT_REGISTRY_LOCK_TIMEOUT_SECONDS: Final[float] = 5.0 + + +class WorkspaceRegistryLockError(OSError): + """Raised when the workspace registry lock cannot be acquired in time.""" + + +@contextmanager +def workspace_registry_lock( + lock_path: Path, + *, + timeout_seconds: float = DEFAULT_REGISTRY_LOCK_TIMEOUT_SECONDS, +) -> Iterator[None]: + """Acquire an exclusive cross-process lock for registry mutations.""" + with advisory_file_lock( + lock_path, + timeout_seconds=timeout_seconds, + timeout_error=lambda path: WorkspaceRegistryLockError( + f"Timed out acquiring workspace registry lock at {path}" + ), + ): + yield + + +__all__ = [ + "DEFAULT_REGISTRY_LOCK_TIMEOUT_SECONDS", + "WorkspaceRegistryLockError", + "workspace_registry_lock", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_schema.py b/codeclone/surfaces/mcp/_workspace_intent_schema.py new file mode 100644 index 00000000..146d50ef --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_schema.py @@ -0,0 +1,171 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from ... import __version__ +from ...report.meta import current_report_timestamp_utc +from ...utils.sqlite_store import ( + get_meta_value, + initialize_schema_v1, +) + +INTENT_REGISTRY_SCHEMA_VERSION = "2" + +_INTENT_META_TABLE = "intent_registry_meta" + +_CREATE_INTENTS_SQL = """ +CREATE TABLE IF NOT EXISTS workspace_intents ( + agent_pid INTEGER NOT NULL, + agent_start_epoch INTEGER NOT NULL, + intent_id TEXT NOT NULL, + declared_at_utc TEXT NOT NULL, + payload_json TEXT NOT NULL, + closed_at_utc TEXT, + updated_at_utc TEXT NOT NULL, + PRIMARY KEY (agent_pid, agent_start_epoch, intent_id) +) +""" + +_CREATE_META_SQL = """ +CREATE TABLE IF NOT EXISTS intent_registry_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) +""" + +_INDEX_SQL = ( + "CREATE INDEX IF NOT EXISTS idx_workspace_intents_intent_id " + "ON workspace_intents(intent_id)", + "CREATE INDEX IF NOT EXISTS idx_workspace_intents_declared " + "ON workspace_intents(declared_at_utc, agent_pid, intent_id)", + "CREATE INDEX IF NOT EXISTS idx_workspace_intents_closed " + "ON workspace_intents(closed_at_utc)", +) + +_REQUIRED_INTENT_COLUMNS = frozenset( + { + "agent_pid", + "agent_start_epoch", + "intent_id", + "declared_at_utc", + "payload_json", + "closed_at_utc", + "updated_at_utc", + } +) + + +class IntentRegistrySchemaError(RuntimeError): + """Raised for unsupported or corrupt intent registry database schemas.""" + + +def open_intent_registry_db(path: Path) -> sqlite3.Connection: + from ...observability.sqlite_access import open_instrumented_sqlite_db + + return open_instrumented_sqlite_db(path, ensure_schema=ensure_schema) + + +def open_intent_registry_db_readonly(path: Path) -> sqlite3.Connection: + from ...observability.sqlite_access import open_instrumented_sqlite_db_readonly + + return open_instrumented_sqlite_db_readonly( + path, + validate_schema=_validate_readonly_schema, + ) + + +def ensure_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current is None: + create_schema_v1(conn) + elif current == "1": + _migrate_v1_to_v2(conn) + elif current != INTENT_REGISTRY_SCHEMA_VERSION: + raise IntentRegistrySchemaError( + f"Unsupported intent registry schema version: {current}" + ) + + +def create_schema_v1(conn: sqlite3.Connection) -> None: + initialize_schema_v1( + conn, + ddl_statements=(_CREATE_INTENTS_SQL, _CREATE_META_SQL), + index_statements=_INDEX_SQL, + meta_table=_INTENT_META_TABLE, + seed_meta={ + "schema_version": INTENT_REGISTRY_SCHEMA_VERSION, + "generator": "codeclone", + "codeclone_version": __version__, + "created_at_utc": current_report_timestamp_utc(), + }, + ) + + +def _migrate_v1_to_v2(conn: sqlite3.Connection) -> None: + existing = { + row[1] + for row in conn.execute("PRAGMA table_info(workspace_intents)").fetchall() + } + if "closed_at_utc" not in existing: + conn.execute("ALTER TABLE workspace_intents ADD COLUMN closed_at_utc TEXT") + if "updated_at_utc" not in existing: + conn.execute( + "ALTER TABLE workspace_intents " + "ADD COLUMN updated_at_utc TEXT NOT NULL DEFAULT ''" + ) + conn.execute( + """ + UPDATE workspace_intents + SET updated_at_utc = declared_at_utc + WHERE updated_at_utc = '' + """ + ) + conn.execute( + f"UPDATE {_INTENT_META_TABLE} SET value = ? WHERE key = 'schema_version'", + (INTENT_REGISTRY_SCHEMA_VERSION,), + ) + for statement in _INDEX_SQL: + conn.execute(statement) + conn.commit() + + +def _validate_readonly_schema(conn: sqlite3.Connection) -> None: + current = get_meta(conn, "schema_version") + if current != INTENT_REGISTRY_SCHEMA_VERSION: + rendered = current if current is not None else "missing" + raise IntentRegistrySchemaError( + "Intent registry requires writable schema initialization or migration: " + f"found {rendered}, expected {INTENT_REGISTRY_SCHEMA_VERSION}" + ) + columns = { + str(row[1]) + for row in conn.execute("PRAGMA table_info(workspace_intents)").fetchall() + if len(row) > 1 + } + missing = sorted(_REQUIRED_INTENT_COLUMNS - columns) + if missing: + raise IntentRegistrySchemaError( + "Intent registry is missing required columns: " + ", ".join(missing) + ) + + +def get_meta(conn: sqlite3.Connection, key: str) -> str | None: + return get_meta_value(conn, meta_table=_INTENT_META_TABLE, key=key) + + +__all__ = [ + "INTENT_REGISTRY_SCHEMA_VERSION", + "IntentRegistrySchemaError", + "create_schema_v1", + "ensure_schema", + "get_meta", + "open_intent_registry_db", + "open_intent_registry_db_readonly", +] diff --git a/codeclone/surfaces/mcp/_workspace_intent_staleness.py b/codeclone/surfaces/mcp/_workspace_intent_staleness.py new file mode 100644 index 00000000..ac2cefc3 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_staleness.py @@ -0,0 +1,64 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared workspace intent staleness predicates (leaf module).""" + +from __future__ import annotations + +from ._workspace_intent_contract import WorkspaceIntentRecord +from ._workspace_intent_lifecycle import ( + PidLiveness, + WorkspaceIntentStatus, + utc_now, +) +from ._workspace_intent_lifecycle import ( + is_lease_expired as _is_lease_expired, +) +from ._workspace_intent_lifecycle import ( + parse_utc as _parse_utc, +) + + +def stale_reason(record: WorkspaceIntentRecord) -> str | None: + from . import _workspace_intent_pid as pid_mod + + if record.status == WorkspaceIntentStatus.EXPIRED.value: + return "expired" + if record.status == WorkspaceIntentStatus.ORPHANED.value: + return "orphaned" + expires = _parse_utc(record.expires_at_utc) + if expires is None or expires <= utc_now(): + return "expired" + if pid_mod.agent_pid_liveness(record.agent_pid) == PidLiveness.DEAD: + return "orphaned" + if _is_lease_expired(record): + return "lease_expired" + return None + + +def ttl_expired(record: WorkspaceIntentRecord) -> bool: + expires = _parse_utc(record.expires_at_utc) + return expires is None or expires <= utc_now() + + +def gc_removal_reason( + record: WorkspaceIntentRecord, + *, + for_lazy_close: bool = False, +) -> str | None: + reason = stale_reason(record) + if reason == "lease_expired" and not ttl_expired(record): + return None + if for_lazy_close and reason == "orphaned": + return None + return reason + + +def is_stale(record: WorkspaceIntentRecord) -> bool: + return stale_reason(record) is not None + + +__all__ = ["gc_removal_reason", "is_stale", "stale_reason", "ttl_expired"] diff --git a/codeclone/surfaces/mcp/_workspace_intent_store.py b/codeclone/surfaces/mcp/_workspace_intent_store.py new file mode 100644 index 00000000..5eecbc3b --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intent_store.py @@ -0,0 +1,680 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +import threading +from collections.abc import Callable, Iterable, Iterator +from contextlib import contextmanager +from dataclasses import dataclass, replace +from datetime import datetime, timedelta, timezone +from pathlib import Path + +from ...config.intent_registry import ( + IntentRegistryConfig, + resolve_intent_registry_config, +) +from ...report.meta import current_report_timestamp_utc +from ...utils.json_io import write_json_document_atomically +from ._workspace_intent_contract import WorkspaceIntentRecord +from ._workspace_intent_lifecycle import ( + WorkspaceIntentStatus, + gc_status_for_reason, + is_terminal_workspace_intent_status, +) +from ._workspace_intent_models import ( + WorkspaceIntentRowModel, + parse_workspace_document, + parse_workspace_document_json, + record_from_document, + signed_payload_dict_from_record, + signed_payload_json_from_record, +) +from ._workspace_intent_paths import ( + intent_path, + is_safe_intent_id, + read_payload, + record_sort_key, + registry_dir, + registry_files, + safe_remove_own_intent, + unlink, +) +from ._workspace_intent_registry_lock import workspace_registry_lock +from ._workspace_intent_schema import open_intent_registry_db +from ._workspace_intent_staleness import gc_removal_reason + +_STORE_CACHE: dict[ + tuple[str, str, str], FileWorkspaceIntentStore | SqliteWorkspaceIntentStore +] = {} +_FILE_STORE_LOCKS: dict[str, threading.Lock] = {} +_STORE_CACHE_LOCK = threading.Lock() +_FILE_STORE_LOCKS_LOCK = threading.Lock() + + +def _file_store_process_lock(root: Path) -> threading.Lock: + key = str(root.resolve()) + with _FILE_STORE_LOCKS_LOCK: + lock = _FILE_STORE_LOCKS.get(key) + if lock is None: + lock = threading.Lock() + _FILE_STORE_LOCKS[key] = lock + return lock + + +@contextmanager +def registry_transaction( + store: FileWorkspaceIntentStore | SqliteWorkspaceIntentStore, +) -> Iterator[None]: + """Cross-process and in-process lock for registry read/write transactions.""" + with workspace_registry_lock(store.registry_lock_path), store.in_process_lock(): + yield + + +@dataclass(frozen=True, slots=True) +class FileWorkspaceIntentStore: + root: Path + + @property + def backend(self) -> str: + return "file" + + @property + def storage_path(self) -> Path: + return registry_dir(self.root) + + @property + def registry_lock_path(self) -> Path: + return registry_dir(self.root) / ".registry.lock" + + @contextmanager + def in_process_lock(self) -> Iterator[None]: + with _file_store_process_lock(self.root): + yield + + def write(self, record: WorkspaceIntentRecord) -> bool: + with registry_transaction(self): + return self.write_unlocked(record) + + def write_unlocked(self, record: WorkspaceIntentRecord) -> bool: + path = intent_path( + root=self.root, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + try: + path.parent.mkdir(parents=True, exist_ok=True) + write_json_document_atomically( + path=path, + document=signed_payload_dict_from_record(record), + sort_keys=True, + trailing_newline=True, + ) + except OSError: + return False + return True + + def list_records(self) -> tuple[WorkspaceIntentRecord, ...]: + return self.list_records_current() + + def list_records_raw(self) -> tuple[WorkspaceIntentRecord, ...]: + with self.in_process_lock(): + return self._all_valid_records_unlocked() + + def list_records_current(self) -> tuple[WorkspaceIntentRecord, ...]: + with registry_transaction(self): + _lazy_close_eligible_records_unlocked(self) + return self._active_records_unlocked() + + def list_records_for_hygiene(self) -> tuple[WorkspaceIntentRecord, ...]: + with registry_transaction(self): + return self._all_valid_records_unlocked() + + def _all_valid_records_unlocked(self) -> tuple[WorkspaceIntentRecord, ...]: + records = [record for _, record in self._valid_entries_unlocked()] + return tuple(sorted(records, key=record_sort_key)) + + def _active_records_unlocked(self) -> tuple[WorkspaceIntentRecord, ...]: + records = [ + record + for record in self._all_valid_records_unlocked() + if not is_terminal_workspace_intent_status(record.status) + ] + return tuple(sorted(records, key=record_sort_key)) + + def find(self, intent_id: str) -> WorkspaceIntentRecord | None: + with registry_transaction(self): + _lazy_close_eligible_records_unlocked(self) + return self.find_raw_unlocked(intent_id) + + def find_current_unlocked(self, intent_id: str) -> WorkspaceIntentRecord | None: + _lazy_close_eligible_records_unlocked(self) + return self.find_raw_unlocked(intent_id) + + def find_raw_unlocked(self, intent_id: str) -> WorkspaceIntentRecord | None: + matches = [ + record + for _, record in self._valid_entries_unlocked() + if record.intent_id == intent_id + and not is_terminal_workspace_intent_status(record.status) + ] + if not matches: + return None + return sorted(matches, key=record_sort_key)[-1] + + def find_raw(self, intent_id: str) -> WorkspaceIntentRecord | None: + with registry_transaction(self): + return self.find_raw_unlocked(intent_id) + + def remove(self, *, pid: int, start_epoch: int, intent_id: str) -> bool: + with registry_transaction(self): + return safe_remove_own_intent( + root=self.root, + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + + def gc(self) -> dict[str, object]: + with registry_transaction(self): + result = _gc_eligible_records_unlocked(self) + remaining = len(self._active_records_unlocked()) + return { + **result.to_gc_fragment(), + "retention_purged": 0, + "remaining": remaining, + } + + def _valid_entries_unlocked(self) -> tuple[tuple[Path, WorkspaceIntentRecord], ...]: + entries: list[tuple[Path, WorkspaceIntentRecord]] = [] + for path in registry_files(self.root): + payload = read_payload(path) + record = _record_from_json(payload) if payload is not None else None + if record is not None: + entries.append((path, record)) + return tuple(entries) + + +class SqliteWorkspaceIntentStore: + def __init__(self, *, db_path: Path, retention_days: int) -> None: + self._db_path = db_path + self._retention_days = retention_days + self._lock = threading.Lock() + self._conn = open_intent_registry_db(db_path) + + @property + def backend(self) -> str: + return "sqlite" + + @property + def storage_path(self) -> Path: + return self._db_path + + @property + def registry_lock_path(self) -> Path: + return Path(f"{self._db_path}.lock") + + @contextmanager + def in_process_lock(self) -> Iterator[None]: + with self._lock: + yield + + def close(self) -> None: + with self._lock: + self._conn.close() + + def write(self, record: WorkspaceIntentRecord) -> bool: + with registry_transaction(self): + return self.write_unlocked(record) + + def write_unlocked(self, record: WorkspaceIntentRecord) -> bool: + try: + payload_json = signed_payload_json_from_record(record) + now_text = current_report_timestamp_utc() + closed_at = ( + now_text if is_terminal_workspace_intent_status(record.status) else None + ) + row = WorkspaceIntentRowModel.from_record_fields( + agent_pid=record.agent_pid, + agent_start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + declared_at_utc=record.declared_at_utc, + payload_json=payload_json, + updated_at_utc=now_text, + closed_at_utc=closed_at, + ) + except (TypeError, ValueError): + return False + try: + self._conn.execute( + """ + INSERT INTO workspace_intents( + agent_pid, + agent_start_epoch, + intent_id, + declared_at_utc, + payload_json, + closed_at_utc, + updated_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(agent_pid, agent_start_epoch, intent_id) DO UPDATE SET + declared_at_utc = excluded.declared_at_utc, + payload_json = excluded.payload_json, + updated_at_utc = excluded.updated_at_utc, + closed_at_utc = excluded.closed_at_utc + """, + ( + row.agent_pid, + row.agent_start_epoch, + row.intent_id, + row.declared_at_utc, + row.payload_json, + row.closed_at_utc, + row.updated_at_utc, + ), + ) + self._conn.commit() + except sqlite3.Error: + return False + return True + + def list_records(self) -> tuple[WorkspaceIntentRecord, ...]: + return self.list_records_current() + + def list_records_raw(self) -> tuple[WorkspaceIntentRecord, ...]: + with self.in_process_lock(): + return self._list_records_raw_unlocked() + + def list_records_current(self) -> tuple[WorkspaceIntentRecord, ...]: + with registry_transaction(self): + _lazy_close_eligible_records_unlocked(self) + return self._active_records_unlocked() + + def list_records_for_hygiene(self) -> tuple[WorkspaceIntentRecord, ...]: + with registry_transaction(self): + return self._load_all_records_unlocked() + + def _list_records_raw_unlocked(self) -> tuple[WorkspaceIntentRecord, ...]: + records = [ + record + for record in self._load_all_records_unlocked() + if not is_terminal_workspace_intent_status(record.status) + ] + return tuple(sorted(records, key=record_sort_key)) + + def _active_records_unlocked(self) -> tuple[WorkspaceIntentRecord, ...]: + return self._list_records_raw_unlocked() + + def find(self, intent_id: str) -> WorkspaceIntentRecord | None: + with registry_transaction(self): + _lazy_close_eligible_records_unlocked(self) + return self.find_raw_unlocked(intent_id) + + def find_current_unlocked(self, intent_id: str) -> WorkspaceIntentRecord | None: + _lazy_close_eligible_records_unlocked(self) + return self.find_raw_unlocked(intent_id) + + def find_raw_unlocked(self, intent_id: str) -> WorkspaceIntentRecord | None: + records = [ + record + for record in self._load_all_records_unlocked() + if record.intent_id == intent_id + and not is_terminal_workspace_intent_status(record.status) + ] + if not records: + return None + return sorted(records, key=record_sort_key)[-1] + + def find_raw(self, intent_id: str) -> WorkspaceIntentRecord | None: + with registry_transaction(self): + return self.find_raw_unlocked(intent_id) + + def remove(self, *, pid: int, start_epoch: int, intent_id: str) -> bool: + if not is_safe_intent_id(intent_id): + return False + with registry_transaction(self): + record = self._fetch_record_unlocked( + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + if record is None or is_terminal_workspace_intent_status(record.status): + return False + return self.write_unlocked( + replace(record, status=WorkspaceIntentStatus.CLEAN.value), + ) + + def gc(self) -> dict[str, object]: + with registry_transaction(self): + result = _gc_eligible_records_unlocked(self) + retention_purged = self._purge_retention_rows_unlocked() + remaining = len(self._active_records_unlocked()) + return { + **result.to_gc_fragment(), + "retention_purged": retention_purged, + "remaining": remaining, + } + + def iter_rows(self) -> tuple[tuple[int, int, str, str], ...]: + try: + rows = self._conn.execute( + """ + SELECT agent_pid, agent_start_epoch, intent_id, payload_json + FROM workspace_intents + ORDER BY declared_at_utc, agent_pid, intent_id + """ + ).fetchall() + except sqlite3.Error: + return () + return tuple( + (int(agent_pid), int(agent_start_epoch), str(intent_id), str(payload_json)) + for agent_pid, agent_start_epoch, intent_id, payload_json in rows + ) + + def delete_row_unlocked( + self, + *, + pid: int, + start_epoch: int, + intent_id: str, + ) -> bool: + try: + cursor = self._conn.execute( + """ + DELETE FROM workspace_intents + WHERE agent_pid = ? AND agent_start_epoch = ? AND intent_id = ? + """, + (pid, start_epoch, intent_id), + ) + self._conn.commit() + except sqlite3.Error: + return False + return cursor.rowcount > 0 + + def _load_all_records_unlocked(self) -> tuple[WorkspaceIntentRecord, ...]: + try: + rows = self._conn.execute( + """ + SELECT payload_json + FROM workspace_intents + ORDER BY declared_at_utc, agent_pid, intent_id + """ + ).fetchall() + except sqlite3.Error: + return () + return tuple( + record + for record in (_record_from_json(row[0]) for row in rows) + if record is not None + ) + + def _fetch_record_unlocked( + self, + *, + pid: int, + start_epoch: int, + intent_id: str, + ) -> WorkspaceIntentRecord | None: + try: + row = self._conn.execute( + """ + SELECT payload_json + FROM workspace_intents + WHERE agent_pid = ? AND agent_start_epoch = ? AND intent_id = ? + """, + (pid, start_epoch, intent_id), + ).fetchone() + except sqlite3.Error: + return None + if row is None: + return None + return _record_from_json(row[0]) + + def _purge_retention_rows_unlocked(self) -> int: + cutoff = datetime.now(timezone.utc) - timedelta(days=self._retention_days) + cutoff_text = cutoff.replace(microsecond=0).isoformat().replace("+00:00", "Z") + try: + cursor = self._conn.execute( + """ + DELETE FROM workspace_intents + WHERE closed_at_utc IS NOT NULL AND closed_at_utc < ? + """, + (cutoff_text,), + ) + self._conn.commit() + except sqlite3.Error: + return 0 + return int(cursor.rowcount) + + +WorkspaceIntentStore = FileWorkspaceIntentStore | SqliteWorkspaceIntentStore + + +def get_workspace_intent_store(root: Path) -> WorkspaceIntentStore: + root_path = root.resolve() + config = resolve_intent_registry_config(root_path) + cache_key = ( + str(root_path), + config.backend, + str(config.storage_path), + ) + with _STORE_CACHE_LOCK: + cached = _STORE_CACHE.get(cache_key) + if cached is not None: + return cached + store = _build_store(root_path=root_path, config=config) + _STORE_CACHE[cache_key] = store + return store + + +def write_workspace_intent_with_existing( + *, + root: Path, + record: WorkspaceIntentRecord, +) -> tuple[tuple[WorkspaceIntentRecord, ...], bool]: + """Atomically snapshot active records and write ``record``. + + The returned existing records are the pre-write active registry view. This + closes the list-then-write race for declare conflict evaluation while + preserving the current advisory conflict semantics. + """ + + store = get_workspace_intent_store(root) + with registry_transaction(store): + _lazy_close_eligible_records_unlocked(store) + existing = store._active_records_unlocked() + registered = store.write_unlocked(record) + return existing, registered + + +def clear_workspace_intent_store_cache() -> None: + with _STORE_CACHE_LOCK: + stores = tuple(_STORE_CACHE.values()) + _STORE_CACHE.clear() + for store in stores: + close = getattr(store, "close", None) + if callable(close): + close() + + +def _build_store( + *, root_path: Path, config: IntentRegistryConfig +) -> WorkspaceIntentStore: + if config.backend == "file": + return FileWorkspaceIntentStore(root=root_path) + return SqliteWorkspaceIntentStore( + db_path=config.storage_path, + retention_days=config.retention_days, + ) + + +def _record_from_json(payload: object) -> WorkspaceIntentRecord | None: + if isinstance(payload, str): + document = parse_workspace_document_json(payload) + elif isinstance(payload, dict): + document = parse_workspace_document(payload) + else: + return None + if document is None: + return None + return record_from_document(document) + + +def _sqlite_storage_key(*, pid: int, start_epoch: int, intent_id: str) -> str: + return f"{pid}-{start_epoch}-{intent_id}.sqlite" + + +@dataclass(frozen=True, slots=True) +class LazyCloseResult: + closed_ids: tuple[str, ...] + closed_reasons: dict[str, str] + corrupted_removed: tuple[str, ...] + + def to_gc_fragment(self) -> dict[str, object]: + return { + "removed": len(self.closed_ids), + "removed_intent_ids": list(self.closed_ids), + "removed_reasons": dict(self.closed_reasons), + "corrupted_removed": len(self.corrupted_removed), + "corrupted_filenames": list(self.corrupted_removed), + } + + +def lazy_close_eligible_records( + store: WorkspaceIntentStore, +) -> LazyCloseResult: + """Close/delete records where ``gc_removal_reason()`` is not None.""" + with registry_transaction(store): + return _lazy_close_eligible_records_unlocked(store) + + +def _lazy_close_eligible_records_unlocked( + store: WorkspaceIntentStore, +) -> LazyCloseResult: + return _close_eligible_records_unlocked(store, for_lazy_close=True) + + +def _gc_eligible_records_unlocked( + store: WorkspaceIntentStore, +) -> LazyCloseResult: + return _close_eligible_records_unlocked(store, for_lazy_close=False) + + +def _close_eligible_records_unlocked( + store: WorkspaceIntentStore, + *, + for_lazy_close: bool, +) -> LazyCloseResult: + if isinstance(store, FileWorkspaceIntentStore): + return _close_file_store(store, for_lazy_close=for_lazy_close) + if isinstance(store, SqliteWorkspaceIntentStore): + return _close_sqlite_store(store, for_lazy_close=for_lazy_close) + raise TypeError(f"Unsupported workspace intent store: {type(store)!r}") + + +def lazy_close_eligible_records_unlocked( + store: WorkspaceIntentStore, +) -> LazyCloseResult: + return _lazy_close_eligible_records_unlocked(store) + + +def _lazy_close_from_entries( + entries: Iterable[tuple[str, WorkspaceIntentRecord | None]], + *, + for_lazy_close: bool, + remove_corrupted: Callable[[str], bool], + close_active: Callable[[WorkspaceIntentRecord, str], bool], +) -> LazyCloseResult: + closed_ids: list[str] = [] + closed_reasons: dict[str, str] = {} + corrupted: list[str] = [] + for storage_key, record in entries: + if record is None: + if remove_corrupted(storage_key): + corrupted.append(storage_key) + continue + reason = ( + None + if is_terminal_workspace_intent_status(record.status) + else gc_removal_reason(record, for_lazy_close=for_lazy_close) + ) + if reason is not None and close_active(record, reason): + closed_ids.append(record.intent_id) + closed_reasons[record.intent_id] = reason + return LazyCloseResult( + closed_ids=tuple(closed_ids), + closed_reasons=closed_reasons, + corrupted_removed=tuple(corrupted), + ) + + +def _close_file_store( + store: FileWorkspaceIntentStore, + *, + for_lazy_close: bool, +) -> LazyCloseResult: + path_by_name: dict[str, Path] = {} + path_by_intent: dict[str, Path] = {} + entries: list[tuple[str, WorkspaceIntentRecord | None]] = [] + for path in registry_files(store.root): + payload = read_payload(path) + record = _record_from_json(payload) if payload is not None else None + path_by_name[path.name] = path + if record is not None: + path_by_intent[record.intent_id] = path + entries.append((path.name, record)) + return _lazy_close_from_entries( + entries, + for_lazy_close=for_lazy_close, + remove_corrupted=lambda key: unlink(path_by_name[key]), + close_active=lambda record, _reason: unlink(path_by_intent[record.intent_id]), + ) + + +def _close_sqlite_store( + store: SqliteWorkspaceIntentStore, + *, + for_lazy_close: bool, +) -> LazyCloseResult: + entries: list[tuple[str, WorkspaceIntentRecord | None]] = [] + row_keys: dict[str, tuple[int, int, str]] = {} + for agent_pid, agent_start_epoch, intent_id, payload_json in store.iter_rows(): + storage_key = _sqlite_storage_key( + pid=int(agent_pid), + start_epoch=int(agent_start_epoch), + intent_id=str(intent_id), + ) + record = _record_from_json(payload_json) + entries.append((storage_key, record)) + row_keys[storage_key] = (int(agent_pid), int(agent_start_epoch), str(intent_id)) + return _lazy_close_from_entries( + entries, + for_lazy_close=for_lazy_close, + remove_corrupted=lambda key: store.delete_row_unlocked( + pid=row_keys[key][0], + start_epoch=row_keys[key][1], + intent_id=row_keys[key][2], + ), + close_active=lambda record, reason: store.write_unlocked( + replace(record, status=gc_status_for_reason(reason)), + ), + ) + + +__all__ = [ + "FileWorkspaceIntentStore", + "LazyCloseResult", + "SqliteWorkspaceIntentStore", + "WorkspaceIntentStore", + "clear_workspace_intent_store_cache", + "get_workspace_intent_store", + "lazy_close_eligible_records", + "lazy_close_eligible_records_unlocked", + "registry_transaction", + "write_workspace_intent_with_existing", +] diff --git a/codeclone/surfaces/mcp/_workspace_intents.py b/codeclone/surfaces/mcp/_workspace_intents.py new file mode 100644 index 00000000..a6e63162 --- /dev/null +++ b/codeclone/surfaces/mcp/_workspace_intents.py @@ -0,0 +1,742 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import replace +from datetime import datetime, timedelta, timezone +from enum import Enum +from fnmatch import fnmatchcase +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ._workspace_intent_store import WorkspaceIntentStore + +from ._workspace_intent_contract import ( + DEFAULT_LEASE_SECONDS, + DEFAULT_TTL_SECONDS, + LEGACY_REGISTRY_VERSION, + MAX_LEASE_SECONDS, + MAX_TTL_SECONDS, + MIN_LEASE_SECONDS, + MIN_TTL_SECONDS, + REGISTRY_VERSION, + WorkspaceIntentRecord, + compute_intent_digest, + compute_scope_digest, + verify_intent_integrity, +) +from ._workspace_intent_lifecycle import ( + PidLiveness, + WorkspaceIntentStatus, + utc_now, +) +from ._workspace_intent_lifecycle import ( + lease_expiry as _lease_expiry, +) +from ._workspace_intent_lifecycle import ( + parse_utc as _parse_utc, +) +from ._workspace_intent_paths import ( + intent_filename, + intent_path, + registry_dir, + safe_remove_own_intent, +) +from ._workspace_intent_paths import ( + is_safe_intent_id as _is_safe_intent_id, +) +from ._workspace_intent_paths import ( + is_safe_intent_path as _is_safe_intent_path, +) +from ._workspace_intent_paths import ( + read_payload as _read_payload, +) +from ._workspace_intent_paths import ( + record_sort_key as _record_sort_key, +) +from ._workspace_intent_paths import ( + unlink as _unlink, +) +from ._workspace_intent_staleness import ( + stale_reason, +) +from ._workspace_intent_staleness import ( + ttl_expired as _ttl_expired, +) + + +class IntentOwnership(str, Enum): + OWN_ACTIVE = "own_active" + OWN_STALE = "own_stale" + FOREIGN_ACTIVE = "foreign_active" + FOREIGN_STALE = "foreign_stale" + RECOVERABLE = "recoverable" + EXPIRED = "expired" + + +def _is_pid_alive(pid: int) -> bool: + return _pid_liveness(pid) == PidLiveness.ALIVE + + +def _pid_liveness(pid: int) -> PidLiveness: + from . import _workspace_intent_pid as pid_mod + + return pid_mod.agent_pid_liveness(pid) + + +def is_orphaned(record: WorkspaceIntentRecord) -> bool: + return _pid_liveness(record.agent_pid) == PidLiveness.DEAD + + +def is_stale(record: WorkspaceIntentRecord) -> bool: + return stale_reason(record) is not None + + +def signed_payload(record: WorkspaceIntentRecord) -> dict[str, object]: + from ._workspace_intent_models import signed_payload_dict_from_record + + return signed_payload_dict_from_record(record) + + +def workspace_intent_to_payload( + record: WorkspaceIntentRecord, + *, + own_pid: int | None = None, + own_start_epoch: int | None = None, + now: datetime | None = None, +) -> dict[str, object]: + current_time = now or utc_now() + ownership = classify_intent_ownership( + record, + own_pid=own_pid or 0, + own_start_epoch=own_start_epoch or 0, + now=current_time, + ) + payload = record.unsigned_payload() + payload["ownership"] = ownership.value + payload["is_own"] = ownership in { + IntentOwnership.OWN_ACTIVE, + IntentOwnership.OWN_STALE, + } + lease_expiry = _lease_expiry(record) + if lease_expiry is not None: + remaining = int((lease_expiry - current_time).total_seconds()) + payload["lease_expires_in_seconds"] = max(0, remaining) + if ownership == IntentOwnership.FOREIGN_ACTIVE: + payload["escalation_hint"] = ( + "This intent belongs to a live process with a valid lease. " + "Do NOT kill the process. Ask the user to confirm whether " + "this is an abandoned session or a parallel agent." + ) + elif ownership == IntentOwnership.FOREIGN_STALE: + payload["escalation_hint"] = ( + "This intent belongs to a live process whose lease has expired. " + "The owner may still be working (context overflow, long edit, " + "test run). Coordinate with the user before proceeding." + ) + return payload + + +def classify_intent_ownership( + record: WorkspaceIntentRecord, + *, + own_pid: int, + own_start_epoch: int, + now: datetime, +) -> IntentOwnership: + expires = _parse_utc(record.expires_at_utc) + if expires is None or expires <= now: + return IntentOwnership.EXPIRED + + is_own = record.agent_pid == own_pid and record.agent_start_epoch == own_start_epoch + lease_expiry = _lease_expiry(record) + lease_valid = lease_expiry is not None and lease_expiry > now + if is_own: + return IntentOwnership.OWN_ACTIVE if lease_valid else IntentOwnership.OWN_STALE + liveness = _pid_liveness(record.agent_pid) + if liveness == PidLiveness.DEAD: + return IntentOwnership.RECOVERABLE + return ( + IntentOwnership.FOREIGN_ACTIVE if lease_valid else IntentOwnership.FOREIGN_STALE + ) + + +def resolved_lease_seconds(value: object = None, *, env_value: object = None) -> int: + return _resolved_seconds( + value=value, + env_value=env_value, + default=DEFAULT_LEASE_SECONDS, + minimum=MIN_LEASE_SECONDS, + maximum=MAX_LEASE_SECONDS, + ) + + +def format_utc(value: datetime) -> str: + return ( + value.astimezone(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace( + "+00:00", + "Z", + ) + ) + + +def resolved_ttl_seconds(value: object = None, *, env_value: object = None) -> int: + return _resolved_seconds( + value=value, + env_value=env_value, + default=DEFAULT_TTL_SECONDS, + minimum=MIN_TTL_SECONDS, + maximum=MAX_TTL_SECONDS, + ) + + +def _resolved_seconds( + *, + value: object, + env_value: object, + default: int, + minimum: int, + maximum: int, +) -> int: + raw = value if value is not None else env_value + if raw is None: + return default + if isinstance(raw, bool): + return default + try: + parsed = int(str(raw).strip()) + except ValueError: + return default + return min(maximum, max(minimum, parsed)) + + +def expires_at(*, declared_at: datetime, ttl_seconds: int) -> str: + return format_utc(declared_at + timedelta(seconds=ttl_seconds)) + + +def validate_workspace_record(data: object) -> WorkspaceIntentRecord | None: + from ._workspace_intent_models import parse_workspace_document, record_from_document + + document = parse_workspace_document(data) + if document is None: + return None + return record_from_document(document) + + +def write_workspace_intent(*, root: Path, record: WorkspaceIntentRecord) -> bool: + return bool(_intent_store(root).write(record)) + + +def write_workspace_intent_with_existing( + *, + root: Path, + record: WorkspaceIntentRecord, +) -> tuple[tuple[WorkspaceIntentRecord, ...], bool]: + from ._workspace_intent_store import write_workspace_intent_with_existing as _write + + return _write(root=root, record=record) + + +def update_workspace_intent_status( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, + new_status: str, + ttl_seconds: int | None = None, +) -> bool: + from ._workspace_intent_store import registry_transaction + + store = _intent_store(root) + with registry_transaction(store): + record = store.find_current_unlocked(intent_id) + if record is None: + return False + if record.agent_pid != pid or record.agent_start_epoch != start_epoch: + return False + updated = _updated_record( + record, + new_status=new_status, + ttl_seconds=ttl_seconds, + ) + return bool(store.write_unlocked(updated)) + + +def renew_workspace_intent_lease( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, + lease_seconds: int | None = None, +) -> bool: + from ._workspace_intent_store import registry_transaction + + store = _intent_store(root) + with registry_transaction(store): + record = store.find_current_unlocked(intent_id) + if record is None: + return False + if record.agent_pid != pid or record.agent_start_epoch != start_epoch: + return False + now = utc_now() + expires = _parse_utc(record.expires_at_utc) + if expires is None or expires <= now: + return False + new_lease = ( + resolved_lease_seconds(lease_seconds) + if lease_seconds is not None + else record.lease_seconds + ) + updated = replace( + record, lease_renewed_at_utc=format_utc(now), lease_seconds=new_lease + ) + return bool(store.write_unlocked(updated)) + + +def remove_workspace_intent( + *, + root: Path, + pid: int, + start_epoch: int, + intent_id: str, +) -> bool: + """Remove a workspace intent file with path-containment safety. + + Delegates to :func:`safe_remove_own_intent` which validates that the + constructed path resolves inside the registry directory, rejects + symlink indirection, and checks filename structure before unlinking. + """ + return bool( + _intent_store(root).remove( + pid=pid, + start_epoch=start_epoch, + intent_id=intent_id, + ) + ) + + +def remove_workspace_record(*, root: Path, record: WorkspaceIntentRecord) -> bool: + return remove_workspace_intent( + root=root, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + + +def list_workspace_intents( + *, + root: Path, + exclude_stale: bool = True, +) -> tuple[WorkspaceIntentRecord, ...]: + records = [ + record + for record in _intent_store(root).list_records() + if not exclude_stale or stale_reason(record) is None + ] + return tuple(sorted(records, key=_record_sort_key)) + + +def list_workspace_intent_records_raw( + *, + root: Path, +) -> tuple[WorkspaceIntentRecord, ...]: + """Active registry rows without running lazy close.""" + return _intent_store(root).list_records_raw() + + +def list_workspace_intent_records_for_recovery( + *, + root: Path, +) -> tuple[WorkspaceIntentRecord, ...]: + """Registry rows for recovery listing without lazy-close side effects.""" + return _intent_store(root).list_records_for_hygiene() + + +def find_workspace_intent( + *, + root: Path, + intent_id: str, + apply_lazy_close: bool = True, +) -> WorkspaceIntentRecord | None: + store = _intent_store(root) + if apply_lazy_close: + return store.find(intent_id) + return store.find_raw(intent_id) + + +def workspace_status_counts(*, root: Path) -> dict[str, int]: + records = list(_intent_store(root).list_records_current()) + stale_records = [record for record in records if stale_reason(record) is not None] + return { + "stale_count": len(stale_records), + "orphaned_count": sum( + 1 + for record in records + if _pid_liveness(record.agent_pid) == PidLiveness.DEAD + ), + "total_agents": len({record.agent_pid for record in records}), + } + + +_CONFLICT_OWNERSHIP: frozenset[IntentOwnership] = frozenset( + { + IntentOwnership.FOREIGN_ACTIVE, + IntentOwnership.FOREIGN_STALE, + } +) + +_CONFLICT_SEVERITY: dict[IntentOwnership, str] = { + IntentOwnership.FOREIGN_ACTIVE: "active", + IntentOwnership.FOREIGN_STALE: "stale", +} + +_CONFLICT_ACTION: dict[IntentOwnership, str] = { + IntentOwnership.FOREIGN_ACTIVE: "stop_and_coordinate", + IntentOwnership.FOREIGN_STALE: "coordinate_or_recover", +} + + +def detect_conflicts( + *, + new_scope: Mapping[str, object], + existing: Sequence[WorkspaceIntentRecord], + own_pid: int, + own_start_epoch: int, +) -> list[dict[str, object]]: + conflicts, _relations = _detect_scope_state( + new_scope=new_scope, + existing=existing, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + ) + return conflicts + + +def detect_workspace_relations( + *, + new_scope: Mapping[str, object], + existing: Sequence[WorkspaceIntentRecord], + own_pid: int, + own_start_epoch: int, +) -> list[dict[str, object]]: + _conflicts, relations = _detect_scope_state( + new_scope=new_scope, + existing=existing, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + ) + return relations + + +def _detect_scope_state( + *, + new_scope: Mapping[str, object], + existing: Sequence[WorkspaceIntentRecord], + own_pid: int, + own_start_epoch: int, +) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + new_allowed, new_related, new_forbidden = _scope_all_sets(new_scope) + conflicts: list[dict[str, object]] = [] + relations: list[dict[str, object]] = [] + now = utc_now() + for record in existing: + ownership = classify_intent_ownership( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + now=now, + ) + if ( + record.status == WorkspaceIntentStatus.QUEUED.value + or ownership not in _CONFLICT_OWNERSHIP + ): + continue + existing_allowed, existing_related, existing_forbidden = _scope_all_sets( + record.scope + ) + hard_overlap = tuple(sorted(new_allowed.intersection(existing_allowed))) + soft_overlap = tuple( + sorted( + new_allowed.intersection(existing_related).union( + new_related.intersection(existing_allowed) + ) + ) + ) + if hard_overlap or soft_overlap: + conflict = _edit_overlap_payload( + record=record, + ownership=ownership, + hard_overlap=hard_overlap, + soft_overlap=soft_overlap, + ) + conflicts.append(conflict) + relations.append( + { + **conflict, + "relation": "edit_overlap", + "message": "Foreign agent has overlapping editable scope.", + } + ) + continue + foreign_excludes = _forbidden_matches( + files=new_allowed, + patterns=existing_forbidden, + ) + if foreign_excludes: + relations.append( + _forbidden_relation_payload( + record=record, + ownership=ownership, + relation="foreign_excludes_target", + matching_patterns=foreign_excludes, + message=( + "Foreign agent explicitly excludes files in current scope." + ), + ) + ) + continue + target_excludes = _forbidden_matches( + files=existing_allowed, + patterns=new_forbidden, + ) + if target_excludes: + relations.append( + _forbidden_relation_payload( + record=record, + ownership=ownership, + relation="target_excludes_foreign", + matching_patterns=target_excludes, + message=( + "Current scope explicitly excludes files in foreign scope." + ), + ) + ) + return ( + sorted(conflicts, key=_scope_state_sort_key), + sorted(relations, key=_scope_state_sort_key), + ) + + +def _edit_overlap_payload( + *, + record: WorkspaceIntentRecord, + ownership: IntentOwnership, + hard_overlap: Sequence[str], + soft_overlap: Sequence[str], +) -> dict[str, object]: + return { + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "intent": record.intent, + "ownership": ownership.value, + "severity": _CONFLICT_SEVERITY[ownership], + "recommended_action": _CONFLICT_ACTION[ownership], + "overlap_type": _overlap_type( + hard=bool(hard_overlap), + soft=bool(soft_overlap), + ), + "hard_overlap": list(hard_overlap), + "soft_overlap": list(soft_overlap), + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + } + + +def _forbidden_relation_payload( + *, + record: WorkspaceIntentRecord, + ownership: IntentOwnership, + relation: str, + matching_patterns: Sequence[str], + message: str, +) -> dict[str, object]: + return { + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "intent": record.intent, + "ownership": ownership.value, + "relation": relation, + "severity": "info", + "matching_patterns": list(matching_patterns), + "message": message, + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + } + + +def _scope_state_sort_key( + item: Mapping[str, object], +) -> tuple[str, str, str, str, int, str]: + return ( + str(item.get("severity", "")), + str(item.get("relation", "")), + str(item.get("overlap_type", "")), + str(item.get("agent_label", "")), + _sort_agent_pid(item.get("agent_pid")), + str(item.get("intent_id", "")), + ) + + +def _forbidden_matches( + *, + files: set[str], + patterns: tuple[str, ...], +) -> tuple[str, ...]: + return tuple( + sorted( + { + pattern + for pattern in patterns + for path in files + if fnmatchcase(path, pattern) + } + ) + ) + + +def gc_workspace(*, root: Path) -> dict[str, object]: + store = _intent_store(root) + payload = dict(store.gc()) + payload["raw_active_count"] = len(store.list_records_raw()) + return payload + + +def _updated_record( + record: WorkspaceIntentRecord, + *, + new_status: str, + ttl_seconds: int | None, +) -> WorkspaceIntentRecord: + if ttl_seconds is None: + return replace(record, status=new_status) + declared_at = utc_now() + return replace( + record, + declared_at_utc=format_utc(declared_at), + expires_at_utc=expires_at(declared_at=declared_at, ttl_seconds=ttl_seconds), + ttl_seconds=ttl_seconds, + lease_renewed_at_utc=format_utc(declared_at), + status=new_status, + ) + + +def _intent_store(root: Path) -> WorkspaceIntentStore: + from ._workspace_intent_store import get_workspace_intent_store + + return get_workspace_intent_store(root) + + +def _valid_path_list(value: object, *, required: bool) -> list[str] | None: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return None + paths: list[str] = [] + for item in value: + if not isinstance(item, str): + return None + path = item.replace("\\", "/").strip() + if not path: + continue + if Path(path).is_absolute() or ".." in Path(path).parts: + return None + paths.append(path.rstrip("/")) + deduped = sorted(set(paths)) + if required and not deduped: + return None + return deduped + + +def _scope_all_sets( + scope: Mapping[str, object], +) -> tuple[set[str], set[str], tuple[str, ...]]: + allowed = set(_valid_path_list(scope.get("allowed_files"), required=False) or []) + related = set( + _valid_path_list(scope.get("allowed_related", ()), required=False) or [] + ) + forbidden = tuple( + _valid_path_list(scope.get("forbidden", ()), required=False) or [] + ) + return allowed, related, forbidden + + +def _sort_agent_pid(value: object) -> int: + return value if isinstance(value, int) and not isinstance(value, bool) else 0 + + +def _overlap_type(*, hard: bool, soft: bool) -> str: + if hard and soft: + return "both" + return "hard" if hard else "soft" + + +__all__ = [ + "DEFAULT_LEASE_SECONDS", + "DEFAULT_TTL_SECONDS", + "LEGACY_REGISTRY_VERSION", + "MAX_LEASE_SECONDS", + "MAX_TTL_SECONDS", + "MIN_LEASE_SECONDS", + "MIN_TTL_SECONDS", + "REGISTRY_VERSION", + "IntentOwnership", + "PidLiveness", + "WorkspaceIntentRecord", + "WorkspaceIntentStatus", + "_is_pid_alive", + "_is_safe_intent_id", + "_is_safe_intent_path", + "_lease_expiry", + "_parse_utc", + "_pid_liveness", + "_read_payload", + "_ttl_expired", + "_unlink", + "classify_intent_ownership", + "compute_intent_digest", + "compute_scope_digest", + "detect_conflicts", + "detect_workspace_relations", + "expires_at", + "find_workspace_intent", + "format_utc", + "gc_workspace", + "intent_filename", + "intent_path", + "is_orphaned", + "is_stale", + "list_workspace_intent_records_for_recovery", + "list_workspace_intent_records_raw", + "list_workspace_intents", + "registry_dir", + "remove_workspace_intent", + "remove_workspace_record", + "renew_workspace_intent_lease", + "resolved_lease_seconds", + "resolved_ttl_seconds", + "safe_remove_own_intent", + "signed_payload", + "stale_reason", + "update_workspace_intent_status", + "utc_now", + "validate_workspace_record", + "verify_intent_integrity", + "workspace_intent_to_payload", + "workspace_status_counts", + "write_workspace_intent", + "write_workspace_intent_with_existing", +] diff --git a/codeclone/surfaces/mcp/auth.py b/codeclone/surfaces/mcp/auth.py new file mode 100644 index 00000000..48561063 --- /dev/null +++ b/codeclone/surfaces/mcp/auth.py @@ -0,0 +1,89 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""FastMCP bearer-token helpers for streamable HTTP transport.""" + +from __future__ import annotations + +import hmac +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from mcp.server.auth.provider import AccessToken + from mcp.server.auth.settings import AuthSettings + from pydantic import AnyHttpUrl + +MCP_AUTH_TOKEN_ENV = "CODECLONE_MCP_AUTH_TOKEN" +MCP_AUTH_SCOPE = "codeclone:mcp" +MIN_MCP_AUTH_TOKEN_LENGTH = 32 + + +class MCPAuthConfigurationError(ValueError): + """Raised when MCP HTTP auth is requested but misconfigured.""" + + +class StaticBearerTokenVerifier: + """FastMCP TokenVerifier backed by one local bearer token.""" + + def __init__(self, token: str) -> None: + self._token = validated_mcp_auth_token(token) + + # FastMCP calls TokenVerifier implementations dynamically. + # codeclone: ignore[dead-code] + async def verify_token(self, token: str) -> AccessToken | None: + from mcp.server.auth.provider import AccessToken + + if not hmac.compare_digest(token, self._token): + return None + return AccessToken( + token=token, + client_id="codeclone-local-http", + scopes=[MCP_AUTH_SCOPE], + ) + + +def validated_mcp_auth_token(value: str | None) -> str: + token = "" if value is None else value.strip() + if len(token) < MIN_MCP_AUTH_TOKEN_LENGTH: + raise MCPAuthConfigurationError( + f"{MCP_AUTH_TOKEN_ENV} must be at least " + f"{MIN_MCP_AUTH_TOKEN_LENGTH} characters for streamable-http." + ) + return token + + +def build_http_auth_settings(*, host: str, port: int) -> AuthSettings: + from mcp.server.auth.settings import AuthSettings + + base_url = _http_base_url(host=host, port=port) + return AuthSettings( + issuer_url=_validated_http_url(f"{base_url}/"), + resource_server_url=_validated_http_url(f"{base_url}/mcp"), + required_scopes=[MCP_AUTH_SCOPE], + ) + + +def _validated_http_url(value: str) -> AnyHttpUrl: + from pydantic import AnyHttpUrl, TypeAdapter + + return TypeAdapter(AnyHttpUrl).validate_python(value) + + +def _http_base_url(*, host: str, port: int) -> str: + cleaned = host.strip().strip("[]") or "127.0.0.1" + display_host = f"[{cleaned}]" if ":" in cleaned else cleaned + return f"http://{display_host}:{port}" + + +__all__ = [ + "MCP_AUTH_SCOPE", + "MCP_AUTH_TOKEN_ENV", + "MIN_MCP_AUTH_TOKEN_LENGTH", + "MCPAuthConfigurationError", + "StaticBearerTokenVerifier", + "build_http_auth_settings", + "validated_mcp_auth_token", +] diff --git a/codeclone/surfaces/mcp/messages/__init__.py b/codeclone/surfaces/mcp/messages/__init__.py new file mode 100644 index 00000000..f0af7d98 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/__init__.py @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""User-facing MCP copy: tool descriptions, help topics, workflow messages.""" + +from __future__ import annotations diff --git a/codeclone/surfaces/mcp/messages/blast_radius.py b/codeclone/surfaces/mcp/messages/blast_radius.py new file mode 100644 index 00000000..9fb32f76 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/blast_radius.py @@ -0,0 +1,42 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Blast-radius boundary reasons and guardrails.""" + +from __future__ import annotations + +from typing import Final + +from codeclone.analysis.blast_radius import ( + BOUNDARY_REASON_AFFECTED_NOT_ALLOWED, + BOUNDARY_REASON_BASELINE_OR_STATE, + BOUNDARY_REASON_EXPLICIT_FORBIDDEN, + GUARDRAIL_CLONE_COHORT_CONTEXT, + GUARDRAIL_DO_NOT_TOUCH_APPROVAL, + GUARDRAIL_HIGH_RADIUS_APPROVAL, + GUARDRAIL_REVIEW_DEPENDENTS, + REVIEW_REASON_GOLDEN_FIXTURE_SURFACE, + REVIEW_REASON_KNOWN_BASELINE_DEBT, + REVIEW_REASON_REPORT_ONLY_DESIGN, + REVIEW_REASON_SECURITY_BOUNDARY, +) + +BLAST_SUMMARY_UNKNOWN: Final = "unknown" + +__all__ = [ + "BLAST_SUMMARY_UNKNOWN", + "BOUNDARY_REASON_AFFECTED_NOT_ALLOWED", + "BOUNDARY_REASON_BASELINE_OR_STATE", + "BOUNDARY_REASON_EXPLICIT_FORBIDDEN", + "GUARDRAIL_CLONE_COHORT_CONTEXT", + "GUARDRAIL_DO_NOT_TOUCH_APPROVAL", + "GUARDRAIL_HIGH_RADIUS_APPROVAL", + "GUARDRAIL_REVIEW_DEPENDENTS", + "REVIEW_REASON_GOLDEN_FIXTURE_SURFACE", + "REVIEW_REASON_KNOWN_BASELINE_DEBT", + "REVIEW_REASON_REPORT_ONLY_DESIGN", + "REVIEW_REASON_SECURITY_BOUNDARY", +] diff --git a/codeclone/surfaces/mcp/messages/claims.py b/codeclone/surfaces/mcp/messages/claims.py new file mode 100644 index 00000000..bd9400cb --- /dev/null +++ b/codeclone/surfaces/mcp/messages/claims.py @@ -0,0 +1,60 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Claim-guard validation and violation messages.""" + +from __future__ import annotations + +from typing import Final + +ERR_TEXT_NOT_STRING: Final = "text must be a string." +ERR_TEXT_EMPTY: Final = "text must not be empty." +ERR_TEXT_TOO_LONG: Final = ( + "text exceeds the maximum supported length ({max_chars} characters)." +) + +VIOLATION_REASON_SECURITY_NOT_VULNERABILITY: Final = ( + "Security Surfaces are report-only trust-boundary inventory, " + "not vulnerability claims." +) +VIOLATION_REASON_REPORT_ONLY_GATE: Final = ( + "'{family}' is a report-only signal (gate_keys=()). " + "It cannot fail CI or block a pipeline." +) +VIOLATION_REASON_KNOWN_DEBT_OVERCLAIM: Final = ( + "This finding has novelty='known'; it is accepted baseline debt. " + "Do not describe it as new relative to the baseline. Patch-local " + "introduction requires before-run to after-run verification evidence." +) +VIOLATION_REASON_DEAD_CODE_REACHABILITY: Final = ( + "'{qualname}' has runtime reachability evidence; it must not be claimed " + "as definitely dead code." +) +VIOLATION_REASON_FIX_WITHOUT_VERIFICATION: Final = ( + "Fix claimed but no post-patch analysis run is available. " + "Run analysis after editing and verify the patch contract." +) + +WARN_NO_CITATIONS: Final = ( + "No known CodeClone finding IDs or metric family citations were found in the text." +) +WARN_UNKNOWN_FINDING: Final = ( + "Finding citation '{cited_id}' is not present in this run." +) +WARN_STRUCTURAL_CHECKS_NOT_APPLICABLE: Final = ( + "Review references structural verification, but the verification profile " + "is '{profile}' — structural checks were not applicable for this patch." +) +WARN_HEALTH_REGRESSION_OVERCLAIM: Final = ( + "Patch verify reported a negative health delta ({health_delta:+d}) between " + "before-run and after-run. Do not claim regression-free or fully clean " + "verification." +) + +VIOLATION_REASON_HEALTH_REGRESSION_OVERCLAIM: Final = ( + "Review claims no regressions, but patch verify reported health delta " + "{health_delta:+d} between before-run and after-run." +) diff --git a/codeclone/surfaces/mcp/messages/errors.py b/codeclone/surfaces/mcp/messages/errors.py new file mode 100644 index 00000000..140ff86a --- /dev/null +++ b/codeclone/surfaces/mcp/messages/errors.py @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP contract error messages.""" + +from __future__ import annotations + +from collections.abc import Collection +from typing import Final + +ROOT_REQUIRED_ABSOLUTE: Final = ( + "CodeClone MCP analyze_repository requires an absolute repository root." +) + +PATH_TRAVERSAL: Final = "path traversal not allowed: {path}" + +ROOT_RESOLVE_FAILED: Final = "Unable to resolve repository root '{root}': {error}" + +ROOT_NOT_EXISTS: Final = "Repository root '{root}' does not exist." + +ROOT_NOT_DIRECTORY: Final = "Repository root '{root}' is not a directory." + +CACHE_POLICY_CLI_ONLY: Final = ( + "cache_policy='refresh' is CLI-only. MCP accepts: reuse, off." +) +INVALID_RELATIVE_PATH: Final = "Invalid path '{value}' relative to '{root}': {error}" + + +def invalid_choice(name: str, value: object, allowed: Collection[str]) -> str: + allowed_list = ", ".join(sorted(allowed)) + return f"Invalid value for {name}: {value!r}. Expected one of: {allowed_list}." diff --git a/codeclone/surfaces/mcp/messages/facts.py b/codeclone/surfaces/mcp/messages/facts.py new file mode 100644 index 00000000..83fc068e --- /dev/null +++ b/codeclone/surfaces/mcp/messages/facts.py @@ -0,0 +1,15 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Report-facing MCP summary notes.""" + +from __future__ import annotations + +from typing import Final + +SECURITY_SURFACES_SUMMARY_NOTE: Final = ( + "report_only inventory; not a vulnerability scan" +) diff --git a/codeclone/surfaces/mcp/messages/help_topics.py b/codeclone/surfaces/mcp/messages/help_topics.py new file mode 100644 index 00000000..c4b35501 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/help_topics.py @@ -0,0 +1,779 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP help topic copy and doc links.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final + +from ....contracts import BASELINE_SCHEMA_VERSION, DOCS_URL + + +@dataclass(frozen=True) +class MCPHelpTopicSpec: + summary: str + key_points: tuple[str, ...] + recommended_tools: tuple[str, ...] + doc_links: tuple[tuple[str, str], ...] + warnings: tuple[str, ...] = () + anti_patterns: tuple[str, ...] = () + + +MCP_BOOK_URL: Final = f"{DOCS_URL}book/" +MCP_GUIDE_URL: Final = f"{DOCS_URL}guide/mcp/" +MCP_INTERFACE_DOC_LINK: Final[tuple[str, str]] = ( + "MCP interface contract", + f"{MCP_BOOK_URL}25-mcp-interface/", +) +BASELINE_DOC_LINK: Final[tuple[str, str]] = ( + "Baseline contract", + f"{MCP_BOOK_URL}07-baseline/", +) +CONFIG_DOC_LINK: Final[tuple[str, str]] = ( + "Config and defaults", + f"{MCP_BOOK_URL}10-config-and-defaults/", +) +REPORT_DOC_LINK: Final[tuple[str, str]] = ( + "Report contract", + f"{MCP_BOOK_URL}05-report/", +) +CLI_DOC_LINK: Final[tuple[str, str]] = ( + "CLI contract", + f"{MCP_BOOK_URL}11-cli/", +) +PIPELINE_DOC_LINK: Final[tuple[str, str]] = ( + "Core pipeline", + f"{MCP_BOOK_URL}03-core-pipeline/", +) +SUPPRESSIONS_DOC_LINK: Final[tuple[str, str]] = ( + "Inline suppressions contract", + f"{MCP_BOOK_URL}19-inline-suppressions/", +) +MCP_GUIDE_DOC_LINK: Final[tuple[str, str]] = ("MCP usage guide", MCP_GUIDE_URL) +CHANGE_CONTROL_DOC_LINK: Final[tuple[str, str]] = ( + "Structural change controller", + f"{MCP_BOOK_URL}12-structural-change-controller/", +) +ENGINEERING_MEMORY_DOC_LINK: Final[tuple[str, str]] = ( + "Engineering Memory", + f"{MCP_BOOK_URL}13-engineering-memory/", +) +HELP_TOPIC_SPECS: Final[dict[str, MCPHelpTopicSpec]] = { + "workflow": MCPHelpTopicSpec( + summary=( + "CodeClone MCP is triage-first and budget-aware. Start with a " + "summary or production triage, then narrow through hotspots or " + "focused checks before opening one finding in detail." + ), + key_points=( + "Recommended first pass: analyze_repository or analyze_changed_paths.", + ( + "Start with default or pyproject-resolved thresholds; lower them " + "only for an explicit higher-sensitivity follow-up pass." + ), + ( + "Use get_run_summary or get_production_triage before broad " + "finding listing." + ), + ( + "Prefer list_hotspots or focused check_* tools over " + "list_findings on noisy repositories." + ), + ("Use get_finding and get_remediation only after selecting an issue."), + ( + "get_report_section(section='all') is an exception path, not " + "a default first step." + ), + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "get_production_triage", + "list_hotspots", + "check_clones", + "check_dead_code", + "get_finding", + "get_remediation", + ), + doc_links=(MCP_INTERFACE_DOC_LINK, MCP_GUIDE_DOC_LINK), + warnings=( + ( + "Broad list_findings calls burn context quickly on large or " + "noisy repositories." + ), + ( + "Prefer generate_pr_summary(format='markdown') unless machine " + "JSON is explicitly required." + ), + ), + anti_patterns=( + "Starting exploration with list_findings on a noisy repository.", + "Using get_report_section(section='all') as the default first step.", + ( + "Escalating detail on larger lists instead of opening one " + "finding with get_finding." + ), + ), + ), + "analysis_profile": MCPHelpTopicSpec( + summary=( + "CodeClone default analysis is intentionally conservative: stable " + "first-pass review, baseline-aware governance, and CI-friendly " + "signal over maximum local sensitivity." + ), + key_points=( + ( + "Default thresholds are intentionally conservative and " + "production-friendly." + ), + ( + "A clean default run does not rule out smaller local " + "duplication or repetition." + ), + ( + "Lowering thresholds increases sensitivity and can surface " + "smaller functions, tighter windows, and finer local signals." + ), + ( + "Lower-threshold runs are best for exploratory local review, " + "not as a silent replacement for the default governance profile." + ), + "Interpret results in the context of the active threshold profile.", + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "compare_runs", + ), + doc_links=( + CONFIG_DOC_LINK, + PIPELINE_DOC_LINK, + MCP_INTERFACE_DOC_LINK, + ), + warnings=( + ( + "Do not treat a default-threshold run as proof that no smaller " + "local clone or repetition exists." + ), + ( + "Lower-threshold runs usually increase noise and should be read " + "as higher-sensitivity exploratory passes." + ), + "Run comparisons are most meaningful when profiles are aligned.", + ), + anti_patterns=( + ( + "Assuming a clean default pass means no finer-grained " + "duplication exists anywhere in the repository." + ), + ( + "Lowering thresholds for exploration and then interpreting the " + "result as if it had the same meaning as the conservative " + "default pass." + ), + ( + "Mixing low-threshold exploratory output into baseline or CI " + "reasoning without acknowledging the profile change." + ), + ), + ), + "suppressions": MCPHelpTopicSpec( + summary=( + "CodeClone supports explicit inline suppressions for selected " + "findings. They are local policy, not analysis truth, and should " + "stay narrow and declaration-scoped." + ), + key_points=( + "Current syntax uses codeclone: ignore[rule-id,...].", + "Binding is declaration-scoped: def, async def, or class.", + ( + "Supported placement is the previous line or inline on the " + "declaration or header line." + ), + ( + "Suppressions are target-specific and do not imply file-wide " + "or cascading scope." + ), + ( + "Use suppressions for accepted dynamic or runtime false " + "positives, not to hide broad classes of debt." + ), + ), + recommended_tools=("get_finding", "get_remediation"), + doc_links=(SUPPRESSIONS_DOC_LINK, MCP_INTERFACE_DOC_LINK), + warnings=( + ( + "MCP explains suppression semantics but never creates or " + "updates suppressions." + ), + ), + anti_patterns=( + "Treating suppressions as file-wide or inherited state.", + ( + "Using suppressions to hide broad structural debt instead of " + "accepted false positives." + ), + ), + ), + "baseline": MCPHelpTopicSpec( + summary=( + "A baseline is CodeClone's accepted comparison snapshot for clones " + "and optional metrics. It separates accepted debt from " + "baseline-relative new findings and is trust-checked before use." + ), + key_points=( + ( + f"Canonical baseline schema is v{BASELINE_SCHEMA_VERSION} " + "with meta and clone keys; metrics may be embedded for " + "unified flows." + ), + ( + "Compatibility depends on generator identity, supported " + "schema version, fingerprint version, python tag, and payload " + "integrity." + ), + ( + "Known means already present in the trusted baseline; new " + "means not accepted by baseline. This is baseline-relative, " + "not proof that a patch did or did not introduce the finding." + ), + ( + "Patch-local regressions require clean before-run to after-run " + "comparison evidence from compare_runs or patch contract verify." + ), + ( + "In CI and gating contexts, untrusted baseline states are " + "contract errors rather than soft warnings." + ), + "MCP is read-only and does not update or rewrite baselines.", + ), + recommended_tools=("get_run_summary", "evaluate_gates", "compare_runs"), + doc_links=(BASELINE_DOC_LINK,), + warnings=( + "Baseline trust semantics directly affect new-vs-known classification.", + "Do not use baseline novelty alone for patch-local regression claims.", + ), + anti_patterns=( + "Treating baseline as mutable MCP session state.", + "Assuming an untrusted baseline is only cosmetic in CI contexts.", + ), + ), + "coverage": MCPHelpTopicSpec( + summary=( + "Coverage join is an external current-run signal: CodeClone reads " + "an existing Cobertura XML report and joins line hits to risky " + "function spans." + ), + key_points=( + "Use Cobertura XML such as `coverage xml` output from coverage.py.", + "Coverage join does not become baseline truth and does not affect health.", + ( + "Coverage hotspot gating is current-run only and focuses on " + "medium/high-risk functions measured below the configured " + "threshold." + ), + ( + "Functions missing from the supplied coverage.xml are surfaced " + "as scope gaps, not labeled as untested." + ), + "Use metrics_detail(family='coverage_join') for bounded drill-down.", + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "get_report_section", + "evaluate_gates", + ), + doc_links=( + MCP_INTERFACE_DOC_LINK, + CLI_DOC_LINK, + REPORT_DOC_LINK, + ), + warnings=( + "Coverage join is only as accurate as the external XML path mapping.", + "It does not infer branch coverage and does not execute tests.", + "Use fail-on-untested-hotspots only with a valid joined coverage input.", + ), + anti_patterns=( + "Treating missing coverage XML as zero coverage without stating it.", + "Reading coverage join as a baseline-aware trend signal.", + "Assuming dynamic runtime dispatch is visible through a static line join.", + ), + ), + "latest_runs": MCPHelpTopicSpec( + summary=( + "latest/* resources point to the most recent analysis run in the " + "current MCP session. They are convenience handles, not persistent " + "truth anchors." + ), + key_points=( + "Run history is in-memory only and bounded by history-limit.", + "The latest pointer moves when a newer analyze_* call registers a run.", + "A fresh repository state requires a fresh analyze run.", + ( + "Short run ids are convenience handles derived from canonical " + "run identity." + ), + ( + "Do not assume latest/* is globally current outside the " + "active MCP session." + ), + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "compare_runs", + ), + doc_links=(MCP_INTERFACE_DOC_LINK, MCP_GUIDE_DOC_LINK), + warnings=( + ( + "latest/* can point at a different repository after a later " + "analyze call in the same session." + ), + ), + anti_patterns=( + ( + "Assuming latest/* remains tied to one repository across the " + "whole client session." + ), + ( + "Using latest/* as a substitute for starting a fresh run when " + "freshness matters." + ), + ), + ), + "review_state": MCPHelpTopicSpec( + summary=( + "Reviewed state in MCP is session-local workflow state. It helps " + "long sessions track review progress without modifying canonical " + "findings, baseline, or persisted artifacts." + ), + key_points=( + "Review markers are in-memory only.", + "They do not change report truth, finding identity, or CI semantics.", + "They are useful for triage workflows across long sessions.", + ( + "They should not be interpreted as acceptance, suppression, " + "or baseline update." + ), + ), + recommended_tools=( + "list_hotspots", + "get_finding", + "mark_finding_reviewed", + "list_reviewed_findings", + ), + doc_links=(MCP_INTERFACE_DOC_LINK, MCP_GUIDE_DOC_LINK), + warnings=( + "Reviewed markers disappear when the MCP session is cleared or restarted.", + ), + anti_patterns=( + "Treating reviewed state as a persistent acceptance signal.", + "Assuming reviewed findings are removed from canonical report truth.", + ), + ), + "changed_scope": MCPHelpTopicSpec( + summary=( + "Changed-scope analysis narrows review to findings that touch a " + "selected change set. It is for PR and patch review, not a " + "replacement for full canonical analysis." + ), + key_points=( + ( + "Use analyze_changed_paths with explicit changed_paths or " + "git_diff_ref for review-focused runs." + ), + ( + "Start with the same conservative profile as the default " + "review, then lower thresholds only when you explicitly want " + "a higher-sensitivity changed-files pass." + ), + ( + "Changed-scope is best for asking what new issues touch " + "modified files and whether anything should block CI." + ), + "Prefer production triage and hotspot views before broad listing.", + "If repository-wide truth is needed, run full analysis first.", + ), + recommended_tools=( + "analyze_changed_paths", + "get_run_summary", + "get_production_triage", + "evaluate_gates", + "generate_pr_summary", + ), + doc_links=(MCP_INTERFACE_DOC_LINK, MCP_GUIDE_DOC_LINK), + warnings=( + ( + "Changed-scope narrows review focus; it does not replace the " + "full canonical report for repository-wide truth." + ), + ), + anti_patterns=( + "Using changed-scope as if it were the only source of repository truth.", + ( + "Starting changed-files review with broad listing instead of " + "compact triage." + ), + ), + ), + "change_control": MCPHelpTopicSpec( + summary=( + "Edit-time workflow: declare scope, edit inside it, finish with " + "evidence. Prefer start_controlled_change and finish_controlled_change." + ), + key_points=( + ( + "Cycle: analyze_repository → start_controlled_change → " + "get_relevant_memory → edit → analyze (if after_run required) → " + "finish_controlled_change." + ), + ( + "Requires edit_allowed=true from start; queue via " + "start(on_conflict=queue) then manage_change_intent(promote)." + ), + ( + "Multi-agent: manage_change_intent(list_workspace|renew|recover|" + "gc_workspace) — registry is advisory under .codeclone/intents/." + ), + ( + "Finish: changed_files XOR diff_ref; after_run_id when " + "verification.after_run_required " + "(help(topic=verification_profiles))." + ), + ( + "finish detail_level=full adds hygiene path attribution; " + "patch_trail_detail summary|full on patch_trail (forensics only)." + ), + ( + "Blocks finish: missing_evidence, foreign_dirty_overlap. " + "Out-of-scope dirt is advisory — may yield " + "accepted_with_external_changes." + ), + ("Optional CODECLONE_STRICT_FINISH env may block own_unscoped_dirty."), + ("patch_trail + audit patch_trail.computed do not authorize edits."), + ( + "Atomic declare/check/verify/clear is legacy/debug only when " + "start/finish unavailable." + ), + ), + recommended_tools=( + "analyze_repository", + "start_controlled_change", + "get_relevant_memory", + "finish_controlled_change", + "manage_change_intent", + ), + doc_links=(CHANGE_CONTROL_DOC_LINK, MCP_INTERFACE_DOC_LINK), + warnings=( + "Workspace registry is coordination state, not analysis truth.", + "review_context is information, not an edit ban.", + ), + anti_patterns=( + "Editing before start_controlled_change with edit_allowed=true.", + "Mixing start/finish with atomic verify/clear in one cycle.", + "Resetting a foreign live intent instead of coordinating.", + ), + ), + "trust_boundaries": MCPHelpTopicSpec( + summary=( + "Documented MCP trust limits: read-only analysis, advisory " + "workspace intents, strict artifact paths with opt-in external " + "resolution, and optional Bearer auth on streamable-http." + ), + key_points=( + "MCP never mutates source, baseline, cache.json, or canonical reports.", + ( + "baseline_path, cache_path, coverage_xml resolve under the scan " + "root by default; pass allow_external_artifacts=true for " + "absolute or out-of-repo paths (privileged)." + ), + ( + "Workspace intents under .codeclone/intents/ are " + "advisory same-UID coordination, not signed proof." + ), + ( + "Cache signatures and baseline payload_sha256 detect " + "corruption, not hostile same-UID writers." + ), + ( + "streamable-http: set CODECLONE_MCP_AUTH_TOKEN (>=32 chars) for " + "Bearer auth; --allow-remote is separate loopback guard." + ), + ( + "security_surfaces in responses is report-only inventory, " + "not a vulnerability scan." + ), + ), + recommended_tools=("help", "analyze_repository", "start_controlled_change"), + doc_links=(MCP_INTERFACE_DOC_LINK,), + warnings=( + "Do not treat advisory intent files as cryptographic agent identity.", + ), + anti_patterns=( + "Calling Security Surfaces a vulnerability audit.", + "Passing allow_external_artifacts=true without treating paths " + "as privileged.", + ), + ), + "implementation_context": MCPHelpTopicSpec( + summary=( + "get_implementation_context projects bounded, deterministic context " + "from one existing analysis run. It does not re-analyze, authorize " + "edits, or replace start_controlled_change." + ), + key_points=( + ( + "Explicit repo-relative paths or exact qualnames return " + "established module, import, importer, API-surface, blast-radius, " + "cache-origin, and workspace-freshness facts." + ), + ( + "Symbol subjects use module:symbol qualnames (colon separator, " + "for example pkg.mod:func). They resolve against a deterministic " + "off-report Unit plus API-surface location index. Dot notation is " + "not accepted. Unknown qualnames are reported in " + "subject.unresolved_symbols and are never guessed." + ), + ( + "Subject precedence is explicit paths/symbols, then active " + "intent allowed_files, then the bounded live git-dirty set. A " + "clean tree with no subject returns no_current_work." + ), + ( + "An active intent adds the declared allowed files/related paths, " + "review context, do-not-touch boundaries, and guards. This block " + "mirrors start_controlled_change; it does not create permission." + ), + ( + "mode=impact expands transitive dependency context and projects " + "baseline-sensitive findings. Scoped memory, test anchors, docs, " + "trajectories, and Experiences stay in separate evidence lanes." + ), + ( + "mode=contract returns a truth-map: definition_sites, " + "version_constants, contract_tests, and memory_conflicts. " + "Persistence and serialization path callers are emitted only with " + "a typed or memory-backed anchor; without one they are marked " + "not_available rather than guessed from names or directories, and " + "true field readers/writers stay deferred to dataflow." + ), + ( + "analysis.freshness compares the run manifest with live " + "mtime+size and, when available, the git DirtySnapshot delta." + ), + ( + "context_artifact_digest authenticates the off-report context " + "artifact; context_projection_digest authenticates this bounded " + "request and response projection." + ), + ( + "Import, importer, and test-importer roles collapse into " + "related_modules entries with explicit relations. One global " + "budget bounds evidence and reports every omission." + ), + ( + "Safety context consumes budget first. If safety entries exceed " + "the hard cap, status=safety_context_overflow makes the omission " + "explicit." + ), + ( + "call_context projects callers, callees, references, and " + "test_callers from run-bound relationship facts. Each edge is " + "tagged relation_kind x resolution_status; production callers and " + "test-origin callers stay in separate lanes and test edges never " + "make production code live. Unresolved calls are observations " + "(target=null), and analysis.call_graph_status reports " + "complete/partial/unavailable." + ), + ), + recommended_tools=( + "analyze_repository", + "get_implementation_context", + "start_controlled_change", + ), + doc_links=(MCP_INTERFACE_DOC_LINK, MCP_GUIDE_DOC_LINK), + warnings=( + "freshness strength is mtime+size plus optional git delta, not a " + "content hash of every source file.", + "The tool is context evidence only; edit_allowed remains authoritative.", + ), + anti_patterns=( + "Treating implementation context as edit authorization.", + "Ignoring freshness.status=drifted and editing against a stale run.", + "Describing unresolved or unavailable relationship evidence as fact.", + "Assuming a truncated collection is complete without reading its summary.", + ), + ), + "observability": MCPHelpTopicSpec( + summary=( + "query_platform_observability: read-only, dev-only diagnostics over " + "CodeClone's OWN runtime telemetry (Phase 29). A sectioned slicer, " + "not a trace export API — for building CodeClone itself, never a " + "user-facing repository signal." + ), + key_points=( + "Dev-only: never affects reports, gates, baselines, memory facts, " + "or edit authorization; numeric metrics only, no raw SQL/payloads.", + ( + "Sections: summary | slow_operations | memory_pipeline_cost | " + "db_cost | agent_context | mcp_tool_matrix | correlated_chains " + "| costly_noops | pipeline. Start at summary, then follow " + "recommended_next_sections." + ), + ( + "detail_level compact|normal; full is reserved for future " + "by-id detail sections and downgrades to normal here. limit " + "clamps to [1, 50]." + ), + ( + "Anti-inference: this is CodeClone's runtime, not the user " + "repo. High DB queries != repository bad; high MCP payload != " + "code quality low; hot semantic reindex != unsafe change." + ), + ( + "Absent/disabled telemetry returns an inert " + "status=disabled|no_store envelope, never an error; the " + "branded HTML cockpit stays the humans' everything-view." + ), + ), + recommended_tools=("query_platform_observability",), + doc_links=(MCP_INTERFACE_DOC_LINK,), + warnings=( + "Sections return status=disabled unless " + "CODECLONE_OBSERVABILITY_ENABLED was set for the producing process.", + ), + anti_patterns=( + "Using runtime telemetry to make user-facing quality claims about " + "a repository.", + "Reading db_queries or payload sizes as a code-quality verdict.", + ), + ), + "engineering_memory": MCPHelpTopicSpec( + summary=( + "Ranked scope context before edits, FTS search, optional semantic " + "sidecar (off by default), trajectory forensics, draft-only writes." + ), + key_points=( + ( + "After edit_allowed=true: get_relevant_memory(root=abs, " + "scope|intent_id|symbols). root is required." + ), + ( + "Bootstrap: mcp_sync_policy default bootstrap_if_missing; " + "refresh_from_run for explicit ingest." + ), + ( + "Query: for_path, for_symbol, search (filters.match_mode), get, " + "status, stale; trajectory_status|trajectory_search|" + "trajectory_get|trajectory_anomalies|trajectory_agents|" + "trajectory_dashboard after rebuild_trajectories." + ), + ( + "Scoped response lanes: records[]=durable assertions, " + "experiences[]=advisory patterns, trajectories[]=bounded examples, " + "coverage=availability/trust context. Scores are lane-local: " + "never compare relevance_score across lanes; for_path and plain " + "(non-semantic) search are unranked." + ), + ( + "compact (default): record/trajectory subjects are bounded with " + "subject_count+subjects_truncated; experiences expose multi_agent " + "+ dominant_agent_facet; no quality_contract, steps, evidence ids, " + "or payload. patch_trail_summary rides each trajectory, never " + "duplicated at the payload root. Use full/get for drill-down." + ), + ( + "Semantic (off by default): enable sidecar, rebuild_semantic_index, " + "then search with semantic=true." + ), + ( + "Projections: rebuild_trajectories; jobs via " + "enqueue_projection_rebuild, projection_rebuild_status, " + "run_projection_jobs_once (or finish hook when policy on)." + ), + ( + "Agent writes draft-only: record_candidate, validate_claims, " + "finish(propose_memory=true). Approve via VS Code Memory view." + ), + ( + "Never use project root as scope; one fact per record_candidate " + "(target <=300 chars). detail_level=full or mode=get for full text." + ), + ), + recommended_tools=( + "get_relevant_memory", + "query_engineering_memory", + "manage_engineering_memory", + "start_controlled_change", + ), + doc_links=(ENGINEERING_MEMORY_DOC_LINK, MCP_INTERFACE_DOC_LINK), + warnings=( + "Draft, inferred, and stale records are not established policy.", + "trajectories[] and Patch Trail context do not override findings.", + "Truncation metadata means more evidence exists; it is not evidence loss.", + ), + anti_patterns=( + "Using memory to justify do_not_touch edits or scope expansion.", + "get_relevant_memory without root, scope, intent_id, or symbols.", + "approve/reject/archive via MCP — use VS Code Memory view.", + ), + ), + "verification_profiles": MCPHelpTopicSpec( + summary=( + "finish_controlled_change derives verification_profile from changed " + "files — controls after_run requirements and structural checks." + ), + key_points=( + ( + "Read verification.verification_profile and after_run_required " + "from finish — do not guess." + ), + ( + "python_structural (.py/.pyi) and governance_config need a new " + "after_run_id." + ), + ( + "documentation_only and non_python_patch may verify from " + "changed_files without after_run." + ), + ( + "state_artifact_change (codeclone.baseline.json, .codeclone/**, " + ".cache/codeclone/**) is violated, not verified." + ), + ( + "after_run_not_new when before and after runs match for " + "structural profiles." + ), + ( + "accepted means patch contract passed for scope — not unchanged " + "health or repo-wide cleanliness." + ), + ( + "Read verification.structural_delta and health_regression_advisory " + "on accept." + ), + "Skipped receipt checks are not applicable, never passed.", + ), + recommended_tools=( + "finish_controlled_change", + "analyze_repository", + "check_patch_contract", + ), + doc_links=(CHANGE_CONTROL_DOC_LINK,), + warnings=("Do not claim full structural verification for docs-only patches.",), + anti_patterns=( + "Skipping after_run_id for Python patches.", + "Treating documentation_only accepted as no regressions repo-wide.", + ), + ), +} diff --git a/codeclone/surfaces/mcp/messages/instructions.py b/codeclone/surfaces/mcp/messages/instructions.py new file mode 100644 index 00000000..ffe17472 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/instructions.py @@ -0,0 +1,45 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""FastMCP server instructions and install hints.""" + +from __future__ import annotations + +from typing import Final + +SERVER_INSTRUCTIONS: Final = ( + "CodeClone MCP is a deterministic, baseline-aware, read-only analysis server " + "for Python repositories. Use analyze_repository first for full runs or " + "analyze_changed_paths for PR-style review, then prefer get_run_summary or " + "get_production_triage for the first pass. After analysis, use " + "get_implementation_context for bounded structural, call-graph, contract, and " + "change-control evidence from one stored run (help(topic=implementation_context); " + "codeclone-implementation-context skill). Use list_hotspots or focused " + "check_* tools before broader list_findings calls, then drill into one " + "finding with get_finding or get_remediation. Use " + "help(topic=...) when workflow or contract semantics are unclear. Use " + "default or pyproject-resolved thresholds for the first pass, and lower " + "them only for an explicit higher-sensitivity follow-up when needed. Use " + "get_report_section(section='metrics_detail', family=..., limit=...) for " + "bounded metrics drill-down, and prefer generate_pr_summary(format='markdown') " + "unless machine JSON is required. Coverage join accepts external Cobertura " + "XML as a current-run signal and does not become baseline truth. Pass an " + "absolute repository root to analysis tools. For file edits, prefer " + "start_controlled_change and finish_controlled_change for the complete " + "edit cycle. Use manage_change_intent for queue/promote/recover " + "operations. Atomic tools (get_blast_radius, check_patch_contract, " + "validate_review_claims, create_review_receipt) remain available for " + "advanced inspection and diagnostic use. " + "If concurrent intents overlap, narrow scope or coordinate. This server never " + "updates baselines and never mutates source files, analysis cache, or reports; " + "it may write ephemeral workspace coordination state under " + ".codeclone/intents/." +) + +MCP_INSTALL_HINT: Final = ( + "CodeClone MCP support requires the optional 'mcp' extra. " + "Install it with: pip install 'codeclone[mcp]'" +) diff --git a/codeclone/surfaces/mcp/messages/intent.py b/codeclone/surfaces/mcp/messages/intent.py new file mode 100644 index 00000000..564fc9ce --- /dev/null +++ b/codeclone/surfaces/mcp/messages/intent.py @@ -0,0 +1,76 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Change-intent lifecycle user messages.""" + +from __future__ import annotations + +from typing import Final + +QUEUED_PROMOTE_BEFORE_EDIT: Final = "Queued. Promote before editing." + +PROMOTE_BEFORE_RUN_EVICTED_NEXT: Final = ( + "Run analyze_repository to create a fresh before-run, then redeclare the intent." +) + +PROMOTE_BEFORE_RUN_EVICTED: Final = ( + "Before-run was evicted from bounded history. Re-analyze and redeclare the intent." +) + +PROMOTE_STILL_BLOCKED: Final = "Intent is still blocked by active workspace intents." + +PROMOTED_RECHECK: Final = ( + "Queued intent promoted. Re-check blast radius and patch budget before editing." +) + +QUEUED_SCOPE_WAITING: Final = "Another agent is waiting for this scope." + +RESET_LIVE_FOREIGN: Final = ( + "Intent belongs to a live process. Coordinate " + "with the owning agent or user before resetting it." +) + +RECOVERY_HINT: Final = "Use action='recover' with matching run_id to reclaim." +RECOVERY_NEEDS_ANALYSIS_HINT: Final = ( + "Recoverable intent found. Run analyze_repository in this MCP session, " + "then use action='recover' with the matching run_id." +) +RECOVERY_LIST_NEXT_STEP: Final = ( + "Recovery candidates may require a fresh analyze_repository run after " + "MCP restart before recover succeeds." +) + +SCOPE_CHECK_FORBIDDEN: Final = "Patch touched forbidden or out-of-scope files." +SCOPE_CHECK_RELATED: Final = ( + "Patch touched allowed related files outside primary scope." +) +SCOPE_CHECK_CLEAN: Final = "Patch stayed inside declared scope." + +RECOVERY_FOREIGN_ACTIVE: Final = ( + "Intent has a valid lease from a live process. Cannot recover. " + "Use action='list_workspace' to inspect, then coordinate with the user." +) + +RECOVERY_FOREIGN_STALE: Final = ( + "Intent belongs to a live process with an expired lease. " + "The owner may still be working. Coordinate with the user before recovering." +) + +RECOVERY_EXPIRED: Final = "Intent has expired (TTL). Declare a new intent instead." + +DECLARE_FOREIGN_ACTIVE_OVERLAP: Final = ( + "Foreign active intent overlaps your scope. Ask the user, narrow scope, " + 'or restart with on_conflict="queue".' +) + +DECLARE_FOREIGN_STALE_OVERLAP: Final = ( + "Foreign stale intent overlaps your scope. Coordinate with the user or " + "recover the foreign intent before editing." +) + +DECLARE_FOREIGN_OVERLAP: Final = ( + "Foreign intent overlaps your scope. Ask the user before editing." +) diff --git a/codeclone/surfaces/mcp/messages/params.py b/codeclone/surfaces/mcp/messages/params.py new file mode 100644 index 00000000..270b1ed1 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/params.py @@ -0,0 +1,668 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""MCP tool parameter Field descriptions for JSON Schema export.""" + +from __future__ import annotations + +from typing import Annotated, Literal, get_args + +from pydantic import Field + +RootParam = Annotated[str, Field(description="Absolute repository root path.")] +OptionalRootParam = Annotated[ + str | None, + Field(description="Absolute repository root when resolving run by root."), +] +RunIdParam = Annotated[ + str | None, + Field(description="8-char or full run id; latest run when omitted."), +] +RunIdRequiredParam = Annotated[ + str, + Field(description="8-char or full run id from analyze response."), +] +Facet = Literal[ + "module_role", + "imports", + "importers", + "callers", + "callees", + "references", + "public_surface", + "blast_radius", + "tests", + "contract_tests", + "test_callers", + "docs", + "memory", + "trajectories", + "experiences", + "memory_conflicts", + "definition_sites", + "persistence_path_callers", + "serialization_path_callers", + "deserialization_path_callers", + "store_api_consumers", + "scope", + "review_context", + "baseline_sensitive_findings", + "version_constants", + "dataflow", +] +VALID_FACETS = frozenset(get_args(Facet)) +ContextPathsParam = Annotated[ + list[str] | None, + Field(description="Repo-relative implementation-context subject paths."), +] +ContextSymbolsParam = Annotated[ + list[str] | None, + Field( + description=( + "module:symbol qualnames to resolve as context subjects " + "(colon separator, for example pkg.mod:func)." + ), + ), +] +ChangedScopeParam = Annotated[ + bool, + Field(description="Use the current git-dirty scope as the context subject."), +] +ContextQueryParam = Annotated[ + str | None, + Field( + description=( + "Name search query across analyzed definitions, call targets, and " + "imports; mutually exclusive with paths, symbols, and changed_scope." + ) + ), +] +ContextModeParam = Annotated[ + Literal["implementation", "impact", "contract"], + Field(description="implementation, impact, or contract context mode."), +] +FacetIncludeParam = Annotated[ + list[Facet] | None, + Field(description="Optional closed set of implementation-context facets."), +] +ContextDepthParam = Annotated[ + int, + Field( + ge=0, + le=3, + description="Bounded structural traversal depth from 0 through 3.", + ), +] +ContextDetailLevelParam = Annotated[ + Literal["compact", "normal", "full"], + Field(description="compact, normal, or full context detail level."), +] +ContextBudgetParam = Annotated[ + int, + Field( + ge=1, + le=200, + description="Global maximum emitted context entries (1-200).", + ), +] +AnalysisModeParam = Annotated[ + str, + Field(description="full: clones+metrics. clones_only: clones without metrics."), +] +RespectPyprojectParam = Annotated[ + bool, + Field(description="Load [tool.codeclone] thresholds from pyproject when true."), +] +ChangedPathsParam = Annotated[ + list[str] | None, + Field(description="Repo-relative paths; mutually exclusive with git_diff_ref."), +] +GitDiffRefParam = Annotated[ + str | None, + Field(description="Safe git revision for changed files; not with changed_paths."), +] +ProcessesParam = Annotated[ + int | None, + Field(description="Parallel workers override; capped by host and 64."), +] +ThresholdIntParam = Annotated[ + int | None, + Field(description="Optional analysis threshold override."), +] +ApiSurfaceParam = Annotated[ + bool | None, + Field(description="Enable API surface metrics when true."), +] +CoverageXmlParam = Annotated[ + str | None, + Field( + description=( + "Cobertura XML path. Absolute/out-of-repo paths require " + "allow_external_artifacts=true." + ) + ), +] +CoverageMinParam = Annotated[ + int | None, + Field(description="Coverage gate minimum percent (0-100)."), +] +OptionalPathParam = Annotated[ + str | None, + Field( + description=( + "Repo-relative artifact path. Absolute/out-of-repo paths require " + "allow_external_artifacts=true." + ) + ), +] +AllowExternalArtifactsParam = Annotated[ + bool, + Field( + description=( + "Allow optional artifact paths (baseline_path, metrics_baseline_path, " + "cache_path, coverage_xml) to be absolute or outside the repository." + ) + ), +] +MaxSizeMbParam = Annotated[ + int | None, + Field(description="Max artifact size in megabytes."), +] +CachePolicyParam = Annotated[ + str, + Field(description="reuse: read cache. off: skip cache. MCP read-only."), +] +FilesParam = Annotated[ + list[str], + Field(description="Repo-relative files to inspect for blast radius."), +] +BlastDepthParam = Annotated[ + str, + Field(description="direct, transitive, or auto blast-radius depth."), +] +IncludeParam = Annotated[ + list[str] | None, + Field(description="Optional blast-radius include filters."), +] +PatchModeParam = Annotated[ + str, + Field(description="budget: pre-edit gate preview. verify: post-edit check."), +] +StrictnessParam = Annotated[ + str, + Field(description="ci, relaxed, or strict patch-contract profile."), +] +DiffRefParam = Annotated[ + str | None, + Field(description="Git revision for scope or verify evidence."), +] +ChangedFilesParam = Annotated[ + list[str] | None, + Field(description="Repo-relative changed files for scope or verify."), +] +IntentIdParam = Annotated[str, Field(description="Intent id from start or declare.")] +OptionalIntentIdParam = Annotated[ + str | None, + Field(description="Active or queued intent id when required by action."), +] +ReceiptFormatParam = Annotated[ + str, + Field(description="markdown or json receipt output."), +] +ReviewTextParam = Annotated[ + str, + Field(description="Review claims text to validate against the run."), +] +FinishReviewTextParam = Annotated[ + str | None, + Field( + description=( + "Optional human review note for finish output; not claim-validated. " + "Use claims_text for text that should be checked." + ), + ), +] +ClaimsTextParam = Annotated[ + str | None, + Field(description="Optional claims text to validate against the run."), +] +RequireCitationsParam = Annotated[ + bool, + Field(description="Require finding ids or metric citations in text."), +] +PatchHealthDeltaParam = Annotated[ + int | None, + Field( + description=( + "Optional health delta from check_patch_contract verify " + "(after minus before). Enables health-regression overclaim checks." + ), + ), +] +HelpTopicParam = Annotated[ + str, + Field( + description=( + "workflow, analysis_profile, suppressions, baseline, coverage, " + "latest_runs, review_state, changed_scope, change_control, " + "trust_boundaries, engineering_memory, implementation_context, " + "verification_profiles, observability" + ) + ), +] +HelpDetailParam = Annotated[ + str, + Field(description="compact includes anti_patterns; normal adds warnings."), +] +GateIntParam = Annotated[ + int, + Field(description="Gate threshold; -1 disables that gate."), +] +GateBoolParam = Annotated[bool, Field(description="Enable this gate when true.")] +ReportSectionParam = Annotated[ + str, + Field( + description=( + "meta, inventory, findings, metrics, metrics_detail, changed, " + "derived, integrity, or all." + ) + ), +] +FamilyParam = Annotated[ + str | None, + Field(description="Metrics or finding family filter."), +] +PathFilterParam = Annotated[ + str | None, + Field(description="Repo-relative module or file path filter."), +] +OffsetParam = Annotated[int, Field(description="Pagination offset.")] +LimitParam = Annotated[int, Field(description="Pagination limit.")] +FindingFamilyParam = Annotated[ + str, + Field(description="all, clone, structural, dead_code, or design."), +] +CategoryParam = Annotated[str | None, Field(description="Finding category filter.")] +SeverityParam = Annotated[str | None, Field(description="critical, warning, or info.")] +SourceKindParam = Annotated[ + str | None, + Field(description="production, tests, fixtures, mixed, or other."), +] +NoveltyParam = Annotated[str, Field(description="all, new, or known vs baseline.")] +SortByParam = Annotated[ + str, + Field(description="default, priority, severity, or spread."), +] +DetailLevelParam = Annotated[ + str, + Field(description="summary, normal, or full detail level."), +] +PatchTrailDetailParam = Annotated[ + str, + Field(description="summary or full patch_trail payload on finish."), +] +ExcludeReviewedParam = Annotated[ + bool, + Field(description="Omit session-marked reviewed findings when true."), +] +MaxResultsParam = Annotated[ + int | None, + Field(description="Optional hard cap on returned items."), +] +FindingIdParam = Annotated[ + str, Field(description="Short or full canonical finding id.") +] +HotspotKindParam = Annotated[ + str, + Field( + description=( + "most_actionable, highest_spread, highest_priority, " + "production_hotspots, or test_fixture_hotspots." + ) + ), +] +CompareFocusParam = Annotated[ + str, + Field(description="all, clones, structural, or metrics comparison focus."), +] +MaxHotspotsParam = Annotated[ + int, Field(description="Max production hotspots returned.") +] +MaxSuggestionsParam = Annotated[ + int, Field(description="Max production suggestions returned.") +] +CloneTypeParam = Annotated[ + str | None, Field(description="function, block, or segment.") +] +MinComplexityParam = Annotated[ + int | None, + Field(description="Minimum cyclomatic complexity filter."), +] +MinSeverityParam = Annotated[ + str | None, + Field(description="Minimum dead-code severity filter."), +] +PrFormatParam = Annotated[ + str, + Field(description="markdown (preferred) or json PR summary."), +] +ReviewNoteParam = Annotated[ + str | None, + Field(description="Optional session-local review note."), +] +ScopeParam = Annotated[ + dict[str, object], + Field( + description=( + "Scope object with allowed_files, optional allowed_related, forbidden." + ) + ), +] +IntentTextParam = Annotated[ + str, Field(description="Short description of planned edit.") +] +ExpectedEffectsParam = Annotated[ + list[str] | None, + Field(description="Optional expected patch effects for review."), +] +OnConflictParam = Annotated[ + str | None, + Field(description="queue to wait on overlapping workspace intents."), +] +TtlSecondsParam = Annotated[ + int | None, + Field(description="Intent TTL seconds; default 3600."), +] +BlastRadiusDepthParam = Annotated[ + str, + Field(description="auto, direct, or transitive pre-edit blast radius."), +] +DirtyScopePolicyParam = Annotated[ + str, + Field( + description=( + "block (default) or continue_own_wip when uncommitted changes " + "already overlap declared scope." + ), + ), +] +AfterRunIdParam = Annotated[ + str | None, + Field(description="Post-edit analyze run id for structural verify."), +] +BeforeRunIdParam = Annotated[ + str | None, + Field(description="Pre-edit analyze run id or intent-resolved before run."), +] +CreateReceiptParam = Annotated[ + bool, + Field(description="Generate review receipt on accepted finish."), +] +AutoClearParam = Annotated[ + bool, + Field(description="Clear intent after accepted finish when true."), +] +IncludeBlastRadiusParam = Annotated[ + bool, + Field(description="Include blast radius section in receipt."), +] +IncludePatchContractParam = Annotated[ + bool, + Field(description="Include patch contract section in receipt."), +] +OptionalScopeParam = Annotated[ + dict[str, object] | None, + Field(description="Scope for declare: allowed_files, allowed_related, forbidden."), +] +OptionalIntentTextParam = Annotated[ + str | None, + Field(description="Intent description for declare action."), +] +ManageActionParam = Annotated[ + str, + Field( + description=( + "list_workspace, declare, get, check, clear, renew, promote, " + "recover, gc_workspace, reset_workspace." + ) + ), +] +LeaseSecondsParam = Annotated[ + int | None, + Field(description="Lease renewal seconds for renew action."), +] +MemoryDetailLevelParam = Annotated[ + str, + Field( + description=( + "compact (default) returns statement previews without payload; " + "full returns complete statement and payload. mode=get always returns full." + ), + ), +] +MemoryScopeListParam = Annotated[ + list[str] | None, + Field(description="Repo-relative scope paths for engineering memory retrieval."), +] +MemorySymbolsParam = Annotated[ + list[str] | None, + Field(description="Optional symbol keys for engineering memory retrieval."), +] +MemoryQueryModeParam = Annotated[ + str, + Field( + description=( + "search, get, for_path, for_symbol, stale, drafts, coverage, status, " + "trajectory_status, trajectory_search, trajectory_get, " + "trajectory_anomalies, trajectory_agents, or trajectory_dashboard." + ), + ), +] +MemorySearchQueryParam = Annotated[ + str | None, + Field(description="Keyword query for mode=search or mode=trajectory_search."), +] +MemoryRecordIdParam = Annotated[ + str | None, + Field( + description=( + "Record id for mode=get or IDE governance actions; trajectory id for " + "mode=trajectory_get." + ), + ), +] +MemoryPathParam = Annotated[ + str | None, + Field( + description=( + "Repo-relative subject path for manage_engineering_memory " + "action=record_candidate (required for record_candidate)." + ), + ), +] +MemorySymbolParam = Annotated[ + str | None, + Field(description="Symbol qualname for mode=for_symbol."), +] +MemoryFiltersParam = Annotated[ + dict[str, object] | None, + Field( + description=( + "Optional filters: types, statuses, confidences, match_mode " + "(any|all, search mode only), include_routine (trajectory_search, " + "trajectory_anomalies, trajectory_agents, trajectory_dashboard; " + "default false excludes run:* routine workflows)." + ), + ), +] +IncludeStaleParam = Annotated[ + bool, + Field(description="Include stale engineering memory records."), +] +SemanticParam = Annotated[ + bool, + Field( + description=( + "Blend semantic recall into mode=search (FTS plus semantic, " + "re-ranked); audit incidents and trajectory precedents are returned " + "typed-separate. Requires the optional index; falls back to FTS-only " + "when unavailable." + ) + ), +] +IncludeDraftsParam = Annotated[ + bool, + Field( + description=( + "Include draft engineering memory records. Defaults false for search; " + "get_relevant_memory with scope or intent_id includes drafts " + "automatically; query_engineering_memory for_path/for_symbol includes " + "drafts without setting this flag." + ), + ), +] +MemoryMaxRecordsParam = Annotated[ + int, + Field(description="Maximum engineering memory records to return."), +] +AuditTrailLimitParam = Annotated[ + int, + Field( + description=( + "Maximum recent audit events for IDE-only get_controller_audit_trail." + ), + ), +] +AuditPathOverrideParam = Annotated[ + str | None, + Field( + description=( + "Optional audit database path override for IDE-only " + "get_controller_audit_trail." + ), + ), +] +ManageMemoryActionParam = Annotated[ + str, + Field( + description=( + "Agent: record_candidate, promote_experience, validate_claims, " + "propose_from_receipt, " + "refresh_from_run, rebuild_semantic_index, rebuild_trajectories, " + "enqueue_projection_rebuild, projection_rebuild_status, " + "run_projection_jobs_once. " + "IDE channel only (VS Code): " + "register_ide_governance, " + "prepare_governance, commit_governance. approve/reject/archive are not " + "available through MCP." + ), + ), +] +ManageMemoryExperienceIdParam = Annotated[ + str | None, + Field( + description=( + "Experience id for manage_engineering_memory " + "action=promote_experience (required for that action)." + ), + ), +] +GovernanceDecisionParam = Annotated[ + str | None, + Field(description="IDE governance decision: approve, reject, or archive."), +] +IdeGovernanceKeyParam = Annotated[ + str | None, + Field( + description=( + "Session-bound IDE governance key (hex, >=32 bytes). VS Code only." + ), + ), +] +IdeGovernanceClientNameParam = Annotated[ + str | None, + Field(description="IDE client name for register_ide_governance."), +] +IdeGovernanceClientVersionParam = Annotated[ + str | None, + Field(description="IDE client version for register_ide_governance."), +] +GovernanceTicketParam = Annotated[ + str | None, + Field(description="Single-use governance ticket from prepare_governance."), +] +ConfirmationNonceParam = Annotated[ + str | None, + Field(description="Nonce from prepare_governance; required for commit."), +] +GovernanceProofParam = Annotated[ + str | None, + Field(description="HMAC proof for commit_governance (protocol v2)."), +] +GovernanceActorParam = Annotated[ + str | None, + Field(description="Human actor label stored on the memory revision."), +] +GovernanceProtocolParam = Annotated[ + int | None, + Field(description="IDE attestation protocol version (currently 2)."), +] +MemoryRecordTypeParam = Annotated[ + str | None, + Field(description="Memory record type for record_candidate."), +] +MemoryStatementParam = Annotated[ + str | None, + Field(description="Candidate statement for record_candidate."), +] +MemoryClaimsTextParam = Annotated[ + str | None, + Field(description="Claims text for validate_claims."), +] +ProposeMemoryParam = Annotated[ + bool, + Field( + description=( + "When true on accepted finish, propose draft memory candidates " + "and mark scope-linked records stale." + ), + ), +] + +ObservabilitySectionParam = Annotated[ + str, + Field( + description=( + "Telemetry section to project: summary | slow_operations | " + "memory_pipeline_cost | db_cost | agent_context | mcp_tool_matrix | " + "correlated_chains | costly_noops | pipeline." + ), + ), +] +ObservabilityDetailParam = Annotated[ + str, + Field( + description=( + "compact (bounded top rows) or normal (rows up to limit); full " + "downgrades to normal for aggregate sections." + ), + ), +] +ObservabilityLimitParam = Annotated[ + int, + Field(description="Row cap per section; clamped to [1, 50], else 10."), +] +ObservabilityWindowParam = Annotated[ + str, + Field(description="'latest' for the recent window, or a correlation_id."), +] +ObservabilityOperationIdParam = Annotated[ + str | None, + Field(description="Reserved for detail sections; echoed in ignored_parameters."), +] +ObservabilitySpanIdParam = Annotated[ + str | None, + Field(description="Reserved for detail sections; echoed in ignored_parameters."), +] diff --git a/codeclone/surfaces/mcp/messages/patch_contract.py b/codeclone/surfaces/mcp/messages/patch_contract.py new file mode 100644 index 00000000..8773cd28 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/patch_contract.py @@ -0,0 +1,115 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Patch-contract next_step hints and status messages.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Final + +NEXT_STEP_HINTS: Final[dict[str, str]] = { + "no_before_run": ( + "Run analyze_repository, then pass the run_id as" + " before_run_id — or pass intent_id to auto-resolve." + ), + "no_after_run": ( + "Run analyze_repository after editing, then pass the" + " new run_id as after_run_id." + ), + "after_run_not_new": ( + "After-run matches the intent before-run. Run analyze_repository " + "after editing and pass the new run_id as after_run_id." + ), + "after_run_required_for_governance": ( + "Governance config changes require a post-edit analysis." + " Run analyze_repository and pass after_run_id." + ), + "incomparable_runs": ( + "Before and after runs are not comparable." + " Re-run analyze_repository with the same settings." + ), + "intent_not_active": ( + "Queued intent must be promoted before editing or" + " verification. Call" + " manage_change_intent(action='promote')." + ), + "report_digest_mismatch": ( + "Intent was declared against a different report." + " Do not redeclare on the after-run — use the original" + " intent_id with the original before_run_id." + ), + "state_artifact_mutation": ( + "Baseline, cache, or generated state was touched." + " Remove those files from the patch and use a separate" + " workflow." + ), + "scope_violation": ( + "Patch touched files outside declared scope." + " Redeclare intent with expanded scope, or remove the" + " out-of-scope changes." + ), +} + + +def next_step_hint(reason: str) -> str | None: + return NEXT_STEP_HINTS.get(reason) + + +QUEUED_BUDGET_MESSAGE: Final = ( + "Budget computed for queued intent. Do not edit until promoted." +) + +STATE_ARTIFACT_VIOLATION_MESSAGE: Final = ( + "Patch touched CodeClone generated state. " + "This requires a separate explicit workflow." +) + +PATCH_CONTRACT_EXPIRED_MESSAGE: Final = ( + "Patch contract expired: intent was declared for another report digest." +) + +BUDGET_RELAXED_ADVISORY: Final = ( + "Relaxed patch budget is advisory; gate failures are not blocking." +) +BUDGET_OUTSIDE: Final = "Current run is already outside the selected patch budget." +BUDGET_INSIDE: Final = "Current run is inside the selected patch budget." + +VERIFY_ACCEPTED: Final = "Patch contract accepted." +VERIFY_ACCEPTED_EXTERNAL: Final = ( + "Patch contract accepted; external workspace changes detected." +) +HEALTH_REGRESSION_ADVISORY: Final = ( + "Patch accepted, but repository health changed negatively between " + "before-run and after-run. Report this as advisory context, not as " + "regression-free verification." +) +VERIFY_UNVERIFIED_PREFIX: Final = "Patch contract unverified: {reason}." + + +def verify_message( + *, + status: str, + violations: Sequence[str], + health_delta: int | None = None, +) -> str: + if status == "accepted": + message = VERIFY_ACCEPTED + elif status == "accepted_with_external_changes": + message = VERIFY_ACCEPTED_EXTERNAL + else: + return "Patch contract violated: " + ", ".join(violations) + if health_delta is not None and health_delta < 0: + return f"{message} {HEALTH_REGRESSION_ADVISORY}" + return message + + +def budget_message(*, relaxed: bool, would_fail: bool) -> str: + if relaxed: + return BUDGET_RELAXED_ADVISORY + if would_fail: + return BUDGET_OUTSIDE + return BUDGET_INSIDE diff --git a/codeclone/surfaces/mcp/messages/receipt.py b/codeclone/surfaces/mcp/messages/receipt.py new file mode 100644 index 00000000..037954a3 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/receipt.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Review receipt markdown and claims-not-made copy.""" + +from __future__ import annotations + +from typing import Final + +CLAIM_REASON_SECURITY_NOT_VULNERABILITY: Final = ( + "Security Surfaces are report-only trust-boundary inventory, " + "not vulnerability claims." +) +CLAIM_REASON_BASELINE_DEBT_NOT_REGRESSION: Final = ( + "Known baseline debt was not treated as new relative to the baseline; " + "patch-local regression claims require before-run to after-run evidence." +) +CLAIM_REASON_REPORT_ONLY_NOT_CI_FAILURE: Final = ( + "Report-only signals were not treated as CI gate failures." +) +CLAIM_REASON_SUPPRESSED_CLONE_NOT_REGRESSION: Final = ( + "Suppressed clone groups were not counted as active new regressions." +) + +DECISION_REASON_CLONE_DIVERGENCE: Final = ( + "Clone cohort member was in changed scope; confirm divergence is intentional." +) +DECISION_REASON_BASELINE_DEBT_TOUCHED: Final = ( + "Known baseline finding was in changed scope; confirm whether the patch " + "addresses or preserves it." +) +DECISION_REASON_SCOPE_EXPANSION: Final = ( + "Edit scope expanded beyond declared files; human confirmation is required." +) + +RECEIPT_MD_TITLE: Final = "## CodeClone Agent Review Receipt" +RECEIPT_MD_SECTION_SCOPE: Final = "### Scope" +RECEIPT_MD_SECTION_BLAST_RADIUS: Final = "### Blast Radius" +RECEIPT_MD_SECTION_REVIEWED_EVIDENCE: Final = "### Reviewed Evidence" +RECEIPT_MD_SECTION_PATCH_CONTRACT: Final = "### Patch Contract" +RECEIPT_MD_SECTION_STRUCTURAL_DELTA: Final = "### Structural Delta" +RECEIPT_MD_SECTION_HUMAN_DECISIONS: Final = "### Human Decisions Requested" +RECEIPT_MD_SECTION_CLAIMS_NOT_MADE: Final = "### Claims Not Made" +RECEIPT_MD_SECTION_VERIFICATION_PROFILE: Final = "### Verification Profile" +RECEIPT_MD_NO_INTENT: Final = "No intent declared." +RECEIPT_MD_NOT_AVAILABLE: Final = "Not available." +RECEIPT_MD_LIST_NONE: Final = "- none" +RECEIPT_MD_UNKNOWN: Final = "unknown" +RECEIPT_MD_NONE: Final = "none" +RECEIPT_MD_REVIEW_CONTRACT: Final = "**Review contract:** v1" +RECEIPT_MD_FOOTER: Final = "*Generated by CodeClone | run: `{run_id}` | {timestamp}*" diff --git a/codeclone/surfaces/mcp/messages/remediation.py b/codeclone/surfaces/mcp/messages/remediation.py new file mode 100644 index 00000000..5be6bf27 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/remediation.py @@ -0,0 +1,75 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Remediation shape guidance for MCP get_remediation.""" + +from __future__ import annotations + +from typing import Final + +from ....domain.findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, +) + +REMEDIATION_CLONE_TYPE1: Final = ( + "Keep one canonical implementation and route callers through it." +) +REMEDIATION_CLONE_TYPE2: Final = ( + "Extract shared implementation with explicit parameters." +) +REMEDIATION_CLONE_BLOCK: Final = ( + "Extract the repeated statement sequence into a helper." +) +REMEDIATION_STRUCTURAL: Final = ( + "Extract the repeated branch family into a named helper." +) +REMEDIATION_COMPLEXITY: Final = "Split the function into smaller named steps." +REMEDIATION_COUPLING: Final = ( + "Isolate responsibilities and invert unnecessary dependencies." +) +REMEDIATION_COHESION: Final = "Split the class by responsibility boundary." +REMEDIATION_DEAD_CODE: Final = ( + "Delete the unused symbol or document intentional reachability." +) +REMEDIATION_DEPENDENCY: Final = ( + "Break the cycle by moving shared abstractions to a lower layer." +) +REMEDIATION_DEFAULT: Final = ( + "Extract the repeated logic into a shared, named abstraction." +) + + +def safe_refactor_shape( + *, + category: str, + clone_type: str, + title: str, +) -> str: + if category == CATEGORY_CLONE and clone_type == "Type-1": + return REMEDIATION_CLONE_TYPE1 + if category == CATEGORY_CLONE and clone_type == "Type-2": + return REMEDIATION_CLONE_TYPE2 + if category == CATEGORY_CLONE and "Block" in title: + return REMEDIATION_CLONE_BLOCK + if category == CATEGORY_STRUCTURAL: + return REMEDIATION_STRUCTURAL + if category == CATEGORY_COMPLEXITY: + return REMEDIATION_COMPLEXITY + if category == CATEGORY_COUPLING: + return REMEDIATION_COUPLING + if category == CATEGORY_COHESION: + return REMEDIATION_COHESION + if category == CATEGORY_DEAD_CODE: + return REMEDIATION_DEAD_CODE + if category == CATEGORY_DEPENDENCY: + return REMEDIATION_DEPENDENCY + return REMEDIATION_DEFAULT diff --git a/codeclone/surfaces/mcp/messages/resources.py b/codeclone/surfaces/mcp/messages/resources.py new file mode 100644 index 00000000..8bafb84b --- /dev/null +++ b/codeclone/surfaces/mcp/messages/resources.py @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP resource descriptions for FastMCP registration.""" + +from __future__ import annotations + +from typing import Final + +LATEST_SUMMARY: Final = "Canonical JSON summary for the latest run in this MCP session." +LATEST_REPORT: Final = "Canonical JSON report for the latest run in this MCP session." +LATEST_HEALTH: Final = "Health snapshot for the latest run in this MCP session." +LATEST_GATES: Final = "Gate evaluation for the latest run in this MCP session." +LATEST_CHANGED: Final = ( + "Changed-files projection for the latest diff-aware run in this session." +) +LATEST_TRIAGE: Final = "Production triage for the latest run in this MCP session." +REPORT_SCHEMA: Final = ( + "JSON schema-style descriptor for the canonical CodeClone report." +) +RUN_SUMMARY: Final = "Canonical JSON summary for a specific CodeClone MCP run." +RUN_REPORT: Final = "Canonical JSON report for a specific CodeClone MCP run." +RUN_FINDING: Final = "Canonical JSON finding group for a specific CodeClone MCP run." + +TITLE_LATEST_SUMMARY: Final = "Latest Run Summary" +TITLE_LATEST_REPORT: Final = "Latest Canonical Report" +TITLE_LATEST_HEALTH: Final = "Latest Health Snapshot" +TITLE_LATEST_GATES: Final = "Latest Gate Evaluation" +TITLE_LATEST_CHANGED: Final = "Latest Changed Findings" +TITLE_LATEST_TRIAGE: Final = "Latest Production Triage" +TITLE_REPORT_SCHEMA: Final = "CodeClone Report Schema" +TITLE_RUN_SUMMARY: Final = "Run Summary" +TITLE_RUN_REPORT: Final = "Run Canonical Report" +TITLE_RUN_FINDING: Final = "Run Finding" diff --git a/codeclone/surfaces/mcp/messages/tips.py b/codeclone/surfaces/mcp/messages/tips.py new file mode 100644 index 00000000..24508ed7 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/tips.py @@ -0,0 +1,26 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Structured MCP workspace tips for agent-facing hygiene guidance.""" + +from __future__ import annotations + +from ....paths.gitignore import ( + GITIGNORE_CODECLONE_CACHE_TIP_ID, + WORKSPACE_HYGIENE_CATEGORY, + gitignore_codeclone_cache_tip_payload, +) + + +def gitignore_codeclone_cache_tip() -> dict[str, object]: + return gitignore_codeclone_cache_tip_payload() + + +__all__ = [ + "GITIGNORE_CODECLONE_CACHE_TIP_ID", + "WORKSPACE_HYGIENE_CATEGORY", + "gitignore_codeclone_cache_tip", +] diff --git a/codeclone/surfaces/mcp/messages/tools.py b/codeclone/surfaces/mcp/messages/tools.py new file mode 100644 index 00000000..4b7640de --- /dev/null +++ b/codeclone/surfaces/mcp/messages/tools.py @@ -0,0 +1,325 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""MCP tool descriptions for FastMCP registration.""" + +from __future__ import annotations + +from typing import Final + +ANALYZE_REPOSITORY: Final = ( + "Run a deterministic CodeClone analysis and register it as the " + "latest MCP run. Pass an absolute repository root; relative roots " + "like '.' are rejected in MCP. MCP cache_policy accepts reuse or " + "off only. Start with get_production_triage." +) + +ANALYZE_CHANGED_PATHS: Final = ( + "Run changed-files analysis from explicit paths or git diff ref. " + "Absolute root required. MCP cache_policy: reuse or off. " + "Response includes next_tool hint." +) + +GET_RUN_SUMMARY: Final = ( + "Compact run snapshot for latest or specified run. run_id accepts " + "8-char short id or full digest." +) + +GET_PRODUCTION_TRIAGE: Final = ( + "Return a production-first triage view over a stored run: health, " + "cache freshness, production hotspots, and production suggestions, " + "while keeping global source-kind counters visible. Use this as the " + "default first-pass review on noisy repositories." +) + +GET_BLAST_RADIUS: Final = ( + "Return the deterministic structural risk boundary for changing " + "the given files. Shows direct dependents, clone cohort members, " + "coverage gaps, actionable do-not-touch paths, and review-only " + "context. Derived from the canonical report; no new analysis is " + "performed." +) + +GET_IMPLEMENTATION_CONTEXT: Final = ( + "Return deterministic, bounded implementation context from one existing " + "analysis run. Resolves explicit repo-relative paths and module:symbol " + "qualnames, then projects module, dependency, API-surface, call/reference, " + "blast-radius, cache-origin, and workspace-freshness facts without " + "re-analysis or edit authorization." +) + +GET_RELEVANT_MEMORY: Final = ( + "Return ranked, evidence-linked engineering memory for the declared " + "edit scope. Requires absolute root (same as analyze_repository). " + "Pass scope paths and/or an active intent_id from start_controlled_change; " + "symbols-only retrieval is also supported. Unscoped project-wide retrieval " + "is rejected — use query_engineering_memory(mode=status|search) instead. " + "List responses default to compact statement previews; pass detail_level=full " + "for complete statements. Scoped responses may also include typed " + "trajectory precedents in trajectories[]; records[] remains memory records " + "only. Read-only; does not mutate the memory database." +) + +QUERY_ENGINEERING_MEMORY: Final = ( + "Mode-based engineering memory inspection router. Modes: search, get, " + "for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, " + "trajectory_search, trajectory_get, trajectory_anomalies, trajectory_agents, " + "and trajectory_dashboard. List modes default to compact " + "previews; mode=get and detail_level=full return complete statements. " + "mode=trajectory_get uses record_id as the trajectory id. Project root is " + "not a valid path or coverage scope. Read-only." +) + +MANAGE_ENGINEERING_MEMORY: Final = ( + "Engineering memory governance. Agent actions: refresh_from_run, " + "record_candidate, promote_experience, validate_claims, propose_from_receipt, " + "rebuild_semantic_index, rebuild_trajectories, enqueue_projection_rebuild, " + "projection_rebuild_status, run_projection_jobs_once. " + "promote_experience(experience_id) turns a distilled experience into a " + "human-approvable draft. " + "approve/reject/archive are not available to agents — use VS Code Memory view." +) + +CHECK_PATCH_CONTRACT: Final = ( + "Pre-edit budget query (mode='budget') or post-edit structural " + "verification (mode='verify'). Composes stored runs, gate " + "evaluation, run comparison, and session-local change intent " + "without running analysis or mutating repository state." +) + +CREATE_REVIEW_RECEIPT: Final = ( + "Generate a deterministic, auditable review receipt from stored " + "MCP state: report provenance, intent scope, blast radius, " + "reviewed findings, patch contract status, human decision points, " + "and claims-not-made. Output markdown or JSON without mutating " + "repository state." +) + +VALIDATE_REVIEW_CLAIMS: Final = ( + "Validate cited review text against canonical report semantics. " + "Detects deterministic mischaracterizations: Security Surfaces " + "called vulnerabilities, report-only signals called CI failures, " + "known baseline debt called new relative to baseline, patch-local " + "regression claims without before/after evidence, dead code claimed " + "where runtime reachability evidence exists, fixes claimed " + "without post-patch verification, and regression-free claims when " + "patch_health_delta is negative. Pass patch_health_delta from " + "check_patch_contract verify or finish verification.structural_delta. " + "Structural citation matching; not NLP." +) + +HELP: Final = ( + "Bounded workflow/contract guidance with doc links. compact adds " + "anti_patterns; normal adds warnings. Topics: workflow, analysis_profile, " + "suppressions, baseline, coverage, latest_runs, review_state, " + "changed_scope, change_control, trust_boundaries, engineering_memory, " + "implementation_context, verification_profiles, observability." +) + +QUERY_PLATFORM_OBSERVABILITY: Final = ( + "Read-only sectioned diagnostics over CodeClone's own runtime telemetry " + "(Phase 29). Observability is for CodeClone self-development and " + "diagnostics. It is NOT part of user-facing CodeClone analysis. It MUST " + "NOT affect reports, gates, baselines, memory facts, or edit " + "authorization. A slicer, not a trace export API: each call returns one " + "bounded section, never the full trace, numeric metrics only (no raw SQL " + "or payloads). Anti-inference guard: this describes the runtime of " + "CodeClone itself, not the user repository — high DB queries != repository " + "bad; high MCP payload != code quality low; hot semantic reindex != unsafe " + "change. Sections: summary, slow_operations, memory_pipeline_cost, " + "db_cost, agent_context, mcp_tool_matrix, correlated_chains, costly_noops, " + "pipeline. detail_level compact|normal (full downgrades to normal for " + "aggregate sections). Intended for CodeClone maintainers and development " + "agents; do not use it to make user-facing quality claims about a repo." +) + +EVALUATE_GATES: Final = ( + "Evaluate CodeClone gate conditions against an existing MCP run without " + "modifying baselines or exiting the process." +) + +GET_REPORT_SECTION: Final = ( + "Return one canonical report section. Prefer metrics, metrics_detail, " + "changed, findings over all unless necessary." +) + +LIST_FINDINGS: Final = ( + "List canonical finding groups with deterministic ordering, optional " + "filters, pagination, and compact summary cards by default. Prefer " + "list_hotspots or focused check_* tools for first-pass triage; use " + "this when you need a broader filtered list." +) + +GET_FINDING: Final = ( + "Return a single canonical finding group by short or full id. " + "Normal detail is the default. Use this after list_hotspots, " + "list_findings, or check_* instead of requesting larger lists at " + "higher detail." +) + +GET_REMEDIATION: Final = ( + "Return actionable remediation guidance for a single finding. " + "Normal detail is the default. Use this when you need the fix packet " + "for one finding without pulling larger detail lists." +) + +LIST_HOTSPOTS: Final = ( + "Return one of the derived CodeClone hotlists for the latest or " + "specified MCP run, using compact summary cards by default. Prefer " + "this for first-pass triage before broader list_findings calls." +) + +COMPARE_RUNS: Final = ( + "Compare two runs by finding ids. run_id accepts short or full ids. " + "Returns incomparable when roots or settings differ." +) + +CHECK_COMPLEXITY: Final = ( + "Return complexity hotspots from a compatible stored run. " + "Use analyze_repository first if no full run is available. When " + "filtering by root without run_id, pass an absolute root. Prefer " + "this narrower tool instead of list_findings when you only need " + "complexity hotspots." +) + +CHECK_CLONES: Final = ( + "Return clone findings from a compatible stored run. " + "Use analyze_repository first if no compatible run is available. " + "When filtering by root without run_id, pass an absolute root. " + "Prefer this narrower tool instead of list_findings when you only need " + "clone findings." +) + +CHECK_COUPLING: Final = ( + "Return coupling hotspots from a compatible stored run. " + "Use analyze_repository first if no full run is available. When " + "filtering by root without run_id, pass an absolute root. Prefer " + "this narrower tool instead of list_findings when you only need " + "coupling hotspots." +) + +CHECK_COHESION: Final = ( + "Return cohesion hotspots from a compatible stored run. " + "Use analyze_repository first if no full run is available. When " + "filtering by root without run_id, pass an absolute root. Prefer " + "this narrower tool instead of list_findings when you only need " + "cohesion hotspots." +) + +CHECK_DEAD_CODE: Final = ( + "Return dead-code findings from a compatible stored run. " + "Use analyze_repository first if no full run is available. When " + "filtering by root without run_id, pass an absolute root. Prefer " + "this narrower tool instead of list_findings when you only need " + "dead-code findings." +) + +GENERATE_PR_SUMMARY: Final = ( + "Generate a PR-friendly CodeClone summary for changed files. Prefer " + "format='markdown' for compact LLM-facing output; use 'json' only " + "for machine post-processing." +) + +MARK_FINDING_REVIEWED: Final = ( + "Mark finding reviewed in this MCP session only; cleared on " + "process restart or clear_session_runs." +) + +LIST_REVIEWED_FINDINGS: Final = ( + "List in-memory reviewed findings for the current or specified run." +) + +START_CONTROLLED_CHANGE: Final = ( + "Pre-edit workflow: check workspace for concurrent intents, " + "declare change intent with scope, compute blast radius " + "(direct + bounded transitive for high-radius changes), and " + "return patch budget — all in one call. Requires an existing " + "analysis run for the given root; call analyze_repository " + "first if needed. Returns intent_id for finish_controlled_change. " + "Use dirty_scope_policy=continue_own_wip to resume known " + "uncommitted work in declared scope when no foreign dirty overlap " + "exists; finish must still prove scope via changed_files or diff_ref. " + "Does not run analysis implicitly." +) + +FINISH_CONTROLLED_CHANGE: Final = ( + "Post-edit pipeline: hygiene, scope check, verify, patch_trail, optional " + "claims, receipt, clear. Pass after_run_id when " + "verification.after_run_required. Use detail_level=full for hygiene path " + "attribution; patch_trail_detail summary|full. Set propose_memory=true " + "for draft memory candidates on accept." +) + +MANAGE_CHANGE_INTENT: Final = ( + "Manage the agent change intent lifecycle for the current MCP " + "session and optional workspace registry. Actions: 'list_workspace' " + "to inspect concurrent workspace intents, 'declare' to declare " + "intended scope before editing, 'get' to retrieve active intent, " + "'check' to verify actual diff against declared scope, 'promote' to " + "activate a queued intent, 'clear' to remove intent, 'renew' to " + "refresh the active lease before long edits or test runs, " + "'gc_workspace' to clean stale registry files, 'recover' to " + "explicitly reclaim a recoverable intent, and 'reset_workspace' for " + "interrupted-session recovery. In-memory intent state remains " + "session-local; workspace coordination state is ephemeral under " + ".codeclone/intents/." +) + +CLEAR_SESSION_RUNS: Final = ( + "Clear all in-memory MCP analysis runs and ephemeral session state " + "for this server process." +) + +GET_WORKSPACE_SESSION_STATS: Final = ( + "IDE-only workspace session dashboard: active agents, change intents, " + "lease health, latest cached run summary, and audit token footprint. " + "Mirrors CLI --session-stats. Registered only when the MCP server is " + "launched with --ide-governance-channel (CodeClone VS Code). Not exposed " + "to agent clients on the default launcher." +) + +GET_CONTROLLER_AUDIT_TRAIL: Final = ( + "IDE-only controller audit trail summary with recent events and optional " + "payload token footprint. Mirrors CLI --audit and requires " + "audit_enabled=true. Registered only with --ide-governance-channel. " + "Not for agent MCP clients." +) + +TITLE_ANALYZE_REPOSITORY: Final = "Analyze Repository" +TITLE_ANALYZE_CHANGED_PATHS: Final = "Analyze Changed Paths" +TITLE_GET_RUN_SUMMARY: Final = "Get Run Summary" +TITLE_GET_PRODUCTION_TRIAGE: Final = "Get Production Triage" +TITLE_GET_BLAST_RADIUS: Final = "Get Blast Radius" +TITLE_GET_IMPLEMENTATION_CONTEXT: Final = "Get Implementation Context" +TITLE_GET_RELEVANT_MEMORY: Final = "Get Relevant Memory" +TITLE_QUERY_ENGINEERING_MEMORY: Final = "Query Engineering Memory" +TITLE_MANAGE_ENGINEERING_MEMORY: Final = "Manage Engineering Memory" +TITLE_CHECK_PATCH_CONTRACT: Final = "Check Patch Contract" +TITLE_CREATE_REVIEW_RECEIPT: Final = "Create Review Receipt" +TITLE_VALIDATE_REVIEW_CLAIMS: Final = "Validate Review Claims" +TITLE_HELP: Final = "Help" +TITLE_QUERY_PLATFORM_OBSERVABILITY: Final = "Query Platform Observability" +TITLE_EVALUATE_GATES: Final = "Evaluate Gates" +TITLE_GET_REPORT_SECTION: Final = "Get Report Section" +TITLE_LIST_FINDINGS: Final = "List Findings" +TITLE_GET_FINDING: Final = "Get Finding" +TITLE_GET_REMEDIATION: Final = "Get Remediation" +TITLE_LIST_HOTSPOTS: Final = "List Hotspots" +TITLE_COMPARE_RUNS: Final = "Compare Runs" +TITLE_CHECK_COMPLEXITY: Final = "Check Complexity" +TITLE_CHECK_CLONES: Final = "Check Clones" +TITLE_CHECK_COUPLING: Final = "Check Coupling" +TITLE_CHECK_COHESION: Final = "Check Cohesion" +TITLE_CHECK_DEAD_CODE: Final = "Check Dead Code" +TITLE_GENERATE_PR_SUMMARY: Final = "Generate PR Summary" +TITLE_MARK_FINDING_REVIEWED: Final = "Mark Finding Reviewed" +TITLE_LIST_REVIEWED_FINDINGS: Final = "List Reviewed Findings" +TITLE_START_CONTROLLED_CHANGE: Final = "Start Controlled Change" +TITLE_FINISH_CONTROLLED_CHANGE: Final = "Finish Controlled Change" +TITLE_MANAGE_CHANGE_INTENT: Final = "Manage Change Intent" +TITLE_CLEAR_SESSION_RUNS: Final = "Clear Session Runs" +TITLE_GET_WORKSPACE_SESSION_STATS: Final = "Get Workspace Session Stats" +TITLE_GET_CONTROLLER_AUDIT_TRAIL: Final = "Get Controller Audit Trail" diff --git a/codeclone/surfaces/mcp/messages/verification.py b/codeclone/surfaces/mcp/messages/verification.py new file mode 100644 index 00000000..c15308bb --- /dev/null +++ b/codeclone/surfaces/mcp/messages/verification.py @@ -0,0 +1,75 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Verification profile user-facing messages.""" + +from __future__ import annotations + +from typing import Final + +PROFILE_REASONS: Final[dict[str, str]] = { + "state_artifact_change": "changed files include CodeClone state artifacts", + "python_structural": "changed files include Python source", + "governance_config": "changed files include governance or analysis configuration", + "documentation_only": "all changed files match documentation patterns", + "non_python_patch": ( + "changed files are outside Python source and documentation patterns" + ), +} + +EMPTY_PROFILE_REASON: Final = "no changed files detected" + +PROFILE_LIMITATIONS: Final[dict[str, tuple[str, ...]]] = { + "non_python_patch": ( + "Patch did not touch Python source files, so Python structural " + "regressions were not checked.", + "Changed files are not classified as documentation-only; " + "review non-Python side effects manually.", + ), + "documentation_only": (), + "governance_config": (), + "python_structural": (), + "state_artifact_change": (), +} + +PROFILE_ACCEPTED_MESSAGES: Final[dict[str, str]] = { + "documentation_only": ( + "Patch contract accepted. No Python source files touched; " + "structural checks not applicable." + ), + "non_python_patch": ( + "Patch scope accepted. No Python source files were touched; " + "Python structural checks were not applicable. " + "Changed files are outside the documentation-only profile, " + "so review limitations apply." + ), +} + +PROFILE_UNVERIFIED_MESSAGES: Final[dict[str, str]] = { + "python_structural": ( + "Python source files were changed; after_run_id is required " + "for structural verification." + ), + "governance_config": ( + "Configuration that may affect analysis or CI gates was changed; " + "after_run_id is required for verification." + ), +} + +PROFILE_ACCEPTED_DEFAULT: Final = "Patch contract accepted." +PROFILE_UNVERIFIED_DEFAULT: Final = "after_run_id is required for verification." + + +def profile_accepted_message(profile: str) -> str: + return PROFILE_ACCEPTED_MESSAGES.get(profile, PROFILE_ACCEPTED_DEFAULT) + + +def profile_unverified_message(profile: str) -> str: + return PROFILE_UNVERIFIED_MESSAGES.get(profile, PROFILE_UNVERIFIED_DEFAULT) + + +def profile_limitations(profile: str) -> tuple[str, ...]: + return PROFILE_LIMITATIONS.get(profile, ()) diff --git a/codeclone/surfaces/mcp/messages/workflow.py b/codeclone/surfaces/mcp/messages/workflow.py new file mode 100644 index 00000000..411f0f21 --- /dev/null +++ b/codeclone/surfaces/mcp/messages/workflow.py @@ -0,0 +1,163 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Controlled-change workflow user messages.""" + +from __future__ import annotations + +from typing import Final + +START_NEEDS_ANALYSIS: Final = ( + "No analysis run available for this root. Call analyze_repository first." +) + +START_QUEUED: Final = ( + "Intent queued behind active workspace intent. Do not edit until promoted." +) + +FINISH_PROMOTE_BEFORE_VERIFY: Final = ( + "Promote the queued intent before editing or verification." +) + +FINISH_QUEUED_NOT_ACTIVE: Final = "Queued intent must be promoted before verification." + +FINISH_DIGEST_MISMATCH: Final = "Intent expired: report digest mismatch." + +FINISH_DIGEST_MISMATCH_NEXT: Final = ( + "Intent was declared against a different report. " + "Do not redeclare on the after-run — use the " + "original intent_id with the original before_run_id." +) + +FINISH_SCOPE_VIOLATION: Final = "Patch touched files outside declared scope." + +FINISH_SCOPE_VIOLATION_NEXT: Final = ( + "Redeclare intent with expanded scope, or remove the out-of-scope changes." +) + +START_INTENT_ACTIVE: Final = "Intent active." +START_HIGH_BLAST_RADIUS: Final = "Blast radius is high — review transitive summary." +START_BUDGET_OUTSIDE_CI: Final = "Budget is already outside CI thresholds." +START_BUDGET_WITHIN_CI: Final = "Budget is within CI thresholds." +START_BUDGET_PREVIEW_ADVISORY: Final = ( + "Budget preview already fails; edit is allowed, but final verification " + "may not be accepted." +) + +START_CONTINUE_OWN_WIP: Final = ( + "Continuing own uncommitted work in declared scope. Finish must cover all " + "dirty paths via changed_files or diff_ref." +) + +FINISH_RECEIPT_FAILED: Final = ( + "Change verified but receipt creation failed. Intent not cleared for retry." +) + +FINISH_DONE: Final = "Done. Intent cleared." + +FINISH_EVIDENCE_XOR: Final = ( + "finish_controlled_change requires exactly one of " + "changed_files or diff_ref, not both." +) +FINISH_EVIDENCE_REQUIRED: Final = ( + "finish_controlled_change requires changed_files or diff_ref." +) + +START_FOREIGN_ACTIVE_OVERLAP: Final = ( + "Foreign active intent overlaps your scope. Ask the user, narrow scope, " + 'or restart with on_conflict="queue".' +) + +START_FOREIGN_STALE_OVERLAP: Final = ( + "Foreign stale intent overlaps your scope. Coordinate with the user or " + "recover the foreign intent before editing." +) + +START_DIRTY_SCOPE: Final = ( + "Uncommitted changes overlap your declared scope. Ask the user before " + "editing; inspect diff, then commit, stash, revert, or narrow scope." +) + +START_FOREIGN_DIRTY_OVERLAP: Final = ( + "Uncommitted changes overlap your declared scope and a foreign intent " + "previously declared overlapping paths. Ask the user before editing." +) + +START_COMBINED_BLOCK: Final = ( + "Foreign active intent overlaps your scope and dirty files exist in " + "declared scope. Ask the user before editing; queue or narrow scope, " + "or reconcile the working tree." +) + +FINISH_HYGIENE_BLOCKED: Final = "Finish blocked by workspace hygiene." + +FINISH_HYGIENE_NEXT: Final = ( + "Finish evidence does not match dirty paths in declared scope, or new/modified " + "unattributed dirty paths appeared outside declared scope after intent start. " + "List every touched path in changed_files, redeclare scope, or reconcile the tree." +) + +FINISH_HYGIENE_MISSING_EVIDENCE: Final = ( + "Git shows dirty paths inside declared scope that are missing from finish evidence." +) + +FINISH_HYGIENE_OWN_UNSCOPED: Final = ( + "Git shows unattributed uncommitted changes outside declared scope. " + "Redeclare scope to include them or revert those edits." +) + +FINISH_HYGIENE_NEW_UNATTRIBUTED: Final = ( + "Git shows new unattributed dirty paths outside declared scope since intent start. " + "Redeclare scope to include them or revert those edits." +) + +FINISH_HYGIENE_MODIFIED_UNATTRIBUTED: Final = ( + "Git shows preexisting dirty paths outside declared scope changed since intent " + "start. Redeclare scope to include them or reconcile those edits." +) + +FINISH_HYGIENE_UNKNOWN_UNATTRIBUTED: Final = ( + "Git shows unattributed dirty paths outside declared scope, but their start-time " + "snapshot is unavailable. Reconcile the tree or redeclare scope." +) + +FINISH_HYGIENE_UNACKNOWLEDGED_DIRTY: Final = ( + "Finish evidence does not cover all dirty paths in declared scope." +) + +FINISH_HYGIENE_FOREIGN_DIRTY: Final = "Foreign dirty overlap remains in declared scope." + + +def start_controlled_change_message( + *, + radius_level: str, + budget_would_fail: bool, + continuing_own_wip: bool = False, +) -> str: + parts: list[str] = [START_INTENT_ACTIVE] + if continuing_own_wip: + parts.append(START_CONTINUE_OWN_WIP) + if radius_level == "high": + parts.append(START_HIGH_BLAST_RADIUS) + if budget_would_fail: + parts.append(START_BUDGET_OUTSIDE_CI) + parts.append(START_BUDGET_PREVIEW_ADVISORY) + else: + parts.append(START_BUDGET_WITHIN_CI) + return " ".join(parts) + + +def finish_controlled_change_message( + *, + verify_status: str, + intent_cleared: bool, + receipt_error: str | None, +) -> str: + if receipt_error is not None: + return FINISH_RECEIPT_FAILED + if intent_cleared: + return FINISH_DONE + return f"Verified ({verify_status}). Intent still active." diff --git a/codeclone/surfaces/mcp/payloads.py b/codeclone/surfaces/mcp/payloads.py index a9842061..273fa357 100644 --- a/codeclone/surfaces/mcp/payloads.py +++ b/codeclone/surfaces/mcp/payloads.py @@ -5,13 +5,36 @@ from __future__ import annotations +import json from collections.abc import Mapping, Sequence from dataclasses import dataclass +from math import ceil from typing import Generic, TypeVar +from ...budget.estimator import estimate_payload + T = TypeVar("T") +def measure_payload(payload: Mapping[str, object]) -> tuple[int, int]: + """Return ``(byte_size, token_estimate)`` for a payload's canonical JSON. + + ``byte_size`` is the UTF-8 length of the canonical JSON; tokens reuse the + shared budget estimator (chars-approx default — no ``tiktoken`` import). Never + raises: payload measurement must never break the tool call it wraps. + """ + try: + text = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str) + except (TypeError, ValueError): + return 0, 0 + byte_size = len(text.encode("utf-8")) + try: + tokens = estimate_payload(payload).tokens + except (TypeError, ValueError): + tokens = ceil(len(text) / 4) + return byte_size, tokens + + @dataclass(frozen=True, slots=True) class PageWindow(Generic[T]): items: list[T] diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 3599bac0..62c11823 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -7,13 +7,166 @@ from __future__ import annotations import argparse +import functools +import inspect import ipaddress +import os import sys -from collections.abc import Callable +import time +from collections.abc import AsyncIterator, Callable, Mapping +from contextlib import asynccontextmanager +from pathlib import Path from typing import TYPE_CHECKING, Literal, TypeVar from ... import __version__ +from ...config.observability import resolve_observability_config from ...contracts import DEFAULT_COVERAGE_MIN, DOCS_URL +from ...observability import ( + bind_root, + bootstrap, + is_observability_enabled, + operation, + payload_capture_enabled, + shutdown, + span, +) +from .auth import ( + MCP_AUTH_TOKEN_ENV, + MCPAuthConfigurationError, + StaticBearerTokenVerifier, + build_http_auth_settings, + validated_mcp_auth_token, +) +from .messages import errors as err_msgs +from .messages import instructions as mcp_instructions +from .messages import resources as mcp_resources +from .messages import tools as mcp_tools +from .messages.params import ( + AfterRunIdParam, + AllowExternalArtifactsParam, + AnalysisModeParam, + ApiSurfaceParam, + AuditPathOverrideParam, + AuditTrailLimitParam, + AutoClearParam, + BeforeRunIdParam, + BlastDepthParam, + BlastRadiusDepthParam, + CachePolicyParam, + CategoryParam, + ChangedFilesParam, + ChangedPathsParam, + ChangedScopeParam, + ClaimsTextParam, + CloneTypeParam, + CompareFocusParam, + ConfirmationNonceParam, + ContextBudgetParam, + ContextDepthParam, + ContextDetailLevelParam, + ContextModeParam, + ContextPathsParam, + ContextQueryParam, + ContextSymbolsParam, + CoverageMinParam, + CoverageXmlParam, + CreateReceiptParam, + DetailLevelParam, + DiffRefParam, + DirtyScopePolicyParam, + ExcludeReviewedParam, + ExpectedEffectsParam, + FacetIncludeParam, + FamilyParam, + FilesParam, + FindingFamilyParam, + FindingIdParam, + FinishReviewTextParam, + GateBoolParam, + GateIntParam, + GitDiffRefParam, + GovernanceActorParam, + GovernanceDecisionParam, + GovernanceProofParam, + GovernanceProtocolParam, + GovernanceTicketParam, + HelpDetailParam, + HelpTopicParam, + HotspotKindParam, + IdeGovernanceClientNameParam, + IdeGovernanceClientVersionParam, + IdeGovernanceKeyParam, + IncludeBlastRadiusParam, + IncludeDraftsParam, + IncludeParam, + IncludePatchContractParam, + IncludeStaleParam, + IntentIdParam, + IntentTextParam, + LeaseSecondsParam, + LimitParam, + ManageActionParam, + ManageMemoryActionParam, + ManageMemoryExperienceIdParam, + MaxHotspotsParam, + MaxResultsParam, + MaxSizeMbParam, + MaxSuggestionsParam, + MemoryClaimsTextParam, + MemoryDetailLevelParam, + MemoryFiltersParam, + MemoryMaxRecordsParam, + MemoryPathParam, + MemoryQueryModeParam, + MemoryRecordIdParam, + MemoryRecordTypeParam, + MemoryScopeListParam, + MemorySearchQueryParam, + MemoryStatementParam, + MemorySymbolParam, + MemorySymbolsParam, + MinComplexityParam, + MinSeverityParam, + NoveltyParam, + ObservabilityDetailParam, + ObservabilityLimitParam, + ObservabilityOperationIdParam, + ObservabilitySectionParam, + ObservabilitySpanIdParam, + ObservabilityWindowParam, + OffsetParam, + OnConflictParam, + OptionalIntentIdParam, + OptionalIntentTextParam, + OptionalPathParam, + OptionalRootParam, + OptionalScopeParam, + PatchHealthDeltaParam, + PatchModeParam, + PatchTrailDetailParam, + PathFilterParam, + PrFormatParam, + ProcessesParam, + ProposeMemoryParam, + ReceiptFormatParam, + ReportSectionParam, + RequireCitationsParam, + RespectPyprojectParam, + ReviewNoteParam, + ReviewTextParam, + RootParam, + RunIdParam, + RunIdRequiredParam, + ScopeParam, + SemanticParam, + SeverityParam, + SortByParam, + SourceKindParam, + StrictnessParam, + ThresholdIntParam, + TtlSecondsParam, +) +from .payloads import measure_payload from .service import CodeCloneMCPService from .session import ( DEFAULT_MCP_HISTORY_LIMIT, @@ -30,27 +183,6 @@ from mcp.server.fastmcp import FastMCP from mcp.types import ToolAnnotations -_SERVER_INSTRUCTIONS = ( - "CodeClone MCP is a deterministic, baseline-aware, read-only analysis server " - "for Python repositories. Use analyze_repository first for full runs or " - "analyze_changed_paths for PR-style review, then prefer get_run_summary or " - "get_production_triage for the first pass. Use list_hotspots or focused " - "check_* tools before broader list_findings calls, then drill into one " - "finding with get_finding or get_remediation. Use " - "help(topic=...) when workflow or contract semantics are unclear. Use " - "default or pyproject-resolved thresholds for the first pass, and lower " - "them only for an explicit higher-sensitivity follow-up when needed. Use " - "get_report_section(section='metrics_detail', family=..., limit=...) for " - "bounded metrics drill-down, and prefer generate_pr_summary(format='markdown') " - "unless machine JSON is required. Coverage join accepts external Cobertura " - "XML as a current-run signal and does not become baseline truth. Pass an " - "absolute repository root to analysis tools. This server never updates " - "baselines and never mutates source files." -) -_MCP_INSTALL_HINT = ( - "CodeClone MCP support requires the optional 'mcp' extra. " - "Install it with: pip install 'codeclone[mcp]'" -) DEFAULT_MCP_HOST = "127.0.0.1" DEFAULT_MCP_PORT = 8000 DEFAULT_MCP_JSON_RESPONSE = True @@ -66,6 +198,55 @@ class MCPDependencyError(RuntimeError): MCPCallable = TypeVar("MCPCallable", bound=Callable[..., object]) +def _observability_session_id() -> str: + """Stable per-process id grouping every operation from this MCP server.""" + return f"mcp:{os.getpid()}:{int(time.time())}" + + +def _instrument_tool(func: Callable[..., object]) -> Callable[..., object]: + """Wrap a registered MCP tool so each call records an observability operation + with request/response payload sizes (bytes + tokens). + + Inert direct passthrough when observability is disabled. Signature-preserving + (``functools.wraps`` + explicit ``__signature__``) so FastMCP schema + introspection — and the tool-schema snapshot — stay unchanged. + """ + tool_name = getattr(func, "__name__", "tool") + + @functools.wraps(func) + def wrapper(*args: object, **kwargs: object) -> object: + if not is_observability_enabled(): + return func(*args, **kwargs) + root = kwargs.get("root") + if isinstance(root, str) and root: + bind_root(Path(root)) + with operation(name=f"mcp.{tool_name}", surface="mcp") as op: + if payload_capture_enabled(): + request_bytes, request_tokens = measure_payload(kwargs) + op.set_request( + request_bytes=request_bytes, request_tokens=request_tokens + ) + # Open a root span around the handler: record_db_query attributes + # SQL to the active span, not the operation. Without this span every + # DB-touching MCP tool (start/finish/get_relevant_memory/manage_*) + # records zero db_queries — the operation has no span to hold them. + with span(name=f"mcp.{tool_name}"): + result = func(*args, **kwargs) + if payload_capture_enabled() and isinstance(result, Mapping): + response_bytes, response_tokens = measure_payload(result) + op.set_response( + response_bytes=response_bytes, response_tokens=response_tokens + ) + return result + + # eval_str resolves the tool's string annotations (PEP 563) into real types + # so FastMCP/Pydantic build the same input schema as the unwrapped handler. + wrapper.__signature__ = inspect.signature( # type: ignore[attr-defined] + func, eval_str=True + ) + return wrapper + + def _load_mcp_runtime() -> tuple[ type[FastMCP], ToolAnnotations, @@ -76,7 +257,7 @@ def _load_mcp_runtime() -> tuple[ from mcp.server.fastmcp import FastMCP as imported_fastmcp from mcp.types import ToolAnnotations as runtime_tool_annotations except ImportError as exc: - raise MCPDependencyError(_MCP_INSTALL_HINT) from exc + raise MCPDependencyError(mcp_instructions.MCP_INSTALL_HINT) from exc runtime_fastmcp: type[FastMCP] = imported_fastmcp return ( runtime_fastmcp, @@ -107,21 +288,23 @@ def _validated_analysis_mode(value: str) -> AnalysisMode: if value == "clones_only": return "clones_only" raise MCPServiceContractError( - f"Invalid value for analysis_mode: {value!r}. " - "Expected one of: clones_only, full." + err_msgs.invalid_choice( + "analysis_mode", + value, + ("clones_only", "full"), + ) ) def _validated_cache_policy(value: str) -> CachePolicy: if value == "reuse": return "reuse" - if value == "refresh": - return "refresh" if value == "off": return "off" + if value == "refresh": + raise MCPServiceContractError(err_msgs.CACHE_POLICY_CLI_ONLY) raise MCPServiceContractError( - f"Invalid value for cache_policy: {value!r}. " - "Expected one of: off, refresh, reuse." + err_msgs.invalid_choice("cache_policy", value, ("off", "reuse")) ) @@ -136,14 +319,37 @@ def build_mcp_server( log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = ( DEFAULT_MCP_LOG_LEVEL ), + ide_governance_channel: bool = False, + auth_token: str | None = None, ) -> FastMCP: """Build and register the local read-only CodeClone FastMCP server.""" runtime_fastmcp, read_only_tool, analysis_tool, session_tool = _load_mcp_runtime() - service = CodeCloneMCPService(history_limit=_validated_history_limit(history_limit)) + service = CodeCloneMCPService( + history_limit=_validated_history_limit(history_limit), + ide_governance_channel=ide_governance_channel, + ) + # Freeze the env-resolved observability decision for this server process + # (default OFF). Root-less here; the registrar binds the store to the first + # tool call that carries a root. + bootstrap(resolve_observability_config(), session_id=_observability_session_id()) + + @asynccontextmanager + async def _lifespan(_app: FastMCP) -> AsyncIterator[dict[str, object]]: + yield {} + service.shutdown_cleanup() + shutdown() + + token_verifier = None + auth_settings = None + if auth_token is not None: + token_verifier = StaticBearerTokenVerifier(auth_token) + auth_settings = build_http_auth_settings(host=host, port=port) + mcp = runtime_fastmcp( name="CodeClone", - instructions=_SERVER_INSTRUCTIONS, + instructions=mcp_instructions.SERVER_INSTRUCTIONS, + lifespan=_lifespan, website_url=DOCS_URL, host=host, port=port, @@ -152,15 +358,20 @@ def build_mcp_server( debug=debug, log_level=log_level, dependencies=(f"codeclone=={__version__}",), + token_verifier=token_verifier, + auth=auth_settings, ) # FastMCP otherwise reports the `mcp` package version in initialize/serverInfo. mcp._mcp_server.version = __version__ + # Inject FastMCP reference so the service can lazily resolve the MCP + # clientInfo (name/version) for workspace intent agent_label fields. + service._fastmcp = mcp def tool(*args: object, **kwargs: object) -> Callable[[MCPCallable], MCPCallable]: decorator = mcp.tool(*args, **kwargs) # type: ignore[arg-type] def register(func: MCPCallable) -> MCPCallable: - decorator(func) + decorator(_instrument_tool(func)) return func return register @@ -178,43 +389,37 @@ def register(func: MCPCallable) -> MCPCallable: return register @tool( - title="Analyze Repository", - description=( - "Run a deterministic CodeClone analysis and register it as the " - "latest MCP run. Pass an absolute repository root; relative roots " - "like '.' are rejected in MCP. Start with get_run_summary or " - "get_production_triage. Tip: set cache_policy='off' for a fully " - "fresh run. Defaults are the conservative first pass; lower " - "thresholds only for an explicit deeper review." - ), + title=mcp_tools.TITLE_ANALYZE_REPOSITORY, + description=mcp_tools.ANALYZE_REPOSITORY, annotations=analysis_tool, structured_output=True, ) def analyze_repository( - root: str, - analysis_mode: str = "full", - respect_pyproject: bool = True, - changed_paths: list[str] | None = None, - git_diff_ref: str | None = None, - processes: int | None = None, - min_loc: int | None = None, - min_stmt: int | None = None, - block_min_loc: int | None = None, - block_min_stmt: int | None = None, - segment_min_loc: int | None = None, - segment_min_stmt: int | None = None, - api_surface: bool | None = None, - coverage_xml: str | None = None, - coverage_min: int | None = None, - complexity_threshold: int | None = None, - coupling_threshold: int | None = None, - cohesion_threshold: int | None = None, - baseline_path: str | None = None, - metrics_baseline_path: str | None = None, - max_baseline_size_mb: int | None = None, - cache_policy: str = "reuse", - cache_path: str | None = None, - max_cache_size_mb: int | None = None, + root: RootParam, + analysis_mode: AnalysisModeParam = "full", + respect_pyproject: RespectPyprojectParam = True, + changed_paths: ChangedPathsParam = None, + git_diff_ref: GitDiffRefParam = None, + processes: ProcessesParam = None, + min_loc: ThresholdIntParam = None, + min_stmt: ThresholdIntParam = None, + block_min_loc: ThresholdIntParam = None, + block_min_stmt: ThresholdIntParam = None, + segment_min_loc: ThresholdIntParam = None, + segment_min_stmt: ThresholdIntParam = None, + api_surface: ApiSurfaceParam = None, + coverage_xml: CoverageXmlParam = None, + coverage_min: CoverageMinParam = None, + complexity_threshold: ThresholdIntParam = None, + coupling_threshold: ThresholdIntParam = None, + cohesion_threshold: ThresholdIntParam = None, + baseline_path: OptionalPathParam = None, + metrics_baseline_path: OptionalPathParam = None, + max_baseline_size_mb: MaxSizeMbParam = None, + cache_policy: CachePolicyParam = "reuse", + cache_path: OptionalPathParam = None, + max_cache_size_mb: MaxSizeMbParam = None, + allow_external_artifacts: AllowExternalArtifactsParam = False, ) -> dict[str, object]: return service.analyze_repository( MCPAnalysisRequest( @@ -242,49 +447,42 @@ def analyze_repository( cache_policy=_validated_cache_policy(cache_policy), cache_path=cache_path, max_cache_size_mb=max_cache_size_mb, + allow_external_artifacts=allow_external_artifacts, ) ) @tool( - title="Analyze Changed Paths", - description=( - "Run a deterministic CodeClone analysis and return a changed-files " - "projection from explicit paths or a git diff ref. Pass an absolute " - "repository root; relative roots like '.' are rejected in MCP. " - "Start with get_report_section(section='changed') or " - "get_production_triage before broader finding lists. Tip: set " - "cache_policy='off' for a fully fresh run. Start with the " - "conservative profile first; lower thresholds only for an " - "explicit higher-sensitivity pass." - ), + title=mcp_tools.TITLE_ANALYZE_CHANGED_PATHS, + description=mcp_tools.ANALYZE_CHANGED_PATHS, annotations=analysis_tool, structured_output=True, ) def analyze_changed_paths( - root: str, - changed_paths: list[str] | None = None, - git_diff_ref: str | None = None, - analysis_mode: str = "full", - respect_pyproject: bool = True, - processes: int | None = None, - min_loc: int | None = None, - min_stmt: int | None = None, - block_min_loc: int | None = None, - block_min_stmt: int | None = None, - segment_min_loc: int | None = None, - segment_min_stmt: int | None = None, - api_surface: bool | None = None, - coverage_xml: str | None = None, - coverage_min: int | None = None, - complexity_threshold: int | None = None, - coupling_threshold: int | None = None, - cohesion_threshold: int | None = None, - baseline_path: str | None = None, - metrics_baseline_path: str | None = None, - max_baseline_size_mb: int | None = None, - cache_policy: str = "reuse", - cache_path: str | None = None, - max_cache_size_mb: int | None = None, + root: RootParam, + changed_paths: ChangedPathsParam = None, + git_diff_ref: GitDiffRefParam = None, + analysis_mode: AnalysisModeParam = "full", + respect_pyproject: RespectPyprojectParam = True, + processes: ProcessesParam = None, + min_loc: ThresholdIntParam = None, + min_stmt: ThresholdIntParam = None, + block_min_loc: ThresholdIntParam = None, + block_min_stmt: ThresholdIntParam = None, + segment_min_loc: ThresholdIntParam = None, + segment_min_stmt: ThresholdIntParam = None, + api_surface: ApiSurfaceParam = None, + coverage_xml: CoverageXmlParam = None, + coverage_min: CoverageMinParam = None, + complexity_threshold: ThresholdIntParam = None, + coupling_threshold: ThresholdIntParam = None, + cohesion_threshold: ThresholdIntParam = None, + baseline_path: OptionalPathParam = None, + metrics_baseline_path: OptionalPathParam = None, + max_baseline_size_mb: MaxSizeMbParam = None, + cache_policy: CachePolicyParam = "reuse", + cache_path: OptionalPathParam = None, + max_cache_size_mb: MaxSizeMbParam = None, + allow_external_artifacts: AllowExternalArtifactsParam = False, ) -> dict[str, object]: return service.analyze_changed_paths( MCPAnalysisRequest( @@ -312,36 +510,29 @@ def analyze_changed_paths( cache_policy=_validated_cache_policy(cache_policy), cache_path=cache_path, max_cache_size_mb=max_cache_size_mb, + allow_external_artifacts=allow_external_artifacts, ) ) @tool( - title="Get Run Summary", - description=( - "Return the stored compact MCP summary for the latest or specified " - "run. Start here when you want the cheapest run-level snapshot." - ), + title=mcp_tools.TITLE_GET_RUN_SUMMARY, + description=mcp_tools.GET_RUN_SUMMARY, annotations=read_only_tool, structured_output=True, ) - def get_run_summary(run_id: str | None = None) -> dict[str, object]: + def get_run_summary(run_id: RunIdParam = None) -> dict[str, object]: return service.get_run_summary(run_id) @tool( - title="Get Production Triage", - description=( - "Return a production-first triage view over a stored run: health, " - "cache freshness, production hotspots, and production suggestions, " - "while keeping global source-kind counters visible. Use this as the " - "default first-pass review on noisy repositories." - ), + title=mcp_tools.TITLE_GET_PRODUCTION_TRIAGE, + description=mcp_tools.GET_PRODUCTION_TRIAGE, annotations=read_only_tool, structured_output=True, ) def get_production_triage( - run_id: str | None = None, - max_hotspots: int = 3, - max_suggestions: int = 3, + run_id: RunIdParam = None, + max_hotspots: MaxHotspotsParam = 3, + max_suggestions: MaxSuggestionsParam = 3, ) -> dict[str, object]: return service.get_production_triage( run_id=run_id, @@ -350,21 +541,248 @@ def get_production_triage( ) @tool( - title="Help", - description=( - "Explain a supported CodeClone workflow or contract topic and " - "suggest the safest next step. Return compact guidance with " - "canonical doc links. Use this when workflow or contract meaning " - "is unclear. This is bounded guidance, not a full manual. " - "Supported topics: workflow, analysis_profile, suppressions, " - "baseline, coverage, latest_runs, review_state, changed_scope." - ), + title=mcp_tools.TITLE_GET_BLAST_RADIUS, + description=mcp_tools.GET_BLAST_RADIUS, + annotations=read_only_tool, + structured_output=True, + ) + def get_blast_radius( + files: FilesParam, + run_id: RunIdParam = None, + depth: BlastDepthParam = "direct", + include: IncludeParam = None, + ) -> dict[str, object]: + return service.get_blast_radius( + files=files, + run_id=run_id, + depth=depth, + include=include, + ) + + @tool( + title=mcp_tools.TITLE_GET_IMPLEMENTATION_CONTEXT, + description=mcp_tools.GET_IMPLEMENTATION_CONTEXT, + annotations=read_only_tool, + structured_output=True, + ) + def get_implementation_context( + root: RootParam, + paths: ContextPathsParam = None, + symbols: ContextSymbolsParam = None, + intent_id: OptionalIntentIdParam = None, + changed_scope: ChangedScopeParam = False, + mode: ContextModeParam = "implementation", + include: FacetIncludeParam = None, + depth: ContextDepthParam = 1, + detail_level: ContextDetailLevelParam = "compact", + budget: ContextBudgetParam = 50, + run_id: RunIdParam = None, + query: ContextQueryParam = None, + ) -> dict[str, object]: + return service.get_implementation_context( + root=root, + paths=paths, + symbols=symbols, + intent_id=intent_id, + changed_scope=changed_scope, + mode=mode, + include=include, + depth=depth, + detail_level=detail_level, + budget=budget, + run_id=run_id, + query=query, + ) + + @tool( + title=mcp_tools.TITLE_GET_RELEVANT_MEMORY, + description=mcp_tools.GET_RELEVANT_MEMORY, + annotations=read_only_tool, + structured_output=True, + ) + def get_relevant_memory( + root: RootParam, + scope: MemoryScopeListParam = None, + intent_id: OptionalIntentIdParam = None, + symbols: MemorySymbolsParam = None, + max_records: MemoryMaxRecordsParam = 20, + include_stale: IncludeStaleParam = False, + include_drafts: IncludeDraftsParam = False, + detail_level: MemoryDetailLevelParam = "compact", + ) -> dict[str, object]: + return service.get_relevant_memory( + root=root, + scope=scope, + intent_id=intent_id, + symbols=symbols, + max_records=max_records, + include_stale=include_stale, + include_drafts=include_drafts, + detail_level=detail_level, + ) + + @tool( + title=mcp_tools.TITLE_QUERY_ENGINEERING_MEMORY, + description=mcp_tools.QUERY_ENGINEERING_MEMORY, + annotations=read_only_tool, + structured_output=True, + ) + def query_engineering_memory( + root: RootParam, + mode: MemoryQueryModeParam, + record_id: MemoryRecordIdParam = None, + path: MemoryPathParam = None, + symbol: MemorySymbolParam = None, + query: MemorySearchQueryParam = None, + scope: MemoryScopeListParam = None, + filters: MemoryFiltersParam = None, + max_results: MemoryMaxRecordsParam = 20, + include_stale: IncludeStaleParam = False, + include_drafts: IncludeDraftsParam = False, + detail_level: MemoryDetailLevelParam = "compact", + semantic: SemanticParam = False, + ) -> dict[str, object]: + return service.query_engineering_memory( + root=root, + mode=mode, + record_id=record_id, + path=path, + symbol=symbol, + query=query, + scope=scope, + filters=filters, + max_results=max_results, + include_stale=include_stale, + include_drafts=include_drafts, + detail_level=detail_level, + semantic=semantic, + ) + + @tool( + title=mcp_tools.TITLE_MANAGE_ENGINEERING_MEMORY, + description=mcp_tools.MANAGE_ENGINEERING_MEMORY, + annotations=session_tool, + structured_output=True, + ) + def manage_engineering_memory( + root: RootParam, + action: ManageMemoryActionParam, + record_type: MemoryRecordTypeParam = None, + statement: MemoryStatementParam = None, + subject_path: MemoryPathParam = None, + text: MemoryClaimsTextParam = None, + intent_id: OptionalIntentIdParam = None, + run_id: RunIdParam = None, + record_id: MemoryRecordIdParam = None, + experience_id: ManageMemoryExperienceIdParam = None, + decision: GovernanceDecisionParam = None, + ide_governance_key: IdeGovernanceKeyParam = None, + client_name: IdeGovernanceClientNameParam = None, + client_version: IdeGovernanceClientVersionParam = None, + governance_ticket: GovernanceTicketParam = None, + confirmation_nonce: ConfirmationNonceParam = None, + proof: GovernanceProofParam = None, + actor: GovernanceActorParam = None, + protocol: GovernanceProtocolParam = None, + ) -> dict[str, object]: + return service.manage_engineering_memory( + root=root, + action=action, + record_type=record_type, + statement=statement, + subject_path=subject_path, + text=text, + intent_id=intent_id, + run_id=run_id, + record_id=record_id, + experience_id=experience_id, + decision=decision, + ide_governance_key=ide_governance_key, + client_name=client_name, + client_version=client_version, + governance_ticket=governance_ticket, + confirmation_nonce=confirmation_nonce, + proof=proof, + actor=actor, + protocol=protocol, + ) + + @tool( + title=mcp_tools.TITLE_CHECK_PATCH_CONTRACT, + description=mcp_tools.CHECK_PATCH_CONTRACT, + annotations=read_only_tool, + structured_output=True, + ) + def check_patch_contract( + mode: PatchModeParam, + run_id: RunIdParam = None, + before_run_id: BeforeRunIdParam = None, + after_run_id: AfterRunIdParam = None, + intent_id: OptionalIntentIdParam = None, + strictness: StrictnessParam = "ci", + diff_ref: DiffRefParam = None, + changed_files: ChangedFilesParam = None, + ) -> dict[str, object]: + return service.check_patch_contract( + mode=mode, + run_id=run_id, + before_run_id=before_run_id, + after_run_id=after_run_id, + intent_id=intent_id, + strictness=strictness, + diff_ref=diff_ref, + changed_files=changed_files, + ) + + @tool( + title=mcp_tools.TITLE_CREATE_REVIEW_RECEIPT, + description=mcp_tools.CREATE_REVIEW_RECEIPT, + annotations=read_only_tool, + structured_output=True, + ) + def create_review_receipt( + run_id: RunIdParam = None, + intent_id: OptionalIntentIdParam = None, + format: ReceiptFormatParam = "markdown", + include_blast_radius: IncludeBlastRadiusParam = True, + include_patch_contract: IncludePatchContractParam = True, + ) -> dict[str, object]: + return service.create_review_receipt( + run_id=run_id, + intent_id=intent_id, + format=format, + include_blast_radius=include_blast_radius, + include_patch_contract=include_patch_contract, + ) + + @tool( + title=mcp_tools.TITLE_VALIDATE_REVIEW_CLAIMS, + description=mcp_tools.VALIDATE_REVIEW_CLAIMS, + annotations=read_only_tool, + structured_output=True, + ) + def validate_review_claims( + text: ReviewTextParam, + run_id: RunIdParam = None, + require_citations: RequireCitationsParam = True, + patch_health_delta: PatchHealthDeltaParam = None, + ) -> dict[str, object]: + return service.validate_review_claims( + text=text, + run_id=run_id, + require_citations=require_citations, + patch_health_delta=patch_health_delta, + ) + + @tool( + title=mcp_tools.TITLE_HELP, + description=mcp_tools.HELP, annotations=read_only_tool, structured_output=True, ) def help( - topic: str, - detail: str = "compact", + topic: HelpTopicParam, + detail: HelpDetailParam = "compact", ) -> dict[str, object]: return service.get_help( topic=topic, @@ -372,32 +790,29 @@ def help( ) @tool( - title="Evaluate Gates", - description=( - "Evaluate CodeClone gate conditions against an existing MCP run without " - "modifying baselines or exiting the process." - ), + title=mcp_tools.TITLE_EVALUATE_GATES, + description=mcp_tools.EVALUATE_GATES, annotations=read_only_tool, structured_output=True, ) def evaluate_gates( - run_id: str | None = None, - fail_on_new: bool = False, - fail_threshold: int = -1, - fail_complexity: int = -1, - fail_coupling: int = -1, - fail_cohesion: int = -1, - fail_cycles: bool = False, - fail_dead_code: bool = False, - fail_health: int = -1, - fail_on_new_metrics: bool = False, - fail_on_typing_regression: bool = False, - fail_on_docstring_regression: bool = False, - fail_on_api_break: bool = False, - fail_on_untested_hotspots: bool = False, - min_typing_coverage: int = -1, - min_docstring_coverage: int = -1, - coverage_min: int = DEFAULT_COVERAGE_MIN, + run_id: RunIdParam = None, + fail_on_new: GateBoolParam = False, + fail_threshold: GateIntParam = -1, + fail_complexity: GateIntParam = -1, + fail_coupling: GateIntParam = -1, + fail_cohesion: GateIntParam = -1, + fail_cycles: GateBoolParam = False, + fail_dead_code: GateBoolParam = False, + fail_health: GateIntParam = -1, + fail_on_new_metrics: GateBoolParam = False, + fail_on_typing_regression: GateBoolParam = False, + fail_on_docstring_regression: GateBoolParam = False, + fail_on_api_break: GateBoolParam = False, + fail_on_untested_hotspots: GateBoolParam = False, + min_typing_coverage: GateIntParam = -1, + min_docstring_coverage: GateIntParam = -1, + coverage_min: GateIntParam = DEFAULT_COVERAGE_MIN, ) -> dict[str, object]: return service.evaluate_gates( MCPGateRequest( @@ -422,24 +837,18 @@ def evaluate_gates( ) @tool( - title="Get Report Section", - description=( - "Return a canonical CodeClone report section for the latest or " - "specified MCP run. Prefer specific sections instead of 'all' unless " - "you truly need the full canonical report. The 'metrics' section " - "returns only the summary, while 'metrics_detail' returns paginated " - "item slices or summary+hint when unfiltered." - ), + title=mcp_tools.TITLE_GET_REPORT_SECTION, + description=mcp_tools.GET_REPORT_SECTION, annotations=read_only_tool, structured_output=True, ) def get_report_section( - run_id: str | None = None, - section: str = "all", - family: str | None = None, - path: str | None = None, - offset: int = 0, - limit: int = 50, + run_id: RunIdParam = None, + section: ReportSectionParam = "all", + family: FamilyParam = None, + path: PathFilterParam = None, + offset: OffsetParam = 0, + limit: LimitParam = 50, ) -> dict[str, object]: return service.get_report_section( run_id=run_id, @@ -451,31 +860,51 @@ def get_report_section( ) @tool( - title="List Findings", - description=( - "List canonical finding groups with deterministic ordering, optional " - "filters, pagination, and compact summary cards by default. Prefer " - "list_hotspots or focused check_* tools for first-pass triage; use " - "this when you need a broader filtered list." - ), + title=mcp_tools.TITLE_QUERY_PLATFORM_OBSERVABILITY, + description=mcp_tools.QUERY_PLATFORM_OBSERVABILITY, + annotations=read_only_tool, + structured_output=True, + ) + def query_platform_observability( + root: RootParam, + section: ObservabilitySectionParam, + detail_level: ObservabilityDetailParam = "compact", + limit: ObservabilityLimitParam = 10, + window: ObservabilityWindowParam = "latest", + operation_id: ObservabilityOperationIdParam = None, + span_id: ObservabilitySpanIdParam = None, + ) -> dict[str, object]: + return service.query_platform_observability( + root=root, + section=section, + detail_level=detail_level, + limit=limit, + window=window, + operation_id=operation_id, + span_id=span_id, + ) + + @tool( + title=mcp_tools.TITLE_LIST_FINDINGS, + description=mcp_tools.LIST_FINDINGS, annotations=read_only_tool, structured_output=True, ) def list_findings( - run_id: str | None = None, - family: str = "all", - category: str | None = None, - severity: str | None = None, - source_kind: str | None = None, - novelty: str = "all", - sort_by: str = "default", - detail_level: str = "summary", - changed_paths: list[str] | None = None, - git_diff_ref: str | None = None, - exclude_reviewed: bool = False, - offset: int = 0, - limit: int = 50, - max_results: int | None = None, + run_id: RunIdParam = None, + family: FindingFamilyParam = "all", + category: CategoryParam = None, + severity: SeverityParam = None, + source_kind: SourceKindParam = None, + novelty: NoveltyParam = "all", + sort_by: SortByParam = "default", + detail_level: DetailLevelParam = "summary", + changed_paths: ChangedPathsParam = None, + git_diff_ref: GitDiffRefParam = None, + exclude_reviewed: ExcludeReviewedParam = False, + offset: OffsetParam = 0, + limit: LimitParam = 50, + max_results: MaxResultsParam = None, ) -> dict[str, object]: return service.list_findings( run_id=run_id, @@ -495,20 +924,15 @@ def list_findings( ) @tool( - title="Get Finding", - description=( - "Return a single canonical finding group by short or full id. " - "Normal detail is the default. Use this after list_hotspots, " - "list_findings, or check_* instead of requesting larger lists at " - "higher detail." - ), + title=mcp_tools.TITLE_GET_FINDING, + description=mcp_tools.GET_FINDING, annotations=read_only_tool, structured_output=True, ) def get_finding( - finding_id: str, - run_id: str | None = None, - detail_level: str = "normal", + finding_id: FindingIdParam, + run_id: RunIdParam = None, + detail_level: DetailLevelParam = "normal", ) -> dict[str, object]: return service.get_finding( finding_id=finding_id, @@ -517,19 +941,15 @@ def get_finding( ) @tool( - title="Get Remediation", - description=( - "Return actionable remediation guidance for a single finding. " - "Normal detail is the default. Use this when you need the fix packet " - "for one finding without pulling larger detail lists." - ), + title=mcp_tools.TITLE_GET_REMEDIATION, + description=mcp_tools.GET_REMEDIATION, annotations=read_only_tool, structured_output=True, ) def get_remediation( - finding_id: str, - run_id: str | None = None, - detail_level: str = "normal", + finding_id: FindingIdParam, + run_id: RunIdParam = None, + detail_level: DetailLevelParam = "normal", ) -> dict[str, object]: return service.get_remediation( finding_id=finding_id, @@ -538,24 +958,20 @@ def get_remediation( ) @tool( - title="List Hotspots", - description=( - "Return one of the derived CodeClone hotlists for the latest or " - "specified MCP run, using compact summary cards by default. Prefer " - "this for first-pass triage before broader list_findings calls." - ), + title=mcp_tools.TITLE_LIST_HOTSPOTS, + description=mcp_tools.LIST_HOTSPOTS, annotations=read_only_tool, structured_output=True, ) def list_hotspots( - kind: str, - run_id: str | None = None, - detail_level: str = "summary", - changed_paths: list[str] | None = None, - git_diff_ref: str | None = None, - exclude_reviewed: bool = False, - limit: int = 10, - max_results: int | None = None, + kind: HotspotKindParam, + run_id: RunIdParam = None, + detail_level: DetailLevelParam = "summary", + changed_paths: ChangedPathsParam = None, + git_diff_ref: GitDiffRefParam = None, + exclude_reviewed: ExcludeReviewedParam = False, + limit: LimitParam = 10, + max_results: MaxResultsParam = None, ) -> dict[str, object]: return service.list_hotspots( kind=kind, @@ -569,19 +985,15 @@ def list_hotspots( ) @tool( - title="Compare Runs", - description=( - "Compare two registered CodeClone MCP runs by finding ids and " - "run-to-run health. Returns 'incomparable' when roots or effective " - "analysis settings differ." - ), + title=mcp_tools.TITLE_COMPARE_RUNS, + description=mcp_tools.COMPARE_RUNS, annotations=read_only_tool, structured_output=True, ) def compare_runs( - run_id_before: str, - run_id_after: str | None = None, - focus: str = "all", + run_id_before: RunIdRequiredParam, + run_id_after: RunIdParam = None, + focus: CompareFocusParam = "all", ) -> dict[str, object]: return service.compare_runs( run_id_before=run_id_before, @@ -590,24 +1002,18 @@ def compare_runs( ) @tool( - title="Check Complexity", - description=( - "Return complexity hotspots from a compatible stored run. " - "Use analyze_repository first if no full run is available. When " - "filtering by root without run_id, pass an absolute root. Prefer " - "this narrower tool instead of list_findings when you only need " - "complexity hotspots." - ), + title=mcp_tools.TITLE_CHECK_COMPLEXITY, + description=mcp_tools.CHECK_COMPLEXITY, annotations=read_only_tool, structured_output=True, ) def check_complexity( - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - min_complexity: int | None = None, - max_results: int = 10, - detail_level: str = "summary", + run_id: RunIdParam = None, + root: OptionalRootParam = None, + path: PathFilterParam = None, + min_complexity: MinComplexityParam = None, + max_results: LimitParam = 10, + detail_level: DetailLevelParam = "summary", ) -> dict[str, object]: return service.check_complexity( run_id=run_id, @@ -619,25 +1025,19 @@ def check_complexity( ) @tool( - title="Check Clones", - description=( - "Return clone findings from a compatible stored run. " - "Use analyze_repository first if no compatible run is available. " - "When filtering by root without run_id, pass an absolute root. " - "Prefer this narrower tool instead of list_findings when you only " - "need clone findings." - ), + title=mcp_tools.TITLE_CHECK_CLONES, + description=mcp_tools.CHECK_CLONES, annotations=read_only_tool, structured_output=True, ) def check_clones( - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - clone_type: str | None = None, - source_kind: str | None = None, - max_results: int = 10, - detail_level: str = "summary", + run_id: RunIdParam = None, + root: OptionalRootParam = None, + path: PathFilterParam = None, + clone_type: CloneTypeParam = None, + source_kind: SourceKindParam = None, + max_results: LimitParam = 10, + detail_level: DetailLevelParam = "summary", ) -> dict[str, object]: return service.check_clones( run_id=run_id, @@ -650,23 +1050,17 @@ def check_clones( ) @tool( - title="Check Coupling", - description=( - "Return coupling hotspots from a compatible stored run. " - "Use analyze_repository first if no full run is available. When " - "filtering by root without run_id, pass an absolute root. Prefer " - "this narrower tool instead of list_findings when you only need " - "coupling hotspots." - ), + title=mcp_tools.TITLE_CHECK_COUPLING, + description=mcp_tools.CHECK_COUPLING, annotations=read_only_tool, structured_output=True, ) def check_coupling( - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - max_results: int = 10, - detail_level: str = "summary", + run_id: RunIdParam = None, + root: OptionalRootParam = None, + path: PathFilterParam = None, + max_results: LimitParam = 10, + detail_level: DetailLevelParam = "summary", ) -> dict[str, object]: return service.check_coupling( run_id=run_id, @@ -677,23 +1071,17 @@ def check_coupling( ) @tool( - title="Check Cohesion", - description=( - "Return cohesion hotspots from a compatible stored run. " - "Use analyze_repository first if no full run is available. When " - "filtering by root without run_id, pass an absolute root. Prefer " - "this narrower tool instead of list_findings when you only need " - "cohesion hotspots." - ), + title=mcp_tools.TITLE_CHECK_COHESION, + description=mcp_tools.CHECK_COHESION, annotations=read_only_tool, structured_output=True, ) def check_cohesion( - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - max_results: int = 10, - detail_level: str = "summary", + run_id: RunIdParam = None, + root: OptionalRootParam = None, + path: PathFilterParam = None, + max_results: LimitParam = 10, + detail_level: DetailLevelParam = "summary", ) -> dict[str, object]: return service.check_cohesion( run_id=run_id, @@ -704,24 +1092,18 @@ def check_cohesion( ) @tool( - title="Check Dead Code", - description=( - "Return dead-code findings from a compatible stored run. " - "Use analyze_repository first if no full run is available. When " - "filtering by root without run_id, pass an absolute root. Prefer " - "this narrower tool instead of list_findings when you only need " - "dead-code findings." - ), + title=mcp_tools.TITLE_CHECK_DEAD_CODE, + description=mcp_tools.CHECK_DEAD_CODE, annotations=read_only_tool, structured_output=True, ) def check_dead_code( - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - min_severity: str | None = None, - max_results: int = 10, - detail_level: str = "normal", + run_id: RunIdParam = None, + root: OptionalRootParam = None, + path: PathFilterParam = None, + min_severity: MinSeverityParam = None, + max_results: LimitParam = 10, + detail_level: DetailLevelParam = "normal", ) -> dict[str, object]: return service.check_dead_code( run_id=run_id, @@ -733,20 +1115,16 @@ def check_dead_code( ) @tool( - title="Generate PR Summary", - description=( - "Generate a PR-friendly CodeClone summary for changed files. Prefer " - "format='markdown' for compact LLM-facing output; use 'json' only " - "for machine post-processing." - ), + title=mcp_tools.TITLE_GENERATE_PR_SUMMARY, + description=mcp_tools.GENERATE_PR_SUMMARY, annotations=read_only_tool, structured_output=True, ) def generate_pr_summary( - run_id: str | None = None, - changed_paths: list[str] | None = None, - git_diff_ref: str | None = None, - format: str = "markdown", + run_id: RunIdParam = None, + changed_paths: ChangedPathsParam = None, + git_diff_ref: GitDiffRefParam = None, + format: PrFormatParam = "markdown", ) -> dict[str, object]: return service.generate_pr_summary( run_id=run_id, @@ -756,15 +1134,15 @@ def generate_pr_summary( ) @tool( - title="Mark Finding Reviewed", - description="Mark a finding as reviewed in the current in-memory MCP session.", + title=mcp_tools.TITLE_MARK_FINDING_REVIEWED, + description=mcp_tools.MARK_FINDING_REVIEWED, annotations=session_tool, structured_output=True, ) def mark_finding_reviewed( - finding_id: str, - run_id: str | None = None, - note: str | None = None, + finding_id: FindingIdParam, + run_id: RunIdParam = None, + note: ReviewNoteParam = None, ) -> dict[str, object]: return service.mark_finding_reviewed( finding_id=finding_id, @@ -773,32 +1151,154 @@ def mark_finding_reviewed( ) @tool( - title="List Reviewed Findings", - description=( - "List in-memory reviewed findings for the current or specified run." - ), + title=mcp_tools.TITLE_LIST_REVIEWED_FINDINGS, + description=mcp_tools.LIST_REVIEWED_FINDINGS, annotations=read_only_tool, structured_output=True, ) - def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: + def list_reviewed_findings(run_id: RunIdParam = None) -> dict[str, object]: return service.list_reviewed_findings(run_id=run_id) @tool( - title="Clear Session Runs", - description=( - "Clear all in-memory MCP analysis runs and ephemeral session state " - "for this server process." - ), + title=mcp_tools.TITLE_START_CONTROLLED_CHANGE, + description=mcp_tools.START_CONTROLLED_CHANGE, + annotations=session_tool, + structured_output=True, + ) + def start_controlled_change( + root: RootParam, + scope: ScopeParam, + intent: IntentTextParam, + expected_effects: ExpectedEffectsParam = None, + on_conflict: OnConflictParam = None, + strictness: StrictnessParam = "ci", + ttl_seconds: TtlSecondsParam = None, + blast_radius_depth: BlastRadiusDepthParam = "auto", + dirty_scope_policy: DirtyScopePolicyParam = "block", + ) -> dict[str, object]: + return service.start_controlled_change( + root=root, + scope=scope, + intent=intent, + expected_effects=expected_effects, + on_conflict=on_conflict, + strictness=strictness, + ttl_seconds=ttl_seconds, + blast_radius_depth=blast_radius_depth, + dirty_scope_policy=dirty_scope_policy, + ) + + @tool( + title=mcp_tools.TITLE_FINISH_CONTROLLED_CHANGE, + description=mcp_tools.FINISH_CONTROLLED_CHANGE, + annotations=session_tool, + structured_output=True, + ) + def finish_controlled_change( + intent_id: IntentIdParam, + changed_files: ChangedFilesParam = None, + diff_ref: DiffRefParam = None, + after_run_id: AfterRunIdParam = None, + review_text: FinishReviewTextParam = None, + claims_text: ClaimsTextParam = None, + create_receipt: CreateReceiptParam = True, + auto_clear: AutoClearParam = True, + strictness: StrictnessParam = "ci", + propose_memory: ProposeMemoryParam = False, + detail_level: DetailLevelParam = "summary", + patch_trail_detail: PatchTrailDetailParam = "summary", + ) -> dict[str, object]: + return service.finish_controlled_change( + intent_id=intent_id, + changed_files=changed_files, + diff_ref=diff_ref, + after_run_id=after_run_id, + review_text=review_text, + claims_text=claims_text, + create_receipt=create_receipt, + auto_clear=auto_clear, + strictness=strictness, + propose_memory=propose_memory, + detail_level=detail_level, + patch_trail_detail=patch_trail_detail, + ) + + @tool( + title=mcp_tools.TITLE_MANAGE_CHANGE_INTENT, + description=mcp_tools.MANAGE_CHANGE_INTENT, + annotations=session_tool, + structured_output=True, + ) + def manage_change_intent( + action: ManageActionParam, + run_id: RunIdParam = None, + intent_id: OptionalIntentIdParam = None, + scope: OptionalScopeParam = None, + intent: OptionalIntentTextParam = None, + expected_effects: ExpectedEffectsParam = None, + diff_ref: DiffRefParam = None, + changed_files: ChangedFilesParam = None, + root: OptionalRootParam = None, + ttl_seconds: TtlSecondsParam = None, + lease_seconds: LeaseSecondsParam = None, + on_conflict: OnConflictParam = None, + ) -> dict[str, object]: + return service.manage_change_intent( + action=action, + run_id=run_id, + intent_id=intent_id, + scope=scope, + intent=intent, + expected_effects=expected_effects, + diff_ref=diff_ref, + changed_files=changed_files, + root=root, + ttl_seconds=ttl_seconds, + lease_seconds=lease_seconds, + on_conflict=on_conflict, + ) + + @tool( + title=mcp_tools.TITLE_CLEAR_SESSION_RUNS, + description=mcp_tools.CLEAR_SESSION_RUNS, annotations=session_tool, structured_output=True, ) def clear_session_runs() -> dict[str, object]: return service.clear_session_runs() + if ide_governance_channel: + + @tool( + title=mcp_tools.TITLE_GET_WORKSPACE_SESSION_STATS, + description=mcp_tools.GET_WORKSPACE_SESSION_STATS, + annotations=read_only_tool, + structured_output=True, + ) + def get_workspace_session_stats(root: RootParam) -> dict[str, object]: + return service.get_workspace_session_stats(root=root) + + @tool( + title=mcp_tools.TITLE_GET_CONTROLLER_AUDIT_TRAIL, + description=mcp_tools.GET_CONTROLLER_AUDIT_TRAIL, + annotations=read_only_tool, + structured_output=True, + ) + def get_controller_audit_trail( + root: RootParam, + limit: AuditTrailLimitParam = 50, + audit_path: AuditPathOverrideParam = None, + ) -> dict[str, object]: + return service.get_controller_audit_trail( + root=root, + limit=limit, + audit_path=audit_path, + ) + @resource( "codeclone://latest/summary", - title="Latest Run Summary", - description="Canonical JSON summary for the latest CodeClone MCP run.", + title=mcp_resources.TITLE_LATEST_SUMMARY, + description=mcp_resources.LATEST_SUMMARY, mime_type="application/json", ) def latest_summary_resource() -> str: @@ -806,8 +1306,8 @@ def latest_summary_resource() -> str: @resource( "codeclone://latest/report.json", - title="Latest Canonical Report", - description="Canonical JSON report for the latest CodeClone MCP run.", + title=mcp_resources.TITLE_LATEST_REPORT, + description=mcp_resources.LATEST_REPORT, mime_type="application/json", ) def latest_report_resource() -> str: @@ -815,8 +1315,8 @@ def latest_report_resource() -> str: @resource( "codeclone://latest/health", - title="Latest Health Snapshot", - description="Health score and dimensions for the latest CodeClone MCP run.", + title=mcp_resources.TITLE_LATEST_HEALTH, + description=mcp_resources.LATEST_HEALTH, mime_type="application/json", ) def latest_health_resource() -> str: @@ -824,8 +1324,8 @@ def latest_health_resource() -> str: @resource( "codeclone://latest/gates", - title="Latest Gate Evaluation", - description="Last gate evaluation result produced by this MCP session.", + title=mcp_resources.TITLE_LATEST_GATES, + description=mcp_resources.LATEST_GATES, mime_type="application/json", ) def latest_gates_resource() -> str: @@ -833,10 +1333,8 @@ def latest_gates_resource() -> str: @resource( "codeclone://latest/changed", - title="Latest Changed Findings", - description=( - "Changed-files projection for the latest diff-aware CodeClone MCP run." - ), + title=mcp_resources.TITLE_LATEST_CHANGED, + description=mcp_resources.LATEST_CHANGED, mime_type="application/json", ) def latest_changed_resource() -> str: @@ -844,8 +1342,8 @@ def latest_changed_resource() -> str: @resource( "codeclone://latest/triage", - title="Latest Production Triage", - description=("Production-first triage view for the latest CodeClone MCP run."), + title=mcp_resources.TITLE_LATEST_TRIAGE, + description=mcp_resources.LATEST_TRIAGE, mime_type="application/json", ) def latest_triage_resource() -> str: @@ -853,8 +1351,8 @@ def latest_triage_resource() -> str: @resource( "codeclone://schema", - title="CodeClone Report Schema", - description="JSON schema-style descriptor for the canonical CodeClone report.", + title=mcp_resources.TITLE_REPORT_SCHEMA, + description=mcp_resources.REPORT_SCHEMA, mime_type="application/json", ) def schema_resource() -> str: @@ -862,8 +1360,8 @@ def schema_resource() -> str: @resource( "codeclone://runs/{run_id}/summary", - title="Run Summary", - description="Canonical JSON summary for a specific CodeClone MCP run.", + title=mcp_resources.TITLE_RUN_SUMMARY, + description=mcp_resources.RUN_SUMMARY, mime_type="application/json", ) def run_summary_resource(run_id: str) -> str: @@ -871,8 +1369,8 @@ def run_summary_resource(run_id: str) -> str: @resource( "codeclone://runs/{run_id}/report.json", - title="Run Canonical Report", - description="Canonical JSON report for a specific CodeClone MCP run.", + title=mcp_resources.TITLE_RUN_REPORT, + description=mcp_resources.RUN_REPORT, mime_type="application/json", ) def run_report_resource(run_id: str) -> str: @@ -880,8 +1378,8 @@ def run_report_resource(run_id: str) -> str: @resource( "codeclone://runs/{run_id}/findings/{finding_id}", - title="Run Finding", - description="Canonical JSON finding group for a specific CodeClone MCP run.", + title=mcp_resources.TITLE_RUN_FINDING, + description=mcp_resources.RUN_FINDING, mime_type="application/json", ) def run_finding_resource(run_id: str, finding_id: str) -> str: @@ -928,7 +1426,7 @@ def build_parser() -> argparse.ArgumentParser: default=False, help=( "Allow binding streamable-http to a non-loopback host. " - "Disabled by default because CodeClone MCP has no built-in authentication." + f"HTTP still requires {MCP_AUTH_TOKEN_ENV}." ), ) parser.add_argument( @@ -970,6 +1468,16 @@ def build_parser() -> argparse.ArgumentParser: default=DEFAULT_MCP_LOG_LEVEL, help="FastMCP server log level.", ) + parser.add_argument( + "--ide-governance-channel", + action=argparse.BooleanOptionalAction, + default=False, + help=( + "Enable the VS Code IDE governance channel for human " + "approve/reject/archive via manage_engineering_memory. " + "Agent launchers must not pass this flag." + ), + ) return parser @@ -985,7 +1493,30 @@ def _host_is_loopback(host: str) -> bool: return False +def _install_sigterm_handler() -> None: + """Convert SIGTERM to SystemExit so async teardown runs. + + Python's default SIGTERM handler (SIG_DFL) terminates the process + immediately — no ``finally`` blocks, no ``atexit``, no async + context manager teardown. By raising :class:`SystemExit`, the + event loop unwinds normally and the FastMCP lifespan teardown + (which cleans workspace intent files) gets a chance to execute. + + Only installed on platforms that support SIGTERM (not Windows). + """ + import signal as _signal + + if not hasattr(_signal, "SIGTERM"): + return # pragma: no cover + + def _handler(_signum: int, _frame: object) -> None: + raise SystemExit(0) + + _signal.signal(_signal.SIGTERM, _handler) + + def main() -> None: + _install_sigterm_handler() args = build_parser().parse_args() if ( args.transport == "streamable-http" @@ -996,11 +1527,18 @@ def main() -> None: ( "Refusing to bind CodeClone MCP streamable-http to non-loopback " f"host '{args.host}' without --allow-remote. " - "The server has no built-in authentication." + f"Set {MCP_AUTH_TOKEN_ENV} and pass --allow-remote explicitly." ), file=sys.stderr, ) raise SystemExit(2) + auth_token = None + if args.transport == "streamable-http": + try: + auth_token = validated_mcp_auth_token(os.environ.get(MCP_AUTH_TOKEN_ENV)) + except MCPAuthConfigurationError as exc: + print(str(exc), file=sys.stderr) + raise SystemExit(2) from exc try: server = build_mcp_server( history_limit=args.history_limit, @@ -1010,6 +1548,8 @@ def main() -> None: stateless_http=args.stateless_http, debug=args.debug, log_level=args.log_level, + ide_governance_channel=args.ide_governance_channel, + auth_token=auth_token, ) except MCPDependencyError as exc: print(str(exc), file=sys.stderr) diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index adca1d4d..63167cb6 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -8,6 +8,7 @@ import inspect from typing import Protocol +from ._workspace_intents import safe_remove_own_intent from .session import ( DEFAULT_MCP_HISTORY_LIMIT, MCPAnalysisRequest, @@ -52,6 +53,89 @@ def get_production_triage( def get_help(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("get_help", **params) + def query_platform_observability( + self: _RunDictService, **params: object + ) -> dict[str, object]: + return self._run_dict("query_platform_observability", **params) + + def get_blast_radius( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("get_blast_radius", **params) + + def get_implementation_context( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("get_implementation_context", **params) + + def get_workspace_session_stats( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("get_workspace_session_stats", **params) + + def get_controller_audit_trail( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("get_controller_audit_trail", **params) + + def get_relevant_memory( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("get_relevant_memory", **params) + + def query_engineering_memory( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("query_engineering_memory", **params) + + def manage_engineering_memory( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("manage_engineering_memory", **params) + + def manage_change_intent( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("manage_change_intent", **params) + + def check_patch_contract( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("check_patch_contract", **params) + + def create_review_receipt( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("create_review_receipt", **params) + + def validate_review_claims( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("validate_review_claims", **params) + + def start_controlled_change( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("start_controlled_change", **params) + + def finish_controlled_change( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: + return self._run_dict("finish_controlled_change", **params) + def generate_pr_summary( self: _RunDictService, **params: object, @@ -90,8 +174,16 @@ def check_dead_code(self: _RunDictService, **params: object) -> dict[str, object class CodeCloneMCPService(_QueryServiceMixin, MCPSession): - def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: - super().__init__(history_limit=history_limit) + def __init__( + self, + *, + history_limit: int = DEFAULT_MCP_HISTORY_LIMIT, + ide_governance_channel: bool = False, + ) -> None: + super().__init__( + history_limit=history_limit, + ide_governance_channel=ide_governance_channel, + ) self._session_cls = MCPSession # Keep a stable seam for tests and monkeypatch-based callers while the # service itself now owns the real MCP session state. @@ -122,6 +214,59 @@ def clear_session_runs(self) -> dict[str, object]: def read_resource(self, uri: str) -> str: return self._session_cls.read_resource(self, uri) + def shutdown_cleanup(self) -> None: + """Best-effort cleanup of workspace intent files owned by this process. + + Called from FastMCP lifespan teardown at process exit. Removes + only files that THIS process created — identified by matching + PID + start_epoch + intent_id. Never raises. Does not write to + stdout/stderr (the pipe may already be closed). + """ + try: + with self._state_lock: + snapshot = dict(self._active_intents) + for intent_id, intent in snapshot.items(): + try: + run = self._runs.get(intent.run_id) + except Exception: + continue + safe_remove_own_intent( + root=run.root, + pid=self._agent_pid, + start_epoch=self._agent_start_epoch, + intent_id=intent_id, + ) + except Exception: + pass + self._shutdown_close_resources() + + def _shutdown_close_resources(self) -> None: + """Best-effort close of passive stores owned by this service.""" + try: + writers = tuple(self._audit_writers.values()) + self._audit_writers.clear() + override = self._audit_writer_override + self._audit_writer_override = None + if override is not None: + writers = (*writers, override) + seen: set[int] = set() + for writer in writers: + writer_id = id(writer) + if writer_id in seen: + continue + seen.add(writer_id) + close = getattr(writer, "close", None) + if callable(close): + close() + except Exception: + pass + try: + from ._workspace_intent_store import clear_workspace_intent_store_cache + + clear_workspace_intent_store_cache() + except Exception: + pass + _EMPTY = inspect.Signature.empty @@ -205,6 +350,62 @@ def _apply_public_method_signatures() -> None: _kwonly("max_hotspots", "int", 3), _kwonly("max_suggestions", "int", 3), ), + "get_blast_radius": ( + _kwonly("files", "Sequence[str]"), + _kwonly("run_id", "str | None", None), + _kwonly("depth", "str", "direct"), + _kwonly("include", "Sequence[str] | None", None), + ), + "get_implementation_context": ( + _kwonly("root", "str"), + _kwonly("paths", "Sequence[str] | None", None), + _kwonly("symbols", "Sequence[str] | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("changed_scope", "bool", False), + _kwonly("mode", "str", "implementation"), + _kwonly("include", "Sequence[str] | None", None), + _kwonly("depth", "int", 1), + _kwonly("detail_level", "str", "compact"), + _kwonly("budget", "int", 50), + _kwonly("run_id", "str | None", None), + _kwonly("query", "str | None", None), + ), + "check_patch_contract": ( + _kwonly("mode", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("before_run_id", "str | None", None), + _kwonly("after_run_id", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("strictness", "str", "ci"), + _kwonly("diff_ref", "str | None", None), + _kwonly("changed_files", "Sequence[str] | None", None), + ), + "create_review_receipt": ( + _kwonly("run_id", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("format", "str", "markdown"), + _kwonly("include_blast_radius", "bool", True), + _kwonly("include_patch_contract", "bool", True), + ), + "validate_review_claims": ( + _kwonly("text", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("require_citations", "bool", True), + _kwonly("patch_health_delta", "int | None", None), + ), + "manage_change_intent": ( + _kwonly("action", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("scope", "dict[str, object] | None", None), + _kwonly("intent", "str | None", None), + _kwonly("expected_effects", "Sequence[str] | None", None), + _kwonly("diff_ref", "str | None", None), + _kwonly("changed_files", "Sequence[str] | None", None), + _kwonly("root", "str | None", None), + _kwonly("ttl_seconds", "int | None", None), + _kwonly("lease_seconds", "int | None", None), + ), "get_remediation": ( _kwonly("finding_id", "str"), _kwonly("run_id", "str | None", None), @@ -250,6 +451,74 @@ def _apply_public_method_signatures() -> None: _kwonly("run_id", "str | None", None), _kwonly("note", "str | None", None), ), + "start_controlled_change": ( + _kwonly("root", "str"), + _kwonly("scope", "dict[str, object]"), + _kwonly("intent", "str"), + _kwonly("expected_effects", "Sequence[str] | None", None), + _kwonly("on_conflict", "str | None", None), + _kwonly("strictness", "str", "ci"), + _kwonly("ttl_seconds", "int | None", None), + _kwonly("blast_radius_depth", "str", "auto"), + _kwonly("dirty_scope_policy", "str", "block"), + ), + "get_relevant_memory": ( + _kwonly("root", "str"), + _kwonly("scope", "Sequence[str] | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("symbols", "Sequence[str] | None", None), + _kwonly("max_records", "int", 20), + _kwonly("include_stale", "bool", False), + _kwonly("include_drafts", "bool", False), + ), + "query_engineering_memory": ( + _kwonly("root", "str"), + _kwonly("mode", "str"), + _kwonly("record_id", "str | None", None), + _kwonly("path", "str | None", None), + _kwonly("symbol", "str | None", None), + _kwonly("query", "str | None", None), + _kwonly("scope", "Sequence[str] | None", None), + _kwonly("filters", "Mapping[str, object] | None", None), + _kwonly("max_results", "int", 20), + _kwonly("include_stale", "bool", False), + _kwonly("include_drafts", "bool", False), + ), + "manage_engineering_memory": ( + _kwonly("root", "str"), + _kwonly("action", "str"), + _kwonly("record_type", "str | None", None), + _kwonly("statement", "str | None", None), + _kwonly("subject_path", "str | None", None), + _kwonly("text", "str | None", None), + _kwonly("intent_id", "str | None", None), + _kwonly("run_id", "str | None", None), + _kwonly("record_id", "str | None", None), + _kwonly("decision", "str | None", None), + _kwonly("ide_governance_key", "str | None", None), + _kwonly("client_name", "str | None", None), + _kwonly("client_version", "str | None", None), + _kwonly("governance_ticket", "str | None", None), + _kwonly("confirmation_nonce", "str | None", None), + _kwonly("proof", "str | None", None), + _kwonly("actor", "str | None", None), + _kwonly("protocol", "int | None", None), + _kwonly("reject_reason", "str | None", None), + ), + "finish_controlled_change": ( + _kwonly("intent_id", "str"), + _kwonly("changed_files", "Sequence[str] | None", None), + _kwonly("diff_ref", "str | None", None), + _kwonly("after_run_id", "str | None", None), + _kwonly("review_text", "str | None", None), + _kwonly("claims_text", "str | None", None), + _kwonly("create_receipt", "bool", True), + _kwonly("auto_clear", "bool", True), + _kwonly("strictness", "str", "ci"), + _kwonly("propose_memory", "bool", False), + _kwonly("detail_level", "str", "summary"), + _kwonly("patch_trail_detail", "str", "summary"), + ), } self_param = inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD) for name, params in signature_specs.items(): diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 3de4ce24..bc17ba71 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -6,14 +6,34 @@ from __future__ import annotations +import os +import time +from collections.abc import Mapping +from pathlib import Path + +from ...audit import AuditEvent, AuditWriter, repo_root_digest +from ...audit.runtime import open_audit_writer_for_root from ...cache.store import resolve_cache_status +from ...memory.ide_governance import IdeGovernanceSessionState +from ...observability import span from ...report.meta import build_report_meta as _build_report_meta from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc from . import _session_helpers as _helpers +from ._blast_radius import BlastRadiusResult +from ._implementation_context import build_unit_location_inventory +from ._intent import IntentRecord from ._session_baseline import ( resolve_clone_baseline_state, resolve_metrics_baseline_state, ) +from ._session_blast_radius_mixin import _MCPSessionBlastRadiusMixin +from ._session_claim_guard_mixin import _MCPSessionClaimGuardMixin +from ._session_context_mixin import _MCPSessionContextMixin +from ._session_insights_mixin import _MCPSessionInsightsMixin +from ._session_intent_mixin import _MCPSessionIntentMixin +from ._session_memory_mixin import _MCPSessionMemoryMixin +from ._session_patch_contract_mixin import _MCPSessionPatchContractMixin +from ._session_review_receipt_mixin import _MCPSessionReviewReceiptMixin from ._session_shared import ( _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -55,6 +75,9 @@ report, ) from ._session_state_mixin import _MCPSessionStateMixin +from ._session_workflow_mixin import _MCPSessionWorkflowMixin +from ._workspace_drift import build_run_manifest +from ._workspace_hygiene import collect_dirty_snapshot __all__ = [ "DEFAULT_MCP_HISTORY_LIMIT", @@ -75,17 +98,143 @@ ] -class MCPSession(_MCPSessionStateMixin): - def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: +class MCPSession( + _MCPSessionWorkflowMixin, + _MCPSessionClaimGuardMixin, + _MCPSessionReviewReceiptMixin, + _MCPSessionPatchContractMixin, + _MCPSessionIntentMixin, + _MCPSessionMemoryMixin, + _MCPSessionContextMixin, + _MCPSessionBlastRadiusMixin, + _MCPSessionInsightsMixin, + _MCPSessionStateMixin, +): + def __init__( + self, + *, + history_limit: int = DEFAULT_MCP_HISTORY_LIMIT, + audit_writer: AuditWriter | None = None, + ide_governance_channel: bool = False, + ) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) + self._ide_governance = IdeGovernanceSessionState( + channel_enabled=ide_governance_channel + ) self._state_lock = RLock() self._review_state: dict[str, OrderedDict[str, str | None]] = {} self._last_gate_results: dict[str, dict[str, object]] = {} self._spread_max_cache: dict[str, int] = {} + self._blast_radius_cache: dict[ + tuple[str, tuple[str, ...], str], + BlastRadiusResult, + ] = {} + self._active_intents: dict[str, IntentRecord] = {} + self._intent_sequence = 0 + self._agent_pid = os.getpid() + self._agent_start_epoch = int(time.time()) + self._agent_label_cache: str | None = None + self._fastmcp: object | None = None + self._audit_writer_override = audit_writer + self._audit_writers: dict[Path, AuditWriter] = {} + + # ------------------------------------------------------------------ + # Agent label: lazy-resolved from MCP clientInfo on first access + # ------------------------------------------------------------------ + + @property + def _agent_label(self) -> str: + if self._agent_label_cache is None: + self._agent_label_cache = self._resolve_agent_label() + return self._agent_label_cache + + @_agent_label.setter + def _agent_label(self, value: str) -> None: + self._agent_label_cache = value + + def _resolve_agent_label(self) -> str: + """Build a human-readable agent label from MCP client metadata. + + Resolution order: + 1. MCP ``clientInfo`` from the protocol ``initialize`` handshake + (available after the first tool call) → ``"name/version"``. + 2. Fallback → ``"pid-"``. + """ + try: + get_context = getattr(self._fastmcp, "get_context", None) + if not callable(get_context): + return f"pid-{self._agent_pid}" + ctx = get_context() + session = getattr(ctx, "session", None) + params = getattr(session, "client_params", None) + info = getattr(params, "clientInfo", None) + name = getattr(info, "name", None) + if not isinstance(name, str) or not name: + return f"pid-{self._agent_pid}" + version = getattr(info, "version", None) + if isinstance(version, str) and version: + return f"{name}/{version}" + return name + except Exception: + pass + return f"pid-{self._agent_pid}" + + # ------------------------------------------------------------------ + # Audit trail: best-effort observer, never controller truth + # ------------------------------------------------------------------ + + def _audit_emit( + self, + *, + root: Path, + event_type: str, + severity: str, + run_id: str | None = None, + intent_id: str | None = None, + report_digest: str | None = None, + status: str | None = None, + payload: Mapping[str, object] | None = None, + ) -> int | None: + try: + writer = self._audit_writer_for_root(root) + return writer.emit( + AuditEvent( + event_type=event_type, + severity="error" + if severity == "error" + else ("warn" if severity == "warn" else "info"), + repo_root_digest=repo_root_digest(root), + agent_pid=self._agent_pid, + agent_start_epoch=self._agent_start_epoch, + agent_label=self._agent_label, + run_id=run_id, + intent_id=intent_id, + report_digest=report_digest, + status=status, + payload=payload, + ) + ) + except Exception: + return None + + def _audit_writer_for_root(self, root: Path) -> AuditWriter: + if self._audit_writer_override is not None: + return self._audit_writer_override + root_path = root.resolve() + cached = self._audit_writers.get(root_path) + if cached is not None: + return cached + writer = self._build_audit_writer(root_path) + self._audit_writers[root_path] = writer + return writer + + def _build_audit_writer(self, root: Path) -> AuditWriter: + return open_audit_writer_for_root(root.resolve()) def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: self._validate_analysis_request(request) root_path = _helpers._resolve_root(request.root) + run_dirty_snapshot = collect_dirty_snapshot(root_path) analysis_started_at_utc = _current_report_timestamp_utc() changed_paths = self._resolve_request_changed_paths( root_path=root_path, @@ -109,19 +258,37 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ) console = _BufferConsole() - boot = bootstrap( - args=args, + # Stage spans so mcp.analyze_repository carries the same discover/process/ + # analyze timing as cli.analyze (this path bypasses run_analysis_stages, + # spec §6.1). Spans attach to the active operation from the MCP registrar; + # inert when observability is disabled or no operation is open. + with span(name="pipeline.bootstrap"): + boot = bootstrap( + args=args, + root=root_path, + output_paths=OutputPaths(json=_REPORT_DUMMY_PATH), + cache_path=cache_path, + ) + with span(name="pipeline.discover"): + discovery_result = discover(boot=boot, cache=cache) + run_manifest = build_run_manifest( root=root_path, - output_paths=OutputPaths(json=_REPORT_DUMMY_PATH), - cache_path=cache_path, + filepaths=discovery_result.all_file_paths, ) - discovery_result = discover(boot=boot, cache=cache) - processing_result = process(boot=boot, discovery=discovery_result, cache=cache) - analysis_result = analyze( - boot=boot, - discovery=discovery_result, - processing=processing_result, + with span(name="pipeline.process"): + processing_result = process( + boot=boot, discovery=discovery_result, cache=cache + ) + unit_inventory = build_unit_location_inventory( + root=root_path, + units=processing_result.units, ) + with span(name="pipeline.analyze"): + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) clone_baseline_state = resolve_clone_baseline_state( baseline_path=baseline_path, @@ -298,6 +465,11 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: new_func=frozenset(new_func), new_block=frozenset(new_block), metrics_diff=metrics_diff, + manifest=run_manifest, + dirty_snapshot=run_dirty_snapshot, + unit_inventory=unit_inventory, + relationship_facts=processing_result.function_relationship_facts, + module_imports=processing_result.module_deps, ) changed_projection = self._build_changed_projection(provisional_record) summary = self._augment_summary_with_changed( @@ -327,11 +499,48 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: new_func=frozenset(new_func), new_block=frozenset(new_block), metrics_diff=metrics_diff, + manifest=run_manifest, + dirty_snapshot=run_dirty_snapshot, + unit_inventory=unit_inventory, + relationship_facts=processing_result.function_relationship_facts, + module_imports=processing_result.module_deps, ) self._runs.register(record) + self._emit_analysis_completed_audit( + root_path=root_path, + record=record, + summary=summary, + ) self._prune_session_state() return self._summary_payload(record.summary, record=record) + def _emit_analysis_completed_audit( + self, + *, + root_path: Path, + record: MCPRunRecord, + summary: Mapping[str, object], + ) -> None: + try: + from ...audit.analysis_completed import ( + ANALYSIS_SOURCE_MCP, + emit_analysis_completed, + ) + + emit_analysis_completed( + root_path=root_path, + summary=summary, + source=ANALYSIS_SOURCE_MCP, + report_digest=self._report_digest_value(record), + run_id=record.run_id, + agent_pid=self._agent_pid, + agent_start_epoch=self._agent_start_epoch, + agent_label=self._agent_label, + writer=self._audit_writer_for_root(root_path), + ) + except Exception: + return None + def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object]: if not request.changed_paths and request.git_diff_ref is None: raise MCPServiceContractError( diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index fa94dafb..ac266e0d 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -4,990 +4,25 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -from __future__ import annotations - -import platform -import re -import shlex -import sys -import textwrap -import traceback -from pathlib import Path - -from .. import __version__ -from ..contracts import ( - DEFAULT_BASELINE_PATH, - DEFAULT_COVERAGE_MIN, - DEFAULT_HTML_REPORT_PATH, - DEFAULT_JSON_REPORT_PATH, - DEFAULT_MARKDOWN_REPORT_PATH, - DEFAULT_MAX_BASELINE_SIZE_MB, - DEFAULT_MAX_CACHE_SIZE_MB, - DEFAULT_MIN_LOC, - DEFAULT_MIN_STMT, - DEFAULT_PROCESSES, - DEFAULT_SARIF_REPORT_PATH, - DEFAULT_TEXT_REPORT_PATH, - ISSUES_URL, -) -from ..domain.quality import ( - HEALTH_GRADE_A, - HEALTH_GRADE_B, - HEALTH_GRADE_C, - HEALTH_GRADE_D, - HEALTH_GRADE_F, -) - -BANNER_SUBTITLE = "Structural review layer" - -MARKER_CONTRACT_ERROR = "[error]CONTRACT ERROR:[/error]" -MARKER_INTERNAL_ERROR = "[error]INTERNAL ERROR:[/error]" - -REPORT_BLOCK_GROUP_DISPLAY_NAME_ASSERT_PATTERN = "Assert pattern block" - -HELP_VERSION = "Print the CodeClone version and exit." -HELP_ROOT = "Project root directory to scan.\nDefaults to the current directory." -HELP_MIN_LOC = ( - "Minimum Lines of Code (LOC) required for clone analysis.\n" - f"Default: {DEFAULT_MIN_LOC}." -) -HELP_MIN_STMT = ( - "Minimum AST statement count required for clone analysis.\n" - f"Default: {DEFAULT_MIN_STMT}." -) -HELP_PROCESSES = f"Number of parallel worker processes.\nDefault: {DEFAULT_PROCESSES}." -HELP_CHANGED_ONLY = ( - "Limit clone gating and changed-scope summaries to findings that touch\n" - "files from a git diff selection." -) -HELP_DIFF_AGAINST = ( - "Resolve changed files from `git diff --name-only `.\n" - "Use together with --changed-only." -) -HELP_PATHS_FROM_GIT_DIFF = ( - "Shorthand for --changed-only using `git diff --name-only `.\n" - "Useful for PR and CI review flows." -) -HELP_CACHE_PATH = ( - "Path to the cache file.\n" - "If FILE is omitted, uses /.cache/codeclone/cache.json." -) -HELP_CACHE_DIR_LEGACY = ( - "Legacy alias for --cache-path.\nPrefer --cache-path in new configurations." -) -HELP_MAX_BASELINE_SIZE_MB = ( - f"Maximum allowed baseline size in MB.\nDefault: {DEFAULT_MAX_BASELINE_SIZE_MB}." -) -HELP_MAX_CACHE_SIZE_MB = ( - f"Maximum cache file size in MB.\nDefault: {DEFAULT_MAX_CACHE_SIZE_MB}." -) -HELP_BASELINE = ( - "Path to the clone baseline.\n" - f"If FILE is omitted, uses {Path(DEFAULT_BASELINE_PATH)}." -) -HELP_UPDATE_BASELINE = ( - "Overwrite the clone baseline with current results.\nDisabled by default." -) -HELP_FAIL_ON_NEW = ( - "Exit with code 3 if NEW clone findings not present in the baseline\nare detected." -) -HELP_FAIL_THRESHOLD = ( - "Exit with code 3 if the total number of function + block clone groups\n" - "exceeds this value.\n" - "Disabled unless set." -) -HELP_FAIL_COMPLEXITY = ( - "Exit with code 3 if any function exceeds the cyclomatic complexity\n" - "threshold.\n" - "If enabled without a value, uses 20." -) -HELP_FAIL_COUPLING = ( - "Exit with code 3 if any class exceeds the coupling threshold.\n" - "If enabled without a value, uses 10." -) -HELP_FAIL_COHESION = ( - "Exit with code 3 if any class exceeds the cohesion threshold.\n" - "If enabled without a value, uses 4." -) -HELP_FAIL_CYCLES = "Exit with code 3 if circular module dependencies are detected." -HELP_FAIL_DEAD_CODE = "Exit with code 3 if high-confidence dead code is detected." -HELP_FAIL_HEALTH = ( - "Exit with code 3 if the overall health score falls below the threshold.\n" - "If enabled without a value, uses 60." -) -HELP_FAIL_ON_NEW_METRICS = ( - "Exit with code 3 if new metrics violations appear relative to the\n" - "metrics baseline." -) -HELP_API_SURFACE = ( - "Collect public API surface facts for baseline-aware compatibility review.\n" - "Disabled by default." -) -HELP_COVERAGE = ( - "Join external Cobertura XML line coverage to function spans.\n" - "Pass a `coverage xml` report path." -) -HELP_FAIL_ON_TYPING_REGRESSION = ( - "Exit with code 3 if typing adoption coverage regresses relative to the\n" - "metrics baseline." -) -HELP_FAIL_ON_DOCSTRING_REGRESSION = ( - "Exit with code 3 if public docstring coverage regresses relative to the\n" - "metrics baseline." -) -HELP_FAIL_ON_API_BREAK = ( - "Exit with code 3 if public API removals or signature breaks are detected\n" - "relative to the metrics baseline." -) -HELP_FAIL_ON_UNTESTED_HOTSPOTS = ( - "Exit with code 3 if medium/high-risk functions measured by Coverage Join\n" - "fall below the joined coverage threshold.\nRequires --coverage." -) -HELP_MIN_TYPING_COVERAGE = ( - "Exit with code 3 if parameter typing coverage falls below the threshold.\n" - "Threshold is a whole percent from 0 to 100." -) -HELP_MIN_DOCSTRING_COVERAGE = ( - "Exit with code 3 if public docstring coverage falls below the threshold.\n" - "Threshold is a whole percent from 0 to 100." -) -HELP_COVERAGE_MIN = ( - "Coverage threshold for untested hotspot detection.\n" - "Threshold is a whole percent from 0 to 100.\n" - f"Default: {DEFAULT_COVERAGE_MIN}." -) -HELP_CI = ( - "Enable CI preset.\n" - "Equivalent to: --fail-on-new --no-color --quiet.\n" - "When a trusted metrics baseline is available, CI mode also enables\n" - "metrics regression gating." -) -HELP_UPDATE_METRICS_BASELINE = ( - "Overwrite the metrics baseline with current metrics.\nDisabled by default." -) -HELP_METRICS_BASELINE = ( - "Path to the metrics baseline.\n" - f"If FILE is omitted, uses {Path(DEFAULT_BASELINE_PATH)}." -) -HELP_SKIP_METRICS = "Skip full metrics analysis and run in clone-only mode." -HELP_SKIP_DEAD_CODE = "Skip dead code detection." -HELP_SKIP_DEPENDENCIES = "Skip dependency graph analysis." -HELP_HTML = ( - "Generate an HTML report.\n" - f"If FILE is omitted, writes to {DEFAULT_HTML_REPORT_PATH}." -) -HELP_JSON = ( - "Generate the canonical JSON report.\n" - f"If FILE is omitted, writes to {DEFAULT_JSON_REPORT_PATH}." -) -HELP_MD = ( - "Generate a Markdown report.\n" - f"If FILE is omitted, writes to {DEFAULT_MARKDOWN_REPORT_PATH}." -) -HELP_SARIF = ( - "Generate a SARIF 2.1.0 report.\n" - f"If FILE is omitted, writes to {DEFAULT_SARIF_REPORT_PATH}." -) -HELP_TEXT = ( - "Generate a plain-text report.\n" - f"If FILE is omitted, writes to {DEFAULT_TEXT_REPORT_PATH}." -) -HELP_OPEN_HTML_REPORT = ( - "Open the generated HTML report in the default browser.\nRequires --html." -) -HELP_TIMESTAMPED_REPORT_PATHS = ( - "Append a UTC timestamp to default report filenames.\n" - "Applies only to report flags passed without FILE." -) -HELP_NO_PROGRESS = "Disable progress output.\nRecommended for CI logs." -HELP_PROGRESS = "Force-enable progress output." -HELP_NO_COLOR = "Disable ANSI colors." -HELP_COLOR = "Force-enable ANSI colors." -HELP_QUIET = "Reduce output to warnings, errors, and essential summaries." -HELP_VERBOSE = "Include detailed identifiers for NEW clone findings." -HELP_DEBUG = ( - "Print debug details for internal errors, including traceback and\n" - "environment information." -) - -SUMMARY_TITLE = "Summary" -METRICS_TITLE = "Metrics" -CHANGED_SCOPE_TITLE = "Changed Scope" - -CLI_LAYOUT_MAX_WIDTH = 80 - -SUMMARY_LABEL_FILES_FOUND = "Files found" -SUMMARY_LABEL_FILES_ANALYZED = " analyzed" -SUMMARY_LABEL_CACHE_HITS = " from cache" -SUMMARY_LABEL_FILES_SKIPPED = " skipped" -SUMMARY_LABEL_LINES_ANALYZED = "Lines (this run)" -SUMMARY_LABEL_FUNCTIONS_ANALYZED = "Functions (this run)" -SUMMARY_LABEL_METHODS_ANALYZED = "Methods (this run)" -SUMMARY_LABEL_CLASSES_ANALYZED = "Classes (this run)" -SUMMARY_LABEL_FUNCTION = "Function clones" -SUMMARY_LABEL_BLOCK = "Block clones" -SUMMARY_LABEL_SEGMENT = "Segment clones" -SUMMARY_LABEL_SUPPRESSED = " suppressed" -SUMMARY_LABEL_NEW_BASELINE = "New vs baseline" +"""User-facing CLI messages and formatters.""" -SUMMARY_COMPACT = ( - "Summary found={found} analyzed={analyzed}" - " cached={cache_hits} skipped={skipped}" -) -SUMMARY_COMPACT_CLONES = ( - "Clones func={function} block={block} seg={segment}" - " suppressed={suppressed} new={new}" -) -SUMMARY_COMPACT_METRICS = ( - "Metrics cc={cc_avg}/{cc_max} cbo={cbo_avg}/{cbo_max}" - " lcom4={lcom_avg}/{lcom_max} cycles={cycles} dead_code={dead}" - " health={health}({grade}) overloaded_modules={overloaded_modules}" -) -SUMMARY_COMPACT_DEPENDENCIES = ( - "Dependencies avg={avg_depth} p95={p95_depth} max={max_depth}" -) -SUMMARY_COMPACT_SECURITY_SURFACES = ( - "Security items={items} categories={categories}" - " production={production} tests={tests}" -) -SUMMARY_COMPACT_CHANGED_SCOPE = ( - "Changed paths={paths} findings={findings} new={new} known={known}" -) - -WARN_SUMMARY_ACCOUNTING_MISMATCH = ( - "Summary accounting mismatch: " - "files_found != files_analyzed + cache_hits + files_skipped" -) - -STATUS_DISCOVERING = "[bold green]Discovering Python files..." -STATUS_GROUPING = "[bold green]Grouping clones..." - -INFO_PROCESSING_CHANGED = "[info]Processing {count} changed files...[/info]" - -WARN_WORKER_FAILED = "[warning]Worker failed: {error}[/warning]" -WARN_BATCH_ITEM_FAILED = "[warning]Failed to process batch item: {error}[/warning]" -WARN_PARALLEL_FALLBACK = ( - "[warning]Parallel processing unavailable, " - "falling back to sequential: {error}[/warning]" -) -WARN_FAILED_FILES_HEADER = "\n[warning]{count} files failed to process:[/warning]" -WARN_CACHE_SAVE_FAILED = "[warning]Failed to save cache: {error}[/warning]" -WARN_HTML_REPORT_OPEN_FAILED = ( - "[warning]Failed to open HTML report in browser: {path} ({error}).[/warning]" -) -WARN_COVERAGE_JOIN_IGNORED = "[warning]Coverage join ignored: {error}[/warning]" - -ERR_INVALID_OUTPUT_EXT = ( - "[error]Invalid {label} output extension: {path} " - "(expected {expected_suffix}).[/error]" -) -ERR_INVALID_OUTPUT_PATH = ( - "[error]Invalid {label} output path: {path} ({error}).[/error]" -) -ERR_ROOT_NOT_FOUND = "[error]Root path does not exist: {path}[/error]" -ERR_INVALID_ROOT_PATH = "[error]Invalid root path: {error}[/error]" -ERR_SCAN_FAILED = "[error]Scan failed: {error}[/error]" -ERR_INVALID_BASELINE_PATH = "[error]Invalid baseline path: {path} ({error}).[/error]" -ERR_BASELINE_WRITE_FAILED = ( - "[error]Failed to write baseline file: {path} ({error}).[/error]" -) -ERR_REPORT_WRITE_FAILED = ( - "[error]Failed to write {label} report: {path} ({error}).[/error]" -) -ERR_OPEN_HTML_REPORT_REQUIRES_HTML = ( - "[error]--open-html-report requires --html.[/error]" -) -ERR_TIMESTAMPED_REPORT_PATHS_REQUIRES_REPORT = ( - "[error]--timestamped-report-paths requires at least one report output " - "flag.[/error]" -) -ERR_UNREADABLE_SOURCE_IN_GATING = ( - "One or more source files could not be read in CI/gating mode.\n" - "Unreadable source files: {count}." -) - -WARN_LEGACY_CACHE = ( - "[warning]Legacy cache file found at: {legacy_path}.[/warning]\n" - "[warning]Cache is now stored per-project at: {new_path}.[/warning]\n" - "[warning]Please delete the legacy cache file and add " - ".cache/ to .gitignore.[/warning]" -) - -ERR_INVALID_BASELINE = ( - "[error]Invalid baseline file.[/error]\n" - "{error}\n" - "Please regenerate the baseline with --update-baseline." -) -ACTION_UPDATE_BASELINE = "Run: codeclone . --update-baseline" -WARN_BASELINE_MISSING = ( - "[warning]Baseline file not found at: [bold]{path}[/bold][/warning]\n" - "[dim]Comparing against an empty baseline. " - "Use --update-baseline to create it.[/dim]\n" - f"[dim]{ACTION_UPDATE_BASELINE}[/dim]" -) -WARN_BASELINE_IGNORED = ( - "[warning]Baseline is not trusted for this run and will be ignored.[/warning]\n" - "[dim]Comparison will proceed against an empty baseline.[/dim]\n" - f"[dim]{ACTION_UPDATE_BASELINE}[/dim]" -) -ERR_BASELINE_CI_REQUIRES_TRUSTED = ( - f"[error]CI requires a trusted baseline.[/error]\n{ACTION_UPDATE_BASELINE}" -) -ERR_BASELINE_GATING_REQUIRES_TRUSTED = ( - "[error]Baseline-aware gates require a trusted baseline.[/error]\n" - f"{ACTION_UPDATE_BASELINE}" -) -SUCCESS_BASELINE_UPDATED = "✔ Baseline updated: {path}" - -FAIL_NEW_TITLE = "[error]FAILED: New code clones detected.[/error]" -FAIL_NEW_SUMMARY_TITLE = "Summary:" -FAIL_NEW_FUNCTION = "- New function clone groups: {count}" -FAIL_NEW_BLOCK = "- New block clone groups: {count}" -FAIL_NEW_REPORT_TITLE = "See detailed report:" -FAIL_NEW_ACCEPT_TITLE = "To accept these clones as technical debt, run:" -FAIL_NEW_ACCEPT_COMMAND = " codeclone . --update-baseline" -FAIL_NEW_DETAIL_FUNCTION = "Details (function clone hashes):" -FAIL_NEW_DETAIL_BLOCK = "Details (block clone hashes):" -FAIL_METRICS_TITLE = "[error]FAILED: Metrics quality gate triggered.[/error]" +from __future__ import annotations -WARN_NEW_CLONES_WITHOUT_FAIL = ( - "\n[warning]New clones detected but --fail-on-new not set.[/warning]\n" - "Run with --update-baseline to accept them as technical debt." +from .controller import * # noqa: F403 +from .formatters import * # noqa: F403 +from .help import * # noqa: F403 +from .labels import * # noqa: F403 +from .markers import * # noqa: F403 +from .runtime import * # noqa: F403 +from .styling import ( # re-export private helpers for fmt_* and tests + _HEALTH_GRADE_STYLE as _HEALTH_GRADE_STYLE, ) -TIP_VSCODE_EXTENSION = ( - "\n[dim]Tip:[/dim] VS Code detected. " - "CodeClone has a native extension for triage-first review and hotspot " - "navigation.\n" - "[dim]{url}[/dim]" +from .styling import ( + _RICH_MARKUP_TAG_RE as _RICH_MARKUP_TAG_RE, ) -NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION = ( - "\n[dim]Note:[/dim] Dead-code reachability was refined in 2.0.1 for " - "common Python frameworks.\n" - "[dim]Fewer dead-code findings after upgrading from 2.0.0 are expected: " - "this usually means reduced false positives, not weaker detection.[/dim]" +from .styling import ( + _v as _v, ) -NOTE_DEAD_CODE_REACHABILITY_2_0_2_MIGRATION = ( - "\n[dim]Note:[/dim] Dead-code reachability was refined again in 2.0.2.\n" - "[dim]Fewer dead-code findings after upgrading from 2.0.1 are expected: " - "framework hooks, public exports, and guarded dynamic dispatch now produce " - "fewer false positives, not weaker detection.[/dim]" +from .styling import ( + _vn as _vn, ) -NOTE_DEAD_CODE_REACHABILITY_MIGRATION = NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION - -_RICH_MARKUP_TAG_RE = re.compile(r"\[/?[a-zA-Z][a-zA-Z0-9_ .#:-]*]") - - -def version_output(version: str) -> str: - return f"CodeClone {version}" - - -def banner_title(version: str) -> str: - return ( - f" [bold white]CodeClone[/bold white] [dim]v{version}[/dim]" - f" [dim]\u00b7[/dim] [dim]{BANNER_SUBTITLE}[/dim]" - ) - - -def fmt_invalid_output_extension( - *, label: str, path: Path, expected_suffix: str -) -> str: - return ERR_INVALID_OUTPUT_EXT.format( - label=label, path=path, expected_suffix=expected_suffix - ) - - -def fmt_invalid_output_path(*, label: str, path: Path, error: object) -> str: - return ERR_INVALID_OUTPUT_PATH.format(label=label, path=path, error=error) - - -def fmt_invalid_baseline_path(*, path: Path, error: object) -> str: - return ERR_INVALID_BASELINE_PATH.format(path=path, error=error) - - -def fmt_baseline_write_failed(*, path: Path, error: object) -> str: - return ERR_BASELINE_WRITE_FAILED.format(path=path, error=error) - - -def fmt_report_write_failed(*, label: str, path: Path, error: object) -> str: - return ERR_REPORT_WRITE_FAILED.format(label=label, path=path, error=error) - - -def fmt_html_report_open_failed(*, path: Path, error: object) -> str: - return WARN_HTML_REPORT_OPEN_FAILED.format(path=path, error=error) - - -def fmt_coverage_join_ignored(error: object) -> str: - return WARN_COVERAGE_JOIN_IGNORED.format(error=error) - - -def fmt_unreadable_source_in_gating(*, count: int) -> str: - return ERR_UNREADABLE_SOURCE_IN_GATING.format(count=count) - - -def fmt_processing_changed(count: int) -> str: - return INFO_PROCESSING_CHANGED.format(count=count) - - -def fmt_worker_failed(error: object) -> str: - return WARN_WORKER_FAILED.format(error=error) - - -def fmt_batch_item_failed(error: object) -> str: - return WARN_BATCH_ITEM_FAILED.format(error=error) - - -def fmt_parallel_fallback(error: object) -> str: - return WARN_PARALLEL_FALLBACK.format(error=error) - - -def fmt_failed_files_header(count: int) -> str: - return WARN_FAILED_FILES_HEADER.format(count=count) - - -def fmt_cache_save_failed(error: object) -> str: - return WARN_CACHE_SAVE_FAILED.format(error=error) - - -def fmt_vscode_extension_tip(*, url: str) -> str: - return TIP_VSCODE_EXTENSION.format(url=url) - - -def fmt_dead_code_reachability_migration_note( - *, - target_version: str = "2.0.1", -) -> str: - if target_version == "2.0.2": - return NOTE_DEAD_CODE_REACHABILITY_2_0_2_MIGRATION - return NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION - - -def fmt_legacy_cache_warning(*, legacy_path: Path, new_path: Path) -> str: - return WARN_LEGACY_CACHE.format(legacy_path=legacy_path, new_path=new_path) - - -def fmt_invalid_baseline(error: object) -> str: - return ERR_INVALID_BASELINE.format(error=error) - - -def fmt_baseline_gating_requires_trusted(*, ci: bool) -> str: - return ( - ERR_BASELINE_CI_REQUIRES_TRUSTED if ci else ERR_BASELINE_GATING_REQUIRES_TRUSTED - ) - - -def fmt_cli_runtime_warning(message: object) -> str: - source = _RICH_MARKUP_TAG_RE.sub("", str(message)).strip() - paragraphs = [ - line.strip() for raw_line in source.splitlines() if (line := raw_line.strip()) - ] - rendered: list[str] = [] - for index, paragraph in enumerate(paragraphs): - label = "Warning" - body = paragraph.rstrip() - lowered = body.lower() - if lowered.startswith("cache "): - label = "Cache" - body = body[6:] - elif lowered.startswith("baseline "): - label = "Baseline" - body = body[9:] - elif lowered.startswith("legacy cache "): - label = "Cache" - - segments = [segment.strip() for segment in body.split("; ") if segment.strip()] - head = segments[0].rstrip(".)") if segments else body.rstrip(".)") - details: list[str] = [] - if " (" in head: - head, extra = head.split(" (", 1) - details.append(extra.rstrip(".)")) - if not details and ": " in head: - head, extra = head.split(": ", 1) - details.append(extra.rstrip(".)")) - details.extend(segment.rstrip(".)") for segment in segments[1:]) - - rendered.append(f" [warning]{label}[/warning] {head}") - for detail in details: - rendered.extend( - [ - f" [dim]{wrapped}[/dim]" - for wrapped in textwrap.wrap( - detail, - width=max(40, CLI_LAYOUT_MAX_WIDTH - 8), - break_long_words=False, - break_on_hyphens=False, - ) - ] - ) - if index != len(paragraphs) - 1: - rendered.append("") - return "\n".join(rendered) - - -def fmt_path(template: str, path: Path) -> str: - return template.format(path=path) - - -def fmt_summary_compact( - *, found: int, analyzed: int, cache_hits: int, skipped: int -) -> str: - return SUMMARY_COMPACT.format( - found=found, analyzed=analyzed, cache_hits=cache_hits, skipped=skipped - ) - - -def fmt_summary_compact_clones( - *, - function: int, - block: int, - segment: int, - suppressed: int, - fixture_excluded: int, - new: int, -) -> str: - parts = [ - f"Clones func={function}", - f"block={block}", - f"seg={segment}", - f"suppressed={suppressed}", - ] - if fixture_excluded > 0: - parts.append(f"fixtures={fixture_excluded}") - parts.append(f"new={new}") - return " ".join(parts) - - -def fmt_summary_compact_metrics( - *, - cc_avg: float, - cc_max: int, - cbo_avg: float, - cbo_max: int, - lcom_avg: float, - lcom_max: int, - cycles: int, - dead: int, - health: int, - grade: str, - overloaded_modules: int, -) -> str: - return SUMMARY_COMPACT_METRICS.format( - cc_avg=f"{cc_avg:.1f}", - cc_max=cc_max, - cbo_avg=f"{cbo_avg:.1f}", - cbo_max=cbo_max, - lcom_avg=f"{lcom_avg:.1f}", - lcom_max=lcom_max, - cycles=cycles, - dead=dead, - health=health, - grade=grade, - overloaded_modules=overloaded_modules, - ) - - -def fmt_summary_compact_dependencies( - *, - avg_depth: float, - p95_depth: int, - max_depth: int, -) -> str: - return SUMMARY_COMPACT_DEPENDENCIES.format( - avg_depth=f"{avg_depth:.1f}", - p95_depth=p95_depth, - max_depth=max_depth, - ) - - -def fmt_summary_compact_security_surfaces( - *, - items: int, - categories: int, - production: int, - tests: int, -) -> str: - return SUMMARY_COMPACT_SECURITY_SURFACES.format( - items=items, - categories=categories, - production=production, - tests=tests, - ) - - -def fmt_summary_compact_adoption( - *, - param_permille: int, - return_permille: int, - docstring_permille: int, - any_annotation_count: int, -) -> str: - return ( - "Adoption" - f" params={_format_permille_pct(param_permille)}" - f" returns={_format_permille_pct(return_permille)}" - f" docstrings={_format_permille_pct(docstring_permille)}" - f" any={any_annotation_count}" - ) - - -def fmt_summary_compact_api_surface( - *, - public_symbols: int, - modules: int, - added: int, - breaking: int, -) -> str: - return ( - "Public API" - f" symbols={public_symbols}" - f" modules={modules}" - f" breaking={breaking}" - f" added={added}" - ) - - -def fmt_summary_compact_coverage_join( - *, - status: str, - overall_permille: int, - coverage_hotspots: int, - scope_gap_hotspots: int, - threshold_percent: int, - source_label: str, -) -> str: - parts = [f"Coverage status={status or 'unknown'}"] - if status == "ok": - parts.extend( - [ - f"overall={_format_permille_pct(overall_permille)}", - f"coverage_hotspots={coverage_hotspots}", - f"threshold={threshold_percent}", - ] - ) - if scope_gap_hotspots > 0: - parts.append(f"scope_gaps={scope_gap_hotspots}") - if source_label: - parts.append(f"source={source_label}") - return " ".join(parts) - - -_HEALTH_GRADE_STYLE: dict[str, str] = { - HEALTH_GRADE_A: "bold green", - HEALTH_GRADE_B: "green", - HEALTH_GRADE_C: "yellow", - HEALTH_GRADE_D: "bold red", - HEALTH_GRADE_F: "bold red", -} - -_L = 13 # label column width (after 2-space indent) - - -def _v(n: int, style: str = "") -> str: - """Format value: dim if zero, styled otherwise.""" - match (n == 0, bool(style)): - case (True, _): - return f"[dim]{n}[/dim]" - case (False, True): - return f"[{style}]{n}[/{style}]" - case _: - return str(n) - - -def _vn(n: int, style: str = "") -> str: - """Format value with comma separator: dim if zero, styled otherwise.""" - match (n == 0, bool(style)): - case (True, _): - return f"[dim]{n:,}[/dim]" - case (False, True): - return f"[{style}]{n:,}[/{style}]" - case _: - return f"{n:,}" - - -def fmt_summary_files(*, found: int, analyzed: int, cached: int, skipped: int) -> str: - parts = [ - f"{_v(found, 'bold')} found", - f"{_v(analyzed, 'bold cyan')} analyzed", - f"{_v(cached)} cached", - f"{_v(skipped)} skipped", - ] - val = " \u00b7 ".join(parts) - return f" {'Files':<{_L}}{val}" - - -def fmt_summary_parsed( - *, lines: int, functions: int, methods: int, classes: int -) -> str | None: - if lines == 0 and functions == 0 and methods == 0 and classes == 0: - return None - callable_count = functions + methods - parts = [f"{_vn(lines, 'bold cyan')} lines"] - if callable_count: - parts.append(f"{_v(callable_count, 'bold cyan')} callables") - if classes: - parts.append(f"{_v(classes, 'bold cyan')} classes") - val = " \u00b7 ".join(parts) - return f" {'Parsed':<{_L}}{val}" - - -def fmt_summary_clones( - *, - func: int, - block: int, - segment: int, - suppressed: int, - fixture_excluded: int, - new: int, -) -> str: - clone_parts = [ - f"{_v(func, 'bold yellow')} func", - f"{_v(block, 'bold yellow')} block", - ] - if segment: - clone_parts.append(f"{_v(segment, 'bold yellow')} seg") - main = " \u00b7 ".join(clone_parts) - quals = [ - f"{_v(suppressed, 'yellow')} suppressed", - ] - if fixture_excluded > 0: - quals.append(f"{_v(fixture_excluded, 'yellow')} fixtures") - quals.append(f"{_v(new, 'bold red')} new") - return f" {'Clones':<{_L}}{main} ({', '.join(quals)})" - - -def fmt_metrics_health(total: int, grade: str) -> str: - s = _HEALTH_GRADE_STYLE.get(grade, "bold") - return f" {'Health':<{_L}}[{s}]{total}/100 ({grade})[/{s}]" - - -def fmt_metrics_cc(avg: float, max_val: int, high_risk: int) -> str: - hr = ( - f"[bold red]{high_risk} high-risk[/bold red]" - if high_risk - else "[dim]0 high-risk[/dim]" - ) - return f" {'CC':<{_L}}avg {avg:.1f} \u00b7 max {max_val} \u00b7 {hr}" - - -def fmt_metrics_coupling(avg: float, max_val: int) -> str: - return f" {'Coupling':<{_L}}avg {avg:.1f} \u00b7 max {max_val}" - - -def fmt_metrics_cohesion(avg: float, max_val: int) -> str: - return f" {'Cohesion':<{_L}}avg {avg:.1f} \u00b7 max {max_val}" - - -def fmt_metrics_cycles(count: int) -> str: - match count: - case 0: - return f" {'Cycles':<{_L}}[green]\u2714 clean[/green]" - case _: - return f" {'Cycles':<{_L}}[bold red]{count} detected[/bold red]" - - -def fmt_metrics_dependencies( - *, avg_depth: float, p95_depth: int, max_depth: int -) -> str: - return ( - f" {'Dependencies':<{_L}}" - f"avg {avg_depth:.1f} · p95 {p95_depth} · max {max_depth}" - ) - - -def fmt_metrics_security_surfaces( - *, - items: int, - categories: int, - production: int, - tests: int, -) -> str: - return ( - f" {'Security':<{_L}}" - f"{_v(items, 'bold cyan')} surfaces" - f" · {_v(categories, 'bold cyan')} categories" - f" · production {_v(production)}" - f" · tests {_v(tests)}" - ) - - -def fmt_metrics_dead_code(count: int, *, suppressed: int = 0) -> str: - suppressed_suffix = ( - f" [dim]({suppressed} suppressed)[/dim]" if suppressed > 0 else "" - ) - match count: - case 0: - return ( - f" {'Dead code':<{_L}}[green]\u2714 clean[/green]{suppressed_suffix}" - ) - case _: - return ( - f" {'Dead code':<{_L}}[bold red]{count} found[/bold red]" - f"{suppressed_suffix}" - ) - - -def _format_permille_pct(value: int) -> str: - return f"{value / 10.0:.1f}%" - - -def fmt_metrics_adoption( - *, - param_permille: int, - return_permille: int, - docstring_permille: int, - any_annotation_count: int, -) -> str: - parts = [ - f"params {_format_permille_pct(param_permille)}", - f"returns {_format_permille_pct(return_permille)}", - f"docstrings {_format_permille_pct(docstring_permille)}", - f"Any {_v(any_annotation_count)}", - ] - return f" {'Adoption':<{_L}}{' · '.join(parts)}" - - -def fmt_metrics_api_surface( - *, - public_symbols: int, - modules: int, - added: int, - breaking: int, -) -> str: - parts = [ - f"{_v(public_symbols, 'bold cyan')} symbols", - f"{_v(modules, 'bold cyan')} modules", - ] - if breaking > 0 or added > 0: - parts.append( - " / ".join( - [ - f"{_v(breaking, 'bold red')} breaking", - f"{_v(added, 'bold cyan')} added", - ] - ) - ) - return f" {'Public API':<{_L}}{' · '.join(parts)}" - - -def fmt_metrics_coverage_join( - *, - status: str, - overall_permille: int, - coverage_hotspots: int, - scope_gap_hotspots: int, - threshold_percent: int, - source_label: str, -) -> str: - if status != "ok": - parts = ["join unavailable"] - if source_label: - parts.append(source_label) - return f" {'Coverage':<{_L}}[yellow]{' · '.join(parts)}[/yellow]" - parts = [ - f"{_format_permille_pct(overall_permille)} overall", - f"{_v(coverage_hotspots, 'bold red')} hotspots < {threshold_percent}%", - ] - if scope_gap_hotspots > 0: - parts.append(f"{_v(scope_gap_hotspots, 'bold yellow')} scope gaps") - if source_label: - parts.append(source_label) - return f" {'Coverage':<{_L}}{' · '.join(parts)}" - - -def fmt_metrics_overloaded_modules( - *, - candidates: int, - total: int, - population_status: str, - top_score: float, -) -> str: - parts = [f"{_v(candidates, 'bold magenta')} candidates"] - if top_score > 0: - parts.append(f"max score {top_score:.2f}") - parts.append(f"{_vn(total)} ranked") - summary = " \u00b7 ".join(parts) - note = "report-only" - if population_status and population_status != "ok": - note = f"{note}; {population_status.replace('_', ' ')} population" - return f" {'Overloaded':<{_L}}{summary} [dim]({note})[/dim]" - - -def fmt_changed_scope_paths(*, count: int) -> str: - return f" {'Paths':<{_L}}{_v(count, 'bold cyan')} from git diff" - - -def fmt_changed_scope_findings(*, total: int, new: int, known: int) -> str: - parts = [ - f"{_v(total, 'bold')} total", - f"{_v(new, 'bold cyan')} new", - f"{_v(known)} known", - ] - separator = " \u00b7 " - return f" {'Findings':<{_L}}{separator.join(parts)}" - - -def fmt_changed_scope_compact( - *, - paths: int, - findings: int, - new: int, - known: int, -) -> str: - return SUMMARY_COMPACT_CHANGED_SCOPE.format( - paths=paths, - findings=findings, - new=new, - known=known, - ) - - -def fmt_pipeline_done(elapsed: float) -> str: - return f" [dim]Pipeline done in {elapsed:.2f}s[/dim]" - - -def fmt_contract_error(message: str) -> str: - return f"{MARKER_CONTRACT_ERROR}\n{message}" - - -def fmt_internal_error( - error: BaseException, - *, - issues_url: str = ISSUES_URL, - debug: bool = False, -) -> str: - bug_report_url = issues_url.rstrip("/") + "/new?template=bug_report.yml" - error_name = type(error).__name__ - error_text = str(error).strip() or "" - lines = [ - MARKER_INTERNAL_ERROR, - "Unexpected exception.", - f"Reason: {error_name}: {error_text}", - "", - "Next steps:", - "- Re-run with --debug to include a traceback.", - f"- If this is reproducible, open an issue: {bug_report_url}.", - ( - "- Attach: command line, CodeClone version, Python version, " - "and the report file if generated." - ), - ] - if not debug: - return "\n".join(lines) - - traceback_lines = traceback.format_exception( - type(error), error, error.__traceback__ - ) - command_line = shlex.join(sys.argv) - lines.extend( - [ - "", - "DEBUG DETAILS", - f"Platform: {platform.platform()}", - f"Python: {sys.version.split()[0]}", - f"CodeClone: {__version__}", - f"Command: {command_line}", - f"CWD: {Path.cwd()}", - "Traceback:", - "".join(traceback_lines).rstrip(), - ] - ) - return "\n".join(lines) diff --git a/codeclone/ui_messages/controller.py b/codeclone/ui_messages/controller.py new file mode 100644 index 00000000..a6b3bcc9 --- /dev/null +++ b/codeclone/ui_messages/controller.py @@ -0,0 +1,236 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Controller query-mode screen copy.""" + +from __future__ import annotations + +from typing import Final + +# ── workflow flag validation ───────────────────────────────────────── + +ERR_STRICTNESS_PATCH_VERIFY_ONLY: Final = ( + "--strictness is only valid with --patch-verify." +) +ERR_SESSION_STATS_COMBINED: Final = ( + "--session-stats cannot be combined with " + "--audit, --blast-radius, or --patch-verify." +) +ERR_AUDIT_COMBINED: Final = ( + "--audit cannot be combined with --blast-radius or --patch-verify." +) +ERR_BLAST_PATCH_BOTH: Final = "Use --blast-radius or --patch-verify, not both." +ERR_CONTROLLER_NO_BASELINE_UPDATE: Final = ( + "Controller query modes cannot update baselines." +) +ERR_CONTROLLER_NO_CHANGED_SCOPE: Final = ( + "Controller query modes cannot be combined with changed-scope flags." +) +ERR_CONTROLLER_TERMINAL_ONLY: Final = ( + "Controller query modes are terminal-only and cannot be combined " + "with report output flags." +) + +# ── metrics baseline ──────────────────────────────────────────────── + +ERR_METRICS_BASELINE_REQUIRES_ANALYSIS: Final = ( + "Metrics baseline operations require metrics analysis. Remove --skip-metrics." +) +ERR_METRICS_BASELINE_REQUIRED_FOR_GATES: Final = ( + "Metrics baseline file is required for metrics baseline-aware gates. " + "Run codeclone . --update-metrics-baseline first." +) +ERR_METRICS_BASELINE_UPDATE_WITHOUT_METRICS: Final = ( + "Cannot update metrics baseline: metrics were not computed." +) +ERR_METRICS_BASELINE_TYPING_GATES: Final = ( + "Typing/docstring regression gates require a metrics baseline that includes " + "coverage adoption data. Run codeclone . --update-metrics-baseline first." +) +ERR_METRICS_BASELINE_API_GATES: Final = ( + "API break gating requires a metrics baseline with public API surface data. " + "Run codeclone . --api-surface --update-metrics-baseline first." +) + +# ── session stats ─────────────────────────────────────────────────── + +SESSION_STATS_READ_FAILED: Final = "failed to read session state: {error}" +SESSION_STATS_TITLE: Final = "Session Stats" +SESSION_STATS_WORKSPACE: Final = "Workspace:" +SESSION_STATS_INTENT_REGISTRY: Final = "Intent registry:" +SESSION_STATS_AUDIT: Final = "Audit trail:" +SESSION_STATS_AUDIT_ENABLED: Final = "enabled" +SESSION_STATS_LATEST_RUN: Final = "Latest run:" +SESSION_STATS_LATEST_RUN_NONE: Final = "none" +SESSION_STATS_LATEST_RUN_SOURCE_DISK: Final = "persisted report (CLI)" +SESSION_STATS_LATEST_RUN_SOURCE_AUDIT_MCP: Final = "MCP session (audit)" +SESSION_STATS_LATEST_RUN_SOURCE_AUDIT_CLI: Final = "CLI run (audit)" +SESSION_STATS_CACHE: Final = "Cache:" +SESSION_STATS_LIVE_AGENTS: Final = "Live agents:" +SESSION_STATS_ACTIVE_INTENTS: Final = "Active edit intents:" +SESSION_STATS_VISIBLE_INTENTS: Final = "Visible intent records:" +SESSION_STATS_STALE: Final = "Stale intents:" +SESSION_STATS_EXPIRED: Final = "Expired intents:" +SESSION_STATS_RECOVERABLE: Final = "Recoverable:" +SESSION_STATS_WORKSPACE_HEALTH: Final = "Workspace health:" +SESSION_STATS_NO_AGENTS: Final = "No live workspace agents found." +SESSION_STATS_REPORT_PRESENT: Final = "report.json present ({files} files)" +SESSION_STATS_RETENTION_FOOTPRINT: Final = "Retention payload footprint" +SESSION_STATS_RETENTION_FOOTPRINT_VERBOSE: Final = ( + "Retention payload footprint: ~{tokens:,} tokens in retention window " + "({encoding}, {calls} tool calls)" +) +SESSION_STATS_TOP_WORKFLOWS: Final = "Top payload workflows" +SESSION_STATS_WORKSPACE_INTENT_RECORDS_TITLE: Final = "Workspace intent records" +AUDIT_NOT_ENABLED: Final = "audit is not enabled." + +# ── audit trail ───────────────────────────────────────────────────── + +AUDIT_TITLE: Final = "Controller Audit Trail" +AUDIT_DATABASE: Final = "Database:" +AUDIT_RETENTION: Final = "Retention:" +AUDIT_OLDEST: Final = "Oldest event:" +AUDIT_LATEST: Final = "Latest event:" +AUDIT_SUMMARY: Final = "Summary:" +AUDIT_VIOLATIONS: Final = "Violations:" +AUDIT_MCP_FOOTPRINT_PANEL: Final = "MCP Payload Footprint" +AUDIT_TOKENS_BY_TYPE: Final = "Tokens by Type" +AUDIT_TOP_WORKFLOWS: Final = "Top Workflows" +AUDIT_TOP_PAYLOADS: Final = "Top Payloads" +AUDIT_PAYLOAD_BUDGET_WARNINGS: Final = "Payload Budget Warnings" +AUDIT_MCP_PAYLOAD_FOOTPRINT_ROW: Final = "MCP payload footprint" +AUDIT_NONE: Final = "none" +AUDIT_COL_WORKFLOW: Final = "Workflow" +AUDIT_COL_TOKENS: Final = "Tokens" +AUDIT_COL_TIME: Final = "Time" +AUDIT_COL_TYPE: Final = "Type" +AUDIT_COL_SEVERITY: Final = "Severity" +AUDIT_COL_INTENT: Final = "Intent" +AUDIT_COL_STATUS: Final = "Status" +AUDIT_COL_RUN: Final = "Run" +AUDIT_COL_AGENT: Final = "Agent" +AUDIT_COL_FIRST: Final = "First" +AUDIT_COL_LAST: Final = "Last" +AUDIT_STAT_TOTAL_TOKENS: Final = "Retention window total" +AUDIT_STAT_TOOL_CALLS: Final = "Retention window calls" +AUDIT_STAT_AVG_TOKENS: Final = "Avg tokens/call" +AUDIT_STAT_P95_TOKENS: Final = "p95 tokens" +AUDIT_STAT_MAX_TOKENS: Final = "Max tokens" +AUDIT_STAT_ENCODING: Final = "Encoding" +AUDIT_BREAKDOWN_COL_CALLS: Final = "Calls" +AUDIT_BREAKDOWN_COL_TOTAL: Final = "Total" +AUDIT_BREAKDOWN_COL_MAX: Final = "Max" +AUDIT_TOP_COL_RANK: Final = "#" +AUDIT_FIELD_EMPTY: Final = "-" +AUDIT_TOKENS_EMPTY: Final = "—" +AUDIT_QUIET_PREFIX: Final = "audit:" +AUDIT_QUIET_TEMPLATE: Final = ( + "{prefix} {total_events} events | " + "intents={intent_events} contracts={contract_events} " + "receipts={receipt_events} violations={violation_events} " + "last={last_relative}" +) +AUDIT_EVENT_TYPE_ALIASES: Final[dict[str, str]] = { + "intent.declared": "decl", + "intent.checked": "check", + "intent.expanded": "expand", + "intent.violated": "intent!", + "intent.cleared": "clear", + "intent.renewed": "renew", + "blast_radius.computed": "radius", + "patch_budget.computed": "budget", + "patch_contract.verified": "verify", + "patch_contract.violated": "verify!", + "patch_contract.expired": "expired", + "claim_validation.completed": "claims", + "claim_validation.violated": "claims!", + "review_receipt.created": "receipt", + "baseline_abuse.detected": "baseline!", + "workspace.conflict_detected": "conflict", + "workspace.gc_completed": "gc", +} +AUDIT_BUDGET_WORKFLOW_HEAVY: Final = ( + "Workflow {workflow} totals {total_tokens:,} tokens, above " + "{threshold:,} threshold (heavy)" +) +AUDIT_BUDGET_WORKFLOW_WATCH: Final = ( + "Workflow {workflow} totals {total_tokens:,} tokens, above " + "{threshold:,} threshold (watch)" +) +AUDIT_BUDGET_PAYLOAD_HEAVY: Final = ( + "{event_type} payload {estimated_tokens:,} tokens (heavy)" +) +AUDIT_RELATIVE_NONE: Final = "none" + +# ── session stats ─────────────────────────────────────────────────── + +SESSION_STATS_QUIET_PREFIX: Final = "session-stats:" +SESSION_STATS_QUIET_TEMPLATE: Final = ( + "{prefix} {workspace_health} | live_agents={live_agents} " + "active_intents={active_intents} visible_intents={visible_intents} " + "stale={stale} latest_run={latest_run}" +) +SESSION_STATS_QUIET_HEALTH: Final = "health={health}" +SESSION_STATS_COL_PID: Final = "PID" +SESSION_STATS_COL_AGENT: Final = "Agent" +SESSION_STATS_COL_OWNERSHIP: Final = "Ownership" +SESSION_STATS_COL_STATUS: Final = "Status" +SESSION_STATS_COL_SCOPE: Final = "Scope" +SESSION_STATS_COL_LEASE: Final = "Lease" +SESSION_STATS_COL_FILES: Final = "Files" +SESSION_STATS_COL_WORKFLOW: Final = "Workflow" +SESSION_STATS_COL_TOKENS: Final = "Tokens" +SESSION_STATS_COL_CALLS: Final = "Calls" +SESSION_STATS_AGENT_UNKNOWN: Final = "unknown" +SESSION_STATS_ALLOWED_PREFIX: Final = "allowed:" +SESSION_STATS_LEASE_REMAINING: Final = "lease: {lease} remaining" +SESSION_STATS_SCOPE_FILE: Final = "file" +SESSION_STATS_SCOPE_FILES: Final = "files" + +# ── patch verify ──────────────────────────────────────────────────── + +PATCH_VERIFY_LABEL_STRICTNESS: Final = "Strictness:" +PATCH_VERIFY_LABEL_STATUS: Final = "Status:" +PATCH_VERIFY_LABEL_HEALTH: Final = "Health:" +PATCH_VERIFY_LABEL_STRUCTURAL_DELTA: Final = "Structural delta:" +PATCH_VERIFY_LABEL_REGRESSIONS: Final = "Regressions:" +PATCH_VERIFY_LABEL_IMPROVEMENTS: Final = "Improvements:" +PATCH_VERIFY_LABEL_VERDICT: Final = "Verdict:" +PATCH_VERIFY_LABEL_GATE_PREVIEW: Final = "Gate preview:" +PATCH_VERIFY_GATE_EXIT: Final = "(exit {exit_code})" +PATCH_VERIFY_CONTRACT_VIOLATIONS: Final = "Contract violations" +PATCH_VERIFY_VERDICT_REGRESSED: Final = "regressed" +PATCH_VERIFY_VERDICT_STABLE: Final = "stable" +PATCH_VERIFY_ACCEPTED: Final = "Patch contract accepted." +PATCH_VERIFY_VIOLATED: Final = "Patch contract violated." +PATCH_VERIFY_RELAXED_ADVISORY: Final = ( + "Patch contract has advisory violations but relaxed mode exits 0." +) + +# ── blast radius ──────────────────────────────────────────────────── + +BLAST_RADIUS_FILES: Final = "Files:" +BLAST_RADIUS_RISK_LEVEL: Final = "Risk level:" +BLAST_RADIUS_DIRECT_DEPENDENTS: Final = "Direct dependents" +BLAST_RADIUS_CLONE_COHORT: Final = "Clone cohort members" +BLAST_RADIUS_DEPENDENCY_CYCLES: Final = "Dependency cycles" +BLAST_RADIUS_DO_NOT_TOUCH: Final = "Do not touch" +BLAST_RADIUS_REVIEW_CONTEXT: Final = "Review context" +BLAST_RADIUS_GUARDRAILS: Final = "Guardrails:" +BLAST_RADIUS_NONE: Final = "none" +BLAST_RADIUS_MORE: Final = "... and {count} more" +BLAST_RADIUS_REQUIRES_REPORT: Final = ( + "Blast radius requires a canonical report document." +) +BLAST_RADIUS_INVALID_SELECTION: Final = ( + "Invalid --blast-radius path selection:\n{rendered}" +) +BLAST_RADIUS_SKIPPED_INVENTORY: Final = ( + "Blast radius skipped files outside analysis inventory: {rendered}" +) +BLAST_RADIUS_REQUIRES_INVENTORY_FILE: Final = ( + "--blast-radius requires at least one file from the analysis inventory." +) diff --git a/codeclone/ui_messages/formatters.py b/codeclone/ui_messages/formatters.py new file mode 100644 index 00000000..a5cb0fa4 --- /dev/null +++ b/codeclone/ui_messages/formatters.py @@ -0,0 +1,700 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CLI message formatters.""" + +from __future__ import annotations + +import platform +import shlex +import sys +import textwrap +import traceback +from pathlib import Path + +from .. import __version__ +from ..contracts import ISSUES_URL +from ..paths.gitignore import ( + GITIGNORE_CODECLONE_CACHE_MESSAGE, + GITIGNORE_CODECLONE_CACHE_SUGGESTED_ENTRY, +) +from .labels import ( + CLI_LAYOUT_MAX_WIDTH, + SUMMARY_COMPACT, + SUMMARY_COMPACT_BLAST_RADIUS, + SUMMARY_COMPACT_CHANGED_SCOPE, + SUMMARY_COMPACT_DEPENDENCIES, + SUMMARY_COMPACT_METRICS, + SUMMARY_COMPACT_PATCH_VERIFY, + SUMMARY_COMPACT_SECURITY_SURFACES, +) +from .markers import BANNER_SUBTITLE, MARKER_CONTRACT_ERROR, MARKER_INTERNAL_ERROR +from .runtime import ( + ERR_BASELINE_CI_REQUIRES_TRUSTED, + ERR_BASELINE_GATING_REQUIRES_TRUSTED, + ERR_BASELINE_WRITE_FAILED, + ERR_INVALID_BASELINE, + ERR_INVALID_BASELINE_PATH, + ERR_INVALID_OUTPUT_EXT, + ERR_INVALID_OUTPUT_PATH, + ERR_REPORT_WRITE_FAILED, + ERR_UNREADABLE_SOURCE_IN_GATING, + INFO_PROCESSING_CHANGED, + NOTE_COHESION_LCOM4_2_1_MIGRATION, + NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION, + NOTE_DEAD_CODE_REACHABILITY_2_0_2_MIGRATION, + TIP_GITIGNORE_CODECLONE_CACHE, + TIP_VSCODE_EXTENSION, + WARN_BATCH_ITEM_FAILED, + WARN_CACHE_SAVE_FAILED, + WARN_COVERAGE_JOIN_IGNORED, + WARN_FAILED_FILES_HEADER, + WARN_HTML_REPORT_OPEN_FAILED, + WARN_LEGACY_CACHE, + WARN_LEGACY_REPO_WORKSPACE, + WARN_PARALLEL_FALLBACK, + WARN_WORKER_FAILED, +) +from .styling import ( + _HEALTH_GRADE_STYLE, + _L, + _RICH_MARKUP_TAG_RE, + _format_permille_pct, + _v, + _vn, +) + + +def version_output(version: str) -> str: + return f"CodeClone {version}" + + +def banner_title(version: str) -> str: + return ( + f" [bold white]CodeClone[/bold white] [dim]v{version}[/dim]" + f" [dim]\u00b7[/dim] [dim]{BANNER_SUBTITLE}[/dim]" + ) + + +def fmt_invalid_output_extension( + *, label: str, path: Path, expected_suffix: str +) -> str: + return ERR_INVALID_OUTPUT_EXT.format( + label=label, path=path, expected_suffix=expected_suffix + ) + + +def fmt_invalid_output_path(*, label: str, path: Path, error: object) -> str: + return ERR_INVALID_OUTPUT_PATH.format(label=label, path=path, error=error) + + +def fmt_invalid_baseline_path(*, path: Path, error: object) -> str: + return ERR_INVALID_BASELINE_PATH.format(path=path, error=error) + + +def fmt_baseline_write_failed(*, path: Path, error: object) -> str: + return ERR_BASELINE_WRITE_FAILED.format(path=path, error=error) + + +def fmt_report_write_failed(*, label: str, path: Path, error: object) -> str: + return ERR_REPORT_WRITE_FAILED.format(label=label, path=path, error=error) + + +def fmt_html_report_open_failed(*, path: Path, error: object) -> str: + return WARN_HTML_REPORT_OPEN_FAILED.format(path=path, error=error) + + +def fmt_coverage_join_ignored(error: object) -> str: + return WARN_COVERAGE_JOIN_IGNORED.format(error=error) + + +def fmt_unreadable_source_in_gating(*, count: int) -> str: + return ERR_UNREADABLE_SOURCE_IN_GATING.format(count=count) + + +def fmt_processing_changed(count: int) -> str: + return INFO_PROCESSING_CHANGED.format(count=count) + + +def fmt_worker_failed(error: object) -> str: + return WARN_WORKER_FAILED.format(error=error) + + +def fmt_batch_item_failed(error: object) -> str: + return WARN_BATCH_ITEM_FAILED.format(error=error) + + +def fmt_parallel_fallback(error: object) -> str: + return WARN_PARALLEL_FALLBACK.format(error=error) + + +def fmt_failed_files_header(count: int) -> str: + return WARN_FAILED_FILES_HEADER.format(count=count) + + +def fmt_cache_save_failed(error: object) -> str: + return WARN_CACHE_SAVE_FAILED.format(error=error) + + +def fmt_vscode_extension_tip(*, url: str) -> str: + return TIP_VSCODE_EXTENSION.format(url=url) + + +def fmt_gitignore_codeclone_cache_tip() -> str: + return TIP_GITIGNORE_CODECLONE_CACHE.format( + message=GITIGNORE_CODECLONE_CACHE_MESSAGE, + entry=GITIGNORE_CODECLONE_CACHE_SUGGESTED_ENTRY, + ) + + +def fmt_dead_code_reachability_migration_note( + *, + target_version: str = "2.0.1", +) -> str: + if target_version == "2.0.2": + return NOTE_DEAD_CODE_REACHABILITY_2_0_2_MIGRATION + return NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION + + +def fmt_cohesion_lcom4_migration_note( + *, + target_version: str = "2.1.0", +) -> str: + _ = target_version + return NOTE_COHESION_LCOM4_2_1_MIGRATION + + +def fmt_legacy_cache_warning(*, legacy_path: Path, new_path: Path) -> str: + return WARN_LEGACY_CACHE.format(legacy_path=legacy_path, new_path=new_path) + + +def fmt_legacy_repo_workspace_warning(*, legacy_dir: Path, new_dir: Path) -> str: + return WARN_LEGACY_REPO_WORKSPACE.format(legacy_dir=legacy_dir, new_dir=new_dir) + + +def fmt_invalid_baseline(error: object) -> str: + return ERR_INVALID_BASELINE.format(error=error) + + +def fmt_baseline_gating_requires_trusted(*, ci: bool) -> str: + return ( + ERR_BASELINE_CI_REQUIRES_TRUSTED if ci else ERR_BASELINE_GATING_REQUIRES_TRUSTED + ) + + +def fmt_cli_runtime_warning(message: object) -> str: + source = _RICH_MARKUP_TAG_RE.sub("", str(message)).strip() + paragraphs = [ + line.strip() for raw_line in source.splitlines() if (line := raw_line.strip()) + ] + rendered: list[str] = [] + for index, paragraph in enumerate(paragraphs): + label = "Warning" + body = paragraph.rstrip() + lowered = body.lower() + if lowered.startswith("cache "): + label = "Cache" + body = body[6:] + elif lowered.startswith("baseline "): + label = "Baseline" + body = body[9:] + elif lowered.startswith("legacy cache "): + label = "Cache" + + segments = [segment.strip() for segment in body.split("; ") if segment.strip()] + head = segments[0].rstrip(".)") if segments else body.rstrip(".)") + details: list[str] = [] + if " (" in head: + head, extra = head.split(" (", 1) + details.append(extra.rstrip(".)")) + if not details and ": " in head: + head, extra = head.split(": ", 1) + details.append(extra.rstrip(".)")) + details.extend(segment.rstrip(".)") for segment in segments[1:]) + + rendered.append(f" [warning]{label}[/warning] {head}") + for detail in details: + rendered.extend( + [ + f" [dim]{wrapped}[/dim]" + for wrapped in textwrap.wrap( + detail, + width=max(40, CLI_LAYOUT_MAX_WIDTH - 8), + break_long_words=False, + break_on_hyphens=False, + ) + ] + ) + if index != len(paragraphs) - 1: + rendered.append("") + return "\n".join(rendered) + + +def fmt_path(template: str, path: Path) -> str: + return template.format(path=path) + + +def fmt_summary_compact( + *, found: int, analyzed: int, cache_hits: int, skipped: int +) -> str: + return SUMMARY_COMPACT.format( + found=found, analyzed=analyzed, cache_hits=cache_hits, skipped=skipped + ) + + +def fmt_summary_compact_clones( + *, + function: int, + block: int, + segment: int, + suppressed: int, + fixture_excluded: int, + new: int, +) -> str: + parts = [ + f"Clones func={function}", + f"block={block}", + f"seg={segment}", + f"suppressed={suppressed}", + ] + if fixture_excluded > 0: + parts.append(f"fixtures={fixture_excluded}") + parts.append(f"new={new}") + return " ".join(parts) + + +def fmt_summary_compact_metrics( + *, + cc_avg: float, + cc_max: int, + cbo_avg: float, + cbo_max: int, + lcom_avg: float, + lcom_max: int, + cycles: int, + dead: int, + health: int, + grade: str, + overloaded_modules: int, +) -> str: + return SUMMARY_COMPACT_METRICS.format( + cc_avg=f"{cc_avg:.1f}", + cc_max=cc_max, + cbo_avg=f"{cbo_avg:.1f}", + cbo_max=cbo_max, + lcom_avg=f"{lcom_avg:.1f}", + lcom_max=lcom_max, + cycles=cycles, + dead=dead, + health=health, + grade=grade, + overloaded_modules=overloaded_modules, + ) + + +def fmt_summary_compact_dependencies( + *, + avg_depth: float, + p95_depth: int, + max_depth: int, +) -> str: + return SUMMARY_COMPACT_DEPENDENCIES.format( + avg_depth=f"{avg_depth:.1f}", + p95_depth=p95_depth, + max_depth=max_depth, + ) + + +def fmt_summary_compact_security_surfaces( + *, + items: int, + categories: int, + production: int, + tests: int, +) -> str: + return SUMMARY_COMPACT_SECURITY_SURFACES.format( + items=items, + categories=categories, + production=production, + tests=tests, + ) + + +def fmt_summary_compact_adoption( + *, + param_permille: int, + return_permille: int, + docstring_permille: int, + any_annotation_count: int, +) -> str: + return ( + "Adoption" + f" params={_format_permille_pct(param_permille)}" + f" returns={_format_permille_pct(return_permille)}" + f" docstrings={_format_permille_pct(docstring_permille)}" + f" any={any_annotation_count}" + ) + + +def fmt_summary_compact_api_surface( + *, + public_symbols: int, + modules: int, + added: int, + breaking: int, +) -> str: + return ( + "Public API" + f" symbols={public_symbols}" + f" modules={modules}" + f" breaking={breaking}" + f" added={added}" + ) + + +def fmt_summary_compact_coverage_join( + *, + status: str, + overall_permille: int, + coverage_hotspots: int, + scope_gap_hotspots: int, + threshold_percent: int, + source_label: str, +) -> str: + parts = [f"Coverage status={status or 'unknown'}"] + if status == "ok": + parts.extend( + [ + f"overall={_format_permille_pct(overall_permille)}", + f"coverage_hotspots={coverage_hotspots}", + f"threshold={threshold_percent}", + ] + ) + if scope_gap_hotspots > 0: + parts.append(f"scope_gaps={scope_gap_hotspots}") + if source_label: + parts.append(f"source={source_label}") + return " ".join(parts) + + +def fmt_summary_files(*, found: int, analyzed: int, cached: int, skipped: int) -> str: + parts = [ + f"{_v(found, 'bold')} found", + f"{_v(analyzed, 'bold cyan')} analyzed", + f"{_v(cached)} cached", + f"{_v(skipped)} skipped", + ] + val = " \u00b7 ".join(parts) + return f" {'Files':<{_L}}{val}" + + +def fmt_summary_parsed( + *, lines: int, functions: int, methods: int, classes: int +) -> str | None: + if lines == 0 and functions == 0 and methods == 0 and classes == 0: + return None + callable_count = functions + methods + parts = [f"{_vn(lines, 'bold cyan')} lines"] + if callable_count: + parts.append(f"{_v(callable_count, 'bold cyan')} callables") + if classes: + parts.append(f"{_v(classes, 'bold cyan')} classes") + val = " \u00b7 ".join(parts) + return f" {'Parsed':<{_L}}{val}" + + +def fmt_summary_clones( + *, + func: int, + block: int, + segment: int, + suppressed: int, + fixture_excluded: int, + new: int, +) -> str: + clone_parts = [ + f"{_v(func, 'bold yellow')} func", + f"{_v(block, 'bold yellow')} block", + ] + if segment: + clone_parts.append(f"{_v(segment, 'bold yellow')} seg") + main = " \u00b7 ".join(clone_parts) + quals = [ + f"{_v(suppressed, 'yellow')} suppressed", + ] + if fixture_excluded > 0: + quals.append(f"{_v(fixture_excluded, 'yellow')} fixtures") + quals.append(f"{_v(new, 'bold red')} new") + return f" {'Clones':<{_L}}{main} ({', '.join(quals)})" + + +def fmt_metrics_health(total: int, grade: str) -> str: + s = _HEALTH_GRADE_STYLE.get(grade, "bold") + return f" {'Health':<{_L}}[{s}]{total}/100 ({grade})[/{s}]" + + +def fmt_metrics_cc(avg: float, max_val: int, high_risk: int) -> str: + hr = ( + f"[bold red]{high_risk} high-risk[/bold red]" + if high_risk + else "[dim]0 high-risk[/dim]" + ) + return f" {'CC':<{_L}}avg {avg:.1f} \u00b7 max {max_val} \u00b7 {hr}" + + +def fmt_metrics_coupling(avg: float, max_val: int) -> str: + return f" {'Coupling':<{_L}}avg {avg:.1f} \u00b7 max {max_val}" + + +def fmt_metrics_cohesion(avg: float, max_val: int) -> str: + return f" {'Cohesion':<{_L}}avg {avg:.1f} \u00b7 max {max_val}" + + +def fmt_metrics_cycles(count: int) -> str: + match count: + case 0: + return f" {'Cycles':<{_L}}[green]\u2714 clean[/green]" + case _: + return f" {'Cycles':<{_L}}[bold red]{count} detected[/bold red]" + + +def fmt_metrics_dependencies( + *, avg_depth: float, p95_depth: int, max_depth: int +) -> str: + return ( + f" {'Dependencies':<{_L}}" + f"avg {avg_depth:.1f} · p95 {p95_depth} · max {max_depth}" + ) + + +def fmt_metrics_security_surfaces( + *, + items: int, + categories: int, + production: int, + tests: int, +) -> str: + return ( + f" {'Security':<{_L}}" + f"{_v(items, 'bold cyan')} surfaces" + f" · {_v(categories, 'bold cyan')} categories" + f" · production {_v(production)}" + f" · tests {_v(tests)}" + ) + + +def fmt_metrics_dead_code(count: int, *, suppressed: int = 0) -> str: + suppressed_suffix = ( + f" [dim]({suppressed} suppressed)[/dim]" if suppressed > 0 else "" + ) + match count: + case 0: + return ( + f" {'Dead code':<{_L}}[green]\u2714 clean[/green]{suppressed_suffix}" + ) + case _: + return ( + f" {'Dead code':<{_L}}[bold red]{count} found[/bold red]" + f"{suppressed_suffix}" + ) + + +def fmt_metrics_adoption( + *, + param_permille: int, + return_permille: int, + docstring_permille: int, + any_annotation_count: int, +) -> str: + parts = [ + f"params {_format_permille_pct(param_permille)}", + f"returns {_format_permille_pct(return_permille)}", + f"docstrings {_format_permille_pct(docstring_permille)}", + f"Any {_v(any_annotation_count)}", + ] + return f" {'Adoption':<{_L}}{' · '.join(parts)}" + + +def fmt_metrics_api_surface( + *, + public_symbols: int, + modules: int, + added: int, + breaking: int, +) -> str: + parts = [ + f"{_v(public_symbols, 'bold cyan')} symbols", + f"{_v(modules, 'bold cyan')} modules", + ] + if breaking > 0 or added > 0: + parts.append( + " / ".join( + [ + f"{_v(breaking, 'bold red')} breaking", + f"{_v(added, 'bold cyan')} added", + ] + ) + ) + return f" {'Public API':<{_L}}{' · '.join(parts)}" + + +def fmt_metrics_coverage_join( + *, + status: str, + overall_permille: int, + coverage_hotspots: int, + scope_gap_hotspots: int, + threshold_percent: int, + source_label: str, +) -> str: + if status != "ok": + parts = ["join unavailable"] + if source_label: + parts.append(source_label) + return f" {'Coverage':<{_L}}[yellow]{' · '.join(parts)}[/yellow]" + parts = [ + f"{_format_permille_pct(overall_permille)} overall", + f"{_v(coverage_hotspots, 'bold red')} hotspots < {threshold_percent}%", + ] + if scope_gap_hotspots > 0: + parts.append(f"{_v(scope_gap_hotspots, 'bold yellow')} scope gaps") + if source_label: + parts.append(source_label) + return f" {'Coverage':<{_L}}{' · '.join(parts)}" + + +def fmt_metrics_overloaded_modules( + *, + candidates: int, + total: int, + population_status: str, + top_score: float, +) -> str: + parts = [f"{_v(candidates, 'bold magenta')} candidates"] + if top_score > 0: + parts.append(f"max score {top_score:.2f}") + parts.append(f"{_vn(total)} ranked") + summary = " \u00b7 ".join(parts) + note = "report-only" + if population_status and population_status != "ok": + note = f"{note}; {population_status.replace('_', ' ')} population" + return f" {'Overloaded':<{_L}}{summary} [dim]({note})[/dim]" + + +def fmt_changed_scope_paths(*, count: int) -> str: + return f" {'Paths':<{_L}}{_v(count, 'bold cyan')} from git diff" + + +def fmt_changed_scope_findings(*, total: int, new: int, known: int) -> str: + parts = [ + f"{_v(total, 'bold')} total", + f"{_v(new, 'bold cyan')} new", + f"{_v(known)} known", + ] + separator = " \u00b7 " + return f" {'Findings':<{_L}}{separator.join(parts)}" + + +def fmt_changed_scope_compact( + *, + paths: int, + findings: int, + new: int, + known: int, +) -> str: + return SUMMARY_COMPACT_CHANGED_SCOPE.format( + paths=paths, + findings=findings, + new=new, + known=known, + ) + + +def fmt_blast_radius_compact( + *, + level: str, + dependents: int, + cohorts: int, + cycles: int, + do_not_touch: int, +) -> str: + return SUMMARY_COMPACT_BLAST_RADIUS.format( + level=level, + dependents=dependents, + cohorts=cohorts, + cycles=cycles, + do_not_touch=do_not_touch, + ) + + +def fmt_patch_verify_compact( + *, + status: str, + health_before: int, + health_after: int, + regressions: int, + gate_status: str, +) -> str: + return SUMMARY_COMPACT_PATCH_VERIFY.format( + status=status, + health_before=health_before, + health_after=health_after, + regressions=regressions, + gate_status=gate_status, + ) + + +def fmt_pipeline_done(elapsed: float) -> str: + return f" [dim]Pipeline done in {elapsed:.2f}s[/dim]" + + +def fmt_contract_error(message: str) -> str: + return f"{MARKER_CONTRACT_ERROR}\n{message}" + + +def fmt_internal_error( + error: BaseException, + *, + issues_url: str = ISSUES_URL, + debug: bool = False, +) -> str: + bug_report_url = issues_url.rstrip("/") + "/new?template=bug_report.yml" + error_name = type(error).__name__ + error_text = str(error).strip() or "" + lines = [ + MARKER_INTERNAL_ERROR, + "Unexpected exception.", + f"Reason: {error_name}: {error_text}", + "", + "Next steps:", + "- Re-run with --debug to include a traceback.", + f"- If this is reproducible, open an issue: {bug_report_url}.", + ( + "- Attach: command line, CodeClone version, Python version, " + "and the report file if generated." + ), + ] + if not debug: + return "\n".join(lines) + + traceback_lines = traceback.format_exception( + type(error), error, error.__traceback__ + ) + command_line = shlex.join(sys.argv) + lines.extend( + [ + "", + "DEBUG DETAILS", + f"Platform: {platform.platform()}", + f"Python: {sys.version.split()[0]}", + f"CodeClone: {__version__}", + f"Command: {command_line}", + f"CWD: {Path.cwd()}", + "Traceback:", + "".join(traceback_lines).rstrip(), + ] + ) + return "\n".join(lines) diff --git a/codeclone/ui_messages/help.py b/codeclone/ui_messages/help.py new file mode 100644 index 00000000..f6391812 --- /dev/null +++ b/codeclone/ui_messages/help.py @@ -0,0 +1,214 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CLI flag help text for argparse.""" + +from __future__ import annotations + +from pathlib import Path + +from ..contracts import ( + DEFAULT_BASELINE_PATH, + DEFAULT_COVERAGE_MIN, + DEFAULT_HTML_REPORT_PATH, + DEFAULT_JSON_REPORT_PATH, + DEFAULT_MARKDOWN_REPORT_PATH, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_PROCESSES, + DEFAULT_SARIF_REPORT_PATH, + DEFAULT_TEXT_REPORT_PATH, +) + +HELP_VERSION = "Print the CodeClone version and exit." +HELP_ROOT = "Project root directory to scan.\nDefaults to the current directory." +HELP_MIN_LOC = ( + "Minimum Lines of Code (LOC) required for clone analysis.\n" + f"Default: {DEFAULT_MIN_LOC}." +) +HELP_MIN_STMT = ( + "Minimum AST statement count required for clone analysis.\n" + f"Default: {DEFAULT_MIN_STMT}." +) +HELP_PROCESSES = f"Number of parallel worker processes.\nDefault: {DEFAULT_PROCESSES}." +HELP_CHANGED_ONLY = ( + "Limit clone gating and changed-scope summaries to findings that touch\n" + "files from a git diff selection." +) +HELP_DIFF_AGAINST = ( + "Resolve changed files from `git diff --name-only `.\n" + "Use together with --changed-only." +) +HELP_PATHS_FROM_GIT_DIFF = ( + "Shorthand for --changed-only using `git diff --name-only `.\n" + "Useful for PR and CI review flows." +) +HELP_BLAST_RADIUS = ( + "Show structural blast radius for the given files.\n" + "Runs analysis first, then projects dependents, clone cohorts,\n" + "risk signals, and do-not-touch boundaries." +) +HELP_PATCH_VERIFY = ( + "Verify the current patch against the trusted baseline budget.\n" + "Runs analysis, checks baseline-relative regressions and gate status, then exits." +) +HELP_STRICTNESS = ( + "Strictness profile for --patch-verify: ci, strict, or relaxed.\nDefault: ci." +) +HELP_SESSION_STATS = ( + "Show workspace session status: active agents, intents, lease health.\n" + "Read-only, does not run analysis." +) +HELP_AUDIT = ( + "Show local Controller audit trail from the configured audit database.\n" + "Read-only, does not run analysis." +) +HELP_AUDIT_JSON = ( + "Output audit payload footprint as JSON.\n" + "Implies --audit. Useful for cross-repository comparison." +) +HELP_CACHE_PATH = ( + "Path to the cache file.\nIf FILE is omitted, uses /.codeclone/cache.json." +) +HELP_CACHE_DIR_LEGACY = ( + "Legacy alias for --cache-path.\nPrefer --cache-path in new configurations." +) +HELP_MAX_BASELINE_SIZE_MB = ( + f"Maximum allowed baseline size in MB.\nDefault: {DEFAULT_MAX_BASELINE_SIZE_MB}." +) +HELP_MAX_CACHE_SIZE_MB = ( + f"Maximum cache file size in MB.\nDefault: {DEFAULT_MAX_CACHE_SIZE_MB}." +) +HELP_BASELINE = ( + "Path to the clone baseline.\n" + f"If FILE is omitted, uses {Path(DEFAULT_BASELINE_PATH)}." +) +HELP_UPDATE_BASELINE = ( + "Overwrite the clone baseline with current results.\nDisabled by default." +) +HELP_FAIL_ON_NEW = ( + "Exit with code 3 if NEW clone findings not present in the baseline\nare detected." +) +HELP_FAIL_THRESHOLD = ( + "Exit with code 3 if the total number of function + block clone groups\n" + "exceeds this value.\n" + "Disabled unless set." +) +HELP_FAIL_COMPLEXITY = ( + "Exit with code 3 if any function exceeds the cyclomatic complexity\n" + "threshold.\n" + "If enabled without a value, uses 20." +) +HELP_FAIL_COUPLING = ( + "Exit with code 3 if any class exceeds the coupling threshold.\n" + "If enabled without a value, uses 10." +) +HELP_FAIL_COHESION = ( + "Exit with code 3 if any class exceeds the cohesion threshold.\n" + "If enabled without a value, uses 4." +) +HELP_FAIL_CYCLES = "Exit with code 3 if circular module dependencies are detected." +HELP_FAIL_DEAD_CODE = "Exit with code 3 if high-confidence dead code is detected." +HELP_FAIL_HEALTH = ( + "Exit with code 3 if the overall health score falls below the threshold.\n" + "If enabled without a value, uses 60." +) +HELP_FAIL_ON_NEW_METRICS = ( + "Exit with code 3 if new metrics violations appear relative to the\n" + "metrics baseline." +) +HELP_API_SURFACE = ( + "Collect public API surface facts for baseline-aware compatibility review.\n" + "Disabled by default." +) +HELP_COVERAGE = ( + "Join external Cobertura XML line coverage to function spans.\n" + "Pass a `coverage xml` report path." +) +HELP_FAIL_ON_TYPING_REGRESSION = ( + "Exit with code 3 if typing adoption coverage regresses relative to the\n" + "metrics baseline." +) +HELP_FAIL_ON_DOCSTRING_REGRESSION = ( + "Exit with code 3 if public docstring coverage regresses relative to the\n" + "metrics baseline." +) +HELP_FAIL_ON_API_BREAK = ( + "Exit with code 3 if public API removals or signature breaks are detected\n" + "relative to the metrics baseline." +) +HELP_FAIL_ON_UNTESTED_HOTSPOTS = ( + "Exit with code 3 if medium/high-risk functions measured by Coverage Join\n" + "fall below the joined coverage threshold.\nRequires --coverage." +) +HELP_MIN_TYPING_COVERAGE = ( + "Exit with code 3 if parameter typing coverage falls below the threshold.\n" + "Threshold is a whole percent from 0 to 100." +) +HELP_MIN_DOCSTRING_COVERAGE = ( + "Exit with code 3 if public docstring coverage falls below the threshold.\n" + "Threshold is a whole percent from 0 to 100." +) +HELP_COVERAGE_MIN = ( + "Coverage threshold for untested hotspot detection.\n" + "Threshold is a whole percent from 0 to 100.\n" + f"Default: {DEFAULT_COVERAGE_MIN}." +) +HELP_CI = ( + "Enable CI preset.\n" + "Equivalent to: --fail-on-new --no-color --quiet.\n" + "When a trusted metrics baseline is available, CI mode also enables\n" + "metrics regression gating." +) +HELP_UPDATE_METRICS_BASELINE = ( + "Overwrite the metrics baseline with current metrics.\nDisabled by default." +) +HELP_METRICS_BASELINE = ( + "Path to the metrics baseline.\n" + f"If FILE is omitted, uses {Path(DEFAULT_BASELINE_PATH)}." +) +HELP_SKIP_METRICS = "Skip full metrics analysis and run in clone-only mode." +HELP_SKIP_DEAD_CODE = "Skip dead code detection." +HELP_SKIP_DEPENDENCIES = "Skip dependency graph analysis." +HELP_HTML = ( + "Generate an HTML report.\n" + f"If FILE is omitted, writes to {DEFAULT_HTML_REPORT_PATH}." +) +HELP_JSON = ( + "Generate the canonical JSON report.\n" + f"If FILE is omitted, writes to {DEFAULT_JSON_REPORT_PATH}." +) +HELP_MD = ( + "Generate a Markdown report.\n" + f"If FILE is omitted, writes to {DEFAULT_MARKDOWN_REPORT_PATH}." +) +HELP_SARIF = ( + "Generate a SARIF 2.1.0 report.\n" + f"If FILE is omitted, writes to {DEFAULT_SARIF_REPORT_PATH}." +) +HELP_TEXT = ( + "Generate a plain-text report.\n" + f"If FILE is omitted, writes to {DEFAULT_TEXT_REPORT_PATH}." +) +HELP_OPEN_HTML_REPORT = ( + "Open the generated HTML report in the default browser.\nRequires --html." +) +HELP_TIMESTAMPED_REPORT_PATHS = ( + "Append a UTC timestamp to default report filenames.\n" + "Applies only to report flags passed without FILE." +) +HELP_NO_PROGRESS = "Disable progress output.\nRecommended for CI logs." +HELP_PROGRESS = "Force-enable progress output." +HELP_NO_COLOR = "Disable ANSI colors." +HELP_COLOR = "Force-enable ANSI colors." +HELP_QUIET = "Reduce output to warnings, errors, and essential summaries." +HELP_VERBOSE = "Include detailed identifiers for NEW clone findings." +HELP_DEBUG = ( + "Print debug details for internal errors, including traceback and\n" + "environment information." +) diff --git a/codeclone/ui_messages/labels.py b/codeclone/ui_messages/labels.py new file mode 100644 index 00000000..35ff0079 --- /dev/null +++ b/codeclone/ui_messages/labels.py @@ -0,0 +1,64 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CLI summary titles, labels, and compact templates.""" + +from __future__ import annotations + +SUMMARY_TITLE = "Summary" +METRICS_TITLE = "Metrics" +CHANGED_SCOPE_TITLE = "Changed Scope" +BLAST_RADIUS_TITLE = "Blast Radius" +PATCH_VERIFY_TITLE = "Patch Verify" + +CLI_LAYOUT_MAX_WIDTH = 80 +CLI_AUDIT_MAX_WIDTH = 120 + +SUMMARY_LABEL_FILES_FOUND = "Files found" +SUMMARY_LABEL_FILES_ANALYZED = " analyzed" +SUMMARY_LABEL_CACHE_HITS = " from cache" +SUMMARY_LABEL_FILES_SKIPPED = " skipped" +SUMMARY_LABEL_LINES_ANALYZED = "Lines (this run)" +SUMMARY_LABEL_FUNCTIONS_ANALYZED = "Functions (this run)" +SUMMARY_LABEL_METHODS_ANALYZED = "Methods (this run)" +SUMMARY_LABEL_CLASSES_ANALYZED = "Classes (this run)" +SUMMARY_LABEL_FUNCTION = "Function clones" +SUMMARY_LABEL_BLOCK = "Block clones" +SUMMARY_LABEL_SEGMENT = "Segment clones" +SUMMARY_LABEL_SUPPRESSED = " suppressed" +SUMMARY_LABEL_NEW_BASELINE = "New vs baseline" + +SUMMARY_COMPACT = ( + "Summary found={found} analyzed={analyzed}" + " cached={cache_hits} skipped={skipped}" +) +SUMMARY_COMPACT_CLONES = ( + "Clones func={function} block={block} seg={segment}" + " suppressed={suppressed} new={new}" +) +SUMMARY_COMPACT_METRICS = ( + "Metrics cc={cc_avg}/{cc_max} cbo={cbo_avg}/{cbo_max}" + " lcom4={lcom_avg}/{lcom_max} cycles={cycles} dead_code={dead}" + " health={health}({grade}) overloaded_modules={overloaded_modules}" +) +SUMMARY_COMPACT_DEPENDENCIES = ( + "Dependencies avg={avg_depth} p95={p95_depth} max={max_depth}" +) +SUMMARY_COMPACT_SECURITY_SURFACES = ( + "Security items={items} categories={categories}" + " production={production} tests={tests}" +) +SUMMARY_COMPACT_CHANGED_SCOPE = ( + "Changed paths={paths} findings={findings} new={new} known={known}" +) +SUMMARY_COMPACT_BLAST_RADIUS = ( + "blast-radius: {level} | dependents={dependents} cohorts={cohorts} " + "cycles={cycles} do-not-touch={do_not_touch}" +) +SUMMARY_COMPACT_PATCH_VERIFY = ( + "patch-verify: {status} | health={health_before}->{health_after} " + "regressions={regressions} gates={gate_status}" +) diff --git a/codeclone/ui_messages/markers.py b/codeclone/ui_messages/markers.py new file mode 100644 index 00000000..eaf8425a --- /dev/null +++ b/codeclone/ui_messages/markers.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CLI markers and display names.""" + +from __future__ import annotations + +BANNER_SUBTITLE = "Structural review layer" + +MARKER_CONTRACT_ERROR = "[error]CONTRACT ERROR:[/error]" +MARKER_INTERNAL_ERROR = "[error]INTERNAL ERROR:[/error]" + +REPORT_BLOCK_GROUP_DISPLAY_NAME_ASSERT_PATTERN = "Assert pattern block" diff --git a/codeclone/ui_messages/runtime.py b/codeclone/ui_messages/runtime.py new file mode 100644 index 00000000..0a6ac665 --- /dev/null +++ b/codeclone/ui_messages/runtime.py @@ -0,0 +1,147 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CLI runtime status, warning, error, and gate messages.""" + +from __future__ import annotations + +WARN_SUMMARY_ACCOUNTING_MISMATCH = ( + "Summary accounting mismatch: " + "files_found != files_analyzed + cache_hits + files_skipped" +) + +STATUS_DISCOVERING = "[bold green]Discovering Python files..." +STATUS_GROUPING = "[bold green]Grouping clones..." + +INFO_PROCESSING_CHANGED = "[info]Processing {count} changed files...[/info]" + +WARN_WORKER_FAILED = "[warning]Worker failed: {error}[/warning]" +WARN_BATCH_ITEM_FAILED = "[warning]Failed to process batch item: {error}[/warning]" +WARN_PARALLEL_FALLBACK = ( + "[warning]Parallel processing unavailable, " + "falling back to sequential: {error}[/warning]" +) +WARN_FAILED_FILES_HEADER = "\n[warning]{count} files failed to process:[/warning]" +WARN_CACHE_SAVE_FAILED = "[warning]Failed to save cache: {error}[/warning]" +WARN_HTML_REPORT_OPEN_FAILED = ( + "[warning]Failed to open HTML report in browser: {path} ({error}).[/warning]" +) +WARN_COVERAGE_JOIN_IGNORED = "[warning]Coverage join ignored: {error}[/warning]" + +ERR_INVALID_OUTPUT_EXT = ( + "[error]Invalid {label} output extension: {path} " + "(expected {expected_suffix}).[/error]" +) +ERR_INVALID_OUTPUT_PATH = ( + "[error]Invalid {label} output path: {path} ({error}).[/error]" +) +ERR_ROOT_NOT_FOUND = "[error]Root path does not exist: {path}[/error]" +ERR_INVALID_ROOT_PATH = "[error]Invalid root path: {error}[/error]" +ERR_SCAN_FAILED = "[error]Scan failed: {error}[/error]" +ERR_INVALID_BASELINE_PATH = "[error]Invalid baseline path: {path} ({error}).[/error]" +ERR_BASELINE_WRITE_FAILED = ( + "[error]Failed to write baseline file: {path} ({error}).[/error]" +) +ERR_REPORT_WRITE_FAILED = ( + "[error]Failed to write {label} report: {path} ({error}).[/error]" +) +ERR_OPEN_HTML_REPORT_REQUIRES_HTML = ( + "[error]--open-html-report requires --html.[/error]" +) +ERR_TIMESTAMPED_REPORT_PATHS_REQUIRES_REPORT = ( + "[error]--timestamped-report-paths requires at least one report output " + "flag.[/error]" +) +ERR_UNREADABLE_SOURCE_IN_GATING = ( + "One or more source files could not be read in CI/gating mode.\n" + "Unreadable source files: {count}." +) + +WARN_LEGACY_CACHE = ( + "[warning]Legacy cache file found at: {legacy_path}.[/warning]\n" + "[warning]Cache is now stored per-project at: {new_path}.[/warning]\n" + "[warning]Please delete the legacy cache file and add " + "`.codeclone/` to .gitignore.[/warning]" +) +WARN_LEGACY_REPO_WORKSPACE = ( + "[warning]Legacy CodeClone workspace (.cache/codeclone/) found at: " + "{legacy_dir}.[/warning]\n" + "[warning]Artifacts now live under: {new_dir}.[/warning]\n" + "[warning]Remove the legacy directory after you no longer need its " + "contents.[/warning]" +) + +ERR_INVALID_BASELINE = ( + "[error]Invalid baseline file.[/error]\n" + "{error}\n" + "Please regenerate the baseline with --update-baseline." +) +ACTION_UPDATE_BASELINE = "Run: codeclone . --update-baseline" +WARN_BASELINE_MISSING = ( + "[warning]Baseline file not found at: [bold]{path}[/bold][/warning]\n" + "[dim]Comparing against an empty baseline. " + "Use --update-baseline to create it.[/dim]\n" + f"[dim]{ACTION_UPDATE_BASELINE}[/dim]" +) +WARN_BASELINE_IGNORED = ( + "[warning]Baseline is not trusted for this run and will be ignored.[/warning]\n" + "[dim]Comparison will proceed against an empty baseline.[/dim]\n" + f"[dim]{ACTION_UPDATE_BASELINE}[/dim]" +) +ERR_BASELINE_CI_REQUIRES_TRUSTED = ( + f"[error]CI requires a trusted baseline.[/error]\n{ACTION_UPDATE_BASELINE}" +) +ERR_BASELINE_GATING_REQUIRES_TRUSTED = ( + "[error]Baseline-aware gates require a trusted baseline.[/error]\n" + f"{ACTION_UPDATE_BASELINE}" +) +SUCCESS_BASELINE_UPDATED = "✔ Baseline updated: {path}" + +FAIL_NEW_TITLE = "[error]FAILED: New code clones detected.[/error]" +FAIL_NEW_SUMMARY_TITLE = "Summary:" +FAIL_NEW_FUNCTION = "- New function clone groups: {count}" +FAIL_NEW_BLOCK = "- New block clone groups: {count}" +FAIL_NEW_REPORT_TITLE = "See detailed report:" +FAIL_NEW_ACCEPT_TITLE = "To accept these clones as technical debt, run:" +FAIL_NEW_ACCEPT_COMMAND = " codeclone . --update-baseline" +FAIL_NEW_DETAIL_FUNCTION = "Details (function clone hashes):" +FAIL_NEW_DETAIL_BLOCK = "Details (block clone hashes):" +FAIL_METRICS_TITLE = "[error]FAILED: Metrics quality gate triggered.[/error]" + +WARN_NEW_CLONES_WITHOUT_FAIL = ( + "\n[warning]New clones detected but --fail-on-new not set.[/warning]\n" + "Run with --update-baseline to accept them as technical debt." +) +TIP_VSCODE_EXTENSION = ( + "\n[dim]Tip:[/dim] VS Code detected. " + "CodeClone has a native extension for triage-first review and hotspot " + "navigation.\n" + "[dim]{url}[/dim]" +) +NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION = ( + "\n[dim]Note:[/dim] Dead-code reachability was refined in 2.0.1 for " + "common Python frameworks.\n" + "[dim]Fewer dead-code findings after upgrading from 2.0.0 are expected: " + "this usually means reduced false positives, not weaker detection.[/dim]" +) +NOTE_DEAD_CODE_REACHABILITY_2_0_2_MIGRATION = ( + "\n[dim]Note:[/dim] Dead-code reachability was refined again in 2.0.2.\n" + "[dim]Fewer dead-code findings after upgrading from 2.0.1 are expected: " + "framework hooks, public exports, and guarded dynamic dispatch now produce " + "fewer false positives, not weaker detection.[/dim]" +) +NOTE_COHESION_LCOM4_2_1_MIGRATION = ( + "\n[dim]Note:[/dim] Class cohesion (LCOM4) applicability was refined in " + "2.1.0.\n" + "[dim]Cohesion counts and low-cohesion class totals may change after " + "upgrading from 2.0.2: Protocol interfaces and Pydantic validation hooks " + "are excluded from the LCOM4 graph. This reflects tighter applicability " + "rules, not weaker detection.[/dim]" +) +TIP_GITIGNORE_CODECLONE_CACHE = ( + "\n[dim]Tip:[/dim] {message}\n[dim]Suggested entry: `{entry}`[/dim]" +) +NOTE_DEAD_CODE_REACHABILITY_MIGRATION = NOTE_DEAD_CODE_REACHABILITY_2_0_1_MIGRATION diff --git a/codeclone/ui_messages/styling.py b/codeclone/ui_messages/styling.py new file mode 100644 index 00000000..513fa971 --- /dev/null +++ b/codeclone/ui_messages/styling.py @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared Rich formatting helpers for CLI output.""" + +from __future__ import annotations + +import re + +from ..domain.quality import ( + HEALTH_GRADE_A, + HEALTH_GRADE_B, + HEALTH_GRADE_C, + HEALTH_GRADE_D, + HEALTH_GRADE_F, +) + +_RICH_MARKUP_TAG_RE = re.compile(r"\[/?[a-zA-Z][a-zA-Z0-9_ .#:-]*]") +_HEALTH_GRADE_STYLE: dict[str, str] = { + HEALTH_GRADE_A: "bold green", + HEALTH_GRADE_B: "green", + HEALTH_GRADE_C: "yellow", + HEALTH_GRADE_D: "bold red", + HEALTH_GRADE_F: "bold red", +} + +_L = 13 # label column width (after 2-space indent) + + +def _v(n: int, style: str = "") -> str: + """Format value: dim if zero, styled otherwise.""" + match (n == 0, bool(style)): + case (True, _): + return f"[dim]{n}[/dim]" + case (False, True): + return f"[{style}]{n}[/{style}]" + case _: + return str(n) + + +def _vn(n: int, style: str = "") -> str: + """Format value with comma separator: dim if zero, styled otherwise.""" + match (n == 0, bool(style)): + case (True, _): + return f"[dim]{n:,}[/dim]" + case (False, True): + return f"[{style}]{n:,}[/{style}]" + case _: + return f"{n:,}" + + +def _format_permille_pct(value: int) -> str: + return f"{value / 10.0:.1f}%" diff --git a/codeclone/utils/ci.py b/codeclone/utils/ci.py new file mode 100644 index 00000000..0e3b3612 --- /dev/null +++ b/codeclone/utils/ci.py @@ -0,0 +1,33 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CI environment detection. + +A pure env check with no project dependencies, so any layer (config, memory, +observability) can import it without an upward dependency. Single source — the +memory jobs module re-exports it for its existing callers. +""" + +from __future__ import annotations + +import os +from collections.abc import Mapping + +_CI_ENV_KEYS: tuple[str, ...] = ( + "CI", + "GITHUB_ACTIONS", + "BUILDKITE", + "TF_BUILD", + "TEAMCITY_VERSION", +) + + +def is_ci_environment(environ: Mapping[str, str] | None = None) -> bool: + active = environ if environ is not None else os.environ + return any(active.get(key, "").strip() for key in _CI_ENV_KEYS) + + +__all__ = ["is_ci_environment"] diff --git a/codeclone/utils/file_lock.py b/codeclone/utils/file_lock.py new file mode 100644 index 00000000..a7b248fd --- /dev/null +++ b/codeclone/utils/file_lock.py @@ -0,0 +1,86 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Cross-process advisory file locks shared by memory and MCP surfaces.""" + +from __future__ import annotations + +import os +import sys +import time +from collections.abc import Callable, Iterator +from contextlib import contextmanager +from pathlib import Path +from typing import BinaryIO, Final + +DEFAULT_FILE_LOCK_POLL_SECONDS: Final[float] = 0.05 + + +@contextmanager +def advisory_file_lock( + lock_path: Path, + *, + timeout_seconds: float, + poll_seconds: float = DEFAULT_FILE_LOCK_POLL_SECONDS, + timeout_error: Callable[[Path], Exception], +) -> Iterator[None]: + lock_path.parent.mkdir(parents=True, exist_ok=True) + deadline = time.monotonic() + timeout_seconds + with _open_lock_file(lock_path) as handle: + if handle.seek(0, os.SEEK_END) == 0: + handle.write(b"\0") + handle.flush() + while True: + try: + _acquire_exclusive_lock(handle) + except (OSError, BlockingIOError) as exc: + if time.monotonic() >= deadline: + raise timeout_error(lock_path) from exc + time.sleep(poll_seconds) + continue + try: + yield + finally: + _release_exclusive_lock(handle) + return + + +def _open_lock_file(lock_path: Path) -> BinaryIO: + if sys.platform == "win32": + return lock_path.open("a+b") + flags = os.O_RDWR | os.O_CREAT + nofollow = getattr(os, "O_NOFOLLOW", 0) + if isinstance(nofollow, int): + flags |= nofollow + fd = os.open(lock_path, flags, 0o600) + return os.fdopen(fd, "r+b") + + +def _acquire_exclusive_lock(handle: object) -> None: + fileno = handle.fileno() # type: ignore[attr-defined] + if sys.platform == "win32": + import msvcrt + + msvcrt.locking(fileno, msvcrt.LK_NBLCK, 1) + return + import fcntl + + fcntl.flock(fileno, fcntl.LOCK_EX | fcntl.LOCK_NB) + + +def _release_exclusive_lock(handle: object) -> None: + fileno = handle.fileno() # type: ignore[attr-defined] + if sys.platform == "win32": + import msvcrt + + msvcrt.locking(fileno, msvcrt.LK_UNLCK, 1) + return + import fcntl + + fcntl.flock(fileno, fcntl.LOCK_UN) + + +__all__ = ["DEFAULT_FILE_LOCK_POLL_SECONDS", "advisory_file_lock"] diff --git a/codeclone/utils/iterutils.py b/codeclone/utils/iterutils.py new file mode 100644 index 00000000..2fd8d3a3 --- /dev/null +++ b/codeclone/utils/iterutils.py @@ -0,0 +1,32 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Small iterator utilities shared across the package.""" + +from __future__ import annotations + +from collections.abc import Iterable, Iterator +from itertools import islice +from typing import TypeVar + +_T = TypeVar("_T") + + +def chunked(items: Iterable[_T], size: int) -> Iterator[tuple[_T, ...]]: + """Yield successive ``size``-length tuples from ``items``; the final chunk + may be shorter. Empty input yields nothing. + + Python 3.10+ compatible (stdlib ``itertools.batched`` is 3.12+). + """ + if size < 1: + msg = "size must be >= 1" + raise ValueError(msg) + iterator = iter(items) + while chunk := tuple(islice(iterator, size)): + yield chunk + + +__all__ = ["chunked"] diff --git a/codeclone/utils/json_io.py b/codeclone/utils/json_io.py index 69dd93c6..788220ea 100644 --- a/codeclone/utils/json_io.py +++ b/codeclone/utils/json_io.py @@ -8,11 +8,16 @@ import os import tempfile -from json import JSONDecodeError from pathlib import Path import orjson +DEFAULT_MAX_JSON_BYTES = 64 * 1024 * 1024 + + +class BoundedReadError(OSError): + """Raised when a bounded file read exceeds the configured byte cap.""" + def json_text( data: object, @@ -32,21 +37,51 @@ def json_text( return text -def read_json_document(path: Path) -> object: - try: - return orjson.loads(path.read_text("utf-8")) - except JSONDecodeError: - return orjson.loads(path.read_bytes()) +def read_bounded_bytes( + path: Path, + *, + max_bytes: int = DEFAULT_MAX_JSON_BYTES, +) -> bytes: + if max_bytes <= 0: + raise ValueError("max_bytes must be positive") + with path.open("rb") as handle: + payload = handle.read(max_bytes + 1) + if len(payload) > max_bytes: + raise BoundedReadError( + f"File too large ({len(payload)} bytes, max {max_bytes}) at {path}" + ) + return payload -def read_json_object(path: Path) -> dict[str, object]: - payload = read_json_document(path) +def read_json_document( + path: Path, + *, + max_bytes: int = DEFAULT_MAX_JSON_BYTES, +) -> object: + return orjson.loads(read_bounded_bytes(path, max_bytes=max_bytes)) + + +def read_json_object( + path: Path, + *, + max_bytes: int = DEFAULT_MAX_JSON_BYTES, +) -> dict[str, object]: + payload = read_json_document(path, max_bytes=max_bytes) if not isinstance(payload, dict): raise TypeError("JSON payload must be an object") return payload +def _validate_atomic_target(path: Path) -> None: + if path.is_symlink(): + raise OSError(f"Refusing to replace symlink target: {path}") + parent = path.parent + if parent.exists() and parent.is_symlink(): + raise OSError(f"Refusing to write through symlink directory: {parent}") + + def write_json_text_atomically(path: Path, text: str) -> None: + _validate_atomic_target(path) fd_num, tmp_name = tempfile.mkstemp( dir=path.parent, suffix=".tmp", @@ -72,6 +107,7 @@ def write_json_document_atomically( trailing_newline: bool = False, ) -> None: path.parent.mkdir(parents=True, exist_ok=True) + _validate_atomic_target(path) write_json_text_atomically( path, json_text( @@ -81,3 +117,15 @@ def write_json_document_atomically( trailing_newline=trailing_newline, ), ) + + +__all__ = [ + "DEFAULT_MAX_JSON_BYTES", + "BoundedReadError", + "json_text", + "read_bounded_bytes", + "read_json_document", + "read_json_object", + "write_json_document_atomically", + "write_json_text_atomically", +] diff --git a/codeclone/utils/repo_paths.py b/codeclone/utils/repo_paths.py new file mode 100644 index 00000000..af14c7d6 --- /dev/null +++ b/codeclone/utils/repo_paths.py @@ -0,0 +1,118 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Repository-root path containment helpers. + +The helpers here are intentionally small and policy-driven. They are used for +security-sensitive state/artifact paths; general CLI output paths keep their +existing behavior unless a caller opts into these stricter rules. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +class RepoPathError(ValueError): + """Raised when a repository path cannot be resolved safely.""" + + +class PathOutsideRepoError(RepoPathError): + """Raised when a path escapes the repository root.""" + + +@dataclass(frozen=True, slots=True) +class RepoPathPolicy: + allow_absolute: bool = False + allow_external: bool = False + must_exist: bool = False + must_be_file: bool = False + must_be_dir: bool = False + + +def resolve_under_repo_root( + root: Path, + raw: str | Path, + *, + policy: RepoPathPolicy, +) -> Path: + """Resolve ``raw`` relative to ``root`` and enforce containment policy.""" + + root_path = _resolved_root(root) + raw_path = _raw_path(raw) + if raw_path.is_absolute() and not policy.allow_absolute: + raise PathOutsideRepoError("absolute paths require explicit opt-in") + candidate = raw_path if raw_path.is_absolute() else root_path / raw_path + try: + resolved = candidate.expanduser().resolve(strict=policy.must_exist) + except OSError as exc: + raise RepoPathError(f"cannot resolve path {raw_path}: {exc}") from exc + if not policy.allow_external and not _is_relative_to(resolved, root_path): + raise PathOutsideRepoError(f"path escapes repository root: {raw_path}") + _enforce_type_policy(resolved, policy=policy) + return resolved + + +def resolve_repo_relative_path(root: Path, raw: str | Path) -> Path: + """Resolve a repo-contained path, rejecting absolute or external paths.""" + + return resolve_under_repo_root(root, raw, policy=RepoPathPolicy()) + + +def display_repo_path(root: Path, path: Path) -> str: + """Return a stable repo-relative display path when possible.""" + + try: + resolved_path = path.resolve(strict=False) + resolved_root = root.resolve(strict=False) + return resolved_path.relative_to(resolved_root).as_posix() + except (OSError, ValueError): + return str(path) + + +def _raw_path(raw: str | Path) -> Path: + if isinstance(raw, Path): + return raw.expanduser() + text = raw.strip() + if not text: + raise RepoPathError("path must not be empty") + return Path(text).expanduser() + + +def _resolved_root(root: Path) -> Path: + try: + resolved = root.expanduser().resolve(strict=True) + except OSError as exc: + raise RepoPathError(f"cannot resolve repository root {root}: {exc}") from exc + if not resolved.is_dir(): + raise RepoPathError(f"repository root is not a directory: {resolved}") + return resolved + + +def _is_relative_to(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + except ValueError: + return False + return True + + +def _enforce_type_policy(path: Path, *, policy: RepoPathPolicy) -> None: + if policy.must_be_file and not path.is_file(): + raise RepoPathError(f"path must be a file: {path}") + if policy.must_be_dir and not path.is_dir(): + raise RepoPathError(f"path must be a directory: {path}") + + +__all__ = [ + "PathOutsideRepoError", + "RepoPathError", + "RepoPathPolicy", + "display_repo_path", + "resolve_repo_relative_path", + "resolve_under_repo_root", +] diff --git a/codeclone/utils/sqlite_store.py b/codeclone/utils/sqlite_store.py new file mode 100644 index 00000000..9492c83c --- /dev/null +++ b/codeclone/utils/sqlite_store.py @@ -0,0 +1,136 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path +from urllib.parse import quote + +_SQLITE_PRAGMAS = ( + "PRAGMA journal_mode=WAL", + "PRAGMA synchronous=NORMAL", + "PRAGMA foreign_keys=OFF", + "PRAGMA busy_timeout=5000", +) + + +def open_sqlite_db( + path: Path, + *, + ensure_schema: Callable[[sqlite3.Connection], None], + foreign_keys: bool = False, + synchronous: str | None = None, +) -> sqlite3.Connection: + """Open a SQLite database with standard pragmas. + + *synchronous* overrides the default ``NORMAL`` level. Pass ``"FULL"`` + for stores where every commit must survive an unclean process exit + (e.g. engineering memory). + """ + path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect( + str(path), + isolation_level="DEFERRED", + timeout=5.0, + check_same_thread=False, + ) + try: + pragmas: tuple[str, ...] = _SQLITE_PRAGMAS + if foreign_keys: + pragmas = tuple( + "PRAGMA foreign_keys=ON" if stmt.endswith("foreign_keys=OFF") else stmt + for stmt in pragmas + ) + if synchronous is not None: + allowed = ("NORMAL", "FULL", "EXTRA", "OFF") + upper = synchronous.upper() + if upper not in allowed: + msg = f"synchronous must be one of {allowed}, got {synchronous!r}" + raise ValueError(msg) + pragmas = tuple( + f"PRAGMA synchronous={upper}" + if stmt.startswith("PRAGMA synchronous=") + else stmt + for stmt in pragmas + ) + for statement in pragmas: + conn.execute(statement) + ensure_schema(conn) + except Exception: + conn.close() + raise + return conn + + +def open_sqlite_db_readonly( + path: Path, + *, + validate_schema: Callable[[sqlite3.Connection], None], +) -> sqlite3.Connection: + """Open an existing SQLite database without allowing writes or creation.""" + + resolved = path.resolve(strict=True) + uri = f"file:{quote(str(resolved), safe='/')}?mode=ro" + conn = sqlite3.connect( + uri, + uri=True, + ) + try: + conn.execute("PRAGMA query_only=ON") + conn.execute("PRAGMA busy_timeout=5000") + validate_schema(conn) + except Exception: + conn.close() + raise + return conn + + +def get_meta_value( + conn: sqlite3.Connection, + *, + meta_table: str, + key: str, +) -> str | None: + try: + row = conn.execute( + f"SELECT value FROM {meta_table} WHERE key = ?", + (key,), + ).fetchone() + except sqlite3.OperationalError: + return None + if row is None: + return None + value = row[0] + return value if isinstance(value, str) else None + + +def initialize_schema_v1( + conn: sqlite3.Connection, + *, + ddl_statements: Sequence[str], + index_statements: Sequence[str], + meta_table: str, + seed_meta: Mapping[str, str], +) -> None: + for statement in ddl_statements: + conn.execute(statement) + for statement in index_statements: + conn.execute(statement) + conn.executemany( + f"INSERT OR IGNORE INTO {meta_table}(key, value) VALUES (?, ?)", + sorted(seed_meta.items()), + ) + conn.commit() + + +__all__ = [ + "get_meta_value", + "initialize_schema_v1", + "open_sqlite_db", + "open_sqlite_db_readonly", +] diff --git a/codeclone/utils/utc_timestamps.py b/codeclone/utils/utc_timestamps.py new file mode 100644 index 00000000..c1a70efe --- /dev/null +++ b/codeclone/utils/utc_timestamps.py @@ -0,0 +1,32 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from datetime import datetime, timezone + + +def age_seconds_since_utc_timestamp(timestamp: str | None) -> int | None: + """Return whole seconds elapsed since an ISO-8601 UTC timestamp.""" + + if timestamp is None: + return None + text = timestamp.strip() + if not text: + return None + if text.endswith("Z"): + text = f"{text[:-1]}+00:00" + try: + created = datetime.fromisoformat(text) + except ValueError: + return None + if created.tzinfo is None: + created = created.replace(tzinfo=timezone.utc) + delta = datetime.now(timezone.utc) - created.astimezone(timezone.utc) + return max(0, int(delta.total_seconds())) + + +__all__ = ["age_seconds_since_utc_timestamp"] diff --git a/codeclone/workspace_intent/__init__.py b/codeclone/workspace_intent/__init__.py new file mode 100644 index 00000000..8ae88012 --- /dev/null +++ b/codeclone/workspace_intent/__init__.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Public read-only workspace intent helpers.""" + +from __future__ import annotations + +from .gate import ( + UnclosedWorkspaceIntent, + WorkspaceEditGateDecision, + WorkspaceIntentRegistryUnavailable, + evaluate_workspace_edit_gate, + has_authorized_workspace_intent, + has_blocking_workspace_intent, + list_unclosed_workspace_intents, + list_unclosed_workspace_intents_for_hook_cleanup, +) + +__all__ = [ + "UnclosedWorkspaceIntent", + "WorkspaceEditGateDecision", + "WorkspaceIntentRegistryUnavailable", + "evaluate_workspace_edit_gate", + "has_authorized_workspace_intent", + "has_blocking_workspace_intent", + "list_unclosed_workspace_intents", + "list_unclosed_workspace_intents_for_hook_cleanup", +] diff --git a/codeclone/workspace_intent/gate.py b/codeclone/workspace_intent/gate.py new file mode 100644 index 00000000..0115c7ac --- /dev/null +++ b/codeclone/workspace_intent/gate.py @@ -0,0 +1,427 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Read-only workspace intent gate for local enforcement hooks. + +The workspace intent registry is the durable coordination source for agent +change-control. MCP writes it; hooks read it. This module intentionally exposes a +small public read API so plugin hooks do not parse registry files or assume a +specific registry backend. Queued intents remain visible but do not authorize +local edit hooks. +""" + +from __future__ import annotations + +import os +import sqlite3 +from collections.abc import Iterable, Mapping +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Literal + +from codeclone.config.intent_registry import ( + IntentRegistryConfig, + IntentRegistryConfigError, + resolve_intent_registry_config, +) +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intent_contract import WorkspaceIntentRecord +from codeclone.surfaces.mcp._workspace_intent_lifecycle import ( + WorkspaceIntentStatus, + is_terminal_workspace_intent_status, + utc_now, +) +from codeclone.surfaces.mcp._workspace_intent_models import ( + parse_workspace_document, + parse_workspace_document_json, + record_from_document, +) +from codeclone.surfaces.mcp._workspace_intent_paths import ( + read_payload, + record_sort_key, + registry_files, +) +from codeclone.surfaces.mcp._workspace_intent_schema import ( + IntentRegistrySchemaError, + open_intent_registry_db_readonly, +) + +GateReason = Literal[ + "active_intent", + "no_active_intent", + "queued_intent_not_editable", + "registry_error", +] + + +class WorkspaceIntentRegistryUnavailable(RuntimeError): + """Raised when the hook cannot read the workspace intent registry.""" + + +HOOK_AUTHORIZE_FOREIGN_ENV = "CODECLONE_HOOK_AUTHORIZE_FOREIGN" +_TRUTHY_HOOK_VALUES = frozenset({"1", "true", "yes", "on"}) +_FALSY_HOOK_VALUES = frozenset({"0", "false", "no", "off"}) + + +@dataclass(frozen=True, slots=True) +class UnclosedWorkspaceIntent: + """Non-terminal workspace intent that still needs finish or clear.""" + + intent_id: str + status: str + + +@dataclass(frozen=True, slots=True) +class WorkspaceEditGateDecision: + """Structured read-only decision for local hook enforcement.""" + + allowed: bool + reason: GateReason + intent_id: str | None = None + status: str | None = None + ownership: str | None = None + agent_label: str | None = None + registry_backend: str | None = None + registry_path: str | None = None + details: Mapping[str, object] = field(default_factory=dict) + + +def evaluate_workspace_edit_gate(root: Path | str) -> WorkspaceEditGateDecision: + """Return whether repository writes are authorized by a live intent record. + + The function is read-only: it does not lazy-close records, migrate SQLite + schemas, create registry directories, or write marker files. + """ + + root_path = Path(root).resolve() + try: + config = resolve_intent_registry_config(root_path) + except (IntentRegistryConfigError, OSError, ValueError) as exc: + return WorkspaceEditGateDecision( + allowed=False, + reason="registry_error", + registry_backend=None, + registry_path=None, + details={"error": str(exc)}, + ) + + try: + records = _load_registry_records_read_only(root_path, config) + except (OSError, sqlite3.Error, IntentRegistrySchemaError, ValueError) as exc: + return WorkspaceEditGateDecision( + allowed=False, + reason="registry_error", + registry_backend=config.backend, + registry_path=_display_registry_path(root_path, config.storage_path), + details={"error": str(exc)}, + ) + + return _decision_from_records( + records, + registry_backend=config.backend, + registry_path=_display_registry_path(root_path, config.storage_path), + ) + + +def has_authorized_workspace_intent(root: Path | str) -> bool: + """True when a live active registry intent authorizes local hook writes.""" + + return evaluate_workspace_edit_gate(root).allowed + + +def has_blocking_workspace_intent(root: Path | str) -> bool: + """Compatibility boolean for hooks that historically asked for a lock.""" + + return has_authorized_workspace_intent(root) + + +def list_unclosed_workspace_intents( + root: Path | str, +) -> tuple[UnclosedWorkspaceIntent, ...]: + """Return non-terminal workspace intents still present in the registry.""" + + return _list_unclosed_workspace_intents_filtered( + root, + own_pid=0, + own_start_epoch=0, + recoverable_agent_label_prefix=None, + include_foreign=True, + ) + + +def list_unclosed_workspace_intents_for_hook_cleanup( + root: Path | str, + *, + own_pid: int | None = None, + own_start_epoch: int | None = None, + recoverable_agent_label_prefix: str | None = "cursor-vscode/", +) -> tuple[UnclosedWorkspaceIntent, ...]: + """Return unclosed intents the local stop hook may ask the user to finish/clear. + + Foreign active/stale intents are excluded; they require coordination, not + ``manage_change_intent(clear)`` from another agent. Recoverable rows are + included only when ``recoverable_agent_label_prefix`` matches ``agent_label``. + """ + + resolved_pid = _resolve_hook_int_env( + own_pid, + env_key="CODECLONE_HOOK_OWN_AGENT_PID", + ) + resolved_epoch = _resolve_hook_int_env( + own_start_epoch, + env_key="CODECLONE_HOOK_OWN_AGENT_START_EPOCH", + ) + return _list_unclosed_workspace_intents_filtered( + root, + own_pid=resolved_pid, + own_start_epoch=resolved_epoch, + recoverable_agent_label_prefix=recoverable_agent_label_prefix, + include_foreign=False, + ) + + +def _resolve_hook_int_env(explicit: int | None, *, env_key: str) -> int: + if explicit is not None: + return explicit + raw_value = os.environ.get(env_key, "").strip() + if raw_value.isdigit(): + return int(raw_value) + return 0 + + +def _include_record_in_hook_cleanup( + record: WorkspaceIntentRecord, + *, + own_pid: int, + own_start_epoch: int, + recoverable_agent_label_prefix: str | None, + include_foreign: bool, + now: datetime, +) -> bool: + ownership = workspace_intents.classify_intent_ownership( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + now=now, + ) + if include_foreign: + return ownership != workspace_intents.IntentOwnership.EXPIRED + if ownership in { + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + workspace_intents.IntentOwnership.FOREIGN_STALE, + workspace_intents.IntentOwnership.EXPIRED, + }: + return False + if ownership in { + workspace_intents.IntentOwnership.OWN_ACTIVE, + workspace_intents.IntentOwnership.OWN_STALE, + }: + return True + if ownership == workspace_intents.IntentOwnership.RECOVERABLE: + if recoverable_agent_label_prefix is None: + return False + return record.agent_label.startswith(recoverable_agent_label_prefix) + return False + + +def _list_unclosed_workspace_intents_filtered( + root: Path | str, + *, + own_pid: int, + own_start_epoch: int, + recoverable_agent_label_prefix: str | None, + include_foreign: bool, +) -> tuple[UnclosedWorkspaceIntent, ...]: + root_path = Path(root).resolve() + try: + config = resolve_intent_registry_config(root_path) + except (IntentRegistryConfigError, OSError, ValueError) as exc: + raise WorkspaceIntentRegistryUnavailable(str(exc)) from exc + + try: + records = _load_registry_records_read_only(root_path, config) + except (OSError, sqlite3.Error, IntentRegistrySchemaError, ValueError) as exc: + raise WorkspaceIntentRegistryUnavailable(str(exc)) from exc + + current_time = utc_now() + unclosed = [ + UnclosedWorkspaceIntent(intent_id=record.intent_id, status=record.status) + for record in records + if not is_terminal_workspace_intent_status(record.status) + and _include_record_in_hook_cleanup( + record, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + recoverable_agent_label_prefix=recoverable_agent_label_prefix, + include_foreign=include_foreign, + now=current_time, + ) + ] + return tuple(sorted(unclosed, key=lambda item: item.intent_id)) + + +def _decision_from_records( + records: Iterable[WorkspaceIntentRecord], + *, + registry_backend: str, + registry_path: str, +) -> WorkspaceEditGateDecision: + current_time = utc_now() + queued: WorkspaceIntentRecord | None = None + ignored_count = 0 + for record in sorted(records, key=record_sort_key): + if not is_terminal_workspace_intent_status(record.status): + ownership = workspace_intents.classify_intent_ownership( + record, + own_pid=0, + own_start_epoch=0, + now=current_time, + ) + liveness = workspace_intents._pid_liveness(record.agent_pid) + if record.status == WorkspaceIntentStatus.QUEUED.value: + queued = queued or record + continue + if ( + record.status == WorkspaceIntentStatus.ACTIVE.value + and _ownership_authorizes_hook(ownership, liveness=liveness) + ): + return WorkspaceEditGateDecision( + allowed=True, + reason="active_intent", + intent_id=record.intent_id, + status=record.status, + ownership=ownership.value, + agent_label=record.agent_label, + registry_backend=registry_backend, + registry_path=registry_path, + details={ + "run_id": record.run_id[:8], + "lease_seconds": record.lease_seconds, + }, + ) + if record.status != WorkspaceIntentStatus.QUEUED.value: + ignored_count += 1 + if queued is not None: + return WorkspaceEditGateDecision( + allowed=False, + reason="queued_intent_not_editable", + intent_id=queued.intent_id, + status=queued.status, + agent_label=queued.agent_label, + registry_backend=registry_backend, + registry_path=registry_path, + details={"ignored_records": ignored_count}, + ) + return WorkspaceEditGateDecision( + allowed=False, + reason="no_active_intent", + registry_backend=registry_backend, + registry_path=registry_path, + details={"ignored_records": ignored_count}, + ) + + +def _ownership_authorizes_hook( + ownership: workspace_intents.IntentOwnership, + *, + liveness: workspace_intents.PidLiveness, +) -> bool: + if liveness != workspace_intents.PidLiveness.ALIVE: + return False + if ownership == workspace_intents.IntentOwnership.OWN_ACTIVE: + return True + if ownership == workspace_intents.IntentOwnership.FOREIGN_ACTIVE: + return _hook_authorizes_foreign_active() + return False + + +def _hook_authorizes_foreign_active() -> bool: + raw_value = os.environ.get(HOOK_AUTHORIZE_FOREIGN_ENV) + if raw_value is None: + # Current hook inputs do not carry the declaring agent PID/start_epoch, + # so live active MCP intents appear foreign to the hook reader. + # Operators can opt into strict own-only authorization with + # CODECLONE_HOOK_AUTHORIZE_FOREIGN=0 once their hook passes identity. + return True + normalized = raw_value.strip().lower() + if normalized in _FALSY_HOOK_VALUES: + return False + return normalized in _TRUTHY_HOOK_VALUES + + +def _load_registry_records_read_only( + root: Path, + config: IntentRegistryConfig, +) -> tuple[WorkspaceIntentRecord, ...]: + if config.backend == "file": + return _load_file_records(root) + if config.backend == "sqlite": + return _load_sqlite_records(config.storage_path) + raise ValueError(f"Unsupported intent registry backend: {config.backend!r}") + + +def _load_file_records(root: Path) -> tuple[WorkspaceIntentRecord, ...]: + records: list[WorkspaceIntentRecord] = [] + for path in registry_files(root): + payload = read_payload(path) + record = _record_from_payload(payload) + if record is not None: + records.append(record) + return tuple(sorted(records, key=record_sort_key)) + + +def _load_sqlite_records(db_path: Path) -> tuple[WorkspaceIntentRecord, ...]: + if not db_path.is_file(): + return () + conn = open_intent_registry_db_readonly(db_path) + try: + rows = conn.execute( + """ + SELECT payload_json + FROM workspace_intents + ORDER BY declared_at_utc, agent_pid, intent_id + """ + ).fetchall() + finally: + conn.close() + records = [ + record + for record in (_record_from_payload(row[0]) for row in rows) + if record is not None + ] + return tuple(sorted(records, key=record_sort_key)) + + +def _record_from_payload(payload: object) -> WorkspaceIntentRecord | None: + if isinstance(payload, str): + document = parse_workspace_document_json(payload) + elif isinstance(payload, Mapping): + document = parse_workspace_document(payload) + else: + return None + if document is None: + return None + return record_from_document(document) + + +def _display_registry_path(root: Path, registry_path: Path) -> str: + try: + return str(registry_path.relative_to(root)) + except ValueError: + return str(registry_path) + + +__all__ = [ + "HOOK_AUTHORIZE_FOREIGN_ENV", + "UnclosedWorkspaceIntent", + "WorkspaceEditGateDecision", + "WorkspaceIntentRegistryUnavailable", + "evaluate_workspace_edit_gate", + "has_authorized_workspace_intent", + "has_blocking_workspace_intent", + "list_unclosed_workspace_intents", + "list_unclosed_workspace_intents_for_hook_cleanup", +] diff --git a/docs/README-pypi.md b/docs/README-pypi.md index 62f01eb9..69b65da6 100644 --- a/docs/README-pypi.md +++ b/docs/README-pypi.md @@ -17,50 +17,18 @@

- A structural review layer for Python — baseline-aware, deterministic, built for CI and AI agents + Structural Change Controller for AI-assisted Python development

PyPI - Tests - Benchmark Python + Tests

-CodeClone adds a control layer between analysis and CI: it isolates structural -regressions from historical debt, so merges are blocked only by what actually -got worse. - -The same analysis pipeline powers CLI reports, CI checks, the MCP server, and -native IDE/agent clients — so humans and AI agents operate on identical, -deterministic facts. - -- Documentation: -- Live sample report: -- Source: -- Issues: - -## Features - -**Control & governance** -- **Baseline governance** — separates accepted **legacy** debt from **new regressions**; CI fails only on what changed -- **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support -- **Reports** — interactive HTML, JSON, Markdown, SARIF, and text from one canonical report - -**Detection & analysis** -- **Clone detection** — function (CFG fingerprint), block (statement windows), and segment (report-only) clones -- **Structural findings** — duplicated branch families, clone guard/exit divergence, and clone-cohort drift -- **Quality metrics** — cyclomatic complexity, coupling (CBO), cohesion (LCOM4), dependency cycles, adaptive depth profile, dead code, health score, and overloaded-module profiling -- **Adoption & API** — type/docstring annotation coverage, public API surface inventory and baseline diff -- **Coverage Join** — fuse external Cobertura XML into the current run to surface coverage hotspots and scope gaps -- **Security Surfaces** — report-only inventory of security-relevant capability boundaries without vulnerability claims - -**Surfaces & integrations** -- **MCP control surface** — triage-first agent and IDE interface over the same canonical pipeline; read-only by contract -- **IDE & agent clients** — VS Code extension, Claude Desktop bundle, and Codex plugin over the same MCP contract - -**Performance** -- **Fast** — incremental caching, parallel processing, warm-run optimization +Deterministic static analysis that combines clone detection, code-quality metrics, +and baseline-aware CI gating — structural change controller for AI-assisted +Python development. ## Quick Start @@ -69,96 +37,52 @@ uv tool install codeclone codeclone . # analyze codeclone . --html # HTML report -codeclone . --html --open-html-report -codeclone . --json --md --sarif --text codeclone . --ci # CI mode ``` -Run without installing: - -```bash -uvx codeclone@latest . -``` - -## CI Workflow - -```bash -# 1. Generate and commit the baseline -codeclone . --update-baseline - -# 2. Enforce it in CI -codeclone . --ci -``` - -`--ci` equals `--fail-on-new --no-color --quiet`. When a trusted metrics -baseline is loaded, CI mode also enables `--fail-on-new-metrics`. - -Exit codes: +## Key Capabilities -| Code | Meaning | -|------|-------------------------------------------------------------------------------| -| `0` | Success | -| `2` | Contract error — untrusted baseline, invalid config, unreadable sources in CI | -| `3` | Gating failure — new clones or metric threshold exceeded | -| `5` | Internal error | +- **Clone detection** — function (CFG fingerprint), block, and segment clones +- **Quality metrics** — complexity, coupling, cohesion, dead code, health score +- **Baseline governance** — separates legacy debt from new regressions; CI fails only on what changed +- **Change controller** — intent declaration, blast radius, patch contract, review receipt for AI agents +- **Engineering Memory** — governed records, trajectory passports, and advisory Experiences +- **MCP server** — 33-tool default interface for IDE and agent clients (35 with `--ide-governance-channel`) +- **Platform Observability** — opt-in local diagnostics for CodeClone's own runtime +- **Corpus Analytics** — optional offline intent clustering (`codeclone[analytics]`) +- **Reports** — HTML, JSON, Markdown, SARIF, text from one canonical payload -Contract errors (`2`) take precedence over gating failures (`3`). - -## Reports +## MCP Server ```bash -codeclone . --html -codeclone . --json -codeclone . --md -codeclone . --sarif -codeclone . --text +uv tool install "codeclone[mcp]" +codeclone-mcp --transport stdio ``` -All formats are rendered from one canonical report payload. - -Report contract: +Native clients: VS Code extension, Claude Desktop bundle, Codex plugin. -## MCP and Native Clients +Engineering Memory, Corpus Analytics, and runtime diagnostics: ```bash -uv tool install "codeclone[mcp]" - -codeclone-mcp --transport stdio -``` - -The MCP server is read-only by contract: it never mutates source files, -baselines, cache, or repository state. - -| Surface | Link | -|-----------------------|--------------------------------------------------------------------------------------| -| VS Code extension | | -| Claude Desktop bundle | | -| Codex plugin | | - -MCP docs: - -## Configuration - -```toml -[tool.codeclone] -baseline = "codeclone.baseline.json" -min_loc = 10 -min_stmt = 6 -block_min_loc = 20 -block_min_stmt = 8 -fail_on_new = true -fail_cycles = true -fail_dead_code = true -fail_health = 80 +uv tool install "codeclone[analytics]" +codeclone analytics build --root . --sweep --use-recommended +codeclone memory trajectory dashboard --root . +CODECLONE_OBSERVABILITY_ENABLED=1 codeclone . +codeclone observability trace --root . --html /tmp/codeclone-observer.html ``` -Precedence: CLI flags > `pyproject.toml` > built-in defaults. +## Links -Config reference: +- Documentation: +- Engineering Memory: +- Platform Observability: +- Corpus Analytics: +- Source: +- Issues: ## License -- Code: MPL-2.0 (`LICENSE`) -- Documentation and docs-site content: MIT (`LICENSE-MIT`) +- Code: MPL-2.0 +- Documentation: MIT License scope map: diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index bbd2b6ed..00000000 --- a/docs/README.md +++ /dev/null @@ -1,157 +0,0 @@ -# CodeClone Docs - -> Deterministic structural review for Python codebases. -> One canonical analysis across CI, HTML reports, IDEs, and AI agents. - -CodeClone is a structural review layer for Python focused on deterministic -analysis, baseline-aware governance, and review surfaces for both humans and -AI-assisted workflows. - -This documentation site has two complementary layers: - -- **Contracts Book** — canonical behavioral contracts derived from code and locked tests -- **Deep Dives** — architecture, CFG semantics, integrations, and operational rationale - -!!! note "Licensing" - CodeClone source code is licensed under MPL-2.0. - - Documentation content under `docs/` and the published docs site are - licensed under MIT. - ---- - -## Start Here - -### New to CodeClone? - -Understand the deterministic review model and governance philosophy. - -- [Contracts and guarantees](book/00-intro.md) -- [Architecture map (components + ownership)](book/01-architecture-map.md) -- [Terminology](book/02-terminology.md) - -### Integrating into CI? - -Set up baseline-aware gating and deterministic review flows. - -- [Exit codes and failure policy](book/03-contracts-exit-codes.md) -- [Metrics mode and quality gates](book/15-metrics-and-quality-gates.md) -- [Baseline contract](book/06-baseline.md) - -### Using IDEs or AI agents? - -Understand the canonical review surfaces and MCP contract. - -- [MCP interface contract](book/20-mcp-interface.md) -- [VS Code extension](book/21-vscode-extension.md) -- [Codex plugin](book/23-codex-plugin.md) - -### Reviewing reports? - -Explore the canonical report model and rendered review surfaces. - -- [Report contract](book/08-report.md) -- [HTML report rendering](book/10-html-render.md) -- [Live sample report](examples/report.md) - ---- - -## Contracts Book - -Contract-first documentation derived from code and locked tests. - -The Contracts Book defines: - -- schemas and typed contracts -- baseline and cache semantics -- exit codes and CI behavior -- determinism guarantees -- trust and compatibility rules -- review surface contracts - -### Core Contracts - -- [Exit codes and failure policy](book/03-contracts-exit-codes.md) -- [Config and defaults](book/04-config-and-defaults.md) -- [Core pipeline and invariants](book/05-core-pipeline.md) -- [Baseline contract (schema v2.1)](book/06-baseline.md) -- [Cache contract (schema v2.8)](book/07-cache.md) -- [Report contract (schema v2.11)](book/08-report.md) - -### Interfaces - -- [CLI behavior, modes, and UX](book/09-cli.md) -- [MCP interface contract](book/20-mcp-interface.md) -- [VS Code extension contract](book/21-vscode-extension.md) -- [Claude Desktop bundle contract](book/22-claude-desktop-bundle.md) -- [Codex plugin contract](book/23-codex-plugin.md) -- [HTML report rendering contract](book/10-html-render.md) - -### System Properties - -- [Security model and threat boundaries](book/11-security-model.md) -- [Determinism policy](book/12-determinism.md) -- [Tests as specification](book/13-testing-as-spec.md) -- [Compatibility and versioning rules](book/14-compatibility-and-versioning.md) - -### Quality Contracts - -- [Health Score model and evolution policy](book/15-health-score.md) -- [Metrics mode and quality gates](book/15-metrics-and-quality-gates.md) -- [Dead-code contract and test-boundary policy](book/16-dead-code-contract.md) -- [Suggestions and clone typing contract](book/17-suggestions-and-clone-typing.md) -- [Reproducible Docker benchmarking](book/18-benchmarking.md) -- [Inline suppressions contract](book/19-inline-suppressions.md) - ---- - -## Deep Dives - -Narrative documentation covering architecture, integrations, and operational usage. - -- [Architecture narrative](architecture.md) -- [CFG design and semantics](cfg.md) -- [MCP integration for AI agents and clients](mcp.md) -- [VS Code extension usage guide](vscode-extension.md) -- [Claude Desktop bundle usage guide](claude-desktop-bundle.md) -- [Codex plugin usage guide](codex-plugin.md) -- [SARIF integration for IDE/code-scanning use](sarif.md) - -### Operational and legal - -- [Privacy Policy](privacy-policy.md) -- [Terms of Use](terms-of-use.md) -- [Docs publishing and Pages workflow](publishing.md) - ---- - -## Reference Appendices - -- [Status enums and typed contracts](book/appendix/a-status-enums.md) -- [Schema layouts (baseline/cache/report)](book/appendix/b-schema-layouts.md) -- [Error catalog (contract vs internal)](book/appendix/c-error-catalog.md) - ---- - -## Local Preview - -=== "Build the site" - - ```bash title="Validate the docs site" - uv run --with mkdocs --with mkdocs-material mkdocs build --strict - ``` - -=== "Build the site and sample report" - - ```bash title="Generate the live sample report into the built site" - uv run --with mkdocs --with mkdocs-material mkdocs build --strict - uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live - ``` - -!!! note "Generated output" - `site/` is generated output used for local preview and GitHub Pages - publishing. It is not committed to git. - -GitHub Pages publishing is handled by -[`docs.yml`](https://github.com/orenlab/codeclone/blob/main/.github/workflows/docs.yml) -via a custom Actions workflow. diff --git a/docs/architecture.md b/docs/architecture.md deleted file mode 100644 index 5375db10..00000000 --- a/docs/architecture.md +++ /dev/null @@ -1,296 +0,0 @@ -# CodeClone Architecture - -> Scope note: this file is an architecture narrative/deep-dive. -> Contract-level guarantees (schemas, statuses, exit codes, trust model, determinism) are defined in `docs/book/`. - -This document describes the high-level architecture of **CodeClone**. - ---- - -## Pipeline Overview - -CodeClone processes Python projects in the following stages: - -1. **Source scanning** -2. **AST parsing** -3. **AST normalization** -4. **CFG construction** -5. **Fingerprinting** -6. **Segment window extraction** -7. **Clone grouping** -8. **Reporting / CI decision** - ---- - -## 1. Source Scanning - -- Recursively scans `.py` files. -- Uses deterministic sorted traversal. -- Skips paths that resolve outside the root (symlink traversal guard). -- Applies cache-based skipping using file stat signatures. -- Default cache location is project-local: `/.cache/codeclone/cache.json` - (override via `--cache-path`, legacy alias: `--cache-dir`). -- Cache file size guard is configurable via `--max-cache-size-mb` (oversized cache is ignored with warning). -- Cache is best-effort: signature/version/shape mismatches are ignored with warnings, and - invalid entries are skipped deterministically. - ---- - -## 2. AST Parsing - -- Uses Python's built-in `ast` module. -- Supports Python 3.10+ syntax. - ---- - -## 3. AST Normalization - -Normalization removes non-structural noise: - -- variable names → `_VAR_` -- constants → `_CONST_` -- attributes → `_ATTR_` -- symbolic call targets are preserved (to avoid API conflation) -- syntactic sugar (e.g. `x += 1` → `x = x + 1`) -- commutative operand canonicalization (`+`, `*`, `|`, `&`, `^`) on proven constant domains -- local logical equivalence (`not (x in y)` → `x not in y`, `not (x is y)` → `x is not y`) -- docstrings removed -- type annotations removed - -This ensures structural stability across refactors. - ---- - -## 4. CFG Construction - -- Built per-function using `CFGBuilder`. -- Produces deterministic basic blocks. -- Captures structural control flow (`if`, `for`, `while`, `try`, `with`, `match`). -- Models short‑circuit `and`/`or` as micro‑CFG branches. -- Links `try/except` only from statements that may raise. -- Preserves `match case` and `except` handler order structurally. -- Models `break` / `continue` as terminating loop transitions. -- Preserves `for/while ... else` semantics. - -📄 See [docs/cfg.md](cfg.md) for full semantics. - ---- - -## 5. Fingerprinting - -Each function CFG is converted into a canonical string form and hashed. - -This fingerprint is used to group structurally identical functions. - ---- - -## 6. Segment Windows - -Large functions are also scanned with **segment windows** (sliding windows over normalized -statements). These are used to detect **internal clones** inside the same function. - -Segment windows are **never** used as a final equivalence signal; they are candidate -generators with strict hash confirmation. - ---- - -## 7. Clone Detection - -Clone groups are detected at three granularities: - -### Function clone groups - -- Grouped by `fingerprint|loc_bucket`. -- Report typing is deterministic (`Type-1`..`Type-4`) in report layer. - -### Block clone groups - -- Repeated structural statement windows across functions. -- Report typing is `Type-4` with explainability facts from core. - -Noise filters applied: - -- minimum LOC / statement thresholds -- no overlapping blocks -- no same-function block clones -- `__init__` excluded from block analysis - ---- - -### Segment clones (internal/report-only) - -- Detected only **inside the same function**. -- Used for internal copy‑paste discovery and report explainability. -- Not included in baseline or CI failure logic. -- Report UX merges overlapping segment windows and suppresses boilerplate‑only groups. -- A segment group is reported only if it has at least **2** unique statement types - or contains a control‑flow statement. - ---- - -### Structural findings (report-only) - -- `duplicated_branches`: repeated branch-body signatures. -- `clone_guard_exit_divergence`: guard/terminal divergence inside one function-clone cohort. -- `clone_cohort_drift`: drift from majority terminal/guard/try/side-effect profile. - -These findings are rendered in reports only and do not change baseline diff or CI -gating decisions. - ---- - -## 8. Reporting - -Detected findings can be rendered as: - -- interactive HTML (`--html`), -- canonical JSON (`--json`, schema `2.11`), -- deterministic text projection (`--text`), -- deterministic Markdown projection (`--md`), -- deterministic SARIF projection (`--sarif`). - -Reporting uses a layered model: - -- canonical sections: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics` -- non-canonical view layer: `derived` -- integrity metadata: `integrity` (`canonicalization` + `digest`) - -Provenance is carried through `meta` and includes: - -- runtime/context (`codeclone_version`, `python_version`, `python_tag`, `analysis_mode`, `report_mode`) -- analysis profile (`meta.analysis_profile`) -- analysis thresholds (`meta.analysis_thresholds.design_findings`) -- baseline status block (`meta.baseline.*`) -- cache status block (`meta.cache.*`) -- metrics-baseline status block (`meta.metrics_baseline.*`) -- generation timestamp (`meta.runtime.report_generated_at_utc`) - -Explainability contract (v1): - -- Explainability facts are produced only by Python core/report layer. -- HTML/JS renderer is display-only and must not recalculate metrics or introduce new semantics. -- UI can format, filter, and highlight facts, but cannot invent new hints. - ---- - -## 9. MCP Agent Interface - -CodeClone also exposes an optional MCP layer for AI agents and MCP-capable -clients. - -Current shape: - -- install via the optional `codeclone[mcp]` extra -- launch via `codeclone-mcp` -- transports: - - `stdio` - - `streamable-http` -- semantics: - - read-only - - baseline-aware - - built on the same pipeline/report contracts as the CLI - - bounded in-memory run history - -Operational note: - -- `codeclone/surfaces/mcp/server.py` is only a thin launcher/registration layer. -- The optional MCP runtime is imported lazily so the base `codeclone` install - and normal CI paths do not require MCP packages. -- `codeclone/surfaces/mcp/service.py` is the in-process adapter over the existing - pipeline/report contracts. - -The MCP layer is intentionally thin. It does not add a separate analysis engine; -it adapts the existing pipeline into tools/resources such as: - -- analyze repository -- analyze changed paths -- get run summary -- compare runs -- list findings -- inspect one finding -- project remediation payloads -- list hotspots -- generate PR summary -- preview gate outcomes -- keep session-local reviewed markers - -This keeps agent integrations deterministic and aligned with the same canonical -report document used by JSON/HTML/SARIF. - -Security boundaries: - -- Read-only by design — no tool mutates source files, baselines, or repo state. -- `--allow-remote` guard required for non-local transports; default is `stdio`. -- `cache_policy=refresh` rejected to preserve read-only semantics. -- Review markers are session-local in-memory state, never persisted. -- Run history bounded by `--history-limit` to prevent unbounded memory growth. -- `git_diff_ref` validated as a safe single revision expression before any - `git diff` subprocess call. - ---- - -## CI Integration - -Baseline comparison allows CI to fail **only on new clones**, -enabling gradual architectural improvement. - -Baseline files use a stable v2 contract (current schema `2.1`, with compatibility -support for major `1` legacy schema checks where applicable). Compatibility is checked by -`schema_version`, `fingerprint_version`, `python_tag`, and `generator.name`, -not package patch/minor version. -Regeneration is typically required when `fingerprint_version` or `python_tag` changes. -Baseline integrity is tamper-evident via canonical `payload_sha256`, which covers -`clones.functions`, `clones.blocks`, `meta.fingerprint_version`, and `meta.python_tag`. -`schema_version` and `generator.name` are compatibility gates and intentionally -excluded from the integrity hash. -`created_at` and `generator.version` are informational metadata and do not affect -integrity validation. - -Baseline validation order is deterministic: - -1. size guard (before JSON parse), -2. JSON parse and root object/type checks, -3. required fields and type checks, -4. compatibility checks (`generator`, `schema_version`, `fingerprint_version`, `python_tag`), -5. integrity checks (`payload_sha256`). - -Baseline loading is strict: schema/type violations, integrity failures, generator mismatch, -or oversized files are treated as untrusted input. -In `--ci` (or explicit `--fail-on-new`), untrusted baseline states fail fast. -Outside gating mode, untrusted baseline is ignored with warning and comparison proceeds -against an empty baseline. -Baseline size guard is configurable via `--max-baseline-size-mb`. - -CLI exit code contract: - -- `0` success -- `2` contract error (invalid arguments/output options, untrusted baseline, or unreadable source files in gating mode) -- `3` gating failure (`--ci` new clones, or `--fail-threshold` exceeded) -- `5` unexpected internal error (reserved) - -`5` is reserved only for unexpected internal exception paths (tool bug), not for -baseline/options contract violations. - -## Python Tag Consistency for Baseline Checks - -Due to inherent AST differences across interpreter builds, baseline compatibility -is pinned to `python_tag` (for example `cp314`). - -This preserves deterministic and reproducible clone detection results while allowing -patch updates within the same interpreter tag. - ---- - -## Design Principles - -- Structural > textual -- Deterministic > precise -- Low-noise > completeness -- CI-first design - ---- - -## Summary - -CodeClone provides **structural code quality analysis** for Python — -clone detection, quality metrics, and baseline-aware CI governance. diff --git a/docs/assets/codeclone-pipeline.svg b/docs/assets/codeclone-pipeline.svg index 830505be..f999a26b 100644 --- a/docs/assets/codeclone-pipeline.svg +++ b/docs/assets/codeclone-pipeline.svg @@ -1,7 +1,7 @@ - - CodeClone analysis pipeline - Deterministic CodeClone pipeline from source discovery and cache reuse to canonical report, gates, and - human or agent-facing projections. + + CodeClone pipeline + Deterministic CodeClone pipeline from source discovery and cache reuse to a canonical report that powers + gates, projections, and the Structural Change Controller with Engineering Memory. diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index c23d8d4c..9765085c 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -126,7 +126,16 @@ Tables --------------------------------------------------------- */ +/* Uniform width: the wrapper is block-level and the table fills it, so every + content table shares one width instead of sizing to its own content. */ +.md-typeset__table { + display: block; + width: 100%; +} + .md-typeset table:not([class]) { + display: table; + width: 100%; border-radius: 14px; overflow: hidden; border: 1px solid var(--cc-border-dark); @@ -344,3 +353,23 @@ box-shadow: none !important; border-width: 1px; } + +/* --------------------------------------------------------- + Home / hub goal tables + --------------------------------------------------------- */ + +.md-typeset table:not([class]) thead th { + font-size: 0.72rem; + font-weight: 700; + letter-spacing: 0.04em; + text-transform: uppercase; + color: var(--md-default-fg-color--light); +} + +[data-md-color-scheme="slate"] .md-typeset table:not([class]) tbody tr:hover { + background: rgba(129, 140, 248, 0.06); +} + +[data-md-color-scheme="default"] .md-typeset table:not([class]) tbody tr:hover { + background: rgba(79, 70, 229, 0.04); +} diff --git a/docs/book/00-intro.md b/docs/book/00-intro.md index 67c1f938..0f07d208 100644 --- a/docs/book/00-intro.md +++ b/docs/book/00-intro.md @@ -1,3 +1,7 @@ + + # 00. Intro ## Purpose @@ -83,21 +87,21 @@ Refs: ## Recommended reading paths - CI contract path: - [03-contracts-exit-codes.md](03-contracts-exit-codes.md) → - [06-baseline.md](06-baseline.md) → - [07-cache.md](07-cache.md) → - [08-report.md](08-report.md) → - [09-cli.md](09-cli.md) + [09-exit-codes.md](09-exit-codes.md) → + [07-baseline.md](07-baseline.md) → + [08-cache.md](08-cache.md) → + [05-report.md](05-report.md) → + [11-cli.md](11-cli.md) - Metrics governance path: - [04-config-and-defaults.md](04-config-and-defaults.md) → + [10-config-and-defaults.md](10-config-and-defaults.md) → [15-health-score.md](15-health-score.md) → - [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) → - [16-dead-code-contract.md](16-dead-code-contract.md) → + [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) → + [17-dead-code-contract.md](17-dead-code-contract.md) → [19-inline-suppressions.md](19-inline-suppressions.md) → - [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) + [18-suggestions-and-clone-typing.md](18-suggestions-and-clone-typing.md) - Determinism and compatibility path: - [12-determinism.md](12-determinism.md) → - [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) + [22-determinism.md](22-determinism.md) → + [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) - Benchmarking path: - [12-determinism.md](12-determinism.md) → - [18-benchmarking.md](18-benchmarking.md) + [22-determinism.md](22-determinism.md) → + [20-benchmarking.md](20-benchmarking.md) diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md deleted file mode 100644 index e1c354b5..00000000 --- a/docs/book/01-architecture-map.md +++ /dev/null @@ -1,139 +0,0 @@ -# 01. Architecture Map - -## Purpose - -Document the current module boundaries and ownership in CodeClone `2.0.x`. - -## Public surface - -Main ownership layers: - -- CLI entry and UX orchestration -- Config parsing and pyproject resolution -- Core runtime pipeline -- Analysis and clone grouping -- Metrics and findings -- Baseline/cache persistence contracts -- Canonical report document and deterministic projections -- HTML render-only surface -- Read-only MCP surface -- IDE/client surfaces over MCP - -## Data model - -| Layer | Modules | Responsibility | -|-------------------------|-------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------| -| Entry | `codeclone/main.py` | Public CLI entrypoint only | -| CLI surface | `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*` | Parse args, resolve runtime mode, print summaries, write outputs, route exits | -| Config | `codeclone/config/*` | Option specs, parser construction, pyproject loading, CLI > pyproject > defaults merge | -| Core runtime | `codeclone/core/*` | Bootstrap, discovery, worker processing, project metrics, report/gate integration | -| Analysis | `codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*` | Parse source, normalize AST/CFG facts, extract units, prepare deterministic analysis inputs | -| Findings | `codeclone/findings/clones/*`, `codeclone/findings/structural/*` | Clone grouping and structural finding derivation | -| Metrics | `codeclone/metrics/*` | Complexity, coupling, cohesion, dependencies, dead code, health, adoption, coverage join, API surface | -| Contracts/domain | `codeclone/contracts/*`, `codeclone/models.py`, `codeclone/domain/*` | Version constants, enums, typed exceptions, shared models, domain taxonomies | -| Persistence | `codeclone/baseline/*`, `codeclone/cache/*` | Trusted comparison state and optimization-only cache contracts | -| Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | -| Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | -| HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | -| MCP surface | `codeclone/surfaces/mcp/*` | Read-only MCP tools/resources over the same pipeline/report contracts | -| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | - -Refs: - -- `codeclone/main.py:main` -- `codeclone/surfaces/cli/workflow.py:_main_impl` -- `codeclone/core/pipeline.py:analyze` -- `codeclone/report/document/builder.py:build_report_document` -- `codeclone/report/html/assemble.py:build_html_report` -- `codeclone/surfaces/mcp/server.py:build_mcp_server` - -## Contracts - -- Core produces facts; renderers present facts. -- `codeclone/report/document/*` is the canonical report source of truth. -- HTML, Markdown, SARIF, text, and MCP are projections over the same canonical report semantics. -- Baseline and cache are persistence contracts, not analysis truth. -- Cache is optimization-only and fail-open. -- MCP is read-only and must not create a second analysis truth path. -- VS Code, Claude Desktop, and Codex plugin surfaces are clients over MCP, not second analyzers. - -Refs: - -- `codeclone/report/document/builder.py:build_report_document` -- `codeclone/report/renderers/text.py:render_text_report_document` -- `codeclone/report/renderers/markdown.py:render_markdown_report_document` -- `codeclone/report/renderers/sarif.py:render_sarif_report_document` -- `codeclone/report/html/assemble.py:build_html_report` -- `codeclone/baseline/clone_baseline.py:Baseline.load` -- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.load` -- `codeclone/cache/store.py:Cache.load` - -## Invariants (MUST) - -- Report serialization is deterministic and schema-versioned. -- UI is render-only and must not invent gating semantics. -- Status enums remain domain-owned in baseline/metrics-baseline/cache/contracts modules. -- `codeclone/main.py` stays thin; orchestration lives in `codeclone/surfaces/cli/*`. - -Refs: - -- `codeclone/report/document/integrity.py:_build_integrity_payload` -- `codeclone/report/document/inventory.py:_build_inventory_payload` -- `codeclone/baseline/trust.py:BaselineStatus` -- `codeclone/baseline/_metrics_baseline_contract.py:MetricsBaselineStatus` -- `codeclone/cache/versioning.py:CacheStatus` -- `codeclone/contracts/__init__.py:ExitCode` - -## Failure modes - -| Condition | Layer | -|--------------------------------------------------|----------------------------------------------------------------| -| Invalid CLI args / invalid output path | CLI surface (`codeclone/config/*`, `codeclone/surfaces/cli/*`) | -| Baseline schema/integrity mismatch | Baseline contract layer | -| Metrics baseline schema/integrity mismatch | Metrics-baseline contract layer | -| Cache corruption/version mismatch | Cache contract layer (fail-open) | -| HTML snippet read failure | HTML render layer fallback snippet | -| MCP invalid request / invalid root / unknown run | MCP surface | - -## Determinism / canonicalization - -- File iteration and grouping order are explicit sorts. -- Canonical report integrity excludes non-canonical `derived` payload. -- Baseline and cache hashes/signatures use canonical JSON. - -Refs: - -- `codeclone/scanner/__init__.py:iter_py_files` -- `codeclone/report/document/integrity.py:_build_integrity_payload` -- `codeclone/baseline/trust.py:_compute_payload_sha256` -- `codeclone/cache/integrity.py:canonical_json` - -## Locked by tests - -- `tests/test_architecture.py::test_architecture_layer_violations` -- `tests/test_report.py::test_report_json_compact_v21_contract` -- `tests/test_report_contract_coverage.py::test_report_document_rich_invariants_and_renderers` -- `tests/test_html_report.py::test_html_report_uses_core_block_group_facts` -- `tests/test_cache.py::test_cache_v13_uses_relpaths_when_root_set` -- `tests/test_mcp_service.py::test_mcp_service_analyze_repository_registers_latest_run` - -## Non-guarantees - -- Internal file splits may evolve in `2.0.x` if public contracts are preserved. -- Package markers and internal helper placement are not contract by themselves. - -## Chapter map - -| Topic | Primary chapters | -|---------------------------------------|------------------------------------------------------------------------------------------------------------------| -| CLI behavior and failure routing | [03-contracts-exit-codes.md](03-contracts-exit-codes.md), [09-cli.md](09-cli.md) | -| Config precedence and defaults | [04-config-and-defaults.md](04-config-and-defaults.md) | -| Core processing pipeline | [05-core-pipeline.md](05-core-pipeline.md) | -| Clone baseline trust/compat/integrity | [06-baseline.md](06-baseline.md) | -| Cache trust and fail-open behavior | [07-cache.md](07-cache.md) | -| Report schema and provenance | [08-report.md](08-report.md), [10-html-render.md](10-html-render.md) | -| MCP agent surface | [20-mcp-interface.md](20-mcp-interface.md) | -| Health score model | [15-health-score.md](15-health-score.md) | -| Metrics gates and metrics baseline | [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) | -| Dead-code liveness policy | [16-dead-code-contract.md](16-dead-code-contract.md) | -| Determinism and versioning policy | [12-determinism.md](12-determinism.md), [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) | diff --git a/docs/book/02-terminology.md b/docs/book/01-terminology.md similarity index 86% rename from docs/book/02-terminology.md rename to docs/book/01-terminology.md index 95f4aabe..677275a5 100644 --- a/docs/book/02-terminology.md +++ b/docs/book/01-terminology.md @@ -1,4 +1,9 @@ -# 02. Terminology + + +# 01. Terminology ## Purpose @@ -44,6 +49,11 @@ Refs: ## Contracts - New/known classification is key-based, not heuristic-based. +- `novelty="known"` is baseline-relative: the finding fingerprint is accepted + by the trusted baseline. It does not prove that the current patch did not + introduce or reintroduce that finding. +- Patch-local regressions require clean before-run to after-run evidence + (`compare_runs` / `check_patch_contract(mode="verify")`). - Baseline trust is status-driven. - Cache trust is status-driven and independent from baseline trust. - Design finding universe is determined by the canonical report builder; MCP and HTML read it, never resynthesize it. diff --git a/docs/book/02-architecture-map.md b/docs/book/02-architecture-map.md new file mode 100644 index 00000000..0316bee6 --- /dev/null +++ b/docs/book/02-architecture-map.md @@ -0,0 +1,159 @@ + + +# 02. Architecture Map + +## Purpose + +Document the current module boundaries and ownership in CodeClone `2.1.x`. + +## Public surface + +Main ownership layers: + +- CLI entry and UX orchestration +- Config parsing and pyproject resolution +- Core runtime pipeline +- Analysis and clone grouping +- Metrics and findings +- Baseline/cache persistence contracts +- Canonical report document and deterministic projections +- HTML render-only surface +- Read-only MCP surface with implementation context, structural change control, + and claim validation +- IDE/client surfaces over MCP + +## Data model + +| Layer | Modules | Responsibility | +|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Entry | `codeclone/main.py` | Public CLI entrypoint only | +| CLI surface | `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*` | Parse args, resolve runtime mode, print summaries, write outputs, route exits | +| Report copy | `codeclone/report/messages/*` | Glossary, suggestions, explainability, overview, security, chrome, text/markdown/sarif projections, gate prefixes | +| Config | `codeclone/config/*` | Option specs, parser construction, pyproject loading, CLI > pyproject > defaults merge | +| Core runtime | `codeclone/core/*` | Bootstrap, discovery, worker processing, project metrics, report/gate integration | +| Analysis | `codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*` | Parse source, normalize AST/CFG facts, extract units, prepare deterministic analysis inputs; includes shared blast-radius graph core (`analysis/blast_radius.py`) | +| Findings | `codeclone/findings/clones/*`, `codeclone/findings/structural/*` | Clone grouping and structural finding derivation | +| Metrics | `codeclone/metrics/*` | Complexity, coupling, cohesion, dependencies, dead code, health, adoption, coverage join, API surface | +| Contracts/domain | `codeclone/contracts/*`, `codeclone/models.py`, `codeclone/domain/*` | Version constants, enums, typed exceptions, shared models, domain taxonomies | +| Persistence | `codeclone/baseline/*`, `codeclone/cache/*` | Trusted comparison state and optimization-only cache contracts | +| Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | +| Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | +| HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | +| MCP surface | `codeclone/surfaces/mcp/*`, `codeclone/surfaces/mcp/messages/*` | Read-only MCP tools/resources, run-bound implementation-context projections, change-control projections, Engineering Memory retrieval/governance, dev-only Platform Observability slices, and centralized agent-facing copy | +| Engineering Memory | `codeclone/memory/*`, `codeclone/config/memory*.py` | Local SQLite store, scoped retrieval, semantic sidecar, trajectory + Patch Trail projection, Experience distillation, coalesced rebuild jobs, staleness, governance, and CLI/MCP surfaces over deterministic report/git/doc/audit facts | +| Platform Observability | `codeclone/observability/*` | Opt-in operation/span telemetry, local SQLite store, bounded MCP slicer, and CLI JSON/HTML diagnostics; never analysis truth or a gate input | +| Corpus Analytics | `codeclone/analytics/*`, `codeclone/config/analytics.py` | Optional offline intent corpus clustering (`codeclone[analytics]`); audit/trajectory ingestion, separate analytics embeddings, SQLite + LanceDB under `.codeclone/analytics/`; never report/gate/memory authority | +| Controller insights | `codeclone/controller_insights/*` | Shared session-stats and audit-trail payloads for CLI `--session-stats` / `--audit` and IDE-only MCP `get_workspace_session_stats` / `get_controller_audit_trail` | +| Audit trail | `codeclone/audit/*` | Optional controller event and MCP payload footprint recording under `.codeclone/db/` when enabled | +| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*`, `plugins/cursor-codeclone/*`, `plugins/claude-code-codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | + +Refs: + +- `codeclone/main.py:main` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/core/pipeline.py:analyze` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/surfaces/mcp/server.py:build_mcp_server` + +## Contracts + +- Core produces facts; renderers present facts. +- `codeclone/report/document/*` is the canonical report source of truth. +- HTML, Markdown, SARIF, text, and MCP are projections over the same canonical report semantics. +- Baseline and cache are persistence contracts, not analysis truth. +- Cache is optimization-only and fail-open. +- MCP is read-only and must not create a second analysis truth path. Change + control, implementation context, and claim guard are projections over stored + run/report semantics, not new analyzers. Implementation-context manifests and + future relationship adjacency remain in-memory/off-report and cannot change + canonical report identity. +- VS Code, Claude Desktop, Claude Code, Codex, and Cursor surfaces are clients + over MCP, not second analyzers. + +Refs: + +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/renderers/text.py:render_text_report_document` +- `codeclone/report/renderers/markdown.py:render_markdown_report_document` +- `codeclone/report/renderers/sarif.py:render_sarif_report_document` +- `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/baseline/clone_baseline.py:Baseline.load` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.load` +- `codeclone/cache/store.py:Cache.load` + +## Invariants (MUST) + +- Report serialization is deterministic and schema-versioned. +- UI is render-only and must not invent gating semantics. +- Status enums remain domain-owned in baseline/metrics-baseline/cache/contracts modules. +- `codeclone/main.py` stays thin; orchestration lives in `codeclone/surfaces/cli/*`. + +Refs: + +- `codeclone/report/document/integrity.py:_build_integrity_payload` +- `codeclone/report/document/inventory.py:_build_inventory_payload` +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/baseline/_metrics_baseline_contract.py:MetricsBaselineStatus` +- `codeclone/cache/versioning.py:CacheStatus` +- `codeclone/contracts/__init__.py:ExitCode` + +## Failure modes + +| Condition | Layer | +|--------------------------------------------------|----------------------------------------------------------------| +| Invalid CLI args / invalid output path | CLI surface (`codeclone/config/*`, `codeclone/surfaces/cli/*`) | +| Baseline schema/integrity mismatch | Baseline contract layer | +| Metrics baseline schema/integrity mismatch | Metrics-baseline contract layer | +| Cache corruption/version mismatch | Cache contract layer (fail-open) | +| HTML snippet read failure | HTML render layer fallback snippet | +| MCP invalid request / invalid root / unknown run | MCP surface | + +## Determinism / canonicalization + +- File iteration and grouping order are explicit sorts. +- Canonical report integrity excludes non-canonical `derived` payload. +- Baseline and cache hashes/signatures use canonical JSON. + +Refs: + +- `codeclone/scanner/__init__.py:iter_py_files` +- `codeclone/report/document/integrity.py:_build_integrity_payload` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:canonical_json` + +## Locked by tests + +- `tests/test_architecture.py::test_architecture_layer_violations` +- `tests/test_report.py::test_report_json_compact_v21_contract` +- `tests/test_report_contract_coverage.py::test_report_document_rich_invariants_and_renderers` +- `tests/test_html_report.py::test_html_report_uses_core_block_group_facts` +- `tests/test_cache.py::test_cache_v13_uses_relpaths_when_root_set` +- `tests/test_mcp_service.py::test_mcp_service_analyze_repository_registers_latest_run` + +## Non-guarantees + +- Internal file splits may evolve in `2.1.x` if public contracts are preserved. +- Package markers and internal helper placement are not contract by themselves. + +## Chapter map + +| Topic | Primary chapters | +|---------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| CLI behavior and failure routing | [09-exit-codes.md](09-exit-codes.md), [11-cli.md](11-cli.md) | +| Config precedence and defaults | [10-config-and-defaults.md](10-config-and-defaults.md) | +| Core processing pipeline | [03-core-pipeline.md](03-core-pipeline.md) | +| Clone baseline trust/compat/integrity | [07-baseline.md](07-baseline.md) | +| Cache trust and fail-open behavior | [08-cache.md](08-cache.md) | +| Report schema and provenance | [05-report.md](05-report.md), [06-html-render.md](06-html-render.md) | +| MCP agent surface | [25-mcp-interface/index.md](25-mcp-interface/index.md), [14-claim-guard.md](14-claim-guard.md) | +| Engineering Memory evidence layers | [13-engineering-memory/index.md](13-engineering-memory/index.md), [13-engineering-memory/trajectory-quality-and-passport.md](13-engineering-memory/trajectory-quality-and-passport.md), [13-engineering-memory/experience-layer.md](13-engineering-memory/experience-layer.md) | +| Platform runtime diagnostics | [26-platform-observability.md](26-platform-observability.md) | +| Corpus analytics (intent clustering) | [27-corpus-analytics.md](27-corpus-analytics.md) | +| Health score model | [15-health-score.md](15-health-score.md) | +| Metrics gates and metrics baseline | [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) | +| Dead-code liveness policy | [17-dead-code-contract.md](17-dead-code-contract.md) | +| Determinism and versioning policy | [22-determinism.md](22-determinism.md), [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) | diff --git a/docs/book/03-contracts-exit-codes.md b/docs/book/03-contracts-exit-codes.md deleted file mode 100644 index 3a7345cb..00000000 --- a/docs/book/03-contracts-exit-codes.md +++ /dev/null @@ -1,85 +0,0 @@ -# 03. Contracts: Exit Codes - -## Purpose - -Define stable process exit semantics and category boundaries. - -## Public surface - -- Exit enum: `codeclone/contracts/__init__.py:ExitCode` -- CLI entry: `codeclone/main.py:main` -- CLI orchestration: `codeclone/surfaces/cli/workflow.py:_main_impl` -- Error markers/formatters: `codeclone/ui_messages/__init__.py` - -## Data model - -| Exit code | Category | Meaning | -|-----------|----------------|-----------------------------------------------------| -| `0` | success | Run completed without gating failures | -| `2` | contract error | Input or contract violation | -| `3` | gating failure | Analysis succeeded but policy failed | -| `5` | internal error | Unexpected exception escaped top-level CLI handling | - -Refs: - -- `codeclone/contracts/__init__.py:ExitCode` -- `codeclone/config/argparse_builder.py:_ArgumentParser.error` - -## Contracts - -- Contract errors use the `CONTRACT ERROR:` marker. -- Gating failures use the `GATING FAILURE:` marker. -- Internal errors use `INTERNAL ERROR:` and hide traceback unless debug is enabled. -- `main()` lets `SystemExit` from contract/gating paths pass through unchanged. - -Refs: - -- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` -- `codeclone/ui_messages/__init__.py:MARKER_INTERNAL_ERROR` -- `codeclone/ui_messages/__init__.py:fmt_contract_error` -- `codeclone/ui_messages/__init__.py:fmt_gating_failure` -- `codeclone/ui_messages/__init__.py:fmt_internal_error` - -## Invariants (MUST) - -- Only non-`SystemExit` exceptions in `main()` become exit `5`. -- In gating mode, unreadable source files win over clone/metric gate failure and force exit `2`. - -Refs: - -- `codeclone/main.py:main` -- `codeclone/surfaces/cli/workflow.py:_main_impl` - -## Failure modes - -| Condition | Marker | Exit | -|--------------------------------------------|------------------|------| -| Invalid output extension/path | `CONTRACT ERROR` | `2` | -| Invalid CLI flag combination | `CONTRACT ERROR` | `2` | -| Untrusted baseline in CI/gating | `CONTRACT ERROR` | `2` | -| Unreadable source in CI/gating | `CONTRACT ERROR` | `2` | -| New clones with `--fail-on-new` | `GATING FAILURE` | `3` | -| Threshold or metrics gate breach | `GATING FAILURE` | `3` | -| Unexpected exception in top-level CLI path | `INTERNAL ERROR` | `5` | - -## Determinism / canonicalization - -- Help epilog strings are generated from static constants. -- Error category markers are static constants. - -Refs: - -- `codeclone/contracts/__init__.py:cli_help_epilog` -- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` - -## Locked by tests - -- `tests/test_cli_unit.py::test_cli_help_text_consistency` -- `tests/test_cli_unit.py::test_cli_internal_error_marker` -- `tests/test_cli_unit.py::test_cli_internal_error_debug_flag_includes_traceback` -- `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` -- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` - -## Non-guarantees - -- Exact message body wording may evolve; marker category and exit code are contract. diff --git a/docs/book/05-core-pipeline.md b/docs/book/03-core-pipeline.md similarity index 95% rename from docs/book/05-core-pipeline.md rename to docs/book/03-core-pipeline.md index 55da44f8..d70ec393 100644 --- a/docs/book/05-core-pipeline.md +++ b/docs/book/03-core-pipeline.md @@ -1,4 +1,8 @@ -# 05. Core Pipeline + + +# 03. Core Pipeline ## Purpose diff --git a/docs/cfg.md b/docs/book/04-cfg-semantics.md similarity index 88% rename from docs/cfg.md rename to docs/book/04-cfg-semantics.md index b561e3c4..0184d9f9 100644 --- a/docs/cfg.md +++ b/docs/book/04-cfg-semantics.md @@ -1,7 +1,13 @@ -# Control Flow Graph (CFG) — Design and Semantics + -> Scope note: this file is a CFG deep-dive. -> Contract-level guarantees are documented in `docs/book/` (especially `05-core-pipeline.md` and `12-determinism.md`). +# 04. Control Flow Graph (CFG) — Design and Semantics + +> Contract-level guarantees are in [Core Pipeline](03-core-pipeline.md) and +> [Determinism](22-determinism.md). This document describes the **Control Flow Graph (CFG)** model used by **CodeClone**, its design goals, semantics, and known limitations. @@ -183,15 +189,7 @@ CFG v1 guarantees: This is critical for CI usage and baseline comparison. -## Python Tag Consistency for Baseline Checks - -Due to AST differences between interpreter versions, baseline compatibility is pinned to -the same `python_tag` (for example `cp314`), not full patch version equality. - -This keeps clone detection deterministic while allowing patch updates within the same tag. - -CI gating uses the baseline tag policy, while the test matrix validates runtime -compatibility across Python 3.10-3.14. +Python tag consistency: see [Compatibility and Versioning](24-compatibility-and-versioning.md). --- diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md deleted file mode 100644 index cfa3dcfd..00000000 --- a/docs/book/04-config-and-defaults.md +++ /dev/null @@ -1,284 +0,0 @@ -# 04. Config and Defaults - -## Purpose - -Describe effective runtime configuration and defaults that affect behavior. - -## Public surface - -- Option specs/defaults: `codeclone/config/spec.py` -- CLI parser and defaults: `codeclone/config/argparse_builder.py:build_parser` -- Pyproject config loader: `codeclone/config/pyproject_loader.py:load_pyproject_config` -- Config resolver: `codeclone/config/resolver.py:resolve_config` -- Effective cache default path logic: `codeclone/surfaces/cli/runtime.py:_resolve_cache_path` -- Metrics-mode selection logic: `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` -- Debug mode sources: `codeclone/surfaces/cli/console.py:_is_debug_enabled` - -## Data model - -Configuration sources, in precedence order: - -1. CLI flags (`argparse`, explicit options only) -2. `pyproject.toml` section `[tool.codeclone]` -3. Code defaults in parser and runtime - -`CODECLONE_DEBUG=1` affects debug diagnostics only and is not part of analysis -or gating configuration precedence. - -Key defaults: - -- `root="."` -- `--min-loc=10` -- `--min-stmt=6` -- `--processes=4` -- `--baseline=codeclone.baseline.json` -- `--max-baseline-size-mb=5` -- `--max-cache-size-mb=50` -- `--coverage-min=50` -- default cache path (when no cache flag is given): `/.cache/codeclone/cache.json` -- `--metrics-baseline=codeclone.baseline.json` (same default path as `--baseline`) -- bare reporting flags use default report paths: - - `--html` -> `/.cache/codeclone/report.html` - - `--json` -> `/.cache/codeclone/report.json` - - `--md` -> `/.cache/codeclone/report.md` - - `--sarif` -> `/.cache/codeclone/report.sarif` - - `--text` -> `/.cache/codeclone/report.txt` - -Fragment-level admission thresholds (pyproject.toml only, advanced tuning): - -- `block_min_loc=20` — minimum function LOC for block-level sliding window -- `block_min_stmt=8` — minimum function statements for block-level sliding window -- `segment_min_loc=20` — minimum function LOC for segment-level sliding window -- `segment_min_stmt=10` — minimum function statements for segment-level sliding window - -Example project-level config: - -```toml title="Minimal [tool.codeclone] configuration" -[tool.codeclone] -min_loc = 10 -min_stmt = 6 -baseline = "codeclone.baseline.json" -skip_metrics = true -quiet = true -``` - -Supported `[tool.codeclone]` keys in the current line: - -`Requires / Implies` lists only runtime-enforced relationships from the current -code path. Use `-` when the key has no special dependency contract. - -Analysis: - -| Key | Type | Default | Meaning | Requires / Implies | -|------------------------|---------------|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------| -| `min_loc` | `int` | `10` | Minimum function LOC for clone admission | `-` | -| `min_stmt` | `int` | `6` | Minimum function statement count for clone admission | `-` | -| `block_min_loc` | `int` | `20` | Minimum function LOC for block-window analysis | `-` | -| `block_min_stmt` | `int` | `8` | Minimum function statements for block-window analysis | `-` | -| `segment_min_loc` | `int` | `20` | Minimum function LOC for segment analysis | `-` | -| `segment_min_stmt` | `int` | `10` | Minimum function statements for segment analysis | `-` | -| `processes` | `int` | `4` | Worker process count | `-` | -| `cache_path` | `str \| null` | `/.cache/codeclone/cache.json` | Cache file path | `-` | -| `max_cache_size_mb` | `int` | `50` | Maximum accepted cache size before fail-open ignore | `-` | -| `skip_metrics` | `bool` | `false*` | Skip full metrics mode when allowed | Incompatible with metrics gates/update; auto-enabled in some runs* | -| `skip_dead_code` | `bool` | `false` | Skip dead-code analysis | Forced on by `skip_metrics`; overridden by `fail_dead_code` | -| `skip_dependencies` | `bool` | `false` | Skip dependency analysis | Forced on by `skip_metrics`; overridden by `fail_cycles` | -| `golden_fixture_paths` | `list[str]` | `[]` | Exclude clone groups fully contained in matching golden test fixtures from health/gates/active findings; keep them as suppressed report facts | Patterns must resolve under `tests/` or `tests/fixtures/` | - -Baseline and CI: - -| Key | Type | Default | Meaning | Requires / Implies | -|---------------------------|--------|---------------------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------| -| `baseline` | `str` | `codeclone.baseline.json` | Clone baseline path | Default target for `metrics_baseline` when not overridden | -| `max_baseline_size_mb` | `int` | `5` | Maximum accepted baseline size | `-` | -| `update_baseline` | `bool` | `false` | Rewrite unified baseline from current run | In unified mode, auto-enables `update_metrics_baseline` unless `skip_metrics=true` | -| `metrics_baseline` | `str` | `codeclone.baseline.json` | Dedicated metrics-baseline path override | Defaults to `baseline` path when not overridden | -| `update_metrics_baseline` | `bool` | `false` | Rewrite metrics baseline from current run | Requires metrics analysis; may auto-enable `update_baseline` for missing shared baseline | -| `ci` | `bool` | `false` | Enable CI preset behavior | Implies `fail_on_new`, `no_color`, `quiet`; enables `fail_on_new_metrics` when a trusted metrics baseline loads | - -Quality gates and metric collection: - -| Key | Type | Default | Meaning | Requires / Implies | -|--------------------------------|---------------|---------|-------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------| -| `fail_on_new` | `bool` | `false` | Fail when new clone groups appear | Requires a trusted clone baseline | -| `fail_threshold` | `int` | `-1` | Fail when clone count exceeds threshold | `-` | -| `fail_complexity` | `int` | `-1` | Fail when max cyclomatic complexity exceeds threshold | Incompatible with `skip_metrics` | -| `fail_coupling` | `int` | `-1` | Fail when max CBO exceeds threshold | Incompatible with `skip_metrics` | -| `fail_cohesion` | `int` | `-1` | Fail when max LCOM4 exceeds threshold | Incompatible with `skip_metrics` | -| `fail_cycles` | `bool` | `false` | Fail when dependency cycles are present | Incompatible with `skip_metrics`; forces dependency analysis | -| `fail_dead_code` | `bool` | `false` | Fail when high-confidence dead code is present | Incompatible with `skip_metrics`; forces dead-code analysis | -| `fail_health` | `int` | `-1` | Fail when health score drops below threshold | Incompatible with `skip_metrics` | -| `fail_on_new_metrics` | `bool` | `false` | Fail on new metric hotspots vs trusted metrics baseline | Requires trusted metrics baseline; incompatible with `skip_metrics`; auto-enabled by `ci` when baseline loads | -| `api_surface` | `bool` | `false` | Collect public API inventory/diff facts | Auto-enabled by `fail_on_api_break` | -| `coverage_xml` | `str \| null` | `null` | Join external Cobertura XML to current-run function spans | Enables Coverage Join | -| `coverage_min` | `int` | `50` | Coverage threshold for joined measured coverage hotspots | Used by Coverage Join; meaningful with `coverage_xml` | -| `min_typing_coverage` | `int` | `-1` | Minimum allowed typing coverage percent | Incompatible with `skip_metrics` | -| `min_docstring_coverage` | `int` | `-1` | Minimum allowed docstring coverage percent | Incompatible with `skip_metrics` | -| `fail_on_typing_regression` | `bool` | `false` | Fail on typing coverage regression vs metrics baseline | Requires trusted metrics baseline with adoption snapshot; incompatible with `skip_metrics` | -| `fail_on_docstring_regression` | `bool` | `false` | Fail on docstring coverage regression vs metrics baseline | Requires trusted metrics baseline with adoption snapshot; incompatible with `skip_metrics` | -| `fail_on_api_break` | `bool` | `false` | Fail on public API breaking changes vs metrics baseline | Requires trusted metrics baseline with API surface snapshot; incompatible with `skip_metrics`; implies `api_surface` | -| `fail_on_untested_hotspots` | `bool` | `false` | Fail when medium/high-risk functions measured by Coverage Join fall below threshold | Incompatible with `skip_metrics`; requires successful Coverage Join to fire | - -Report outputs and local UX: - -| Key | Type | Default | Meaning | Requires / Implies | -|---------------|---------------|---------|--------------------------------|----------------------------------------| -| `html_out` | `str \| null` | `null` | HTML report output path | `-` | -| `json_out` | `str \| null` | `null` | JSON report output path | `-` | -| `md_out` | `str \| null` | `null` | Markdown report output path | `-` | -| `sarif_out` | `str \| null` | `null` | SARIF report output path | `-` | -| `text_out` | `str \| null` | `null` | Plain-text report output path | `-` | -| `no_progress` | `bool` | `false` | Disable progress UI | Implied by `quiet` | -| `no_color` | `bool` | `false` | Disable colored CLI output | Enabled by `ci` | -| `quiet` | `bool` | `false` | Use compact CLI output | Implies `no_progress`; enabled by `ci` | -| `verbose` | `bool` | `false` | Enable more verbose CLI output | `-` | -| `debug` | `bool` | `false` | Enable debug diagnostics | Also enabled by `CODECLONE_DEBUG=1` | - -This is the exact accepted `[tool.codeclone]` key set from -`codeclone/config/spec.py` and `codeclone/config/pyproject_loader.py`; unknown -keys are contract errors. - -!!! note "Pyproject keys vs CLI flags" - The tables above list `[tool.codeclone]` keys, not CLI flag spellings. - CLI flags may map to the same internal destination under a different name. - Example: `coverage_xml` in `pyproject.toml` corresponds to CLI - `--coverage FILE`. The same pattern applies to report outputs such as - `html_out` ↔ `--html` and `json_out` ↔ `--json`. - -!!! warning "Metrics-mode conflicts are enforced" - Metrics update/gating flags are runtime contracts, not hints. Combinations - such as `skip_metrics=true` together with metrics gating or metrics - baseline update flags are contract errors. - -Notes: - -- `skip_metrics=false*`: parser default is `false`, but runtime may auto-enable - it when no metrics work is requested and no metrics baseline exists. -- Report output keys default to `null`; bare CLI flags still write to the - deterministic `.cache/codeclone/report.*` paths listed above. - -CLI always has precedence when option is explicitly provided, including boolean -overrides via `--foo/--no-foo` (e.g. `--no-skip-metrics`). - -Path values loaded from `pyproject.toml` are normalized relative to resolved -scan root when provided as relative paths. - -`golden_fixture_paths` is different: - -- entries are repo-relative glob patterns, not filesystem paths -- they are not normalized to absolute paths -- they must target `tests/` or `tests/fixtures/` -- a clone group is excluded only when every occurrence matches the configured - golden-fixture scope - -Current-run coverage join config: - -- `coverage_xml` is the `[tool.codeclone]` key; the equivalent CLI flag is - `--coverage FILE`. -- `coverage_xml` may be set in `pyproject.toml`; relative paths resolve from - the scan root like other configured paths. -- `coverage_min` and `fail_on_untested_hotspots` follow the same precedence - rules as CLI flags. -- Coverage join remains current-run only and does not persist to baseline. - -Dependency depth config note: - -- `dependency_max_depth` is an observed metric in reports/baselines, not a - CLI or `pyproject.toml` option. -- Dependency depth now uses an internal adaptive profile based on - `avg_depth`, `p95_depth`, and `max_depth` for the internal module graph. -- There is no user-facing knob to tune that model in the current `2.x` release - line. - -Metrics baseline path selection contract: - -- Relative `baseline` / `metrics_baseline` paths coming from defaults or - `pyproject.toml` resolve from the analysis root. -- If `--metrics-baseline` is explicitly set, that path is used. -- If `metrics_baseline` in `pyproject.toml` differs from parser default, that - configured path is used even without explicit CLI flag. -- Otherwise, metrics baseline defaults to the clone baseline path. -- In other words, metrics do **not** live in a separate file by default: - the default unified flow uses the same `codeclone.baseline.json` path for - clone and metrics comparison state. - -Refs: - -- `codeclone/config/spec.py` -- `codeclone/config/argparse_builder.py:build_parser` -- `codeclone/config/pyproject_loader.py:load_pyproject_config` -- `codeclone/config/resolver.py:resolve_config` -- `codeclone/surfaces/cli/workflow.py:_main_impl` -- `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` - -## Contracts - -- `--ci` is a preset: enables `fail_on_new`, `no_color`, `quiet`. -- In CI mode, if trusted metrics baseline is loaded, runtime also enables - `fail_on_new_metrics`. -- `--quiet` implies `--no-progress`. -- Negative size limits are contract errors. - -Refs: - -- `codeclone/surfaces/cli/workflow.py:_main_impl` - -## Invariants (MUST) - -- Detection thresholds (`min-loc`, `min-stmt`) affect function-level extraction. -- Fragment thresholds (`block_min_loc/stmt`, `segment_min_loc/stmt`) affect block/segment extraction. -- All six thresholds are part of cache compatibility (`payload.ap`). -- Reporting flags (`--html/--json/--md/--sarif/--text`) affect output only. -- Reporting flags accept optional path values; passing bare flag writes to - deterministic default path under `.cache/codeclone/`. -- `--cache-path` overrides project-local cache default; legacy alias `--cache-dir` maps to same destination. -- Metrics baseline update/gating flags require metrics mode; incompatible - combinations with `--skip-metrics` are contract errors. -- Unknown keys or invalid value types in `[tool.codeclone]` are contract errors (exit 2). - -Refs: - -- `codeclone/analysis/units.py:extract_units_and_stats_from_source` -- `codeclone/config/spec.py` -- `codeclone/config/argparse_builder.py:build_parser` -- `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` - -## Failure modes - -| Condition | Behavior | -|-------------------------------|--------------------| -| Invalid output extension/path | Contract error (2) | -| Invalid root path | Contract error (2) | -| Negative size limits | Contract error (2) | - -Refs: - -- `codeclone/surfaces/cli/reports_output.py:_validate_output_path` -- `codeclone/surfaces/cli/startup.py:resolve_existing_root_path` -- `codeclone/surfaces/cli/workflow.py:_main_impl` - -## Determinism / canonicalization - -- Parser help text and epilog are deterministic constants. -- Summary metric labels are deterministic constants. - -Refs: - -- `codeclone/contracts/__init__.py:cli_help_epilog` -- `codeclone/ui_messages/__init__.py:SUMMARY_LABEL_FILES_FOUND` - -## Locked by tests - -- `tests/test_cli_unit.py::test_cli_help_text_consistency` -- `tests/test_cli_inprocess.py::test_cli_default_cache_dir_uses_root` -- `tests/test_cli_inprocess.py::test_cli_cache_dir_override_respected` -- `tests/test_cli_inprocess.py::test_cli_negative_size_limits_fail_fast` - -## Non-guarantees - -- CLI help section ordering is stable today but not versioned independently from the CLI contract. - -## See also - -- [09-cli.md](09-cli.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) diff --git a/docs/book/08-report.md b/docs/book/05-report.md similarity index 92% rename from docs/book/08-report.md rename to docs/book/05-report.md index 4fc8e20e..7c29cf5f 100644 --- a/docs/book/08-report.md +++ b/docs/book/05-report.md @@ -1,8 +1,13 @@ -# 08. Report + + +# 05. Report ## Purpose -Define the canonical report contract for the current 2.0 release line: report +Define the canonical report contract for the current `2.1` release line: report schema `2.11` plus deterministic text/Markdown/SARIF/HTML projections. ## Public surface @@ -93,7 +98,7 @@ Refs: Refs: - `codeclone/report/document/builder.py:build_report_document` -- `codeclone/report/derived.py:_health_snapshot` +- `codeclone/report/document/derived.py:_health_snapshot` - `codeclone/report/overview.py:materialize_report_overview` - `codeclone/report/suggestions.py:generate_suggestions` diff --git a/docs/book/10-html-render.md b/docs/book/06-html-render.md similarity index 91% rename from docs/book/10-html-render.md rename to docs/book/06-html-render.md index 6b648791..65b44dda 100644 --- a/docs/book/10-html-render.md +++ b/docs/book/06-html-render.md @@ -1,4 +1,8 @@ -# 10. HTML Render + + +# 06. HTML Render ## Purpose @@ -13,6 +17,8 @@ Document HTML rendering as a pure view layer over canonical report data. - Snippet/highlight helpers: `codeclone/report/html/widgets/snippets.py` - Sections/widgets/assets: `codeclone/report/html/sections/*`, `codeclone/report/html/widgets/*`, `codeclone/report/html/assets/*` +- User-facing copy catalog: `codeclone/report/messages/*` (glossary, + suggestions, explainability, overview, security, chrome) ## Data model diff --git a/docs/book/06-baseline.md b/docs/book/07-baseline.md similarity index 87% rename from docs/book/06-baseline.md rename to docs/book/07-baseline.md index d582a779..c1911ec6 100644 --- a/docs/book/06-baseline.md +++ b/docs/book/07-baseline.md @@ -1,4 +1,10 @@ -# 06. Baseline + + +# 07. Baseline ## Purpose @@ -46,6 +52,12 @@ Current runtime policy: - new clone baseline saves write schema `2.1` - runtime accepts `1.0`, `2.0`, and `2.1` +- baseline novelty is **baseline-relative**. A `known` finding is accepted by + the trusted baseline, but that alone does not prove the current patch did not + introduce or reintroduce it. +- patch-local regression claims require a clean before-run to after-run + comparison (`compare_runs` / `check_patch_contract(mode="verify")`), not a + single run's baseline novelty. Unified-baseline contract: diff --git a/docs/book/07-cache.md b/docs/book/08-cache.md similarity index 66% rename from docs/book/07-cache.md rename to docs/book/08-cache.md index f38fd33e..91e523d4 100644 --- a/docs/book/07-cache.md +++ b/docs/book/08-cache.md @@ -1,8 +1,12 @@ -# 07. Cache + + +# 08. Cache ## Purpose -Define cache schema `2.8`, integrity verification, stale-entry pruning, and +Define cache schema `2.10`, integrity verification, stale-entry pruning, and fail-open behavior. ## Public surface @@ -17,7 +21,7 @@ fail-open behavior. ## Data model -On-disk schema (`v == "2.8"`): +On-disk schema (`v == "2.10"`): - top-level: `v`, `payload`, `sig` - `payload` keys: `py`, `fp`, `ap`, `files`, optional `sr` @@ -27,14 +31,38 @@ On-disk schema (`v == "2.8"`): - `files` stores compact per-file entries with stat signature, extracted units, optional metrics sections (including runtime reachability evidence and report-only `security_surfaces`), - referenced names/qualnames, and cached source stats + referenced names/qualnames, cached source stats, and optional + **`function_relationship_facts`** - `sr` stores optional segment-report projection payload +### `function_relationship_facts` (per-file cache section) + +Cached under the canonical key `function_relationship_facts` in the typed entry; +wire compact key **`fr`**. Each file may store zero or more fact rows keyed by +`source_qualname`, each with a sorted list of relationship records: + +| Field | Meaning | +|---------------------|--------------------------------------------------------| +| `relation_kind` | Deterministic relationship classifier from module walk | +| `resolution_status` | Resolved vs deferred boundary for the target | +| `origin_lane` | Which analysis lane produced the edge | +| `target_qualname` | Callee / related symbol qualname | +| `line` | Source line of the relationship site | +| `expression` | Normalized expression text (bounded) | +| `resolution_rule` | Rule id explaining how the target was resolved | + +Facts are derived during unit extraction (`codeclone/analysis/units.py`) and +persisted on cache save when present. On cache hit, discovery rehydrates them +into the processing pipeline (`codeclone/core/discovery.py`) so warm runs preserve +the same function-relationship evidence as cold runs without recomputing AST +facts. Empty sections are omitted from wire entries. + Refs: - `codeclone/cache/store.py:Cache.load` -- `codeclone/cache/_wire_encode.py:_encode_wire_file_entry` -- `codeclone/cache/_wire_decode.py:_decode_wire_file_entry` +- `codeclone/cache/_wire_encode.py:_encode_function_relationship_facts` +- `codeclone/cache/_wire_decode.py:_decode_optional_wire_function_relationship_facts` +- `codeclone/cache/entries.py:_function_relationship_facts_dict_from_model` ## Contracts @@ -66,6 +94,8 @@ Refs: - Cached public-API symbol payloads preserve declared parameter order. - Cached runtime reachability facts are required for cold/warm dead-code equivalence across supported framework registration patterns. +- Cached `function_relationship_facts` round-trip deterministically through wire + encode/decode and preserve relationship ordering within each source qualname. - Legacy `.cache_secret` is warning-only and never used for trust. Refs: @@ -94,7 +124,7 @@ CLI behavior: cache failures do not change exit code; analysis continues without Refs: - `codeclone/cache/versioning.py:CacheStatus` -- `codeclone/surfaces/cli/runtime.py:resolve_cache_status` +- `codeclone/cache/store.py:resolve_cache_status` ## Determinism / canonicalization @@ -116,6 +146,7 @@ Refs: - `tests/test_cache.py::test_cache_signature_validation_ignores_json_whitespace` - `tests/test_cache.py::test_cache_signature_mismatch_warns` - `tests/test_cache.py::test_cache_too_large_warns` +- `tests/test_cache.py::test_cache_roundtrip_preserves_function_relationship_facts` - `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` - `tests/test_cli_inprocess.py::test_cli_cache_analysis_profile_compatibility` - `tests/test_core_branch_coverage.py::test_discover_prunes_deleted_cache_entries` diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md deleted file mode 100644 index 3d9fc9f6..00000000 --- a/docs/book/09-cli.md +++ /dev/null @@ -1,138 +0,0 @@ -# 09. CLI - -## Purpose - -Define observable CLI behavior: argument handling, summaries, output writing, -and exit routing. - -!!! note "Observable surface only" - This chapter covers scripting-visible behavior and user-facing CLI output - categories. Rich styling details may evolve as long as markers, exit - semantics, and deterministic output contracts stay stable. - -## Public surface - -- Public entrypoint: `codeclone/main.py:main` -- CLI orchestration: `codeclone/surfaces/cli/workflow.py:_main_impl` -- Parser: `codeclone/config/argparse_builder.py:build_parser` -- Summary renderer: `codeclone/surfaces/cli/summary.py:_print_summary` -- Output path validation and writes: - `codeclone/surfaces/cli/reports_output.py` -- Message catalog: `codeclone/ui_messages/__init__.py` - -## Data model - -CLI modes: - -- normal mode -- gating mode (`--ci`, `--fail-on-new`, explicit metric gates) -- baseline update mode (`--update-baseline`, `--update-metrics-baseline`) - -Summary metrics include: - -- files found/analyzed/cache hits/skipped -- structural counters for lines/functions/methods/classes -- function/block/segment clone groups -- suppressed clone groups from `golden_fixture_paths` -- dead-code active/suppressed status -- dependency depth profile (`avg_depth`, `p95_depth`, `max_depth`) when metrics are computed -- adoption/API/coverage-join facts when computed -- new vs baseline - -Refs: - -- `codeclone/surfaces/cli/summary.py:_print_summary` -- `codeclone/surfaces/cli/runtime.py:_metrics_flags_requested` -- `codeclone/surfaces/cli/runtime.py:_metrics_computed` -- `codeclone/surfaces/cli/report_meta.py:_build_report_meta` - -## Contracts - -- Help output includes canonical exit-code section and project links. -- Bare report flags write to deterministic default paths under `.cache/codeclone/`. -- `--open-html-report` is layered on top of `--html`; it does not imply HTML output. -- `--timestamped-report-paths` rewrites only default report paths requested via bare flags. -- In interactive VS Code terminals, the CLI may print a one-time extension hint - after summary output. The hint is suppressed in `--quiet`, CI, and non-TTY - contexts, and is tracked per CodeClone version next to the resolved project - cache path. -- In interactive non-CI runs, the CLI may print one-time migration notes when a - trusted baseline was produced by a release whose dead-code reachability model - is known to be narrower than the current version, such as `2.0.0` -> `2.0.1` - or `2.0.1` -> `2.0.2`. Notes explain expected dead-code count reductions from - refined reachability evidence and are remembered next to the resolved project - cache path. -- Changed-scope review uses: - - `--changed-only` - - `--diff-against` - - `--paths-from-git-diff` -- Contract errors use `CONTRACT ERROR:`. -- Gating failures use `GATING FAILURE:`. -- Internal errors use `fmt_internal_error` and include traceback only in debug mode. - -Refs: - -- `codeclone/contracts/__init__.py:cli_help_epilog` -- `codeclone/ui_messages/__init__.py:fmt_contract_error` -- `codeclone/ui_messages/__init__.py:fmt_internal_error` -- `codeclone/surfaces/cli/changed_scope.py:_validate_changed_scope_args` - -## Invariants (MUST) - -- Report writes are path-validated and write failures are contract errors. -- `--open-html-report` requires `--html`. -- `--timestamped-report-paths` requires at least one requested report output. -- `--changed-only` requires a diff source. -- Browser-open failure after successful HTML write is warning-only. -- In gating mode, unreadable source files are contract errors with higher priority than clone/metric gate failures. - -Refs: - -- `codeclone/surfaces/cli/reports_output.py:_validate_output_path` -- `codeclone/surfaces/cli/reports_output.py:_validate_report_ui_flags` -- `codeclone/surfaces/cli/workflow.py:_main_impl` - -## Failure modes - -!!! warning "Failure precedence" - Contract failures take precedence over gating failures. In CI and scripted - flows, invalid config or unreadable sources must surface as exit `2` before - any clone or metrics gate can fail with exit `3`. - -| Condition | User-facing category | Exit | -|-------------------------------------------------------------------|----------------------|------| -| Invalid CLI flag | contract | `2` | -| Invalid output extension/path | contract | `2` | -| Invalid changed-scope flag combination | contract | `2` | -| Baseline untrusted in CI/gating | contract | `2` | -| Coverage/API regression gate without required baseline capability | contract | `2` | -| Unreadable source in CI/gating | contract | `2` | -| New clones with `--fail-on-new` | gating | `3` | -| Threshold or metrics gate exceeded | gating | `3` | -| Unexpected exception | internal | `5` | - -## Determinism / canonicalization - -- Summary metric ordering is fixed. -- Compact summary mode is fixed-format text. -- Help epilog is generated from static constants. -- Git diff path inputs are normalized to sorted repo-relative paths. - -Refs: - -- `codeclone/surfaces/cli/summary.py:_print_summary` -- `codeclone/contracts/__init__.py:cli_help_epilog` -- `codeclone/surfaces/cli/changed_scope.py:_normalize_changed_paths` - -## Locked by tests - -- `tests/test_cli_unit.py::test_cli_help_text_consistency` -- `tests/test_cli_unit.py::test_argument_parser_contract_error_marker_for_invalid_args` -- `tests/test_cli_inprocess.py::test_cli_summary_format_stable` -- `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` -- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` - -## Non-guarantees - -- Rich styling details are not machine-facing contract. -- Warning phrasing may evolve if category markers and exit semantics stay stable. diff --git a/docs/book/09-exit-codes.md b/docs/book/09-exit-codes.md new file mode 100644 index 00000000..d5fc9195 --- /dev/null +++ b/docs/book/09-exit-codes.md @@ -0,0 +1,106 @@ + + +# 09. Contracts: Exit Codes + +## Purpose + +Define stable process exit semantics and category boundaries. + +## Public surface + +- Exit enum: `codeclone/contracts/__init__.py:ExitCode` +- CLI entry: `codeclone/main.py:main` +- CLI orchestration: `codeclone/surfaces/cli/workflow.py:_main_impl` +- Error markers/formatters: `codeclone/ui_messages/*` (canonical definitions in + `markers.py` and `formatters.py`; re-exported from `__init__.py`) + +## Data model + +| Exit code | Category | Meaning | +|-----------|----------------|-----------------------------------------------------| +| `0` | success | Run completed without gating failures | +| `2` | contract error | Input or contract violation | +| `3` | gating failure | Analysis succeeded but policy failed | +| `5` | internal error | Unexpected exception escaped top-level CLI handling | + +Refs: + +- `codeclone/contracts/__init__.py:ExitCode` +- `codeclone/config/argparse_builder.py:_ArgumentParser.error` + +## Contracts + +- Contract errors use the `CONTRACT ERROR:` marker. +- Gating failures use the `GATING FAILURE:` marker. +- Internal errors use `INTERNAL ERROR:` and hide traceback unless debug is enabled. +- `main()` lets `SystemExit` from contract/gating paths pass through unchanged. + +Refs: + +- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` +- `codeclone/ui_messages/__init__.py:MARKER_INTERNAL_ERROR` +- `codeclone/ui_messages/__init__.py:fmt_contract_error` +- `codeclone/report/gates/reasons.py:print_gating_failure_block` +- `codeclone/ui_messages/__init__.py:fmt_internal_error` + +## Invariants (MUST) + +- Only non-`SystemExit` exceptions in `main()` become exit `5`. +- **Gating mode** is enabled when any of `--ci`, `--fail-on-new`, `--fail-threshold`, + `--fail-complexity`, `--fail-coupling`, `--fail-cohesion`, `--fail-cycles`, + `--fail-dead-code`, `--fail-health`, `--fail-on-new-metrics`, + `--fail-on-typing-regression`, `--fail-on-docstring-regression`, + `--fail-on-api-break`, `--min-typing-coverage`, or `--min-docstring-coverage` + is active (`codeclone/surfaces/cli/runtime.py:gating_mode_enabled`). +- **`--fail-on-untested-hotspots`** is a Coverage Join policy gate (exit `3` when + breached). It requires `--coverage` / `coverage_xml` and is evaluated after + metrics analysis, not via `gating_mode_enabled` unreadable-source precedence. +- In gating mode, unreadable source files produce `CONTRACT ERROR:` and exit `2` + **before** clone/metric gate evaluation (`GATING FAILURE:` is suppressed when + both would apply). + +Refs: + +- `codeclone/main.py:main` +- `codeclone/surfaces/cli/workflow.py:_main_impl` + +## Failure modes + +| Condition | Marker | Exit | +|---------------------------------------------------------------|------------------|------| +| Invalid output extension/path | `CONTRACT ERROR` | `2` | +| Invalid CLI flag combination | `CONTRACT ERROR` | `2` | +| Invalid controller query combination | `CONTRACT ERROR` | `2` | +| `--patch-verify` without trusted baseline | `CONTRACT ERROR` | `2` | +| Untrusted baseline in CI/gating | `CONTRACT ERROR` | `2` | +| Unreadable source in CI/gating | `CONTRACT ERROR` | `2` | +| New clones with `--fail-on-new` | `GATING FAILURE` | `3` | +| Blocking `--patch-verify` violation | `GATING FAILURE` | `3` | +| Threshold or metrics gate breach | `GATING FAILURE` | `3` | +| Untested coverage hotspots with `--fail-on-untested-hotspots` | `GATING FAILURE` | `3` | +| Unexpected exception in top-level CLI path | `INTERNAL ERROR` | `5` | + +## Determinism / canonicalization + +- Help epilog strings are generated from static constants. +- Error category markers are static constants. + +Refs: + +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` + +## Locked by tests + +- `tests/test_cli_unit.py::test_cli_help_text_consistency` +- `tests/test_cli_unit.py::test_cli_internal_error_marker` +- `tests/test_cli_unit.py::test_cli_internal_error_debug_flag_includes_traceback` +- `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` +- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` + +## Non-guarantees + +- Exact message body wording may evolve; marker category and exit code are contract. diff --git a/docs/book/10-config-and-defaults.md b/docs/book/10-config-and-defaults.md new file mode 100644 index 00000000..4c10ff0b --- /dev/null +++ b/docs/book/10-config-and-defaults.md @@ -0,0 +1,598 @@ + + +# 10. Config and Defaults + +## Purpose + +Describe effective runtime configuration and defaults that affect behavior. + +## Public surface + +- Option specs/defaults: `codeclone/config/spec.py` +- CLI parser and defaults: `codeclone/config/argparse_builder.py:build_parser` +- Pyproject config loader: `codeclone/config/pyproject_loader.py:load_pyproject_config` +- Config resolver: `codeclone/config/resolver.py:resolve_config` +- Effective cache default path logic: `codeclone/surfaces/cli/runtime.py:_resolve_cache_path` +- Metrics-mode selection logic: `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` +- Debug mode sources: `codeclone/surfaces/cli/console.py:_is_debug_enabled` + +## Data model + +Configuration sources for the main `codeclone` CLI scan, in precedence order: + +1. CLI flags (`argparse`, explicit options only) +2. `pyproject.toml` section `[tool.codeclone]` +3. Code defaults in parser and runtime + +Engineering Memory, the workspace intent registry, MCP session TTL/lease, and +other subsystems listed in [Environment variable overrides](#environment-variable-overrides) +may also read `CODECLONE_*` variables. Those overrides apply only to the +documented fields and do not change main CLI precedence unless noted. + +Key defaults: + +- `root="."` +- `--min-loc=10` +- `--min-stmt=6` +- `--processes=4` +- `--baseline=codeclone.baseline.json` +- `--max-baseline-size-mb=5` +- `--max-cache-size-mb=50` +- `--coverage-min=50` +- default cache path (when no cache flag is given): `/.codeclone/cache.json` +- `--metrics-baseline=codeclone.baseline.json` (same default path as `--baseline`) +- bare reporting flags use default report paths: + - `--html` -> `/.codeclone/report.html` + - `--json` -> `/.codeclone/report.json` + - `--md` -> `/.codeclone/report.md` + - `--sarif` -> `/.codeclone/report.sarif` + - `--text` -> `/.codeclone/report.txt` +- legacy locations (CLI warns, does not migrate automatically): + - home cache: `~/.cache/codeclone/cache.json` when it differs from the project cache path + - repo workspace: non-empty `/.cache/codeclone/` from releases before `2.1.0a1` + +Fragment-level admission thresholds (pyproject.toml only, advanced tuning): + +- `block_min_loc=20` — minimum function LOC for block-level sliding window +- `block_min_stmt=8` — minimum function statements for block-level sliding window +- `segment_min_loc=20` — minimum function LOC for segment-level sliding window +- `segment_min_stmt=10` — minimum function statements for segment-level sliding window + +Example project-level config: + +```toml title="Minimal [tool.codeclone] configuration" +[tool.codeclone] +min_loc = 10 +min_stmt = 6 +baseline = "codeclone.baseline.json" +skip_metrics = true +quiet = true +``` + +Supported `[tool.codeclone]` keys in the current line: + +`Requires / Implies` lists only runtime-enforced relationships from the current +code path. Use `-` when the key has no special dependency contract. + +Analysis: + +| Key | Type | Default | Meaning | Requires / Implies | +|------------------------|---------------|--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------| +| `min_loc` | `int` | `10` | Minimum function LOC for clone admission | `-` | +| `min_stmt` | `int` | `6` | Minimum function statement count for clone admission | `-` | +| `block_min_loc` | `int` | `20` | Minimum function LOC for block-window analysis | `-` | +| `block_min_stmt` | `int` | `8` | Minimum function statements for block-window analysis | `-` | +| `segment_min_loc` | `int` | `20` | Minimum function LOC for segment analysis | `-` | +| `segment_min_stmt` | `int` | `10` | Minimum function statements for segment analysis | `-` | +| `processes` | `int` | `4` | Worker process count | `-` | +| `cache_path` | `str \| null` | `/.codeclone/cache.json` | Cache file path | `-` | +| `max_cache_size_mb` | `int` | `50` | Maximum accepted cache size before fail-open ignore | `-` | +| `skip_metrics` | `bool` | `false*` | Skip full metrics mode when allowed | Incompatible with metrics gates/update; auto-enabled in some runs* | +| `skip_dead_code` | `bool` | `false` | Skip dead-code analysis | Forced on by `skip_metrics`; overridden by `fail_dead_code` | +| `skip_dependencies` | `bool` | `false` | Skip dependency analysis | Forced on by `skip_metrics`; overridden by `fail_cycles` | +| `golden_fixture_paths` | `list[str]` | `[]` | Exclude clone groups fully contained in matching golden test fixtures from health/gates/active findings; keep them as suppressed report facts | Patterns must resolve under `tests/` or `tests/fixtures/` | + +Baseline and CI: + +| Key | Type | Default | Meaning | Requires / Implies | +|---------------------------|--------|---------------------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------| +| `baseline` | `str` | `codeclone.baseline.json` | Clone baseline path | Default target for `metrics_baseline` when not overridden | +| `max_baseline_size_mb` | `int` | `5` | Maximum accepted baseline size | `-` | +| `update_baseline` | `bool` | `false` | Rewrite unified baseline from current run | In unified mode, auto-enables `update_metrics_baseline` unless `skip_metrics=true` | +| `metrics_baseline` | `str` | `codeclone.baseline.json` | Dedicated metrics-baseline path override | Defaults to `baseline` path when not overridden | +| `update_metrics_baseline` | `bool` | `false` | Rewrite metrics baseline from current run | Requires metrics analysis; may auto-enable `update_baseline` for missing shared baseline | +| `ci` | `bool` | `false` | Enable CI preset behavior | Implies `fail_on_new`, `no_color`, `quiet`; enables `fail_on_new_metrics` when a trusted metrics baseline loads | + +Quality gates and metric collection: + +| Key | Type | Default | Meaning | Requires / Implies | +|--------------------------------|---------------|---------|-------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------| +| `fail_on_new` | `bool` | `false` | Fail when new clone groups appear | Requires a trusted clone baseline | +| `fail_threshold` | `int` | `-1` | Fail when clone count exceeds threshold | `-` | +| `fail_complexity` | `int` | `-1` | Fail when max cyclomatic complexity exceeds threshold | Incompatible with `skip_metrics` | +| `fail_coupling` | `int` | `-1` | Fail when max CBO exceeds threshold | Incompatible with `skip_metrics` | +| `fail_cohesion` | `int` | `-1` | Fail when max LCOM4 exceeds threshold | Incompatible with `skip_metrics` | +| `fail_cycles` | `bool` | `false` | Fail when dependency cycles are present | Incompatible with `skip_metrics`; forces dependency analysis | +| `fail_dead_code` | `bool` | `false` | Fail when high-confidence dead code is present | Incompatible with `skip_metrics`; forces dead-code analysis | +| `fail_health` | `int` | `-1` | Fail when health score drops below threshold | Incompatible with `skip_metrics` | +| `fail_on_new_metrics` | `bool` | `false` | Fail on new metric hotspots vs trusted metrics baseline | Requires trusted metrics baseline; incompatible with `skip_metrics`; auto-enabled by `ci` when baseline loads | +| `api_surface` | `bool` | `false` | Collect public API inventory/diff facts | Auto-enabled by `fail_on_api_break` | +| `coverage_xml` | `str \| null` | `null` | Join external Cobertura XML to current-run function spans | Enables Coverage Join | +| `coverage_min` | `int` | `50` | Coverage threshold for joined measured coverage hotspots | Used by Coverage Join; meaningful with `coverage_xml` | +| `min_typing_coverage` | `int` | `-1` | Minimum allowed typing coverage percent | Incompatible with `skip_metrics` | +| `min_docstring_coverage` | `int` | `-1` | Minimum allowed docstring coverage percent | Incompatible with `skip_metrics` | +| `fail_on_typing_regression` | `bool` | `false` | Fail on typing coverage regression vs metrics baseline | Requires trusted metrics baseline with adoption snapshot; incompatible with `skip_metrics` | +| `fail_on_docstring_regression` | `bool` | `false` | Fail on docstring coverage regression vs metrics baseline | Requires trusted metrics baseline with adoption snapshot; incompatible with `skip_metrics` | +| `fail_on_api_break` | `bool` | `false` | Fail on public API breaking changes vs metrics baseline | Requires trusted metrics baseline with API surface snapshot; incompatible with `skip_metrics`; implies `api_surface` | +| `fail_on_untested_hotspots` | `bool` | `false` | Fail when medium/high-risk functions measured by Coverage Join fall below threshold | Incompatible with `skip_metrics`; requires successful Coverage Join to fire | + +Report outputs and local UX: + +| Key | Type | Default | Meaning | Requires / Implies | +|---------------|---------------|---------|--------------------------------|-----------------------------------------------------------------------------------------| +| `html_out` | `str \| null` | `null` | HTML report output path | `-` | +| `json_out` | `str \| null` | `null` | JSON report output path | `-` | +| `md_out` | `str \| null` | `null` | Markdown report output path | `-` | +| `sarif_out` | `str \| null` | `null` | SARIF report output path | `-` | +| `text_out` | `str \| null` | `null` | Plain-text report output path | `-` | +| `no_progress` | `bool` | `false` | Disable progress UI | Implied by `quiet` | +| `no_color` | `bool` | `false` | Disable colored CLI output | Enabled by `ci` | +| `quiet` | `bool` | `false` | Use compact CLI output | Implies `no_progress`; enabled by `ci` | +| `verbose` | `bool` | `false` | Enable more verbose CLI output | `-` | +| `debug` | `bool` | `false` | Enable debug diagnostics | Also enabled by `CODECLONE_DEBUG`; see [env overrides](#environment-variable-overrides) | + +Controller audit trail: + +| Key | Type | Default | Meaning | Requires / Implies | +|-------------------------|--------|-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------| +| `audit_enabled` | `bool` | `false` | Enable the optional local controller audit trail | Required for `--audit` output | +| `audit_path` | `str` | `.codeclone/db/audit.sqlite3` | SQLite audit database path, relative to the analysis root; stored under `db/` to separate controller state from report/cache artifacts | Used only when `audit_enabled=true` | +| `audit_payloads` | `str` | `compact` | Audit payload mode: `off`, `compact`, or `full`. Compact omits large fields but keeps `intent_description` on `intent.declared`; row `summary` always stores a short essence | Used only when `audit_enabled=true` | +| `audit_retention_days` | `int` | `30` | Retention window for audit rows | Used only when `audit_enabled=true` | +| `audit_token_estimator` | `str` | `chars_approx` | Audit payload token estimator: default `chars_approx`, or explicit `tiktoken` opt-in with `codeclone[token-bench]` | Used only when `audit_enabled=true` | + +Workspace intent registry: + +| Key | Type | Default | Meaning | Requires / Implies | +|----------------------------------|-------|---------------------------------|-------------------------------------------------------------------------------------|-------------------------------------------------| +| `intent_registry_backend` | `str` | `file` | Workspace intent storage backend: `file` or `sqlite` | MCP workspace coordination | +| `intent_registry_path` | `str` | `.codeclone/db/intents.sqlite3` | SQLite registry database path, relative to the analysis root | Used only when `intent_registry_backend=sqlite` | +| `intent_registry_retention_days` | `int` | `14` | Retention window for closed SQLite intent rows; any positive value (no edition cap) | Used only when `intent_registry_backend=sqlite` | + +Retention is configurable to any positive number of days; there is no edition +cap. Managed/hosted retention (central storage, backup, compliance) is a roadmap +Team/Enterprise option — see [Plans and Retention](../plans-and-retention.md). + +### Engineering Memory (nested tables) + +Keys under `[tool.codeclone.memory]` and `[tool.codeclone.memory.semantic]` are +**not** part of the root `[tool.codeclone]` table above. They are validated by +`codeclone/config/memory.py` / `SemanticConfig` and documented in +[Engineering Memory](13-engineering-memory/index.md). + +Trajectory / projection keys (defaults from `codeclone/config/memory_defaults.py`): + +| Key | Default | Meaning | +|----------------------------------------------|------------------------------------------------|---------------------------------------------------------------------| +| `backend` | `sqlite` | Memory store backend (`sqlite` or `postgres`) | +| `db_path` | `.codeclone/memory/engineering_memory.sqlite3` | SQLite path under repo root | +| `mcp_sync_policy` | `bootstrap_if_missing` | MCP auto-bootstrap when store missing (`off`, `refresh_when_stale`) | +| `active_retention_days` | `-1` | Active record retention (`-1` = no expiry) | +| `stale_retention_days` | `180` | Stale record retention before vacuum | +| `draft_retention_days` | `14` | Draft candidate retention | +| `rejected_retention_days` | `30` | Rejected draft retention | +| `archived_retention_days` | `365` | Archived record retention | +| `receipt_retention_days` | `90` | Patch Trail receipt retention | +| `max_records` | `10000` | Hard cap on active memory rows | +| `max_candidates` | `1000` | Draft candidate cap | +| `max_evidence_per_record` | `20` | Evidence links per record | +| `max_statement_chars` | `1000` | Hard statement length cap | +| `max_blast_radius_cache_entries` | `500` | Blast-radius cache size | +| `git_hotspot_period_days` | `90` | Git hotspot lookback window | +| `git_hotspot_min_changes` | `5` | Minimum commits to qualify as hotspot | +| `trajectories_enabled` | `true` | Gate trajectory rebuild and retrieval | +| `trajectory_retention_days` | `365` | Retention hint for vacuum | +| `projection_rebuild_policy` | `off` | `enqueue_when_stale` enqueues worker on accepted finish | +| `projection_rebuild_running_timeout_seconds` | `1800` | Stale running job timeout | +| `projection_rebuild_spawn_worker` | `true` | Spawn worker on enqueue | +| `projection_rebuild_coalesce_window_seconds` | `60` | Batch sub-threshold rebuilds within window (`0` = immediate spawn) | +| `projection_rebuild_coalesce_min_delta` | `25` | Active-record delta that bypasses coalesce window | +| `trajectory_export_enabled` | `false` | Gate CLI JSONL export | +| `trajectory_export_include_payloads` | `false` | Include step payloads in export rows | +| `trajectory_export_max_record_bytes` | `65536` | Per export row cap | +| `trajectory_export_max_file_bytes` | `10485760` | Output file cap | + +| Semantic field | Default | Meaning | +|-------------------------------------|---------------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `enabled` | `false` | Turn on LanceDB sidecar indexing and search blend | +| `backend` | `lancedb` | Vector backend (only `lancedb` today) | +| `index_path` | `.codeclone/memory/semantic_index.lance` | Sidecar directory | +| `embedding_provider` | `diagnostic` | `diagnostic` (hash vectors, not semantic quality), `fastembed` (local semantic-quality provider), `local_model`, `api` | +| `embedding_model` | `null` (`BAAI/bge-small-en-v1.5` for `fastembed`) | Optional provider model name | +| `embedding_cache_dir` | `.codeclone/memory/fastembed` | Local model cache used by `fastembed` | +| `allow_model_download` | `false` | Permit `fastembed` to download a missing model instead of requiring a pre-populated cache | +| `dimension` | `256` (`384` for `fastembed`) | Vector size; must match the provider model | +| `max_results` | `20` | Cap for vector `k` and merged search ranking | +| `index_audit` | `true` | Index bounded audit `summary` rows when audit DB exists | +| `embed_max_documents_per_batch` | `64` | Max documents per embedding batch | +| `embed_max_padded_tokens_per_batch` | `8192` | Max padded tokens per embedding batch | +| `projection_token_estimator` | `chars_approx` | Token estimate for semantic projection (`chars_approx` or `tiktoken`) | + +Semantic-quality local search requires both the LanceDB sidecar and FastEmbed: +install `codeclone[semantic-local]` (or combine `semantic-lancedb` + +`semantic-fastembed`), set `embedding_provider = "fastembed"`, then run +`codeclone memory semantic rebuild` after enabling. `semantic-lancedb` alone can +build the sidecar with the diagnostic hash provider, which is deterministic but +not semantic-quality recall. + +This is the exact accepted `[tool.codeclone]` key set from +`codeclone/config/spec.py` and `codeclone/config/pyproject_loader.py`; unknown +keys are contract errors. + +!!! note "Pyproject keys vs CLI flags" + The tables above list `[tool.codeclone]` keys, not CLI flag spellings. + CLI flags may map to the same internal destination under a different name. + Example: `coverage_xml` in `pyproject.toml` corresponds to CLI + `--coverage FILE`. The same pattern applies to report outputs such as + `html_out` ↔ `--html` and `json_out` ↔ `--json`. + + CLI-only flags (no `[tool.codeclone]` key; authoritative spelling in + `tests/fixtures/contract_snapshots/cli_help.txt`): + + | CLI flag | Group | Meaning | + |----------------------------------|---------------|------------------------------------------------------------------------------------| + | `--changed-only` | Analysis | Limit clone gating/summaries to git-selected files | + | `--diff-against GIT_REF` | Analysis | Resolve changed files from `git diff --name-only `; requires `--changed-only` | + | `--paths-from-git-diff GIT_REF` | Analysis | Shorthand for `--changed-only` + git diff selection | + | `--blast-radius FILE [FILE ...]` | Analysis | Render structural blast radius for given files after analysis | + | `--patch-verify` | Analysis | Verify current patch against trusted clone baseline-relative budget | + | `--strictness LEVEL` | Analysis | `ci`, `strict`, or `relaxed`; valid only with `--patch-verify` (default: `ci`) | + | `--session-stats` | Analysis | Show workspace session status; read-only | + | `--audit` | Analysis | Show local Controller audit trail; requires `audit_enabled=true` | + | `--audit-json` | Analysis | JSON audit footprint; uses audit collector; requires `audit_enabled=true` | + | `--cache-dir [FILE]` | Analysis | Legacy alias for `--cache-path` | + | `--timestamped-report-paths` | Reporting | Append UTC timestamp to default report filenames | + | `--open-html-report` | Output and UI | Open generated HTML in browser; requires `--html` | + | `--progress` | Output and UI | Force-enable progress output | + | `--color` | Output and UI | Force-enable ANSI colors | + + Canonical help text, defaults, and exit-code epilog are locked by + `tests/test_cli_help_snapshot.py` and `tests/test_cli_unit.py::test_cli_help_text_consistency`. + + #### Controller and workspace query combinations + + Enforced by `codeclone/surfaces/cli/workflow.py:_validate_controller_query_flags`: + + | Combination | Result | + |-------------|--------| + | `--blast-radius` + `--patch-verify` | contract error | + | `--session-stats` + explicit `--audit` | contract error | + | `--session-stats` + `--blast-radius` or `--patch-verify` | contract error | + | explicit `--audit` + `--blast-radius` or `--patch-verify` | contract error | + | any controller query + `--changed-only`, `--diff-against`, or `--paths-from-git-diff` | contract error | + | any controller query + report output flags | contract error | + | any controller query + `--update-baseline` / `--update-metrics-baseline` | contract error | + | `--strictness` without `--patch-verify` (when `--strictness` is explicit on argv) | contract error | + + Notes: + + - `--patch-verify` cannot scope via `--diff-against`; use changed-scope flags for + git-selected review, or `--patch-verify` alone for baseline-relative terminal check. + - `--audit-json` selects JSON audit output but does **not** set the `--audit` flag + for combination validation. `--session-stats` blocks only explicit `--audit`. + - Pre-analysis queries (`--session-stats`, `--audit`, `--audit-json`) exit before + analysis; only the first matching mode runs per invocation. + - `--audit` and `--audit-json` both require `audit_enabled=true` in effective config. + +!!! warning "Metrics-mode conflicts are enforced" + Metrics update/gating flags are runtime contracts, not hints. Combinations + such as `skip_metrics=true` together with metrics gating or metrics + baseline update flags are contract errors. + + Notes: + + - `skip_metrics=false*`: parser default is `false`, but runtime may auto-enable + it when no metrics work is requested and no metrics baseline exists. + - Report output keys default to `null`; bare CLI flags still write to the + deterministic `.codeclone/report.*` paths listed above. + + CLI always has precedence when option is explicitly provided, including boolean + overrides via `--foo/--no-foo` (e.g. `--no-skip-metrics`). + + Path values loaded from `pyproject.toml` are normalized relative to resolved + scan root when provided as relative paths. + + `golden_fixture_paths` is different: + + - entries are repo-relative glob patterns, not filesystem paths + - they are not normalized to absolute paths + - they must target `tests/` or `tests/fixtures/` + - a clone group is excluded only when every occurrence matches the configured + golden-fixture scope + + Current-run coverage join config: + + - `coverage_xml` is the `[tool.codeclone]` key; the equivalent CLI flag is + `--coverage FILE`. + - `coverage_xml` may be set in `pyproject.toml`; relative paths resolve from + the scan root like other configured paths. + - `coverage_min` and `fail_on_untested_hotspots` follow the same precedence + rules as CLI flags. + - Coverage join remains current-run only and does not persist to baseline. + + Dependency depth config note: + + - `dependency_max_depth` is an observed metric in reports/baselines, not a + CLI or `pyproject.toml` option. + - Dependency depth now uses an internal adaptive profile based on + `avg_depth`, `p95_depth`, and `max_depth` for the internal module graph. + - There is no user-facing knob to tune that model in the current `2.x` release + line. + + Metrics baseline path selection contract: + + - Relative `baseline` / `metrics_baseline` paths coming from defaults or + `pyproject.toml` resolve from the analysis root. + - If `--metrics-baseline` is explicitly set, that path is used. + - If `metrics_baseline` in `pyproject.toml` differs from parser default, that + configured path is used even without explicit CLI flag. + - Otherwise, metrics baseline defaults to the clone baseline path. + - In other words, metrics do **not** live in a separate file by default: + the default unified flow uses the same `codeclone.baseline.json` path for + clone and metrics comparison state. + + Refs: + + - `codeclone/config/spec.py` + - `codeclone/config/argparse_builder.py:build_parser` + - `codeclone/config/pyproject_loader.py:load_pyproject_config` + - `codeclone/config/resolver.py:resolve_config` + - `codeclone/surfaces/cli/workflow.py:_main_impl` + - `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` + +## Environment variable overrides + +Single home for all `CODECLONE_*` environment variables. Other chapters link here +instead of duplicating tables. + +**Truthy values** (where noted): `1`, `true`, `yes`, `on` (case-insensitive). +**Falsy values** (where noted): `0`, `false`, `no`, `off`. + +### Precedence by subsystem + +| Subsystem | Resolver | Precedence when an env var is set | +|----------------------------------|--------------------------------------------------|--------------------------------------------------------------------------------------------| +| Main CLI scan | `resolve_config` | CLI > pyproject > defaults; only `CODECLONE_DEBUG` applies from env | +| Engineering Memory | `resolve_memory_config` | Documented env > `[tool.codeclone.memory]` / `[tool.codeclone.memory.semantic]` > defaults | +| Workspace intent registry | `resolve_intent_registry_config` | Documented env > `[tool.codeclone]` registry keys > defaults | +| MCP workspace intent TTL / lease | `resolved_ttl_seconds`, `resolved_lease_seconds` | Explicit MCP tool parameter > env > built-in default | +| Finish hygiene strict mode | `_strict_finish_enabled` | Env only (no pyproject key) | +| Platform Observability | `resolve_observability_config` | Env only; disabled by default, no pyproject table | +| Corpus Analytics | `resolve_analytics_config` | `[tool.codeclone.analytics]` > built-in defaults; no env overrides in Slice 1 | +| Cursor / IDE hooks | hook helpers | Env > repo config file (where noted) > built-in default | + +There is no generic `CODECLONE_MEMORY__*` nested env convention. Each variable +name is flat and listed below. + +### Diagnostics + +| Variable | Values | Effect | +|-------------------|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `CODECLONE_DEBUG` | exactly `1` enables | Turns on CLI debug diagnostics (`codeclone/surfaces/cli/console.py`). Other truthy strings such as `true` do **not** enable debug. Independent of analysis, gating, and `[tool.codeclone] debug`. | + +### Platform Observability + +Platform Observability is environment-only and disabled by default. It has no +`[tool.codeclone.observability]` table. See +[Platform Observability](26-platform-observability.md) for the data and trust +contracts. + +### Corpus Analytics + +Optional intent corpus clustering uses `[tool.codeclone.analytics]`. Install +`codeclone[analytics]` before running `codeclone analytics …`. Paths resolve +under the repository root. The historical audit source is inherited from the +top-level `[tool.codeclone].audit_path`; it is not duplicated in the analytics +table. `embedding_provider` currently accepts only `fastembed`, and +`default_cluster_selection_method` accepts `eom` or `leaf`. Full key list: +[Corpus Analytics](27-corpus-analytics.md#configuration). + +Profile control-plane keys (`default_profile_id`, `profile_paths`, and the +non-profile `sweep_*` axes) live in the same nested table and are documented in +[Corpus Analytics — Configuration](27-corpus-analytics.md#configuration). Profile +manifests are validated with the same schema as bundled profiles. Configured +paths must resolve to readable files inside the repository. Profile sweeps use +the manifest grid; the sweep keys configure only non-profile sweeps. + +Refs: + +- `codeclone/config/analytics.py:resolve_analytics_config` +- `codeclone/config/pyproject_loader.py:load_pyproject_config` + +### Platform Observability (environment) + +| Variable | Values | Effect | +|-------------------------------------------------|----------------|---------------------------------------------------------------------| +| `CODECLONE_OBSERVABILITY_ENABLED` | truthy / falsy | Enable local operation/span instrumentation. | +| `CODECLONE_OBSERVABILITY_FORCE` | truthy / falsy | Lift the CI collection guard; does not enable collection by itself. | +| `CODECLONE_OBSERVABILITY_PROFILE` | truthy / falsy | Capture process metrics; requires `codeclone[perf]`. | +| `CODECLONE_OBSERVABILITY_PERSIST` | truthy / falsy | Persist completed operations; default true when enabled. | +| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES` | truthy / falsy | Capture bounded size/token estimates; default true. | +| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT` | reserved | Rejected; raw payload snapshots are unsupported. | +| `CODECLONE_OBSERVABILITY_CORRELATION_ID` | internal ID | Worker handoff for cross-process correlation; set by CodeClone. | +| `CODECLONE_OBSERVABILITY_PARENT_OPERATION_ID` | internal ID | Worker handoff for the parent operation; set by CodeClone. | + +The internal correlation variables are launcher/worker protocol, not operator +tuning knobs. + +### Engineering Memory + +Overrides `[tool.codeclone.memory]` and `[tool.codeclone.memory.semantic]` for the +listed field only. Paths resolve under the repository root like pyproject paths. + +| Variable | Values | Overrides | Effect | +|--------------------------------------------------|-------------------------------------------------|----------------------------------------|-------------------------------------------------------------------------------------------| +| `CODECLONE_MEMORY_DB_PATH` | repo-relative or absolute path under root | `memory.db_path` | SQLite Engineering Memory store location | +| `CODECLONE_PROJECTION_REBUILD_POLICY` | `off`, `enqueue_when_stale` | `memory.projection_rebuild_policy` | When accepted MCP finish may enqueue async trajectory/semantic/Experience projection jobs | +| `CODECLONE_MEMORY_SEMANTIC_ENABLED` | `true` / `false` | `memory.semantic.enabled` | Turn semantic index sidecar on or off | +| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_PROVIDER` | `diagnostic`, `fastembed`, `local_model`, `api` | `memory.semantic.embedding_provider` | Embedding backend for semantic rebuild/search | +| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_MODEL` | model name string | `memory.semantic.embedding_model` | Provider model id (for example FastEmbed model name) | +| `CODECLONE_MEMORY_SEMANTIC_EMBEDDING_CACHE_DIR` | path | `memory.semantic.embedding_cache_dir` | Local ONNX/model cache directory for FastEmbed | +| `CODECLONE_MEMORY_SEMANTIC_ALLOW_MODEL_DOWNLOAD` | `true` / `false` | `memory.semantic.allow_model_download` | When `false`, FastEmbed requires a pre-populated cache | +| `CODECLONE_MEMORY_SEMANTIC_INDEX_PATH` | path | `memory.semantic.index_path` | LanceDB semantic sidecar directory | + +Memory keys without a documented env override (for example retention caps, +`projection_rebuild_spawn_worker`, and projection coalesce settings) are +pyproject-only. + +Refs: `codeclone/config/memory.py`, `codeclone/config/memory_defaults.py`. + +### Workspace intent registry + +Overrides `[tool.codeclone]` registry keys. Used by MCP workspace coordination +and local hook gate reads. + +| Variable | Values | Overrides | Effect | +|--------------------------------------------|-----------------------------------------|----------------------------------|------------------------------------------------------------------------| +| `CODECLONE_INTENT_REGISTRY_BACKEND` | `file`, `sqlite` | `intent_registry_backend` | File-per-intent JSON under `.codeclone/intents/` vs SQLite WAL backend | +| `CODECLONE_INTENT_REGISTRY_PATH` | `.sqlite3` / `.db` path under repo root | `intent_registry_path` | SQLite database path when backend is `sqlite` | +| `CODECLONE_INTENT_REGISTRY_RETENTION_DAYS` | integer `>= 1` | `intent_registry_retention_days` | Closed-row retention for SQLite backend purge | + +Refs: `codeclone/config/intent_registry.py`. + +### MCP session and change-control hygiene + +| Variable | Values | Applies when | Effect | +|----------------------------------|----------------|--------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------| +| `CODECLONE_INTENT_TTL_SECONDS` | `60`–`86400` | `start_controlled_change` / workspace registry write when tool `ttl_seconds` omitted | Hard maximum lifetime of a workspace intent record (default `3600`) | +| `CODECLONE_INTENT_LEASE_SECONDS` | `60`–`600` | Workspace registry lease renewal when tool `lease_seconds` omitted | Ownership freshness window renewed by active MCP use (default `300`) | +| `CODECLONE_STRICT_FINISH` | truthy / falsy | `finish_controlled_change` hygiene | When truthy, unattributed out-of-scope dirty may set `finish_block_reason: own_unscoped_dirty` and block finish; default is advisory only | + +Explicit `ttl_seconds` / `lease_seconds` on MCP tools take precedence over the +matching env var. + +Refs: `codeclone/surfaces/mcp/_workspace_intents.py`, +`codeclone/surfaces/mcp/_workspace_hygiene.py`. + +### MCP HTTP authentication + +| Variable | Values | Effect | +|----------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `CODECLONE_MCP_AUTH_TOKEN` | string, minimum 32 characters | **Required** for every `streamable-http` start (loopback or remote). The launcher exits with code `2` when the variable is missing or shorter than 32 characters — there is no unauthenticated HTTP mode. Clients send `Authorization: Bearer …`; the server validates with `hmac.compare_digest`. Non-loopback bind additionally requires `--allow-remote`. | + +Refs: `codeclone/surfaces/mcp/auth.py`, `codeclone/surfaces/mcp/server.py`, +[21-security-model.md](21-security-model.md#remote-mcp-transport). + +### Workspace edit gate (hooks) + +Read by `codeclone/workspace_intent/gate.py` for Cursor/IDE pre-edit enforcement. + +| Variable | Values | Default when unset | Effect | +|----------------------------------------|-----------------------|----------------------------------|------------------------------------------------------------------------------------------------| +| `CODECLONE_HOOK_AUTHORIZE_FOREIGN` | truthy / falsy | authorize foreign active intents | When `0`/`false`/`no`/`off`, a live foreign active intent does not authorize local hook writes | +| `CODECLONE_HOOK_OWN_AGENT_PID` | integer PID | hook argument only | Limits stop-hook cleanup to recoverable intents owned by this process | +| `CODECLONE_HOOK_OWN_AGENT_START_EPOCH` | integer epoch seconds | hook argument only | Pairs with own-agent PID for recoverable intent matching | + +### Cursor plugin hooks + +| Variable | Values | Precedence | Effect | +|---------------------------------|------------------|---------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------| +| `CODECLONE_HOOKS_ENFORCE_SCOPE` | `python`, `repo` | env > `.cursor/codeclone-hooks.json` > default `python` | `python`: gate `.py`/`.pyi` edits and matching shell commands. `repo`: gate any path under workspace root including `.git/**` | + +Refs: `plugins/cursor-codeclone/hooks/_hook_io.py`, [integrations/cursor-plugin.md](integrations/cursor-plugin.md). + +### IDE and MCP launcher passthrough + +Set by VS Code, Claude Desktop, Claude Code, Codex, and Cursor launchers — not +usually edited in `pyproject.toml`. Launchers forward variables prefixed with +`CODECLONE_` to the child `codeclone-mcp` process. + +| Variable | Values | Effect | +|-----------------------------------|---------------------|-------------------------------------------------------------------------------------------------------------------------------| +| `CODECLONE_WORKSPACE_ROOT` | absolute path | Preferred repository root for launcher workspace discovery and MCP child env when cwd/PWD disagree with the trusted workspace | +| `CODECLONE_MCP_COMMAND` | command path | Claude Desktop bundle: override MCP server executable | +| `CODECLONE_MCP_ARGS_JSON` | JSON string array | Claude Desktop bundle: extra launcher argv (stdio transport locked by IDE clients) | +| `CODECLONE_MCP_SHUTDOWN_GRACE_MS` | positive integer ms | Grace period before SIGTERM when stopping MCP child (default `5000`) | +| `CODECLONE_MCP_KILL_GRACE_MS` | positive integer ms | Grace period before SIGKILL after SIGTERM (default `2000`) | + +Refs: `plugins/codeclone/scripts/launch_mcp.py`, +`extensions/vscode-codeclone/src/support.js`, +`extensions/claude-desktop-codeclone/src/launcher.js`. + +## Contracts + +- `--ci` is a preset: enables `fail_on_new`, `no_color`, `quiet`. +- In CI mode, if trusted metrics baseline is loaded, runtime also enables + `fail_on_new_metrics`. +- `--quiet` implies `--no-progress`. +- Negative size limits are contract errors. + +Refs: + +- `codeclone/surfaces/cli/workflow.py:_main_impl` + +## Invariants (MUST) + +- Detection thresholds (`min-loc`, `min-stmt`) affect function-level extraction. +- Fragment thresholds (`block_min_loc/stmt`, `segment_min_loc/stmt`) affect block/segment extraction. +- All six thresholds are part of cache compatibility (`payload.ap`). +- Reporting flags (`--html/--json/--md/--sarif/--text`) affect output only. +- Reporting flags accept optional path values; passing bare flag writes to + deterministic default path under `.codeclone/`. +- `--cache-path` overrides project-local cache default; legacy alias `--cache-dir` maps to same destination. +- Metrics baseline update/gating flags require metrics mode; incompatible + combinations with `--skip-metrics` are contract errors. +- Unknown keys or invalid value types in `[tool.codeclone]` are contract errors (exit 2). + +Refs: + +- `codeclone/analysis/units.py:extract_units_and_stats_from_source` +- `codeclone/config/spec.py` +- `codeclone/config/argparse_builder.py:build_parser` +- `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` + +## Failure modes + +| Condition | Behavior | +|-------------------------------|--------------------| +| Invalid output extension/path | Contract error (2) | +| Invalid root path | Contract error (2) | +| Negative size limits | Contract error (2) | + +Refs: + +- `codeclone/surfaces/cli/reports_output.py:_validate_output_path` +- `codeclone/surfaces/cli/startup.py:resolve_existing_root_path` +- `codeclone/surfaces/cli/workflow.py:_main_impl` + +## Determinism / canonicalization + +- Parser help text and epilog are deterministic constants. +- Summary metric labels are deterministic constants. + +Refs: + +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/ui_messages/__init__.py:SUMMARY_LABEL_FILES_FOUND` + +## Locked by tests + +- `tests/test_cli_unit.py::test_cli_help_text_consistency` +- `tests/test_cli_inprocess.py::test_cli_default_cache_dir_uses_root` +- `tests/test_cli_inprocess.py::test_cli_cache_dir_override_respected` +- `tests/test_cli_inprocess.py::test_cli_negative_size_limits_fail_fast` + +## Non-guarantees + +- CLI help section ordering is stable today but not versioned independently from the CLI contract. + +## See also + +- [11-cli.md](11-cli.md) +- [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) +- [13-engineering-memory/bootstrap-and-config.md](13-engineering-memory/bootstrap-and-config.md) — memory pyproject + tables (env overrides live here) diff --git a/docs/book/11-cli.md b/docs/book/11-cli.md new file mode 100644 index 00000000..28c7fbf5 --- /dev/null +++ b/docs/book/11-cli.md @@ -0,0 +1,278 @@ + + +# 11. CLI + +## Purpose + +Define observable CLI behavior: argument handling, summaries, output writing, +and exit routing. + +!!! note "Observable surface only" + This chapter covers scripting-visible behavior and user-facing CLI output + categories. Rich styling details may evolve as long as markers, exit + semantics, and deterministic output contracts stay stable. + +## Public surface + +- Public entrypoint: `codeclone/main.py:main` +- CLI orchestration: `codeclone/surfaces/cli/workflow.py:_main_impl` +- Parser: `codeclone/config/argparse_builder.py:build_parser` +- Summary renderer: `codeclone/surfaces/cli/summary.py:_print_summary` +- Output path validation and writes: + `codeclone/surfaces/cli/reports_output.py` +- Message catalog: `codeclone/ui_messages/*` (`help`, `labels`, `runtime`, + `markers`, `formatters`, `controller`, `styling`; stable names re-exported from + `__init__.py`) + +## Data model + +CLI modes: + +- normal mode +- gating mode (`--ci`, `--fail-on-new`, explicit metric gates) +- baseline update mode (`--update-baseline`, `--update-metrics-baseline`) +- controller query mode (`--blast-radius`, `--patch-verify`) +- workspace query modes (`--session-stats`, `--audit`, `--audit-json`) +- development diagnostics mode (`codeclone observability trace`) + +Summary metrics include: + +- files found/analyzed/cache hits/skipped +- structural counters for lines/functions/methods/classes +- function/block/segment clone groups +- suppressed clone groups from `golden_fixture_paths` +- dead-code active/suppressed status +- dependency depth profile (`avg_depth`, `p95_depth`, `max_depth`) when metrics are computed +- adoption/API/coverage-join facts when computed +- new vs baseline + +Refs: + +- `codeclone/surfaces/cli/summary.py:_print_summary` +- `codeclone/surfaces/cli/runtime.py:_metrics_flags_requested` +- `codeclone/surfaces/cli/runtime.py:_metrics_computed` +- `codeclone/surfaces/cli/report_meta.py:_build_report_meta` + +## Contracts + +- Help output includes canonical exit-code section and project links. +- Bare report flags write to deterministic default paths under `.codeclone/`. +- `--open-html-report` is layered on top of `--html`; it does not imply HTML output. +- `--timestamped-report-paths` rewrites only default report paths requested via bare flags. +- In interactive VS Code terminals, the CLI may print a one-time extension hint + after summary output. The hint is suppressed in `--quiet`, CI, and non-TTY + contexts, and is tracked per CodeClone version next to the resolved project + cache path. +- In interactive non-CI runs, the CLI may print one-time migration notes when a + trusted baseline was produced by a release whose dead-code reachability model + is known to be narrower than the current version, such as `2.0.0` -> `2.0.1` + or `2.0.1` -> `2.0.2`. Notes explain expected dead-code count reductions from + refined reachability evidence and are remembered next to the resolved project + cache path. +- The same tips mechanism also covers cohesion (LCOM4) applicability changes, + such as `2.0.2` -> `2.1.0`: when a trusted baseline was generated by + `2.0.2`, the CLI may print a one-time note that cohesion counts can change + because Protocol interfaces and Pydantic validation hooks are excluded from + the LCOM4 graph. +- After a normal interactive analysis run, the CLI may print a workspace + hygiene tip when the repository root `.gitignore` does not cover + `.codeclone/` (or the broader `.cache/` tree). The tip is advisory + only, suppressed in `--quiet`, CI, and non-TTY contexts, and repeats on + each eligible run until `.gitignore` covers the cache path. CodeClone never + edits `.gitignore` automatically. +- Changed-scope review uses: + - `--changed-only` + - `--diff-against` + - `--paths-from-git-diff` +- Controller query mode is terminal-only: + - `--blast-radius FILE [FILE...]` builds the canonical report in memory and + renders the same blast-radius projection used by MCP. + - `--patch-verify` compares the current run against the trusted clone + baseline for baseline-relative regressions, previews gate status, and + exits `3` for blocking violations in `ci` or `strict` mode. Cannot combine + with changed-scope flags; patch-local before-run to after-run regression + claims require MCP change-control verify. +- Session query mode is terminal-only: + - `--session-stats` shows workspace session status: active agents, intents, + and lease health. Read-only, does not run analysis. +- Audit query mode is terminal-only: + - `--audit` shows the local Controller audit trail from the configured audit + database. Read-only, does not run analysis. Requires `audit_enabled=true` + in effective configuration (`[tool.codeclone]` or resolved defaults). + - `--audit-json` outputs audit payload footprint as JSON. Uses the same audit + collector as `--audit` but does not set the `--audit` flag for combination + validation. Requires `audit_enabled=true` in effective configuration. +- Engineering Memory commands (`codeclone memory`) are terminal-only and + read-only with respect to source files, baselines, and analysis cache: + - `init [--refresh] [--dry-run] [--from-report PATH] [--no-docs] [--no-tests]` + — create or refresh the local SQLite memory store from canonical report + + git + docs + tests (omit docs/tests with the `--no-*` flags; seed from an + existing report with `--from-report`). + - `status`, `for-path`, `search`, `stale`, `vacuum`, `coverage` — query + modes mirroring MCP `query_engineering_memory` (`vacuum` purges per + retention config; no `--dry-run`). + - `semantic status|rebuild|search` — optional LanceDB sidecar (requires + `[tool.codeclone.memory.semantic] enabled = true`, extra + `codeclone[semantic-lancedb]`, and a successful rebuild — MCP agents use + `manage_engineering_memory(action=rebuild_semantic_index)`). + For semantic-quality local recall, install `codeclone[semantic-local]` + and configure `embedding_provider = "fastembed"`; `semantic-lancedb` + alone can still run the deterministic diagnostic provider. + - `review-candidates`, `approve`, `reject`, `archive` — human governance + for draft records (CLI and VS Code Memory; not MCP agent tools). Direct CLI + `approve` / `reject` / `archive` require `--i-know-what-im-doing` unless + routed through the IDE governance channel. Use `--by NAME` (default + `human`) to record the approver; there is no `--verified-by` flag. + - `trajectory status|rebuild|list|search|show|agents|anomalies|dashboard|export` + — audit-derived narratives, quality passports, analytics, and local + Patch Trail export (requires audit + rebuild). + - `jobs status|enqueue|run-once|list` — projection rebuild queue (semantic + + trajectory + Experience projections). + - `search` accepts `--match any|all` for FTS token matching (default `any`) + and `--semantic` to blend vector proximity when the index is available. + - Requires a prior normal analysis run or cached report for `init`. + - Full contract: [Engineering Memory](13-engineering-memory/index.md). +- Platform Observability commands are terminal-only, read-only diagnostics of + CodeClone's own runtime: + - `codeclone observability trace --root .` prints JSON. + - `--last`, `--operation`, and `--correlation` select a bounded trace. + - `--json PATH` and `--html PATH` write machine-readable or self-contained + cockpit views. + - A missing local store is an informational success state. + - Full contract: [Platform Observability](26-platform-observability.md). +- Corpus Analytics commands are terminal-only, offline clustering of historical + intents (requires `codeclone[analytics]`): + - `codeclone analytics snapshot|embed|cluster|build|clusters|cluster-show|outliers` + - `codeclone analytics profiles list|show|validate` + - `build` runs snapshot → embed → cluster. `--use-recommended` requires + `--sweep`. With `--profile`, it renders the profile-batch winner; without + a profile it renders the global heuristic winner. + - `--profile PROFILE_ID` implies sweep; `--profile auto` resolves only from + configured `default_profile_id`. No profile is applied implicitly. + - Single-run overrides: `--pca-dimensions`, `--min-cluster-size`, + `--min-samples`, `--cluster-selection-method`. + - Sweep-axis overrides: `--sweep-pca`, `--sweep-min-cluster-size`, + `--sweep-min-samples`, `--sweep-selection-method`. Any sweep-axis flag + implies `--sweep`. + - `cluster --select-run RUN_ID` appends a selection event. + `--selection-profile none|PROFILE_ID|PROFILE_BATCH_ID` controls scope; + `--selected-by` and `--selection-rationale` preserve governance context. + - Representations: `description` (default) or `description_with_frame`. + - Artifacts live under `.codeclone/analytics/` (SQLite metadata + LanceDB vectors). + - JSON export schema `1.3` and HTML use one interpretation projection: + formally valid runs receive full metrics/previews; invalid or failed runs + remain inspectable as limited diagnostics with invariant codes. + - Sweep output includes every persisted candidate for the generation. + Invalid candidates are unranked and show dominant metrics as unavailable. + `cluster-show` may therefore export a resolved run that is not eligible + for full interpretation. + - Expected capability, schema, ownership, and integrity errors exit `2` + without a traceback. Inspection/export commands require only the base + install and open analytics metadata read-only. + - Full contract: + [Corpus Analytics](27-corpus-analytics.md#profile-control-plane-slice-12). +- Controller and workspace query flags are mutually exclusive where enforced: + - `--blast-radius` and `--patch-verify` cannot be combined. + - `--strictness {ci,strict,relaxed}` is valid only with `--patch-verify`. + - `--session-stats` and `--audit` collect payloads from + `codeclone/controller_insights/` (same facts as IDE-only MCP tools when + the server runs with `--ide-governance-channel`). + - `--session-stats` cannot combine with explicit `--audit`, `--blast-radius`, + or `--patch-verify`. `--audit-json` is not treated as `--audit` for this + check (run one pre-analysis query per invocation). + - explicit `--audit` cannot combine with `--blast-radius` or `--patch-verify`. + - controller and workspace query modes cannot combine with changed-scope + flags (`--changed-only`, `--diff-against`, `--paths-from-git-diff`). + - controller and workspace query modes do not write reports, baselines, or + analysis cache data. +- Contract errors use `CONTRACT ERROR:`. +- Gating failures use `GATING FAILURE:`. +- Internal errors use `fmt_internal_error` and include traceback only in debug mode. + +Refs: + +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/ui_messages/__init__.py:fmt_contract_error` +- `codeclone/ui_messages/__init__.py:fmt_internal_error` +- `codeclone/surfaces/cli/changed_scope.py:_validate_changed_scope_args` +- `codeclone/surfaces/cli/workflow.py:_validate_controller_query_flags` + +## Invariants (MUST) + +- Report writes are path-validated and write failures are contract errors. +- `--open-html-report` requires `--html`. +- `--timestamped-report-paths` requires at least one requested report output. +- `--changed-only` requires a diff source. +- `--blast-radius` and `--patch-verify` are mutually exclusive. +- `--session-stats` cannot combine with explicit `--audit`, `--blast-radius`, or + `--patch-verify`. +- explicit `--audit` cannot combine with `--blast-radius` or `--patch-verify`. +- Controller and workspace query modes are incompatible with changed-scope flags + (`--changed-only`, `--diff-against`, `--paths-from-git-diff`). +- Controller and workspace query modes are incompatible with report output flags, + baseline update flags, and changed-scope flags. +- `--patch-verify` requires a trusted clone baseline. +- `--audit` and `--audit-json` require `audit_enabled=true` in effective configuration. +- Browser-open failure after successful HTML write is warning-only. +- In gating mode, unreadable source files are contract errors with higher priority than clone/metric gate failures. + +Refs: + +- `codeclone/surfaces/cli/reports_output.py:_validate_output_path` +- `codeclone/surfaces/cli/reports_output.py:_validate_report_ui_flags` +- `codeclone/surfaces/cli/workflow.py:_main_impl` + +## Failure modes + +!!! warning "Failure precedence" + Contract failures take precedence over gating failures. In CI and scripted + flows, invalid config or unreadable sources must surface as exit `2` before + any clone or metrics gate can fail with exit `3`. + + | Condition | User-facing category | Exit | + |-------------------------------------------------------------------|----------------------|------| + | Invalid CLI flag | contract | `2` | + | Invalid output extension/path | contract | `2` | + | Invalid changed-scope flag combination | contract | `2` | + | Invalid controller query flag combination | contract | `2` | + | `--audit` with `audit_enabled=false` | contract | `2` | + | `--patch-verify` without trusted baseline | contract | `2` | + | Baseline untrusted in CI/gating | contract | `2` | + | Coverage/API regression gate without required baseline capability | contract | `2` | + | Unreadable source in CI/gating | contract | `2` | + | New clones with `--fail-on-new` | gating | `3` | + | Blocking `--patch-verify` contract violation | gating | `3` | + | Untested coverage hotspots with `--fail-on-untested-hotspots` | gating | `3` | + | Threshold or metrics gate exceeded | gating | `3` | + | Unexpected exception | internal | `5` | + +## Determinism / canonicalization + +- Summary metric ordering is fixed. +- Compact summary mode is fixed-format text. +- Help epilog is generated from static constants. +- Git diff path inputs are normalized to sorted repo-relative paths. + +Refs: + +- `codeclone/surfaces/cli/summary.py:_print_summary` +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/surfaces/cli/changed_scope.py:_normalize_changed_paths` + +## Locked by tests + +- `tests/test_cli_unit.py::test_cli_help_text_consistency` +- `tests/test_cli_help_snapshot.py::test_cli_help_snapshot` +- `tests/test_cli_unit.py::test_argument_parser_contract_error_marker_for_invalid_args` +- `tests/test_cli_inprocess.py::test_cli_summary_format_stable` +- `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` +- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` + +## Non-guarantees + +- Rich styling details are not machine-facing contract. +- Warning phrasing may evolve if category markers and exit semantics stay stable. diff --git a/docs/book/11-security-model.md b/docs/book/11-security-model.md deleted file mode 100644 index 5a34a19d..00000000 --- a/docs/book/11-security-model.md +++ /dev/null @@ -1,96 +0,0 @@ -# 11. Security Model - -## Purpose - -Describe implemented protections and explicit security boundaries. - -## Public surface - -- Scanner path validation: `codeclone/scanner/__init__.py:iter_py_files` -- File read and parser limits: `codeclone/core/worker.py:process_file`, - `codeclone/analysis/parser.py:_parse_limits` -- Baseline/cache validation: `codeclone/baseline/*`, `codeclone/cache/*` -- HTML escaping: `codeclone/report/html/primitives/escape.py`, - `codeclone/report/html/assemble.py` -- MCP read-only enforcement: `codeclone/surfaces/mcp/*` - -## Data model - -Security-relevant input classes: - -- filesystem paths (root/source/baseline/cache/report) -- untrusted JSON files (baseline/cache) -- untrusted source snippets and metadata rendered into HTML -- MCP request parameters (`root`, filters, diff refs, cache policy) - -## Contracts - -- CodeClone parses source text; it does not execute repository Python code. -- Sensitive root directories are blocked by scanner policy. -- Symlink traversal outside the root is skipped. -- HTML escapes text and attribute contexts before embedding. -- MCP is read-only by design: - no tool mutates source files, baselines, cache, or report artifacts. -- `--allow-remote` is required for non-local transports. -- `cache_policy=refresh` is rejected by MCP. -- Review markers are session-local in-memory state only. -- `git_diff_ref` is validated as a safe single revision expression before any `git diff` subprocess call. - -Refs: - -- `codeclone/analysis/parser.py:_parse_with_limits` -- `codeclone/scanner/__init__.py:SENSITIVE_DIRS` -- `codeclone/scanner/__init__.py:iter_py_files` -- `codeclone/report/html/primitives/escape.py:_escape_html` - -## Invariants (MUST) - -- Baseline and cache integrity checks use constant-time comparison. -- Size guards are enforced before parsing baseline/cache JSON. -- Cache failures degrade safely; baseline trust failures follow the explicit trust model. - -Refs: - -- `codeclone/baseline/clone_baseline.py:Baseline.verify_integrity` -- `codeclone/cache/store.py:Cache.load` -- `codeclone/surfaces/cli/workflow.py:_main_impl` - -## Failure modes - -| Condition | Security behavior | -|------------------------------------------|--------------------| -| Symlink points outside root | File skipped | -| Root under sensitive dirs | Validation error | -| Oversized baseline | Baseline rejected | -| Oversized cache | Cache ignored | -| HTML-injected payload in metadata/source | Escaped output | -| `--allow-remote` not passed for HTTP | Transport rejected | -| `cache_policy=refresh` requested in MCP | Policy rejected | -| `git_diff_ref` fails validation | Parameter rejected | - -## Determinism / canonicalization - -- Canonical JSON hashing for baseline/cache prevents formatting-only drift. -- Security failures map to explicit statuses rather than silent mutation. - -Refs: - -- `codeclone/baseline/trust.py:_compute_payload_sha256` -- `codeclone/cache/integrity.py:canonical_json` -- `codeclone/baseline/trust.py:BaselineStatus` -- `codeclone/cache/versioning.py:CacheStatus` - -## Locked by tests - -- `tests/test_security.py::test_scanner_path_traversal` -- `tests/test_scanner_extra.py::test_iter_py_files_symlink_loop_does_not_traverse` -- `tests/test_security.py::test_html_report_escapes_user_content` -- `tests/test_html_report.py::test_html_report_escapes_script_breakout_payload` -- `tests/test_cache.py::test_cache_too_large_warns` -- `tests/test_mcp_service.py::test_mcp_service_rejects_refresh_cache_policy_in_read_only_mode` -- `tests/test_mcp_server.py::test_mcp_server_main_rejects_non_loopback_host_without_opt_in` - -## Non-guarantees - -- Baseline/cache integrity is tamper-evident at file-content level; it is not cryptographic attestation against a - privileged attacker. diff --git a/docs/book/12-structural-change-controller/blast-radius-and-receipt.md b/docs/book/12-structural-change-controller/blast-radius-and-receipt.md new file mode 100644 index 00000000..2f631b67 --- /dev/null +++ b/docs/book/12-structural-change-controller/blast-radius-and-receipt.md @@ -0,0 +1,45 @@ +## Blast Radius Payload + +Core blast-radius graph traversal lives in `codeclone/analysis/blast_radius.py` +(consuming canonical report `Mapping` facts). MCP (`get_blast_radius`, +`start`/`finish` summaries) and CLI (`--blast-radius`) are presentation +adapters over that core — non-MCP surfaces must not import +`codeclone/surfaces/mcp/_blast_radius.py`. + +`get_blast_radius` separates hard edit guardrails from review context: + +- `do_not_touch`: actionable negative context such as baseline/cache state, + generated CodeClone state, or explicit forbidden paths. +- `review_context`: report-only facts such as security boundary inventory, + overloaded-module candidates, known baseline debt, and golden fixture + surfaces. + +Long context sections are bounded and include summaries with `total`, `shown`, +and `truncated`. + +## Review Receipt Payload + +`create_review_receipt` returns `format="markdown"` by default and can return a +structured JSON receipt with `format="json"`. The receipt is a composition of +stored MCP state; it does not run analysis and does not mutate source files, +baselines, cache, reports, or repository state. + +The receipt includes: + +- report provenance: digest, schema version, baseline trust state, run id, root +- verification profile: profile classification, reason, applicable/not-applicable + checks, limitations +- scope: optional change intent, declared files, changed files, unexpected files +- blast radius summary: level, direct dependent count, clone cohort count, + do-not-touch count +- reviewed evidence: session-local reviewed finding markers and notes +- patch contract: accepted, violated, or not checked from stored gate, + structural delta, intent, and baseline-abuse signals +- human decision points: bounded list of clone divergence, scope expansion, and + known-baseline-debt prompts +- claims not made: explicit reminders that Security Surfaces are boundary + inventory, report-only signals are not gates, and known baseline debt is not + new relative to the baseline + +Receipt verdicts are `clean`, `incomplete`, or `needs_attention`. They summarize +receipt completeness only; they are not CI gates. diff --git a/docs/book/12-structural-change-controller/cli-controller-queries.md b/docs/book/12-structural-change-controller/cli-controller-queries.md new file mode 100644 index 00000000..57df76bc --- /dev/null +++ b/docs/book/12-structural-change-controller/cli-controller-queries.md @@ -0,0 +1,65 @@ +## CLI Controller Queries + +The CLI exposes read-only terminal projections for humans: + +```bash +codeclone . --blast-radius codeclone/analysis/parser.py +codeclone . --patch-verify +codeclone . --patch-verify --strictness relaxed +codeclone . --session-stats +codeclone . --audit +codeclone . --audit-json +``` + +For git-scoped clone review (not patch-verify), use changed-scope flags instead: + +```bash +codeclone . --changed-only --diff-against HEAD~1 +``` + +`--blast-radius` runs normal analysis, builds the canonical report in memory, +and renders the same dependent/context split as `get_blast_radius`. + +`--patch-verify` is a baseline-relative terminal check: it uses the trusted +clone baseline as the accepted comparison snapshot and checks baseline-relative +new clone regressions plus the selected gate profile. It is not the same as MCP +patch-local verification, which compares a clean before-run to an after-run. +`ci` is the default; `strict` applies tighter controller budgets; `relaxed` +reports violations but exits `0`. + +Controller query modes cannot combine with changed-scope flags +(`--changed-only`, `--diff-against`, `--paths-from-git-diff`). Combining +`--patch-verify` with `--diff-against` is a contract error — pick one workflow. + +`--session-stats` shows workspace session status: active agents, intents, and +lease health. Read-only, does not run analysis. Collection is implemented in +`codeclone/controller_insights/session_stats.py` (CLI and IDE-only MCP tools +consume the same payload). + +`--audit` and `--audit-json` show the local Controller audit trail (JSON footprint +mode for `--audit-json`). Both require `audit_enabled=true` in effective config. +`--audit-json` selects JSON output but does not set the `--audit` flag for +combination validation. + +### Flag combination rules + +Enforced by `codeclone/surfaces/cli/workflow.py:_validate_controller_query_flags`: + +| Combination | Result | +|---------------------------------------------------------------------------|----------------| +| `--blast-radius` + `--patch-verify` | contract error | +| `--session-stats` + explicit `--audit` | contract error | +| `--session-stats` + `--blast-radius` or `--patch-verify` | contract error | +| explicit `--audit` + `--blast-radius` or `--patch-verify` | contract error | +| any controller query + changed-scope flags | contract error | +| any controller query + report output flags | contract error | +| any controller query + baseline update flags | contract error | +| `--strictness` without `--patch-verify` (when `--strictness` is explicit) | contract error | + +`--audit-json` is not treated as `--audit` for the session-stats mutual-exclusion +check. Pre-analysis queries (`--session-stats`, `--audit`, `--audit-json`) exit +before analysis; only one runs per invocation (first match wins). + +CLI controller queries are terminal-only and read-only with respect to source +files, baselines, reports, and analysis cache data. They are incompatible with +report output flags and baseline update flags. diff --git a/docs/book/12-structural-change-controller/finish-controlled-change.md b/docs/book/12-structural-change-controller/finish-controlled-change.md new file mode 100644 index 00000000..a5a74154 --- /dev/null +++ b/docs/book/12-structural-change-controller/finish-controlled-change.md @@ -0,0 +1,105 @@ +## `finish_controlled_change` + +Post-edit workflow tool. It runs a **fixed pipeline** over the same atomic +primitives as the manual path; agents must not skip hygiene, check, or verify +and call `clear` alone. + +### Preconditions + +- Intent is **active** in the current MCP session (not `queued`). +- **Evidence:** exactly one of `changed_files` or `diff_ref` (non-empty). Both + or neither is a contract error. +- **`after_run_id`** when the derived `verification_profile` requires it + (Python structural and governance config patches). + +`review_text` is a human note only. **`claims_text`** is the only finish input +passed to Claim Guard (when `claim_validation_recommended` is true). + +### Execution order (do not reorder manually) + +```text +resolve intent + → resolve changed_files | diff_ref (git-expanded) + → finish_hygiene_check (git + start dirty snapshot) + → manage_change_intent(check) # uses files_for_scope_check = evidence only + → check_patch_contract(verify) # before_run_id from intent when omitted + → compute Patch Trail + audit emit patch_trail.computed (when check/verify reached) + → validate_review_claims (optional, if claims_text + recommended) + → create_review_receipt (default true) + → manage_change_intent(clear) # auto_clear when accepted and receipt ok + → elevate status if out-of-scope dirty remains (external_changes) +``` + +Early exits (intent stays active unless noted): + +| Step | Top-level `status` | `reason` (typical) | `intent_cleared` | +|---------------------|------------------------------------------------|-------------------------|-------------------------------------------| +| Queued intent | `unverified` | `intent_not_active` | `false` | +| Hygiene gate | `unverified` | `workspace_hygiene` | `false` | +| Scope check | `expired` / `violated` | digest / scope | `false` | +| Verify not accepted | `unverified` / `violated` | verify-specific | `false` | +| Receipt failure | `accepted` or `accepted_with_external_changes` | — | `false` (verify passed but clear skipped) | +| Success | `accepted` or `accepted_with_external_changes` | verify reason or `null` | `true` when `auto_clear` and receipt ok | + +### Top-level `status` semantics + +| `status` | Meaning for agents | +|----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------| +| `accepted` | Patch contract passed for declared scope; no out-of-scope dirty paths in the hygiene view | +| `accepted_with_external_changes` | Patch contract passed; **other** git-dirty paths exist outside declared scope — report `external_changes` to the user; intent may still clear | +| `unverified` | Hygiene block, verify failure, missing after-run, `after_run_not_new`, etc. — follow `next_step` | +| `violated` | Scope expansion or structural/gate violations attributable to the patch | +| `expired` | Before-run digest no longer matches intent — re-analyze and `start` again | + +`accepted` / `accepted_with_external_changes` mean the **patch contract** passed +for the declared scope. They do **not** mean “no structural regressions” or +unchanged repository health — read `verification.structural_delta` and +`health_regression_advisory` when present. + +### Hygiene payload `detail_level` + +On `start_controlled_change` / `finish_controlled_change`, hygiene uses +`detail_level` as binary size control: `summary` and `normal` are equivalent +(`counts`, `foreign_dirty_overlaps`, blocking flags). `detail_level="full"` adds +`dirty_attribution`, path classification arrays, and expanded `dirty_snapshot`. +Findings/hotspots tools still honor all three levels. + +### Response payloads agents should read + +| Field | Use | +|---------------------------------|-------------------------------------------------------------------------------------------------------| +| `summary` | Compact dashboard (`scope_status`, `verification_profile`, `receipt`, `intent_cleared`, dirty counts) | +| `scope_check` | Declared vs actual files from check | +| `verification` | Full verify payload including `structural_delta`, `next_step` | +| `workspace_hygiene_after` | Post-finish hygiene; `counts` always; `dirty_attribution` only when `detail_level="full"` | +| `health_regression_advisory` | On accepted verify when `health_delta < 0` — user-facing, not auto-fail | +| `claims` | Claim Guard result when `claims_text` was validated | +| `receipt` / `receipt_error` | Receipt body; `receipt_error` prevents `auto_clear` | +| `propose_memory` / memory hooks | When `propose_memory=true` on accept | +| `patch_trail` | Deterministic scope/verify forensics for this finish (see below); not authorization | +| `projection_rebuild` | Optional job enqueue on accept when projection policy is not `off` (non-CI) | + +### Patch Trail on finish + +Patch Trail is computed when scope `check` reaches `violated` (**before** +verify) or when check is `clean` / `expanded` and verify runs — including +failed verify (`unverified` / `violated` top-level status). Hygiene blocks and +`expired` intents do **not** emit Patch Trail. + +Normative diagram and fields: [Patch Trail](patch-trail.md). + +### Post-success hooks (accept only) + +When verify status is `accepted` or `accepted_with_external_changes`: + +- `propose_memory=true` runs finish-side memory proposals and staleness updates. +- `maybe_auto_enqueue_projection_rebuild` may return `projection_rebuild` when + `memory.projection_rebuild_policy` is not `off` and the process is not CI. + +Receipt creation and `auto_clear` still follow the table above; a receipt error +leaves the intent active even when verify passed. + +Refs: + +- `codeclone/surfaces/mcp/_session_workflow_mixin.py:finish_controlled_change` +- `codeclone/memory/jobs/workflow.py:maybe_auto_enqueue_projection_rebuild` diff --git a/docs/book/12-structural-change-controller/finish-hygiene.md b/docs/book/12-structural-change-controller/finish-hygiene.md new file mode 100644 index 00000000..42c1e576 --- /dev/null +++ b/docs/book/12-structural-change-controller/finish-hygiene.md @@ -0,0 +1,99 @@ +## Workspace hygiene and registry consistency + +Three independent contours (do not collapse): + +```text +status = persisted registry lifecycle +ownership = runtime view (PID / TTL / lease) +hygiene = git working tree ∩ declared scope +permission = edit_allowed (with status gate) +``` + +**Lazy intent closure:** agent-facing registry reads (`list_workspace`, +declare/start workspace refresh) close eligible non-terminal intents using a +**lazy-close predicate** (`for_lazy_close=True`). Lease-only staleness with valid +TTL is not closed on read. **Orphaned** (dead PID) intents stay recoverable until +TTL expiry or explicit `gc_workspace` — lazy close does not purge them. + +**Explicit GC:** `gc_workspace` performs cleanup/purge in one atomic transaction +using a broader removal predicate. Lazy close and GC share intent lifecycle +concepts, but **not** an identical close predicate. + +Registry I/O is serialized with cross-process locks; SQLite `gc()` is one +atomic scan→close→purge transaction. + +**Continuing known WIP:** when uncommitted changes already overlap your declared +scope, default `dirty_scope_policy="block"` returns workflow `status: "blocked"`. +Pass `dirty_scope_policy="continue_own_wip"` only to resume known dirty scope +when **no** live foreign dirty overlap exists (`foreign_dirty_overlaps` empty). +Finish must still prove all declared-scope dirty paths via `changed_files` or +`diff_ref`. + +**Start blocking:** when foreign active/stale scope overlap is unresolved +(without `on_conflict="queue"`) or scoped hygiene detects dirty paths in +`allowed_files`, `start_controlled_change` returns workflow `status: "blocked"`, +`edit_allowed: false`, and populated `workspace` / `workspace_hygiene` payloads. +`blocked` is workflow-only — never persisted registry lifecycle status. + +**Finish hygiene gate:** see [finish_controlled_change](finish-controlled-change.md) +for the full pipeline. By default only `missing_evidence` and +`foreign_dirty_overlap` set `blocks_finish`. With +[strict finish mode](../10-config-and-defaults.md#mcp-session-and-change-control-hygiene) +enabled, `own_unscoped_dirty` may also block. Out-of-scope unattributed dirt is +advisory and may elevate the top-level status to `accepted_with_external_changes` +without failing verify. +**Queued** foreign intents do not populate `foreign_dirty_overlaps`. + +Declare **new files** in `allowed_files` at `start`, not only in +`allowed_related`. Finish always attaches `workspace_hygiene_after` (scoped +hygiene + repo-level `workspace_dirty_summary`) on verify paths that reach +hygiene evaluation. + +**List workspace:** `manage_change_intent(action="list_workspace")` attaches +repo-level `workspace_dirty_summary` only (bounded dirty path sample). Scoped +`workspace_hygiene.blocks_edit` applies only to start/finish. When recoverable +intents exist, the response includes `recovery_available` (each entry may show +`run_available: false` after MCP restart) and top-level `recovery_next_step`. + +### Finish hygiene: what blocks vs what informs + +Finish hygiene reconciles **agent evidence with git** and the **start-time dirty +snapshot**. It is not honor-system. + +**Blocking** (`blocks_finish: true`, top-level `reason: workspace_hygiene`, +`user_action_required: true`) happens only for: + +| `finish_block_reason` | Meaning | Agent action | +|-------------------------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------| +| `missing_evidence` | Git is dirty inside declared scope but the path is missing from `changed_files` / `diff_ref` | Add every in-scope dirty path to evidence or revert | +| `foreign_dirty_overlap` | A **live** foreign active/stale intent previously declared the same **in-scope** path | Coordinate (queue/promote/clear foreign intent), stash/commit foreign WIP, or narrow scope | +| `own_unscoped_dirty` | Unattributed out-of-scope dirty when strict finish mode is enabled (see env overrides) | Reconcile out-of-scope dirt, widen scope, or unset strict mode | + +**Non-blocking (advisory)** — surfaced on `workspace_hygiene_after` (path lists in +`dirty_attribution` when `detail_level="full"`), but **do not** set +`finish_block_reason` and **do not** feed `files_for_scope_check`: + +| Field | Meaning | +|----------------------------------------|-----------------------------------------------------------------------------------------| +| `preexisting_unscoped_dirty` | Out-of-scope dirty at `start`, unchanged since — informational | +| `new_unattributed_unscoped_dirty` | Out-of-scope dirty appeared after `start`, not foreign-attributed — peer/context signal | +| `modified_unattributed_unscoped_dirty` | Out-of-scope dirty existed at `start` but content changed — peer/context signal | +| `unknown_unattributed_unscoped_dirty` | No usable start snapshot for comparison — conservative classification only | +| `foreign_attributed_outside_scope` | Out-of-scope dirty owned by foreign active/stale intent — ignored for your finish | +| `dirty_paths_outside_scope` | All out-of-scope dirty paths — drives `external_changes` when verify is `accepted` | + +`own_unscoped_dirty` and `unattributed_unscoped_dirty` are **legacy aliases** for +the union of unattributed out-of-scope paths. They are **not** proof that the +current agent owns those edits and **do not** block finish. + +**Recoverable** foreign intents (dead PID) do **not** populate +`foreign_attributed_outside_scope`. **Queued** foreign intents do **not** +populate `foreign_dirty_overlaps`. + +When verify returns plain `accepted` but `dirty_paths_outside_scope` is +non-empty, finish elevates the top-level status to +`accepted_with_external_changes` and attaches: + +```json +"external_changes": {"count": N, "sample": ["path", "..."], "truncated": false} +``` diff --git a/docs/book/12-structural-change-controller/index.md b/docs/book/12-structural-change-controller/index.md new file mode 100644 index 00000000..7f1ee6e7 --- /dev/null +++ b/docs/book/12-structural-change-controller/index.md @@ -0,0 +1,63 @@ +# Structural Change Controller + +Normative contract for MCP/CLI change intent, blast radius, patch verification, +hygiene, receipts, and Patch Trail. Agent recipes live in the +[Change control guide](../../guide/change-control/overview.md). + +## Status + +The v2.1 alpha currently includes intent, blast-radius, patch-contract checks, +review receipts, workspace intent visibility, claim guard, and CLI controller +queries: + +| Phase | Status | Surface | +|---------------------------|-------------------|------------------------------------------------------------------------------------| +| Declarative workflow | Live in `2.1.0a1` | MCP `start_controlled_change`, `finish_controlled_change` | +| Intent declaration | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Blast radius | Live in `2.1.0a1` | MCP `get_blast_radius`, CLI `--blast-radius` | +| Patch contract | Live in `2.1.0a1` | MCP `check_patch_contract`, CLI `--patch-verify` | +| Review receipt | Live in `2.1.0a1` | MCP `create_review_receipt` | +| Workspace intent registry | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Lease and recovery | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Claim guard | Live in `2.1.0a1` | MCP `validate_review_claims` | +| Scope-aware verification | Live in `2.1.0a1` | MCP `check_patch_contract` | +| Workspace relations | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Verification profiles | Live in `2.1.0a1` | MCP `check_patch_contract` | +| Intent queue | Live in `2.1.0a1` | MCP `manage_change_intent` | +| Verify ergonomics | Live in `2.1.0a1` | MCP `check_patch_contract` | +| MCP payload token budget | Live in `2.1.0a1` | Audit trail, CLI `--audit`, `--session-stats` | +| Patch Trail | Live in `2.1.0a1` | MCP `finish_controlled_change(patch_trail_detail=…)`; audit `patch_trail.computed` | + +## Contract + +- The canonical report remains the source of truth. +- Intent truth is **session-local** for the active MCP process; the optional + workspace registry (file backend under `.codeclone/intents/` or SQLite + per `[tool.codeclone]`) provides advisory, TTL/lease-bound cross-process + visibility only. +- MCP may write ephemeral workspace coordination records through the configured + intent registry backend and optional audit records under + `.codeclone/db/` when enabled. +- MCP must not mutate source files, baselines, reports, or analysis cache data. +- Tools derive responses from existing run/report facts rather than LLM + inference. +- Report-only context is review context, not an edit prohibition. + +!!! note "Claim Guard" + Full pattern catalog: [Claim Guard](../14-claim-guard.md). + +## Chapters + +| Topic | Contract | +|----------------------------------------|---------------------------------------------------------| +| CLI `--blast-radius`, `--patch-verify` | [CLI controller queries](cli-controller-queries.md) | +| Blast radius & review receipt | [Blast radius & receipt](blast-radius-and-receipt.md) | +| Intent registry & queue | [Intent registry & queue](intent-registry-and-queue.md) | +| Verification profiles | [Verification profiles](verification-profiles.md) | +| Patch contract verify | [Patch contract verify](patch-contract-verify.md) | +| Workflow tools | [Workflow tools](workflow-tools.md) | +| `finish_controlled_change` | [finish_controlled_change](finish-controlled-change.md) | +| Finish hygiene | [Finish hygiene](finish-hygiene.md) | +| Patch Trail | [Patch Trail](patch-trail.md) | +| Payload semantics | [Payload semantics](payload-semantics.md) | +| Token budget | [Token budget](token-budget.md) | diff --git a/docs/book/12-structural-change-controller/intent-registry-and-queue.md b/docs/book/12-structural-change-controller/intent-registry-and-queue.md new file mode 100644 index 00000000..85ffb395 --- /dev/null +++ b/docs/book/12-structural-change-controller/intent-registry-and-queue.md @@ -0,0 +1,170 @@ +## Workspace Intent Registry + +`manage_change_intent` also supports workspace actions for multi-agent +coordination: + +- `list_workspace`: list active workspace intent records from all agents for a + repository root. Includes `recovery_available` (with `run_available` and + per-candidate `hint`) and `recovery_next_step` when recoverable intents exist. +- `renew`: refresh the active lease before long edits or test runs. +- `gc_workspace`: remove expired, orphaned, or corrupted registry records. +- `recover`: explicitly reclaim a recoverable intent when the caller has the + matching run and report digest in the current MCP session. +- `reset_workspace`: reset an own intent or remove expired/recoverable + registry records. Foreign active and foreign stale intents are rejected + and require coordination. + +Registry records live under `.codeclone/intents/` by default (one JSON +file per intent) and are protected with a SHA-256 integrity digest over +canonical JSON. Repositories may opt into a SQLite backend instead: + +```toml +[tool.codeclone] +intent_registry_backend = "sqlite" +intent_registry_path = ".codeclone/db/intents.sqlite3" +``` + +Environment overrides for registry keys: +[10-config Environment variable overrides](../10-config-and-defaults.md#environment-variable-overrides) +(workspace intent registry table). + +The SQLite backend stores the same signed JSON payloads in WAL mode; integrity +and validation rules are unchanged. Unlike the file backend, SQLite keeps +closed intents (`clean`, `expired`, `orphaned`) for audit and purges them only +after `intent_registry_retention_days` (default `14`, any positive value; no +edition cap). Managed/hosted retention with backup and compliance is a roadmap +Team/Enterprise option; see [Plans and Retention](../../plans-and-retention.md). + +This detects accidental corruption, not malicious tampering by a user with write +access. Conflicts are advisory: hard overlap means two agents claimed the same +primary file; soft overlap means primary files overlap related context. + +Each registry record has a TTL and a shorter renewable lease. TTL is the hard +maximum lifetime of the record (default 3600s). The lease is the ownership +freshness signal (default 300s, max 600s): active MCP interactions auto-renew +it, while detached processes stop renewing and transition through ownership +states. + +??? info "Ownership classification" + + | State | PID alive | Lease valid | Meaning | + |------------------|-----------|-------------|------------------------------------------------------| + | `own_active` | own | yes | This session's active intent | + | `own_stale` | own | no | This session's intent with expired lease | + | `foreign_active` | foreign | yes | Another live process, active lease — coordinate | + | `foreign_stale` | foreign | no | Another live process, expired lease — coordinate | + | `recoverable` | dead | — | Owning process is dead; safe to reclaim | + | `expired` | — | — | TTL exceeded; eligible for garbage collection | + + A foreign active or foreign stale record should be coordinated with the + user; CodeClone does not ask agents to kill the owning process. Only + `recoverable` intents (dead PID) can be reclaimed without user + coordination. + +### Cursor local enforcement (optional) + +The Cursor plugin can install project hooks (`.cursor/hooks.json`) that run a +fail-closed `preToolUse` gate before `Write`, `StrReplace`, `ApplyPatch`, and +`Shell`. The gate calls the read-only API +`codeclone.workspace_intent.evaluate_workspace_edit_gate`, which loads the same +registry backend as MCP (`file` or `sqlite` per `[tool.codeclone]`). It does not +lazy-close records, create registry files, or read plugin-local marker files. + +| Registry signal | Hook behavior | +|-----------------------------------------------------------------------|---------------------------------------------------------------------| +| Live `active` intent (any agent; lease/TTL rules match MCP ownership) | Authorize repository writes and non–read-only shell | +| `queued` only | Deny — queued intents are visible but not editable locally | +| No active intent / registry error | Deny file tools; allow only read-only Git inspection shell commands | + +Hooks require `codeclone` in the Python interpreter referenced by +`.cursor/hooks.json` (typically the project venv). Install: +`plugins/cursor-codeclone/scripts/install-project-hooks.py`. See +[Cursor plugin guide](../../guide/integrations/cursor/install-and-skills.md) and +[Cursor plugin contract](../integrations/cursor-plugin.md). + +## Workspace Relations + +`detect_conflicts` classifies the relationship between a new intent and existing +workspace intents. Beyond edit-overlap detection (hard and soft conflicts), +the classifier distinguishes forbidden-scope relationships: + +| Relation | Meaning | +|---------------------------|-----------------------------------------------------| +| `edit_overlap` | Both agents claim the same files (hard or soft) | +| `foreign_excludes_target` | Foreign `forbidden` matches current `allowed_files` | +| `target_excludes_foreign` | Current `forbidden` matches foreign `allowed_files` | + +Absence of a relation entry means disjoint scope. + +The `declare` response includes a `workspace_relations` field alongside the +existing `concurrent_intents`. `concurrent_intents` continues to contain only +edit overlaps for backward compatibility; `workspace_relations` provides the +full classification including forbidden-scope signals. + +This allows agents to distinguish three cases that were previously +indistinguishable: + +1. No overlap at all (disjoint). +2. No edit overlap, but the foreign agent explicitly excludes the current + agent's target files (`foreign_excludes_target`) — a positive coordination + signal. +3. No edit overlap, but the current agent explicitly excludes the foreign + agent's target files (`target_excludes_foreign`). + +## Intent Queue + +When multiple agents target overlapping scope, `manage_change_intent` supports +an advisory queue so a blocked agent can register its intent without failing. + +### Declare with queue + +`manage_change_intent(action="declare", on_conflict="queue")` first attempts a +normal declare. If `detect_conflicts` finds overlapping foreign active intents, +it downgrades the already-registered intent to `queued` instead of returning an +error. + +A queued intent: + +- Is visible in `list_workspace` as a workspace record with `status="queued"`. +- Does **not** own scope — conflict detection skips queued records. +- Does **not** pin the before-run — long waits may cause eviction from bounded + run history. +- Cannot pass `check_patch_contract(mode="verify")` or + `check_patch_contract(mode="budget")` with `edit_allowed=true`. +- Can be cleared via `manage_change_intent(action="clear")`. + +The declare response includes `blocked_by` (list of blocking intents with +`intent_id`, `agent_pid`, `ownership`, `overlapping_files`) and +`queue_position` (deterministic ordering by `declared_at_utc`, then +`intent_id`). + +### Promote + +`manage_change_intent(action="promote", intent_id=...)` transitions a queued +intent to active: + +1. Validates the intent has `status="queued"`. +2. Resolves the before-run — if evicted, returns `status="unverified"` with + `reason="before_run_evicted"` and a `next_step` hint. +3. Re-checks workspace conflicts. If conflicts persist, returns `status="queued"` + with `blocking_count` and `blocked_by` without changing state. +4. On success: sets status to `active`, pins the run, renews the lease, and + updates the workspace record. + +### Queue semantic invariants + +- `queued` is a lifecycle status, not an ownership classification. Ownership + (`own_active`, `foreign_active`, etc.) and status (`active`, `queued`) are + orthogonal. +- Queued intents do not block other agents. `_detect_scope_state` skips records + with `status == "queued"`. +- Queue position is deterministic: sorted by `declared_at_utc`, then + `intent_id` as tiebreaker. + +### Audit events + +| Event | When | +|------------------------|------------------------------| +| `intent.queued` | Declare downgrades to queued | +| `intent.promoted` | Promote succeeds | +| `intent.queue_blocked` | Promote blocked by conflicts | diff --git a/docs/book/12-structural-change-controller/patch-contract-verify.md b/docs/book/12-structural-change-controller/patch-contract-verify.md new file mode 100644 index 00000000..7ef70317 --- /dev/null +++ b/docs/book/12-structural-change-controller/patch-contract-verify.md @@ -0,0 +1,80 @@ +## Scope-Aware Patch Contract Verification + +When a change intent is active, `check_patch_contract(mode="verify")` attributes +regressions and gate changes to the declared scope rather than treating the +entire workspace as one undifferentiated surface. + +### Regression attribution + +Regressions from `compare_runs` are partitioned into two sets: + +- `intent_regressions` — findings whose file paths fall inside the declared + `allowed_files` or `allowed_related`. +- `external_regressions` — findings whose file paths are entirely outside + the declared scope. + +Only `intent_regressions` produce `structural_regressions` contract violations. +External regressions are reported as informational context without failing the +contract. + +Findings with no extractable file paths are conservatively classified as +intent-scope to avoid false-negative accepts. + +Without an active intent, all regressions are treated as intent-scope and +behavior is unchanged from the base contract. + +### Scope matching vs verify attribution + +Scope **check** (`unexpected_files`) uses exact membership in `allowed_files` / +`allowed_related`. Verify regression attribution uses `fnmatchcase` on those +patterns (and treats path-less findings as in-scope). Do not assume identical +matching rules across check and verify — declare literal paths in scope lists. + +### Gate-delta logic + +Gate evaluation uses a two-layer attribution model: + +1. **Gate delta** — only gate *changes* between before-run and after-run are + contract-relevant. A gate that was already failing before the edit is + pre-existing, not a new violation. `gate_worsened` is true only when + `before_gate.would_fail` is false and `after_gate.would_fail` is true. + +2. **Gate attribution** — when `gate_worsened` is true and an intent is active, + the contract checks whether the gate-triggering signals come from intent + scope: intent-scope regressions or intent-scope worsened metric symbols. If + neither exists, the gate failure is external and does not produce a contract + violation. + +### Status values + +| Status | Meaning | +|----------------------------------|--------------------------------------------------------------------------| +| `accepted` | No intent-scope regressions, no gate worsening | +| `accepted_with_external_changes` | Intent scope is clean but external signals exist | +| `violated` | Intent-scope regressions, intent-caused gate failure, or scope violation | +| `unverified` | Missing before or after run | +| `expired` | Report digest mismatch since declaration | + +The `accepted_with_external_changes` status signals that another agent or +concurrent edit introduced regressions outside the current intent scope. The +verify response includes `intent_regressions`, `external_regressions`, +`intent_worsened`, `external_worsened`, `gate_worsened`, and `before_gate` +fields for full attribution visibility. + +??? info "Decision table" + + | Intent | Intent regressions | External regressions | Gate worsened | Intent caused gate | Scope check | Status | + |--------|--------------------|-----------------------|---------------|--------------------|-------------|----------------------------------| + | no | any | — | any | any | — | current logic unchanged | + | yes | > 0 | any | any | any | any | `violated` | + | yes | 0 | any | yes | yes | clean | `violated` | + | yes | 0 | any | yes | no | clean | `accepted_with_external_changes` | + | yes | 0 | > 0 | no | — | clean | `accepted_with_external_changes` | + | yes | 0 | 0 | no | — | clean | `accepted` | + | yes | 0 | any | any | any | violated | `violated` (scope violation) | + +### Baseline abuse + +`detect_baseline_abuse` stays workspace-global. Baseline hygiene is a +repository-level signal: if the baseline was updated while any regressions exist +(even external), that is suspicious regardless of whose regressions they are. diff --git a/docs/book/12-structural-change-controller/patch-trail.md b/docs/book/12-structural-change-controller/patch-trail.md new file mode 100644 index 00000000..86716da5 --- /dev/null +++ b/docs/book/12-structural-change-controller/patch-trail.md @@ -0,0 +1,51 @@ +### Patch Trail {#patch-trail} + +Patch Trail is a **bounded, deterministic snapshot** of declared scope, evidence +files, hygiene counts, and verify outcome for one finish cycle. It complements +patch verify — it does **not** authorize edits, expand scope, or override +structural findings. + +```mermaid +flowchart TD + FIN[finish_controlled_change] --> HY[finish_hygiene_check] + HY -->|blocks| STOP1[Early exit — no patch_trail] + HY --> CHK[manage_change_intent check] + CHK -->|expired| STOP2[Early exit — no patch_trail] + CHK -->|violated| PT[compute_patch_trail] + CHK -->|clean or expanded| VER[check_patch_contract verify] + VER --> PT + PT --> AUD[audit patch_trail.computed] + PT --> RES[patch_trail response field] + VER --> RCPT[receipt / clear / projection hook] + AUD --> TRJ[Trajectory rebuild persists memory_trajectory_patch_trails] +``` + +**When emitted:** after scope `check` returns `violated` (**before** verify), or +after `verify` when check is `clean` / `expanded`. Failed verify still returns +`patch_trail` when check/verify stages were reached. Hygiene blocks and expired +intents do **not** emit Patch Trail. + +**Parameters:** + +| Parameter | Default | Meaning | +|----------------------|-----------|------------------------------------------------------------------------| +| `patch_trail_detail` | `summary` | `summary`: counts, statuses, digest, evidence refs; `full`: path lists | + +**Response `patch_trail` (summary):** `schema_version` (`PATCH_TRAIL_SCHEMA_VERSION`, +currently **`1`**), `intent_id`, compact `intent_description`, `scope_check_status`, +`verification_status`, `counts`, `patch_trail_digest`, `evidence` (audit sequence +refs), `retrieval_policy` (`patch_trail_does_not_authorize_edits`, +`patch_trail_does_not_override_findings`). + +**Audit:** `patch_trail.computed` stores a compact event core (`patch_trail_digest`, +counts, verification status) for trajectory projection. Requires `audit_enabled=true`. + +**Persistence:** manual or job-driven trajectory rebuild projects Patch Trail into +`memory_trajectory_patch_trails` and bumps trajectory projection to +`trajectory-v2` or later (digest includes `patch_trail_digest`). The active +`trajectory-v3` projection also carries deterministic quality scoring and agent +subjects. Scoped retrieval surfaces `patch_trail_summary` / full `patch_trail` — see +[Engineering Memory — Trajectory memory](../13-engineering-memory/trajectory-and-patch-trail.md). + +Refs: `codeclone/memory/trajectory/patch_trail.py`, `codeclone/audit/events.py`, +`codeclone/surfaces/mcp/_session_workflow_mixin.py:_finish_patch_trail`. diff --git a/docs/book/12-structural-change-controller/payload-semantics.md b/docs/book/12-structural-change-controller/payload-semantics.md new file mode 100644 index 00000000..e544c3ad --- /dev/null +++ b/docs/book/12-structural-change-controller/payload-semantics.md @@ -0,0 +1,139 @@ +## Change-control payload semantics + +This section supplements the workflow descriptions above. It does not repeat tool +lists or atomic step sequences. + +### Scope path matching + +Declare **repo-relative file paths** in `allowed_files` and `allowed_related`. +Glob patterns such as `docs/**` are **not** valid scope entries for scope +`check` — each changed path must appear literally in the declared lists. + +| Mechanism | Matching rule | +|---------------------------------------|----------------------------------------------------------------------------| +| Scope `check` (`unexpected_files`) | Exact membership in `allowed_files` or `allowed_related` | +| Start/finish hygiene (in-scope dirty) | Exact path **or** directory prefix (`docs/book` covers `docs/book/foo.md`) | +| Verify regression attribution | `fnmatchcase` on declared patterns (may differ from scope check) | +| `forbidden` | `fnmatchcase` on declared patterns | + +List every path you create, modify, or delete in finish evidence +(`changed_files` or `diff_ref`). + +### `structural_delta.health_delta` vs receipt `health.delta` + +Verify compares the intent's **before-run** to the explicit **after-run** via +`compare_runs`. `structural_delta` mirrors that comparison: + +```json +"before": {"run_id": "14d82d39", "health": 90}, +"after": {"run_id": "74cb3c0e", "health": 88}, +"structural_delta": { +"verdict": "regressed", +"health_delta": -2, +"regressions": ["...new finding ids..."] +} +``` + +| Field | Source | Meaning | +|----------------------------------|----------------------------------------------------|------------------------------------------------------| +| `verification.before` / `.after` | Intent before-run vs `after_run_id` | Run refs used for patch contract | +| `structural_delta.health_delta` | `health_after - health_before` from `compare_runs` | **Patch delta** between those two stored runs | +| `receipt.health.delta` | After-run summary vs trusted baseline | **Repository drift** signal in the receipt narrative | + +Patch deltas are run-relative, not baseline-novelty-relative. A finding absent +from the clean before-run and present in the after-run is a patch regression +even when its fingerprint is `novelty="known"` against the trusted baseline. + +If `before.run_id == after.run_id` for `python_structural` or +`governance_config` profiles, verify returns `status: "unverified"` with +`reason: "after_run_not_new"` — run a fresh post-edit analysis and pass the new +`after_run_id`. For documentation-only patches the identical-run case is not +structurally gated the same way. + +Negative `health_delta` sets `structural_delta.verdict` to `"regressed"` (or +`"mixed"` when improvements coexist). It does **not** by itself set +`verification.status` to `"violated"` — blocking comes from intent-scoped +finding regressions, gate worsening attributable to the patch, scope +violations, or baseline-abuse signals. Agents should still surface +`health_delta < 0` in review text. Accepted verify may include +`health_regression_advisory`. Claim Guard warns and violates regression-free +claims when `patch_health_delta < 0` (passed automatically by +`finish_controlled_change`; explicit on atomic `validate_review_claims`). + +### Multi-agent hygiene (who blocks whom) + +Hygiene reads the **shared git working tree**, not per-agent sandboxes. + +| Actor | Trigger | Start | Finish | +|------------------------------------------------------------------------------------|----------------------------------------|----------------------------------------------------------------------------------------------------------|------------------------------------------------------------------| +| **Foreign active/stale** intent on overlapping scope | `concurrent_intents` | `status: "blocked"` (coordination) | — | +| **Any** uncommitted dirty file in your `allowed_files` | `workspace_hygiene.blocks_edit` | `edit_allowed: false` (unless `dirty_scope_policy="continue_own_wip"` and no live foreign dirty overlap) | — | +| Dirty in scope **not** listed in `changed_files` / `diff_ref` (git reconciliation) | `unacknowledged_dirty_in_scope` | — | **`finish_block_reason: missing_evidence`** (blocks finish) | +| Dirty **outside** declared scope, already dirty at `start` and unchanged | `preexisting_unscoped_dirty` | — | Advisory only | +| Dirty **outside** declared scope, appeared after `start`, not foreign-attributed | `new_unattributed_unscoped_dirty` | — | Advisory — may appear in `external_changes` | +| Dirty **outside** declared scope, changed after `start`, not foreign-attributed | `modified_unattributed_unscoped_dirty` | — | Advisory — may appear in `external_changes` | +| Dirty **outside** declared scope, no usable start snapshot | `unknown_unattributed_unscoped_dirty` | — | Advisory classification only | +| Foreign dirty **outside** your scope (other agent's paths) | `foreign_attributed_outside_scope` | — | **ignored** — does not block finish | +| **Live** foreign intent previously declared overlapping dirty paths in your scope | `foreign_dirty_overlaps` | Contributes to `blocks_edit` at start | **`finish_block_reason: foreign_dirty_overlap`** (blocks finish) | + +Recoverable, expired, terminal, or **queued** foreign records **do not** +populate `foreign_dirty_overlaps`. A queued peer does not block finish for an +active agent. + +**Foreign attribution at finish:** only **`foreign_active`** and +**`foreign_stale`** intents (live owning PID, foreign to this session) may +populate `foreign_attributed_outside_scope`. **`Recoverable`** intents (dead +owning PID) do **not** grant foreign attribution — treat their dirty paths like +ordinary workspace dirt unless scope is widened or changes reverted. + +**Finish hygiene payload fields** (on `workspace_hygiene` / `workspace_hygiene_after` +when finish is hygiene-gated): + +For hygiene, `detail_level` is effectively binary: `summary` and `normal` return +`counts`, overlap lists, and blocking fields only; pass `detail_level="full"` for +`dirty_attribution`, path classification arrays, and expanded `dirty_snapshot`. + +| Field | Meaning | +|--------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `unacknowledged_dirty_in_scope` | In-scope git dirty missing from finish evidence | +| `preexisting_unscoped_dirty` | Out-of-scope git dirty that existed at `start` and did not change — informational, non-blocking | +| `unattributed_unscoped_dirty` | Union of unattributed out-of-scope paths — **advisory**, not blocking | +| `own_unscoped_dirty` | Legacy alias for `unattributed_unscoped_dirty`; not proof of ownership | +| `new_unattributed_unscoped_dirty` | Out-of-scope dirty path appeared after `start` | +| `modified_unattributed_unscoped_dirty` | Out-of-scope dirty path existed at `start` but changed afterward | +| `unknown_unattributed_unscoped_dirty` | Out-of-scope dirty path cannot be compared with a start snapshot | +| `foreign_attributed_outside_scope` | Out-of-scope git dirty owned by foreign active/stale intent — informational, non-blocking | +| `dirty_attribution` | Per-path attribution (`detail_level="full"` only) | +| `dirty_snapshot` / `dirty_snapshot_status` | Snapshot summary; expanded detail with `detail_level="full"` | +| `files_for_scope_check` | Agent evidence only — paths passed to scope `check` (out-of-scope dirt does not expand scope) | +| `finish_block_reason` | `missing_evidence`, `foreign_dirty_overlap`, or (when strict finish mode is enabled) `own_unscoped_dirty` when `blocks_finish` is true — see [env overrides](../10-config-and-defaults.md#mcp-session-and-change-control-hygiene) | +| `external_changes` | On finish response when verify is `accepted` but out-of-scope dirty remains — top-level status becomes `accepted_with_external_changes` | + +**Typical two-agent overlap on `pkg/a.py`:** + +1. Agent A (active intent) edits → working tree dirty on `pkg/a.py`. +2. Agent B calls `start` on the same path → blocked by **coordination** + (`foreign_active`) **and** **hygiene** (`blocks_edit` because the tree is + dirty in scope). B should not edit. +3. Agent A calls `finish` with `changed_files` including `pkg/a.py` → passes + declared-scope dirty acknowledgment. Finish fails on **live** foreign dirty overlap only + (`foreign_active` / `foreign_stale`). **Queued** foreign peers do not + appear in `foreign_dirty_overlaps`. +4. Resolution: coordinate (queue/promote/clear **active** foreign intent), + stash/commit foreign WIP, or narrow scope — not kill foreign PIDs. + +### Start / finish workflow transitions + +Workflow `status` values are **not** persisted registry lifecycle states. + +| Tool response | `edit_allowed` | Agent action | +|-----------------------------------------------|----------------|--------------------------------------------------------------------------------------------------------------------------| +| `start` → `needs_analysis` | `false` | `analyze_repository` → `start` again | +| `start` → `queued` | `false` | Wait → `promote`; re-analyze if `before_run_evicted` | +| `start` → `blocked` | `false` | Follow `next_step` (`message` matches); do not edit unless `continue_own_wip` was requested and returned `active` | +| `start` → `active` | `true` | Edit inside declared scope only; read `budget.gate_preview` as advisory | +| `finish` → `accepted` | — | Intent cleared (if receipt ok); no out-of-scope dirty in hygiene view | +| `finish` → `accepted_with_external_changes` | — | Patch accepted; report `external_changes` — other paths dirty outside declared scope | +| `finish` → `unverified` / `workspace_hygiene` | — | Fix `missing_evidence`, coordinate `foreign_dirty_overlap`, or (under strict finish mode) reconcile `own_unscoped_dirty` | +| `finish` → `violated` | — | Fix regressions or widen scope via new `start` | +| `finish` → `expired` | — | Re-analyze → new `start` (digest mismatch) | diff --git a/docs/book/12-structural-change-controller/token-budget.md b/docs/book/12-structural-change-controller/token-budget.md new file mode 100644 index 00000000..b17dee38 --- /dev/null +++ b/docs/book/12-structural-change-controller/token-budget.md @@ -0,0 +1,62 @@ +## MCP Payload Token Budget + +The optional controller audit trail can estimate the token footprint of MCP +payloads returned to the agent. This is a deterministic estimate of how much +context window each tool response consumes, not actual model billing tokens. + +### Setup + +Token estimation requires two conditions: + +1. Audit trail enabled (`audit_enabled = true` in `pyproject.toml`). +2. The `codeclone[token-bench]` optional extra installed (provides `tiktoken`). + +Without `tiktoken`, the estimator falls back to a character-based approximation +(`ceil(characters / 4)`). Without audit enabled, no estimation runs. + +### How it works + +The estimation runs inside the audit writer's `event_to_row`, not in the MCP +tool call path. The MCP session has zero overhead when audit is disabled or +when `tiktoken` is not installed. + +Each audit event row includes three optional fields: + +- `estimated_tokens` — BPE token count (or character-based approximation). +- `token_encoding` — encoding name (`o200k_base` or `chars_approx`). +- `payload_characters` — character count of the canonical JSON payload. + +The estimation input is the full original payload (what the MCP client +receives), not the compact audit storage form. + +With `audit_payloads=compact`, stored JSON drops large structured fields, but +`intent.declared` keeps bounded `intent_description`. The SQLite `summary` column +always stores a short essence via `event_summary()`, independent of payload mode. + +### CLI visibility + +The `--audit` Rich TUI renderer shows token columns when data is available: + +``` +Tokens Encoding Event + 412 o200k_base intent.declared + 890 o200k_base blast_radius.computed + 1204 o200k_base patch_contract.verified +``` + +The `--session-stats` command appends a summary line when audit token data +exists: + +``` +MCP payload footprint: ~3,816 tokens (o200k_base, 7 tool calls) +``` + +### Invariants + +- Token estimation never affects controller decisions, gate results, report + digests, or baseline trust. +- Any exception in the estimation path results in `NULL` values, not a failed + audit event write. +- The `codeclone/budget/` module never imports from `codeclone/surfaces/` or + `codeclone/audit/`. Dependency direction: `audit -> budget`, never reverse. +- Base `codeclone` never depends on `tiktoken`. The import is lazy and guarded. diff --git a/docs/book/12-structural-change-controller/verification-profiles.md b/docs/book/12-structural-change-controller/verification-profiles.md new file mode 100644 index 00000000..bb94a284 --- /dev/null +++ b/docs/book/12-structural-change-controller/verification-profiles.md @@ -0,0 +1,50 @@ +## Verification Profiles + +`check_patch_contract(mode="verify")` derives a **verification profile** from +actual changed files. The profile determines which structural checks are +applicable and whether `after_run_id` is required for verification. + +### Profile classification + +The classifier is a pure function with a deterministic priority chain: + +| Priority | Profile | When | `after_run` required | Structural checks | +|----------|-------------------------|-------------------------------------------------------------------------------------------------------|----------------------|-------------------| +| 1 | `state_artifact_change` | CodeClone state artifacts touched (`codeclone.baseline.json`, `.codeclone/**`, `.cache/codeclone/**`) | no (violated) | not applicable | +| 2 | `python_structural` | Any `.py` / `.pyi` touched | yes | all | +| 3 | `governance_config` | Config files only (pyproject.toml, CI…) | yes | not applicable | +| 4 | `documentation_only` | Only docs files (`.md`, `.rst`, …) | no | not applicable | +| 5 | `non_python_patch` | Other files, no Python or docs | no | not applicable | + +A single file from a higher-priority category overrides the entire patch. + +### Fast path + +Documentation-only and non-Python patches can verify without `after_run_id` +when `changed_files` or `diff_ref` evidence is provided. Without any diff +evidence, verify returns `unverified` to preserve backward compatibility. + +### Invariants + +- The profile is derived from `actual_changed_files`, never declared by the + agent. +- Scope and forbidden checks always run before any profile-based fast return. +- Receipts use "not applicable" for skipped structural checks, never "passed". +- Claim guard warns when review text references structural verification but + the profile says structural checks were not applicable. +- Claim guard warns and violates regression-free claims when + `patch_health_delta < 0`. + +### Public surface + +| Artifact | Path | +|-------------------|--------------------------------------------------------| +| Classifier module | `codeclone/surfaces/mcp/_verification_profile.py` | +| Enum | `VerificationProfile` | +| Classifier | `classify_patch(changed_files) → ClassificationResult` | +| Check matrix | `check_matrix(profile) → CheckMatrix` | + +### Locked by tests + +- `tests/test_verification_profile.py` +- `tests/test_mcp_service.py` diff --git a/docs/book/12-structural-change-controller/workflow-tools.md b/docs/book/12-structural-change-controller/workflow-tools.md new file mode 100644 index 00000000..7be3f296 --- /dev/null +++ b/docs/book/12-structural-change-controller/workflow-tools.md @@ -0,0 +1,119 @@ +## Pre-Change Workflow + +1. Call `manage_change_intent(action="list_workspace", root="/abs/repo")` to + see active intents from other agents before analysis. + If it returns `ownership="recoverable"` for a matching run, use + `manage_change_intent(action="recover")` instead of killing another MCP + process or redeclaring blindly. +2. Run `analyze_repository` or `analyze_changed_paths`. +3. Declare scope with `manage_change_intent(action="declare")`. +4. If `concurrent_intents` is non-empty, narrow scope or coordinate before + editing. +5. Inspect the returned `blast_radius_summary`. +6. Optionally call `get_blast_radius` for full dependent/context detail. +7. Call `check_patch_contract(mode="budget")` to inspect the active regression + budget and metric headroom before editing. +8. Run analysis again after editing (produces the after-run). +9. Call `manage_change_intent(action="check", intent_id=..., changed_files=...)` + with the original `intent_id`. Use `diff_ref=...` instead of + `changed_files=...` when the changed set should come from git. The intent + stays bound to the before-run; `verify` compares its `report_digest` against + the before-run, so redeclaring on the after-run would cause an `expired` + mismatch. +10. Call `check_patch_contract(mode="verify", before_run_id=..., + after_run_id=..., intent_id=...)`. +11. Call `validate_review_claims` before publishing claim text in the atomic + workflow, or pass `claims_text` to `finish_controlled_change`. +12. Call `create_review_receipt` to collect provenance, scope, blast radius, + reviewed findings, patch status, human decision points, and claims-not-made. +13. Call `manage_change_intent(action="clear")` when the edit is complete. + +`manage_change_intent` can return `clean`, `expanded`, `violated`, or +`expired`. Expiry means the report digest changed since declaration. + +`check_patch_contract` never runs analysis itself. Budget mode reads one stored +run and optional intent. Verify mode compares explicit before/after stored runs, +previews gates, validates scope when intent is available, and reports baseline +abuse signals. Missing before or after runs return `status="unverified"` with +`reason="no_before_run"` or `reason="no_after_run"`. + +Patch verify is run-relative, not baseline-novelty-relative: if a finding is +absent from the clean before-run and present in the after-run, it is a patch +regression even when that finding's fingerprint is `novelty="known"` against the +trusted baseline. + +Budget payloads use `null` for disabled numeric thresholds rather than sentinel +values. Boolean policy gates are named `forbid_*`, for example +`forbid_dead_code_regression`. + +## Verify Ergonomics + +`check_patch_contract(mode="verify")` includes three ergonomic features that +reduce agent error and wasted context tokens. + +### Auto-resolve before_run_id + +When `intent_id` is provided but `before_run_id` is omitted, verify resolves +the before-run from the intent record's `run_id`. This eliminates the most +common agent error: forgetting to pass `before_run_id`. + +### Next-step hints + +Non-accepted verify responses include a `next_step` field with an actionable +hint matched to the failure reason: + +| Reason | Hint | +|-------------------------------------|------------------------------------------------------------| +| `no_before_run` | Run analysis or pass intent_id to auto-resolve | +| `no_after_run` | Run analysis after editing and pass after_run_id | +| `after_run_not_new` | After-run matches before-run; run fresh post-edit analysis | +| `after_run_required_for_governance` | Governance changes require post-edit analysis | +| `incomparable_runs` | Re-run analysis with the same settings | +| `intent_not_active` | Queued intent must be promoted first | +| `report_digest_mismatch` | Use the original intent_id with the original before-run | +| `state_artifact_mutation` | Remove baseline/cache files from the patch | +| `scope_violation` | Redeclare intent with expanded scope | + +### Claim validation recommended + +The `claim_validation_recommended` boolean in verify responses advises whether +calling `validate_review_claims` is meaningful for the verification profile. +It is `true` for `python_structural` and `governance_config` profiles, `false` +for `documentation_only`, `non_python_patch`, `state_artifact_change`, and +non-accepted outcomes. + +## Workflow consolidation + +The atomic change control workflow requires 7–11 MCP tool calls per edit +cycle. Two **workflow-level tools** aggregate these steps while preserving +the same evidence, state updates, and boundary checks: + +| Tool | Replaces | Calls | +|----------------------------|---------------------------------------------------|------------------| +| `start_controlled_change` | workspace check + declare + blast radius + budget | 1 instead of 4 | +| `finish_controlled_change` | scope check + verify + claims + receipt + clear | 1 instead of 4–6 | + +Workflow tools are orchestration shortcuts. They call the same internal +methods as the atomic tools and emit the same semantic audit events. +`analyze_repository` remains a separate explicit call — workflow tools +never run analysis implicitly. + +`finish_controlled_change` keeps human notes and validated claims separate: +`review_text` is a note, while `claims_text` is the only finish parameter passed +to Claim Guard. The response includes a compact `summary` for humans while +retaining full `scope_check`, `verification`, `claims`, `receipt`, and +`workspace_hygiene_after` payloads for agents. + +**Tool tiers:** + +- **Normal workflow:** `analyze_repository`, `start_controlled_change`, + `finish_controlled_change` — every edit cycle. +- **Queue/recovery:** `manage_change_intent` (promote, recover, reset, + renew) — multi-agent coordination, crash recovery. +- **Advanced/diagnostic:** `get_blast_radius`, `check_patch_contract`, + `validate_review_claims`, `create_review_receipt` — deep inspection, + step-by-step debugging. + +The same semantic audit events are preserved regardless of which +approach the agent uses. Atomic tools remain available for backward +compatibility and advanced use cases. diff --git a/docs/book/13-engineering-memory/agent-contracts.md b/docs/book/13-engineering-memory/agent-contracts.md new file mode 100644 index 00000000..75b2c523 --- /dev/null +++ b/docs/book/13-engineering-memory/agent-contracts.md @@ -0,0 +1,50 @@ +## Agent playbook + +### When to read memory + +```mermaid +flowchart TD + A[analyze_repository] --> B[start_controlled_change] + B --> C{edit_allowed?} + C -->|no| Z[Stop — queue / blocked / needs_analysis] +C -->|yes|D[get_relevant_memory] +D --> E{contradiction_note\nor stale warnings?} +E -->|yes|F[Surface to user before edit] +E -->|no|G[Edit in declared scope] +G --> H[analyze if profile requires after_run] +H --> I[finish_controlled_change] +I --> J{propose_memory?} +J -->|true + accepted|K[Review memory_candidates\nhuman approve later] +J -->|false|L[Done] + +style D fill: #eff6ff +style G fill: #fef9c3 +``` + +| Moment | Tool | Why | +|----------------------------------|--------------------------------------------------------------------------|-----------------------------------------------| +| After `start`, before first edit | `get_relevant_memory(root=abs, scope=… \| intent_id=…)` | Ranked context for declared scope | +| Need one path deep-dive | `query_engineering_memory(mode=for_path, path=…)` | Targeted lookup | +| Need keyword across store | `query_engineering_memory(mode=search, query=…, filters={match_mode:…})` | FTS discovery | +| Before writing claims in finish | `manage_engineering_memory(action=validate_claims, text=…)` | Catch overclaims vs memory | +| After accepted patch (optional) | `finish(..., propose_memory=true)` | Draft candidates + staleness + coverage delta | + +### When to write memory + +| Situation | Action | Notes | +|----------------------------------|------------------------------------------------------------------------------------------|---------------------------------------| +| Stable observation during edit | `record_candidate` | Draft only; cite scope in statement | +| Patch accepted, workflow finish | `propose_memory=true` | Preferred batch proposal | +| Atomic fallback (no finish hook) | `propose_from_receipt` | Same receipt shape as finish | +| System facts changed in repo | `refresh_from_run` or ask human for `memory init --refresh` | Explicit MCP refresh always available | +| Promote draft to trusted fact | **Not agent** — VS Code Memory view or `codeclone memory approve --i-know-what-im-doing` | Required for active/verified | + +### When **not** to use memory + +- To justify touching `do_not_touch` paths +- To expand scope beyond declared intent +- To override CodeClone structural findings +- As a substitute for `analyze_repository` or `get_blast_radius` +- To treat `draft` / `inferred` / `stale` records as established facts + +--- diff --git a/docs/book/13-engineering-memory/bootstrap-and-config.md b/docs/book/13-engineering-memory/bootstrap-and-config.md new file mode 100644 index 00000000..ca2ca352 --- /dev/null +++ b/docs/book/13-engineering-memory/bootstrap-and-config.md @@ -0,0 +1,200 @@ +## Bootstrap: init, MCP sync, and refresh + +The memory store can be created or refreshed through **CLI init**, **MCP auto-sync** +(default), or **explicit MCP refresh**. All paths call the same deterministic +ingest pipeline (`run_memory_init`). + +### CLI init (human / CI) + +```bash +codeclone memory init --root /abs/repo +codeclone memory init --root /abs/repo --refresh # re-ingest + staleness pass +``` + +```mermaid +sequenceDiagram + participant H as Human / CI + participant CLI as codeclone memory init + participant CC as CodeClone analysis + participant DB as SQLite store + H ->> CLI: init [--refresh] + CLI ->> CC: load cached report or analyze + CLI ->> CLI: build ingest batch + Note over CLI: modules, contracts, docs,
tests, risks, git hotspots + CLI ->> DB: upsert records + evidence + CLI ->> DB: rebuild FTS index + opt --refresh + CLI ->> DB: mark drifted records stale + end + CLI ->> H: status summary +``` + +### MCP sync (default agent path) + +Policy key: `mcp_sync_policy` in `[tool.codeclone.memory]` (default +`bootstrap_if_missing`). + +| Policy | Auto behavior on `get_relevant_memory` | Explicit `refresh_from_run` | +|------------------------|---------------------------------------------------|-----------------------------| +| `off` | No auto sync; DB must exist | Always runs ingest | +| `bootstrap_if_missing` | Create store from latest MCP run when DB missing | Always runs ingest | +| `refresh_when_stale` | Re-ingest when stored digest ≠ current run digest | Always runs ingest | + +```mermaid +sequenceDiagram + participant A as Agent + participant M as MCP + participant S as mcp_sync + participant DB as SQLite store + A ->> M: analyze_repository + M -->> A: run_id + A ->> M: start_controlled_change + M -->> A: edit_allowed=true + A ->> M: get_relevant_memory(root, intent_id) + M ->> S: decide + execute (policy) + alt missing DB + bootstrap_if_missing + S ->> DB: init ingest from run report + S -->> M: memory_sync completed + else digest changed + refresh_when_stale + S ->> DB: refresh ingest + staleness + S -->> M: memory_sync completed + else unchanged + S -->> M: skip (no memory_sync field) + end + M ->> DB: ranked scope query + M -->> A: records + optional memory_sync +``` + +**Explicit refresh:** `manage_engineering_memory(action="refresh_from_run", run_id?)` +always ingests from the selected MCP run (defaults to latest). Use after +`analyze_repository` when you need fresh system facts without waiting for policy +triggers. + +**Agent rule:** MCP sync ingests **system records only** — same as CLI init. +Human `approve` is still required for agent drafts. MCP never runs +approve/reject/archive. + +When auto-sync does not run and the DB is missing, memory tools return a contract +error pointing to `refresh_from_run` or CLI init. + +Ingest sources (non-exhaustive): + +| Record type | Typical ingest source | +|----------------------|------------------------------------------------------| +| `module_role` | Report file inventory | +| `contract_note` | `contracts/__init__.py` paths (auto or configured) | +| `document_link` | Configured docs and/or `docs/**/*.md` from inventory | +| `test_anchor` | Test file inventory | +| `risk_note` | Complexity / security surfaces from metrics | +| `public_surface` | MCP / CLI public API inventory | +| `contradiction_note` | Optional MCP tool-count doc vs snapshot | + +Git provenance (Phase 18.6): init attaches `git_commit` evidence when git is +available; optional git hotspot records use +`git_hotspot_period_days` / `git_hotspot_min_changes` from config. + +Refs: `codeclone/memory/ingest/mcp_sync.py`, `codeclone/surfaces/mcp/_session_memory_mixin.py`. + +--- + +## Configuration + +Nested tables in `pyproject.toml` under `[tool.codeclone.memory]`, +`[tool.codeclone.memory.ingest]`, and `[tool.codeclone.memory.semantic]`. +Defaults live in `codeclone/config/memory_defaults.py`; key validation in +`codeclone/config/memory_specs.py` (flat memory keys) and +`codeclone/config/memory.py` (`IngestConfig`, `SemanticConfig`). + +### Retention and capacity + +| Key | Type | Default | Purpose | +|----------------------------------|------|---------|--------------------------------------------------| +| `active_retention_days` | int | `-1` | Active record retention (`-1` = no age purge) | +| `stale_retention_days` | int | `180` | Stale record retention before vacuum | +| `draft_retention_days` | int | `14` | Draft candidate retention | +| `rejected_retention_days` | int | `30` | Rejected draft retention | +| `archived_retention_days` | int | `365` | Archived record retention | +| `receipt_retention_days` | int | `90` | Finish-receipt evidence retention | +| `max_records` | int | `10000` | Hard cap on persisted records | +| `max_candidates` | int | `1000` | Draft inbox capacity | +| `max_evidence_per_record` | int | `20` | Evidence rows per record | +| `max_statement_chars` | int | `1000` | Statement hard limit (target 300, soft warn 500) | +| `max_blast_radius_cache_entries` | int | `500` | Cached blast-radius projections per project | +| `trajectory_retention_days` | int | `365` | Stored trajectory projection retention | + +### Store backend and sync + +| Key | Type | Default | Purpose | +|-------------------|------|------------------------------------------------|---------------------------------------------------------| +| `backend` | str | `sqlite` | Persistence backend | +| `db_path` | str | `.codeclone/memory/engineering_memory.sqlite3` | SQLite path | +| `mcp_sync_policy` | str | `bootstrap_if_missing` | `off` \| `bootstrap_if_missing` \| `refresh_when_stale` | + +### Git hotspots (init ingest) + +| Key | Type | Default | Purpose | +|---------------------------|------|---------|----------------------------------------| +| `git_hotspot_period_days` | int | `90` | Git history window for hotspot records | +| `git_hotspot_min_changes` | int | `5` | Minimum commits to emit a hotspot | + +### Trajectory projection and export + +| Key | Type | Default | Purpose | +|--------------------------------------|------|------------|----------------------------------------------| +| `trajectories_enabled` | bool | `true` | Enable trajectory projection from audit core | +| `trajectory_export_enabled` | bool | `false` | Gate CLI `trajectory export` | +| `trajectory_export_include_payloads` | bool | `false` | Include step payloads in JSONL export | +| `trajectory_export_max_record_bytes` | int | `65536` | Per-record export size cap | +| `trajectory_export_max_file_bytes` | int | `10485760` | Export file size cap | + +### Projection rebuild coalesce + +| Key | Type | Default | Purpose | +|----------------------------------------------|------|---------|---------------------------------------------------------| +| `projection_rebuild_policy` | str | `off` | `off` \| `enqueue_when_stale` — finish may enqueue jobs | +| `projection_rebuild_running_timeout_seconds` | int | `1800` | Stale running-job reclaim timeout | +| `projection_rebuild_spawn_worker` | bool | `true` | Spawn detached worker on enqueue | +| `projection_rebuild_coalesce_window_seconds` | int | `60` | Batch sub-threshold rebuilds (`0` = immediate spawn) | +| `projection_rebuild_coalesce_min_delta` | int | `25` | Active-record delta bypassing coalesce window | + +### Ingest paths (`[tool.codeclone.memory.ingest]`) + +| Key | Type | Default | Purpose | +|---------------------------------|----------------|---------|--------------------------------------------------------------------------------| +| `contract_constants_paths` | string list | `[]` | Contract version files; empty uses auto discovery under `codeclone/contracts/` | +| `document_link_paths` | string list | `[]` | Doc paths; empty uses README, AGENTS, CLAUDE, and docs tree | +| `mcp_tool_schema_snapshot_path` | string or null | `null` | MCP tool schema snapshot for contradiction checks | +| `mcp_tool_count_doc_paths` | string list | `[]` | Docs claiming MCP tool counts (requires snapshot path) | + +### Semantic batching (`[tool.codeclone.memory.semantic]`) + +| Key | Type | Default | Purpose | +|-------------------------------------|------|------------------------------------------|-------------------------------------------------------| +| `enabled` | bool | `false` | Opt-in semantic sidecar | +| `backend` | str | `lancedb` | Vector backend | +| `index_path` | str | `.codeclone/memory/semantic_index.lance` | LanceDB path | +| `embedding_provider` | str | `diagnostic` | `diagnostic` \| `fastembed` \| `local_model` \| `api` | +| `embedding_model` | str | provider default | e.g. `BAAI/bge-small-en-v1.5` for fastembed | +| `embedding_cache_dir` | str | `.codeclone/memory/fastembed` | Model cache directory | +| `allow_model_download` | bool | `false` | Permit fastembed downloads | +| `dimension` | int | `256` | Diagnostic provider dimension | +| `max_results` | int | `20` | Semantic search cap | +| `index_audit` | bool | `true` | Project audit summaries into index | +| `embed_max_documents_per_batch` | int | `64` | Embedding batch document cap | +| `embed_max_padded_tokens_per_batch` | int | `8192` | Embedding batch token budget | +| `projection_token_estimator` | str | `chars_approx` | `chars_approx` \| `tiktoken` | + +Environment overrides for memory and semantic fields: +[10-config Environment variable overrides](../10-config-and-defaults.md#environment-variable-overrides) +(Engineering Memory table). + +Unknown keys under `[tool.codeclone.memory.semantic]` are contract errors +(Pydantic `extra="forbid"` on `SemanticConfig`). + +Refs: + +- `codeclone/config/memory_specs.py` +- `codeclone/config/memory_defaults.py` +- `codeclone/config/memory.py` + +--- diff --git a/docs/book/13-engineering-memory/cli-surface.md b/docs/book/13-engineering-memory/cli-surface.md new file mode 100644 index 00000000..7f826521 --- /dev/null +++ b/docs/book/13-engineering-memory/cli-surface.md @@ -0,0 +1,75 @@ +## CLI surface + +All commands live under `codeclone memory` and accept `--root` (default `.`). + +| Command | Purpose | +|----------------------------------------------------------------------------------------|-----------------------------------------------------------| +| `init [--refresh] [--dry-run] [--from-report PATH] [--no-docs] [--no-tests]` | Create or refresh the memory store | +| `status` | Schema version, counts, last ingest metadata | +| `for-path PATH [--limit N]` | Records linked to a repo-relative path | +| `search QUERY [--match any\|all] [--semantic] [--active-only] [--limit N]` | FTS search; optional semantic blend | +| `semantic status` | Index availability, provider, row counts | +| `semantic rebuild` | Rebuild LanceDB sidecar from memory + audit + trajectory | +| `semantic probe [--exact-tokens] [--json]` | Projection length stats (index-unit aware for trajectory) | +| `semantic search QUERY [--limit N]` | Search with semantic ranking (index required) | +| `stale [--limit N]` | List stale records and reasons | +| `vacuum` | Retention purge per config (no dry-run flag) | +| `coverage --scope PATH [PATH...]` | Scope coverage metrics | +| `review-candidates [--limit N]` | List draft records awaiting human review | +| `approve RECORD_ID [--by NAME] [--i-know-what-im-doing]` | Promote draft → active | +| `reject RECORD_ID [--by NAME] [--reason TEXT] [--i-know-what-im-doing]` | Reject draft | +| `archive RECORD_ID [--by NAME] [--i-know-what-im-doing]` | Archive record | +| `trajectory status\|rebuild\|list\|search\|show\|agents\|anomalies\|dashboard\|export` | Trajectory projection, passport analytics, and export | +| `jobs status\|enqueue\|run-once\|list` | Trajectory + semantic + Experience projection queue | + +### Init flags + +| Flag | Effect | +|-----------------|---------------------------------------------------------------------| +| `--dry-run` | Build ingest batch without writing the store | +| `--refresh` | Re-ingest and run staleness pass on drifted system records | +| `--from-report` | Load a canonical report JSON path instead of cached/latest analysis | +| `--no-docs` | Skip document-link ingest | +| `--no-tests` | Skip test-anchor ingest | + +### Governance (`approve` / `reject` / `archive`) + +Direct CLI governance is **disabled by default**. The preferred path is the +**CodeClone VS Code Memory** view (IDE governance channel over MCP with +`--ide-governance-channel`). + +For explicit human break-glass outside the IDE channel, pass +`--i-know-what-im-doing` on `approve`, `reject`, or `archive`. Attributions use +`--by` (default `human`), not `--verified-by`. + +MCP agents cannot call `approve`/`reject`/`archive` on `manage_engineering_memory`. + +### Trajectory analytics flags + +| Subcommand | Extra flags | +|-------------|-----------------------------------------------------------------------------| +| `list` | `--limit N` | +| `search` | `QUERY`, `--limit N`, `--match any\|all` | +| `agents` | `--include-routine`, `--json` | +| `anomalies` | `--limit N`, `--include-routine`, `--json` | +| `dashboard` | `--limit N`, `--include-routine`, `--json` | +| `export` | `--profile NAME`, `--out PATH`, `--allow-external-out`, `--force`, `--json` | + +`--include-routine` includes routine analysis-only trajectories in aggregates +(default excludes them). Maps to MCP `filters.include_routine` on trajectory +analytics modes. + +### Projection jobs flags + +| Subcommand | Extra flags | +|------------|------------------------------------------------------------------------------| +| `enqueue` | `--force` (enqueue even when policy off or stimulus unchanged), `--no-spawn` | +| `run-once` | `--not-before ISO-8601-UTC` (defer until coalesce window elapses) | +| `list` | `--limit N`, `--json` | + +Refs: + +- `codeclone/surfaces/cli/memory.py` +- `codeclone/surfaces/cli/memory_render.py` + +--- diff --git a/docs/book/13-engineering-memory/experience-layer.md b/docs/book/13-engineering-memory/experience-layer.md new file mode 100644 index 00000000..134de882 --- /dev/null +++ b/docs/book/13-engineering-memory/experience-layer.md @@ -0,0 +1,103 @@ +# Experience Layer + + + +Experiences are the third Engineering Memory knowledge tier: + +1. memory records describe what the project knows; +2. trajectories describe what happened during agent work; +3. Experiences summarize recurring, evidence-linked patterns across + trajectories. + +Experiences are advisory. They do not authorize edits, override findings, or +replace the human-governed memory record lifecycle. + +## Distillation pipeline + +```mermaid +flowchart LR + A["Canonical trajectories
all outcomes"] --> B["Extract subject families,
signals, outcome classes"] + B --> C["Group by PatternKey"] + C --> D["Support and information-value gates"] + D --> E["Active Experiences"] + E --> F["Scoped retrieval"] + E --> G["Optional promotion"] + G --> H["Human-reviewable draft
memory record"] +``` + +The current distillation version is `experience-v1`. Every canonical +trajectory may contribute, including partial, blocked, and incident-bearing +work. Distillation is not limited to verified or successful changes. + +## Pattern identity + +An Experience key contains: + +- `subject_family`: a deterministic directory family derived from touched + paths, with at most eight families per trajectory; +- `signal`: a non-ubiquitous label or a derived signal; +- `outcome_class`: `:`. + +Derived signals include: + +- `verification_incomplete` for partial or blocked work without a verified + finish; +- `incident_present` when a trajectory contains incidents. + +Agent and tool identity are deliberately excluded from `PatternKey`. They are +evidence facets, not pattern identity, so equivalent project behavior can +coalesce across agents. + +## Admission and scoring + +A candidate requires: + +- support from at least five trajectories; +- information value of at least `50`; +- no more than twenty retained evidence trajectory IDs. + +Information value is deterministic: + +- `+60` when evidence spans at least two agent families; +- `+25` for a structural signal; +- capped at `100`. + +A single-agent pattern therefore does not pass the current information-value +threshold by itself. + +The Experience ID and digest exclude timestamps. They include the pattern key, +sorted member trajectory IDs, and the distillation version. This keeps +replace-all rebuilds reproducible. + +## Storage and retrieval + +Distillation replaces the project's Experience projection atomically in +deterministic order. Current records are `active`. The domain model reserves a +`dormant` state, but dormant lifecycle management is not implemented. + +Scoped retrieval: + +- returns active Experiences only; +- exact-matches the requested directory `subject_family`; +- sorts by support descending, information value descending, then ID; +- returns compact evidence counts and agent-family summaries by default; +- adds agent facets and evidence trajectory IDs at full detail. + +The current distiller emits `agent_family` facets. Other facet kinds are +reserved by the domain types but are not currently populated. + +## Promotion boundary + +Promotion is explicit and idempotent. It creates a human-reviewable draft +memory candidate with the Experience statement, subject family, and trajectory +evidence. It obeys the project's draft capacity and does not silently approve +the result. + +Promotion creates drafts only. To approve, reject, or archive records, use the +**VS Code Memory** view (IDE governance channel) or the CLI break-glass path +(`codeclone memory approve|reject|archive --i-know-what-im-doing`). MCP agents +cannot perform governance actions. + +See [Trust and lifecycle](trust-and-lifecycle.md), +[MCP surface](mcp-surface.md), and the +[trajectories and Experiences guide](../../guide/memory/trajectories-and-experiences.md). diff --git a/docs/book/13-engineering-memory/index.md b/docs/book/13-engineering-memory/index.md new file mode 100644 index 00000000..d437f7fb --- /dev/null +++ b/docs/book/13-engineering-memory/index.md @@ -0,0 +1,140 @@ +# Engineering Memory + +## Purpose + +Engineering Memory is a **local, evidence-linked knowledge store** for a Python +repository. It captures structural facts, document links, git provenance, and +governed human/agent notes — then surfaces them to AI agents **before and during** +controlled edits. + +!!! note "Not a second analyzer" + Memory reads from the same canonical report, contracts, docs, tests, and git + facts as CodeClone analysis. It does **not** run a separate LLM inference + path, mutate source files, or override structural findings. + +!!! note "Not analysis cache" + The SQLite database under `.codeclone/memory/` is a **governed memory + contract**, separate from analysis cache (`cache.json`) and baselines + (`codeclone.baseline.json`). + +--- + +## Status + +| Phase | Capability | Surface | +|-------|---------------------------------------------------------------|------------------------------------------------------------------------------------------| +| 18.1 | Store, init ingest, CLI `init\|status\|for-path\|search` | CLI | +| 18.2 | Scoped retrieval, ranking | MCP `get_relevant_memory`, `query_engineering_memory` | +| 18.3 | Refresh staleness, scope staleness, retention | CLI `stale`, `vacuum`; finish hook marks scope stale | +| 18.4 | Draft governance, claim validation | MCP `manage_engineering_memory`; CLI `review-candidates\|approve\|reject\|archive` | +| 18.5 | Scope coverage, finish proposals | `finish_controlled_change(propose_memory=true)` | +| 18.6 | FTS search (`match_mode`), git hotspots, schema 1.1, Rich CLI | CLI `--match`; MCP `filters.match_mode` | +| 18.7 | MCP sync from analysis runs | `mcp_sync_policy`; auto bootstrap on `get_relevant_memory`; `refresh_from_run` | +| 20 | Optional semantic retrieval (LanceDB sidecar) | `[tool.codeclone.memory.semantic]`; CLI `memory semantic *`; MCP/CLI search `--semantic` | +| 22 | Audit event core for trajectory replay | `AUDIT_EVENT_CORE_VERSION`; audit `event_core_json` / `workflow_id` | +| 23 | Trajectory projection + SQLite storage | CLI `memory trajectory status\|rebuild\|list\|show\|search` | +| 24 | Scoped trajectory retrieval + memory evidence | MCP `get_relevant_memory.trajectories[]`; `query_engineering_memory(mode=trajectory_*)` | +| 25 | Disabled-by-default local JSONL export profiles | CLI `memory trajectory export --profile ... --out ...` | +| 26 | Patch Trail persistence + scoped retrieval | `memory_trajectory_patch_trails`; `patch_trail_summary` on scoped retrieval | +| 28 | Incremental projection jobs | Watermarked trajectory rebuild, semantic hash-skip, coalesced worker | +| Live | Trajectory quality and passport analytics | Quality/complexity contract, anomalies, agents, dashboard | +| Live | Experience Layer | Distillation job, scoped `experiences[]`, `promote_experience` draft bridge | + +Schema version constant: `ENGINEERING_MEMORY_SCHEMA_VERSION` in +`codeclone/contracts/__init__.py` (currently **`1.7`**). + +Semantic index format (separate contract): `SEMANTIC_INDEX_FORMAT_VERSION` +(currently **`2`**) in the same module. The vector sidecar is independent of +the SQLite memory schema version. + +--- + +## Architecture + +```mermaid +graph TB + subgraph Sources["Deterministic sources"] + CR[Canonical Report] + CT[Contracts / docs / tests] + GIT[Git provenance] + RC[Finish receipts / audit] + end + + subgraph MemoryStore["Engineering Memory (SQLite)"] + REC[memory_records] + SUB[memory_subjects] + EV[memory_evidence] + FTS[memory_fts FTS5] + TRAJ[trajectory projection] + EXP[Experience projection] + end + + subgraph Surfaces["Read / write surfaces"] + CLI["codeclone memory *"] + MCP_R["MCP read tools"] + MCP_W["MCP draft writes"] + HUM["Human approve CLI"] + end + + CR -->|init / refresh ingest| MemoryStore + CT -->|init / refresh ingest| MemoryStore + GIT -->|init / refresh ingest| MemoryStore + RC -->|propose_from_receipt / finish hook| MemoryStore + RC --> TRAJ --> EXP + MemoryStore --> CLI + MemoryStore --> MCP_R + MCP_W -->|draft only| MemoryStore + HUM -->|approve / reject / archive| MemoryStore + style MemoryStore stroke: #6366f1, stroke-width: 2px + style MCP_W fill: #fef9c3 + style HUM fill: #dcfce7 +``` + +Module ownership: + +| Module | Role | +|---------------------------------------------------|------------------------------------------------------| +| `codeclone/memory/sqlite_store.py` | SQLite persistence, FTS sync, subject dedup | +| `codeclone/memory/ingest/*` | Init/refresh batch builders from report + git + docs | +| `codeclone/memory/retrieval/*` | Scoped ranking and query router | +| `codeclone/memory/semantic/*` | Projections, LanceDB sidecar, rebuild, search hits | +| `codeclone/memory/embedding/*` | Embedding providers (`diagnostic` default) | +| `codeclone/memory/governance.py` | Draft candidates, approve/reject, claim validation | +| `codeclone/memory/staleness.py` | Refresh-time and scope-time staleness | +| `codeclone/memory/jobs/store.py` | Coalesced projection rebuild jobs (schema 1.3+) | +| `codeclone/memory/trajectory/*` | Audit → trajectory projection, Patch Trail, export | +| `codeclone/memory/experience/*` | Deterministic Experience distillation + persistence | +| `codeclone/config/memory*.py` | `[tool.codeclone.memory]` resolution | +| `codeclone/surfaces/cli/memory*.py` | Human CLI + Rich rendering | +| `codeclone/surfaces/mcp/_session_memory_mixin.py` | MCP memory tools + finish hook | + +Refs: + +- `codeclone/memory/ingest/runner.py:run_memory_init` +- `codeclone/memory/retrieval/service.py:query_engineering_memory` +- `codeclone/surfaces/mcp/_session_memory_mixin.py` + +Normative detail: + +- [Trajectory and Patch Trail](trajectory-and-patch-trail.md) +- [Trajectory quality and passport](trajectory-quality-and-passport.md) +- [Experience Layer](experience-layer.md) +- [Projection jobs](projection-jobs.md) +- [Practical trajectory and Experience guide](../../guide/memory/trajectories-and-experiences.md) + +--- + +## Regressions and UX fixes (2.1.0a1) + +These are documentation anchors for shipped fixes — see `CHANGELOG.md` **Fixed** +for the full controller list. + +| Area | Symptom | Fix (code truth) | +|--------------------------------|-----------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| VS Code session/audit webviews | Payload footprint table showed zeros for workflow metrics | Audit footprint JSON uses `calls` and `tokens` in `top_workflows`; the webview maps both legacy and mistaken field names (`workspaceInsightsRenderer.js`). | +| CLI session stats | Import / duplication issues | Collection lives in `codeclone/controller_insights/`; CLI renders only (`surfaces/cli/session_stats.py`). | +| MCP vs CLI insights | Session stats logic must not live only in MCP | IDE-only tools `get_workspace_session_stats` / `get_controller_audit_trail` share the same collectors as `--session-stats` / `--audit`. | +| Patch verify | Identical before/after run accepted | `after_run_not_new` for `python_structural` and `governance_config` profiles. | +| Finish hygiene | Over-blocking on foreign out-of-scope dirt | Unattributed out-of-scope dirt is advisory; blocking reasons are `missing_evidence` and `foreign_dirty_overlap`. | + +--- diff --git a/docs/book/13-engineering-memory/mcp-surface.md b/docs/book/13-engineering-memory/mcp-surface.md new file mode 100644 index 00000000..99ab6a00 --- /dev/null +++ b/docs/book/13-engineering-memory/mcp-surface.md @@ -0,0 +1,146 @@ +## MCP surface + +### Read tools + +#### `get_relevant_memory` + +Ranked, scope-aware context for the **declared edit scope**. + +| Parameter | Purpose | +|-----------------------------------|-----------------------------------------------------------------------------------------------------------------| +| `root` | **Required.** Absolute repository root (same as `analyze_repository`) | +| `scope` | Explicit repo-relative paths | +| `intent_id` | Active intent from `start_controlled_change` (resolves scope) | +| `symbols` | Optional qualname keys for boost | +| `max_records` | Cap (default 20) | +| `include_stale`, `include_drafts` | `include_stale` defaults false; drafts are automatic for scoped retrieval / path / symbol and opt-in for search | +| `detail_level` | `compact` (default) or `full` — compact returns statement previews without payload | + +`trajectories[]` excludes routine `run:*` workflows by default (same semantics +as `include_routine=false` in retrieval). There is **no** `include_routine` +parameter on this tool; use `query_engineering_memory` trajectory modes with +`filters.include_routine` to include routine analysis-only trajectories. + +Unscoped `get_relevant_memory` is **rejected**. Pass `scope`, `intent_id`, or +`symbols`. For project-wide orientation use +`query_engineering_memory(mode=status|search)` — not root scope (`"."`, `""`). + +Project root is never a valid memory scope for `scope`, `path`, or `coverage`. + +`intent_id` or `scope` without `root` fails MCP argument validation (Pydantic). +Always pass the same absolute `root` used for `analyze_repository` and +`start_controlled_change`. + +When auto-sync runs, the response includes a `memory_sync` object (`status`, +`trigger`, `run_id`, `report_digest`, ingest stats). Omitted when sync was skipped +(`status: unchanged`). + +#### `query_engineering_memory` + +Mode router for inspection and search. + +| `mode` | Required inputs | Purpose | +|------------------------|---------------------------------------------|-------------------------------------------------| +| `search` | `query`; optional `semantic=true` | FTS keyword search; optional vector blend | +| `get` | `record_id` | Single record + subjects + evidence | +| `for_path` | `path` | Path-linked records | +| `for_symbol` | `symbol` | Symbol-linked records | +| `stale` | — | Stale inventory | +| `coverage` | `scope` (non-empty, not project root) | Coverage metrics for paths | +| `status` | — | Store status (like CLI `status`) | +| `drafts` | optional `limit` | Draft inbox (compact by default) | +| `trajectory_status` | — | Trajectory projection run metadata | +| `trajectory_search` | `query`; optional `filters.include_routine` | Search stored trajectories | +| `trajectory_get` | `record_id` (trajectory id) | One trajectory + steps (always full) | +| `trajectory_anomalies` | optional `filters.include_routine` | Detected trajectory contract anomalies | +| `trajectory_agents` | optional `filters.include_routine` | Aggregate quality/outcomes by exact agent label | +| `trajectory_dashboard` | optional `filters.include_routine` | Combined status, agent, and anomaly view | + +List modes (`search`, `stale`, `drafts`, scoped `get_relevant_memory`) default +to **compact** payloads: statement preview, `statement_length`, no `payload`. +Use `mode=get` or `detail_level=full` for complete statements and payload. +`trajectory_get` is also always full regardless of requested detail level. + +Scoped retrieval keeps four typed lanes: + +| Lane | Meaning | `compact` | `full` | +|------------------|------------------------------------------------------|---------------------------------------------------------------|--------------------------------------------------------| +| `records[]` | Durable asserted/project memory | Preview; relevance-first bounded `subjects`; count/truncation | Full statement, subjects, record payload | +| `experiences[]` | Advisory patterns distilled from trajectories | Preview; agent-family count, multi-agent flag, dominant facet | Full agent facets and trajectory evidence ids | +| `trajectories[]` | Prior workflow examples/evidence | Bounded preview; no steps or `quality_contract` | Full contract/subjects; use `trajectory_get` for steps | +| `coverage` | Availability of record/trajectory/experience context | Same factual coverage metadata | Same factual coverage metadata | + +`subject_count` and `subjects_truncated=true` mean more linked subjects exist; +they do not downgrade or discard the record. Each compact trajectory retains +its own `patch_trail_summary`. The duplicate top-level `patch_trail_summary` is +full-only. + +**Filters** (`filters` object): + +| Key | Values | Notes | +|---------------|--------------------------|---------------------------------------| +| `types` | list of record types | e.g. `["contract_note", "risk_note"]` | +| `statuses` | list of statuses | e.g. `["active"]` | +| `confidences` | list of confidences | e.g. `["verified"]` | +| `match_mode` | `any` (default) or `all` | **search mode only** — token matching | + +CLI equivalent: `codeclone memory search QUERY --match any|all`. + +### Write tools (draft layer) + +#### `manage_engineering_memory` + +| `action` | Required params | Effect | +|------------------------------|------------------------------------------------|------------------------------------------------------------| +| `refresh_from_run` | optional `run_id` (defaults to latest MCP run) | Force ingest from MCP run report | +| `rebuild_semantic_index` | (none) | Rebuild LanceDB sidecar when `memory.semantic.enabled` | +| `rebuild_trajectories` | (none) | Rebuild trajectory projections from audit event core | +| `enqueue_projection_rebuild` | (none) | Queue trajectory + semantic + Experience projection job | +| `projection_rebuild_status` | (none) | Latest projection job status | +| `run_projection_jobs_once` | (none) | Run one queued projection job inline | +| `record_candidate` | `record_type`, `statement`, **`subject_path`** | Creates **draft** record | +| `promote_experience` | `experience_id` | Convert advisory Experience into human-reviewable draft | +| `validate_claims` | `text` | Memory-layer claim guard (warnings/errors) | +| `propose_from_receipt` | optional `text`, `intent_id` | Draft proposals from finish-like payload (atomic fallback) | + +IDE channel only (VS Code launches MCP with `--ide-governance-channel`): + +| `action` | Purpose | +|---------------------------|---------------------------------------------------------| +| `register_ide_governance` | Bind session HMAC key + client attestation | +| `prepare_governance` | Issue ticket + nonce + `statement_digest` (protocol v2) | +| `commit_governance` | Human confirm with HMAC proof → approve/reject/archive | + +Agent calls to `approve`, `reject`, or `archive` return `governance_mode_unavailable` +with `next_step` pointing to the VS Code Memory view. Humans may also use +`codeclone memory approve|reject|archive --i-know-what-im-doing` (CLI break-glass). + +#### `finish_controlled_change(propose_memory=true)` + +On **accepted** or **accepted_with_external_changes** finish: + +- proposes draft memory candidates from changed scope, claims, review text +- marks scope-linked **active** records stale +- returns `memory_candidates`, `memory_staleness`, `memory_coverage_delta` +- when `memory.projection_rebuild_policy` is not `off` and the environment is + not CI, may enqueue a projection rebuild job (`projection_rebuild` in the + finish payload — trajectory, semantic, and Experience projections) + +This is the preferred post-edit memory update path when using the workflow +tools. + +### Help topic + +`help(topic="engineering_memory")` — compact agent playbook summary. + +Trajectory analytics and Experience semantics are specified in +[Trajectory quality and passport](trajectory-quality-and-passport.md) and +[Experience Layer](experience-layer.md). + +Refs: + +- `codeclone/surfaces/mcp/server.py` +- `codeclone/surfaces/mcp/messages/help_topics.py` +- `codeclone/surfaces/mcp/_session_workflow_mixin.py` (finish hook) + +--- diff --git a/docs/book/13-engineering-memory/projection-jobs.md b/docs/book/13-engineering-memory/projection-jobs.md new file mode 100644 index 00000000..ff0f454b --- /dev/null +++ b/docs/book/13-engineering-memory/projection-jobs.md @@ -0,0 +1,60 @@ +### Projection rebuild jobs (schema 1.3+) + +Trajectory, semantic, and Experience projections can be rebuilt asynchronously +via a coalesced job row in Engineering Memory SQLite +(`memory_projection_jobs`). The worker rebuilds trajectories first, refreshes +the semantic sidecar, then distills Experiences from the resulting trajectory +corpus. +Default policy is **`off`**; opt in with: + +```toml +[tool.codeclone.memory] +projection_rebuild_policy = "enqueue_when_stale" # off | enqueue_when_stale +``` + +| Surface | Command / action | +|-------------------|-----------------------------------------------------------------------------------| +| CLI status | `codeclone memory jobs status --root .` | +| CLI enqueue | `codeclone memory jobs enqueue --root . [--force] [--no-spawn]` | +| CLI worker | `codeclone memory jobs run-once --root .` | +| MCP enqueue | `manage_engineering_memory(action=enqueue_projection_rebuild)` | +| MCP status | `manage_engineering_memory(action=projection_rebuild_status)` | +| MCP worker | `manage_engineering_memory(action=run_projection_jobs_once)` | +| MCP auto (finish) | When policy ≠ `off`, accepted `finish_controlled_change` enqueues + spawns worker | + +Jobs never run in CI environments (`CI`, `GITHUB_ACTIONS`, …). Sync rebuild +escape hatches remain: `rebuild_trajectories` / `rebuild_semantic_index`. + +## Queue and worker contract + +```mermaid +flowchart LR + A["Accepted finish"] --> B["Compute projection stimulus"] + B --> C{"Stale?"} + C -- "no" --> D["No enqueue"] + C -- "yes" --> E["Coalesce pending job"] + E --> F["Detached or inline worker"] + F --> G["Trajectory projection"] + G --> H["Semantic sidecar"] + H --> I["Experience distillation"] + I --> J["Persist result / watermark"] +``` + +The stimulus includes repository digest, projection version and enablement, +audit event-core counts/watermarks, and active memory-record counts. Pending +work for the same project is coalesced instead of duplicated. + +The job store claims work with an immediate SQLite transaction and permits one +running job per project. Dead-worker and timeout states are reclaimed as +failed before new work is claimed. Trajectory rebuild is incremental when its +stored projection version and audit watermark are compatible; otherwise it +falls back to a full rebuild. Semantic projection may hash-skip unchanged +sources. + +Job states are `pending`, `running`, `done`, `failed`, and `skipped`. +`run-once` returns `nothing_to_do` when the queue is empty. Worker results and +bounded errors remain job metadata; they do not alter canonical analysis. + +Platform Observability can correlate accepted finish, worker spawn, and worker +execution without changing the queue contract. See +[Platform Observability](../26-platform-observability.md). diff --git a/docs/book/13-engineering-memory/scope-and-invariants.md b/docs/book/13-engineering-memory/scope-and-invariants.md new file mode 100644 index 00000000..b80a0b81 --- /dev/null +++ b/docs/book/13-engineering-memory/scope-and-invariants.md @@ -0,0 +1,98 @@ +## Integration with change control + +Memory complements — does not replace — the Structural Change Controller +([12-structural-change-controller/index.md](../12-structural-change-controller/index.md)): + +```mermaid +graph LR + CC[Change Controller] -->|scope, blast, verify| Edit[Scoped edit] + EM[Engineering Memory] -->|context, drafts| Edit + Edit --> CC + CC -->|propose_memory| EM + style CC stroke: #6366f1 + style EM stroke: #059669 +``` + +| Controller fact | Memory fact | +|--------------------------------|-------------------------------------| +| `do_not_touch` — hard boundary | `risk_note` — informational hotspot | +| Patch verify `accepted` | `change_rationale` draft proposal | +| Blast radius dependents | `module_role` inventory link | + +--- + +## Scope and token hygiene + +Engineering Memory stores **short, evidence-linked cards** — not chat transcripts +or project-wide dumps. + +| Rule | Contract | +|----------------------|--------------------------------------------------------------------------------------------------------------------------| +| Root scope forbidden | No `scope=["."]`, `path="."`, empty scope for `coverage`, or repo root as subject | +| Scoped retrieval | `get_relevant_memory` requires `scope`, `intent_id`, or `symbols`; use `status`/`search` for orientation | +| Compact lists | Default `detail_level=compact`: statement preview + `statement_length`; full text via `mode=get` or `detail_level=full` | +| Agent writes | `record_candidate` requires `subject_path`; target ≤300 chars, soft warn >500, hard reject >1000 (`max_statement_chars`) | +| One fact per card | Compress observations before write; store details in receipt/spec/docs | + +--- + +## Invariants (MUST) + +- Memory store path defaults under `.codeclone/memory/` — not baseline or analysis cache. +- Init ingest is deterministic given identical report + git inputs. +- MCP memory tools do not mutate baselines, analysis cache, canonical reports, or source files. Agent-visible writes + create **draft** records only (`record_candidate`, finish `propose_memory`, atomic `propose_from_receipt`). System + actions include `refresh_from_run`, semantic/trajectory/projection rebuild jobs, and finish-side staleness updates. + Human approve/reject/archive: VS Code Memory view **or** + `codeclone memory approve|reject|archive --i-know-what-im-doing` (optional + `--by NAME`; not MCP agent tools). +- Subject rows deduplicated in retrieval payloads (one row per logical subject key). +- FTS rebuilt after init/refresh ingest completes. +- Schema migration is forward-only through `schema_migrate.py`. + +--- + +## Failure modes + +| Condition | Behavior | +|----------------------------|-------------------------------------------------------------| +| DB missing, policy `off` | MCP error: run `refresh_from_run` or CLI init | +| DB missing, default policy | Auto bootstrap on `get_relevant_memory` when MCP run exists | +| No MCP run for sync | Auto sync skipped; DB missing → contract error | +| At `max_candidates` | `record_candidate` raises capacity error | +| At `max_records` | Init upsert skips or rejects per store policy | +| No cached report on init | Init runs analysis or fails with clear message | +| Git unavailable | Init proceeds; git evidence/hotspots skipped | +| Root scope path | `MemoryContractError`: use status/search for orientation | +| Unscoped retrieval | `get_relevant_memory` rejected without scope/intent/symbols | +| Statement too long | `record_candidate` rejected above `max_statement_chars` | + +--- + +## Locked by tests + +- `tests/test_memory_mcp_sync.py` +- `tests/test_memory_store.py` +- `tests/test_memory_search.py` +- `tests/test_memory_retrieval.py` +- `tests/test_memory_staleness.py` +- `tests/test_memory_governance.py` +- `tests/test_memory_cli.py` +- `tests/test_mcp_service.py` (memory tool wiring) +- `tests/test_mcp_server.py` (tool registration) +- `tests/test_semantic_projection.py`, `tests/test_semantic_rebuild.py`, + `tests/test_semantic_chunking.py`, `tests/test_semantic_projection_probe.py`, + `tests/test_semantic_embedding.py`, `tests/test_semantic_index_null.py` +- `tests/test_cli_memory_semantic.py`, `tests/test_mcp_memory_semantic.py` +- `tests/test_config_semantic.py`, `tests/test_semantic_determinism_gate.py` +- `tests/test_controller_insights.py` (shared session/audit payloads) + +--- + +## Related docs + +- [MCP Interface](../25-mcp-interface/index.md) — tool catalog +- [Structural Change Controller](../12-structural-change-controller/index.md) — intent workflow +- [Claim Guard](../14-claim-guard.md) — finish claims validation +- [CLI](../11-cli.md) — `codeclone memory` commands +- [MCP for AI Agents](../../guide/mcp/README.md) — agent-oriented narrative diff --git a/docs/book/13-engineering-memory/search-fts.md b/docs/book/13-engineering-memory/search-fts.md new file mode 100644 index 00000000..962b3dc4 --- /dev/null +++ b/docs/book/13-engineering-memory/search-fts.md @@ -0,0 +1,20 @@ +## Search semantics (schema 1.1) + +### FTS (always available) + +FTS5 index (`memory_fts`) indexes record statements and metadata. + +| `match_mode` | Behavior | +|-----------------|-----------------------------------------------| +| `any` (default) | Match records containing **any** query token | +| `all` | Match records containing **all** query tokens | + +Document links display as normalized headings, e.g. +`AGENTS.md · §16 · Change routing → AGENTS.md`. + +Refs: + +- `codeclone/memory/search_index.py` +- `codeclone/memory/display.py` + +Semantic retrieval (Phase 20): [search-semantic.md](search-semantic.md). diff --git a/docs/book/13-engineering-memory/search-semantic.md b/docs/book/13-engineering-memory/search-semantic.md new file mode 100644 index 00000000..7d71b2af --- /dev/null +++ b/docs/book/13-engineering-memory/search-semantic.md @@ -0,0 +1,164 @@ + + +# Optional semantic retrieval (Phase 20) + +Semantic search is **opt-in** and **off by default** (`enabled = false` in +`codeclone/config/memory_defaults.py`). It does not replace FTS: keyword search +still runs first; when the index is available, vector proximity **merges extra +candidates** and adjusts ranking (`semantic_proximity * 0.3` in +`codeclone/memory/retrieval/ranking.py`). + +```mermaid +flowchart LR + Q[Query text] --> FTS[FTS5 memory_fts] + Q --> EMB[EmbeddingProvider.embed_query] + DOCS[Index projections] --> DOCEMB[EmbeddingProvider.embed_documents] + EMB --> VEC[LanceDB k-NN] + DOCEMB --> INDEX[LanceDB rebuild] + INDEX --> VEC + FTS --> MERGE[Candidate union] + VEC --> MERGE + MERGE --> RANK[Rank + semantic weight] + RANK --> OUT[search payload + semantic block] +``` + +**Prerequisites (all required for `semantic.used: true`):** + +1. `memory.semantic.enabled = true` in effective config. +2. Optional vector backend installed: `pip install 'codeclone[semantic-lancedb]'`. + For semantic-quality local embeddings, install `codeclone[semantic-local]` + instead (or combine `semantic-lancedb` + `semantic-fastembed`). +3. Index built at `index_path` (default + `.codeclone/memory/semantic_index.lance`) via + `manage_engineering_memory(action="rebuild_semantic_index")` (MCP agents) or + `codeclone memory semantic rebuild` (CLI/CI). + +Minimal local semantic-quality setup: + +```bash +pip install 'codeclone[semantic-local]' +``` + +```toml +[tool.codeclone.memory.semantic] +enabled = true +embedding_provider = "fastembed" +allow_model_download = true # or pre-populate embedding_cache_dir and keep false +``` + +```bash +codeclone memory init --root . +# Agents (MCP): manage_engineering_memory(action=rebuild_semantic_index) +codeclone memory semantic rebuild --root . +codeclone memory semantic search "recover after MCP restart" --root . +codeclone memory search "recover after MCP restart" --semantic --root . +``` + +Use `codeclone[semantic-lancedb]` only when you intentionally want the derived +sidecar with the deterministic diagnostic provider; it is stable, but not +semantic-quality recall. + +**Degraded states (never crash read paths):** + +| Condition | Index behavior | Search `semantic` block | +|--------------------------------|----------------------------------------------------------------|---------------------------------------------------| +| `enabled=false` | `NullSemanticIndex` | `used: false`, `reason: disabled` | +| Enabled, index missing | `UnavailableSemanticIndex` (`not_built`) | FTS only; `used: false` | +| Enabled, LanceDB extra missing | `UnavailableSemanticIndex` (`lancedb_not_installed`) | FTS only; explicit `semantic rebuild` fails clear | +| Provider unavailable | `semantic_reason` set (e.g. FastEmbed extra/model unavailable) | FTS only | + +The index is a **derived, rebuildable sidecar** — not updated on the memory +write hot path. Rebuild is idempotent on projection `text_hash` +(`codeclone/memory/semantic/rebuild.py`). + +#### Embedding providers + +| Provider | Status | Meaning | +|------------------------|-------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `diagnostic` (default) | Always available | `DeterministicHashEmbeddingProvider`: sha256-derived unit vectors. **Stable across runs, not semantic-quality recall.** CLI prints an advisory when `provider=diagnostic`. | +| `fastembed` | Optional: `codeclone[semantic-fastembed]` | Local ONNX embeddings through FastEmbed. Default model is `BAAI/bge-small-en-v1.5` (`384` dimensions). Query text uses a `query:` prefix; indexed records use `passage:`. Model download is disabled unless `allow_model_download=true`, so air-gapped installs can pre-populate `embedding_cache_dir`. | +| `local_model` | Raises `MemorySemanticUnavailableError` | Reserved compatibility literal; use `fastembed` for community local semantic search. | +| `api` | Raises `MemorySemanticUnavailableError` | Reserved for remote/API providers. | + +Model id for diagnostic: `diagnostic-hash-v1` +(`codeclone/memory/embedding/__init__.py`). +Model id for FastEmbed: `fastembed:`. + +#### What gets indexed + +**Memory record types** (`INDEXED_MEMORY_TYPES` in +`codeclone/memory/semantic/projection.py`): + +`contract_note`, `change_rationale`, `risk_note`, `architecture_decision`, +`contradiction_note`, `protocol_rule`, `human_note`. + +**Not** semantically indexed (served by exact subject / path match instead): +`module_role`, `test_anchor`, `document_link`, `public_surface`, `stale_marker`. + +**Audit incidents** when `index_audit=true` (default) and `audit_enabled=true` +with a readable audit DB — projected from **`controller_events.summary` only** +(never `payload_json`). Event types: + +`intent.declared`, `patch_contract.violated`, `workspace.conflict_detected`, +`baseline_abuse.detected`, `claim_validation.violated`, `review_receipt.created`. + +Empty audit summaries are skipped. + +**Trajectory passports** when trajectories are enabled — projected via +`project_trajectory()` from bounded trajectory fields (summary, outcome, subjects; +see `codeclone/memory/semantic/projection.py`). Long texts become multiple index +units under format **`2`** (below). + +#### Surfaces + +| Surface | Semantic flag | +|----------------------------------------------------------------|--------------------------------------------------------------| +| `query_engineering_memory(mode=search, semantic=true)` | MCP | +| `manage_engineering_memory(action=rebuild_semantic_index)` | MCP (build sidecar) | +| `codeclone memory search --semantic` | CLI | +| `codeclone memory semantic search` | CLI (requires built index) | +| `codeclone memory semantic rebuild` | CLI (build sidecar) | +| `codeclone memory semantic probe [--exact-tokens] [--json]` | CLI — per-lane projection length stats | +| VS Code `codeclone.memory.searchSemantic` (default **`true`**) | Passes MCP `semantic` on IDE search; server opt-in unchanged | +| `get_relevant_memory` | **No** semantic parameter (scoped ranking only) | + +Search responses include a top-level **`semantic`** object: + +| Field | When set | +|-----------------|-------------------------------------------------------| +| `used` | `true` only when index + provider + rebuild succeeded | +| `backend` | e.g. `lancedb` from index status | +| `provider` | Config label (`diagnostic`, …) | +| `model` | Provider `model_id` when used | +| `index_version` | `SEMANTIC_INDEX_FORMAT_VERSION` when used | +| `reason` | Degrade reason when `used` is false | + +`codeclone memory semantic probe` emits per-lane stats under +`lanes.{memory,audit,trajectory}`. Default estimator is cheap planning; pass +`--exact-tokens` to load the FastEmbed tokenizer and measure passage-prefixed +texts that rebuild would embed. With `--exact-tokens`, trajectory uses the same +chunker as rebuild: `lanes.trajectory.chunking` reports +`{source_documents, index_units, multi_chunk_sources}` and `documents` counts +index units (not raw projections). Lane-level `overflow_examples` (up to five) +list index units still above the model window. Chunking reserves passage prefix +and model special tokens; rebuild fails closed with +`SemanticChunkingInvariantError` when a chunk cannot be proven to fit. + +Format **`2`** indexes long trajectory projections as multiple chunk rows linked +by `parent_id` (single-chunk trajectories keep the trajectory id as `id`). +Hybrid search oversamples trajectory `k × TRAJECTORY_SEARCH_OVERSAMPLE` (4), +collapses chunk hits to the best score per trajectory, and sets +`matched_chunk_index` / `matched_chunk_count` on the returned trajectory hit. + +When semantic hits audit rows, `payload.audit_events` lists hydrated incidents +(event type, bounded summary preview, score) alongside memory records. + +Refs: + +- `codeclone/memory/retrieval/service.py:_handle_semantic_search_mode` +- `codeclone/memory/semantic/__init__.py:resolve_semantic_index` +- `tests/test_semantic_projection.py`, `tests/test_semantic_rebuild.py`, + `tests/test_semantic_chunking.py`, `tests/test_semantic_projection_probe.py`, + `tests/test_mcp_memory_semantic.py`, `tests/test_cli_memory_semantic.py` + +--- diff --git a/docs/book/13-engineering-memory/staleness-and-anchors.md b/docs/book/13-engineering-memory/staleness-and-anchors.md new file mode 100644 index 00000000..94bcdfa8 --- /dev/null +++ b/docs/book/13-engineering-memory/staleness-and-anchors.md @@ -0,0 +1,58 @@ +## Staleness and anchor durability + +Records with a git anchor (`created_at_commit` + `code_fingerprint`) are judged +by **drift from that anchor**, not by whether the subject appears in the current +analysis inventory. Non-Python subjects (`.md`, `.toml`, `.js`, …) therefore +stay `active` across refresh when their on-disk bytes are unchanged. + +| Anchor vs `HEAD` | Status transition | +|----------------------------|-------------------------------------------------------------| +| Fingerprint matches anchor | `active` (or reactivated from `historical` / drift `stale`) | +| Fingerprint differs | `stale` (`subject_fingerprint_drift`) | +| Subject file absent | `historical` (preserved, queryable) | + +A record is **anchored** only when both `created_at_commit` and `code_fingerprint` +are present at write time. `record_candidate` sets git fields only when the +subject fingerprint resolves (commit without fingerprint is treated as +unanchored). Unanchored records skip anchor drift; system-ingest signals below +still apply. + +Only `draft` records skip refresh drift evaluation. `human`-origin and +human-approved records follow the same anchor table — approval does not exempt +a record from honest content drift. + +```mermaid +flowchart TD + subgraph Anchor["anchor drift (refresh)"] + A1[fingerprint match] + A2[subject_fingerprint_drift] + A3[subject deleted] + end + + subgraph Refresh["init --refresh (system ingest)"] + R1[missing_from_refresh] + R2[evidence_digest_mismatch] + R3[refresh_content_contradiction] + R4[report_digest_shift] + end + + subgraph Scope["accepted finish"] + S1[scope_files_changed] + end + + A1 --> ACT[(status = active)] + A2 --> ST[(status = stale)] + A3 --> HIST[(status = historical)] + Refresh --> ST + Scope --> ST + ST --> RE[Excluded from default retrieval] + HIST --> RET[Included in default retrieval] + RE --> RA[Reactivate when anchor fingerprint matches] + HIST --> RA +``` + +`historical` is a durable resting state — vacuum never auto-deletes it. +Stale records remain for audit but are **excluded** from `get_relevant_memory` +and default search unless explicitly included. + +--- diff --git a/docs/book/13-engineering-memory/trajectory-and-patch-trail.md b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md new file mode 100644 index 00000000..eb7b88bc --- /dev/null +++ b/docs/book/13-engineering-memory/trajectory-and-patch-trail.md @@ -0,0 +1,200 @@ +## Trajectory memory {#trajectory-memory} + +Trajectory memory is a **deterministic process narrative** derived from the audit +event core. It complements governed memory cards: cards hold durable repository +facts; trajectories hold bounded edit-cycle timelines (declare → check → verify → +receipt → optional Patch Trail). + +!!! note "Not authorization" + `trajectories[]` and export JSONL are **read-only forensics**. They do not + expand scope, approve memory records, override structural findings, or + substitute for `finish_controlled_change`. + +!!! note "Projection timing" + Trajectory **rows** are built by `rebuild_trajectories` (CLI or MCP) from + the audit event core — not inline on every finish. Finish **does** compute + Patch Trail and may **enqueue** a projection rebuild job when + `memory.projection_rebuild_policy` is not `off` (skipped in CI). Run + `codeclone memory jobs run-once` or wait for the worker to materialize + trajectories after audit-enabled finishes. + + ```bash + codeclone memory trajectory rebuild --root . + ``` + + MCP agents: `manage_engineering_memory(action=rebuild_trajectories)`. + +### Architecture + +```mermaid +flowchart TB + subgraph Finish["Change controller (MCP finish)"] + FIN[finish_controlled_change] + PT[compute_patch_trail] + AUDE[patch_trail.computed audit event] + FIN --> PT --> AUDE + end + + subgraph Audit["Audit SQLite"] + EC[event_core_json per workflow] + end + + subgraph Project["Trajectory rebuild"] + PRJ[projector trajectory-v3] + PTP[patch_trail_projector] + QLT[quality + anomaly analytics] + SUP[supersede stale rows] + end + + subgraph Store["Engineering Memory SQLite"] + TRJ[memory_trajectories + steps] + PTR[memory_trajectory_patch_trails] + JOB[memory_projection_jobs] + end + + subgraph Read["Read surfaces"] + GR[get_relevant_memory.trajectories] + QEM[query_engineering_memory trajectory_*] + DASH[dashboard + agents + anomalies] + EXP[JSONL export v2] + end + + AUDE --> EC + EC --> PRJ + EC --> PTP + PRJ --> QLT + PRJ --> TRJ + PTP --> PTR + PRJ --> SUP + TRJ --> GR + TRJ --> QEM + QLT --> DASH + PTR --> GR + PTR --> QEM + TRJ --> EXP + PTR --> EXP + JOB -.->|enqueue_when_stale| Project +``` + +Module ownership: + +| Module | Role | +|--------------------------------------------------------|----------------------------------------------------------------| +| `codeclone/audit/events.py` | Bounded `event_core_json`; `patch_trail.computed` compaction | +| `codeclone/memory/trajectory/patch_trail.py` | Finish-time Patch Trail compute (`PATCH_TRAIL_SCHEMA_VERSION`) | +| `codeclone/memory/trajectory/patch_trail_projector.py` | Rebuild Patch Trail from audit event cores | +| `codeclone/memory/trajectory/projector.py` | Deterministic trajectory projection (`trajectory-v3`) | +| `codeclone/memory/trajectory/quality.py` | Contract-quality and separate complexity scoring | +| `codeclone/memory/trajectory/analytics.py` | Dashboard, anomaly, and per-agent aggregates | +| `codeclone/memory/trajectory/store.py` | SQLite persistence, supersede, rebuild orchestration | +| `codeclone/memory/trajectory/retrieval.py` | Scoped ranking + `patch_trail_summary` | +| `codeclone/memory/trajectory/export_context.py` | Export v2 context: precedents, citations, scope paths | +| `codeclone/memory/trajectory/export.py` | Local JSONL export (Phase 25+) | +| `codeclone/memory/jobs/store.py` | Projection job queue + worker claim | +| `codeclone/memory/retrieval/service.py` | MCP/CLI query router | + +### Config (`[tool.codeclone.memory]`) + +| Key | Default | Meaning | +|----------------------------------------------|------------|----------------------------------------------------| +| `trajectories_enabled` | `true` | Gate rebuild/list/search | +| `trajectory_retention_days` | `365` | Retention hint for vacuum tooling | +| `projection_rebuild_policy` | `off` | `off` \| `enqueue_when_stale` — async rebuild jobs | +| `projection_rebuild_running_timeout_seconds` | `1800` | Stale running job recovery | +| `projection_rebuild_spawn_worker` | `true` | Spawn worker subprocess on finish enqueue | +| `trajectory_export_enabled` | `false` | Gate JSONL export | +| `trajectory_export_include_payloads` | `false` | Include compact step text in export rows | +| `trajectory_export_max_record_bytes` | `65536` | Per-row cap | +| `trajectory_export_max_file_bytes` | `10485760` | Output file cap | + +Requires **`audit_enabled=true`** and a readable audit DB for rebuild input. + +### CLI + +```bash +codeclone memory trajectory status --root . +codeclone memory trajectory rebuild --root . +codeclone memory trajectory list --root . --limit 20 +codeclone memory trajectory show TRAJ_ID --root . +codeclone memory trajectory search "recover stale intent" --root . +codeclone memory trajectory export \ + --root . \ + --profile agent-change-control-v1 \ + --out .codeclone/trajectories.jsonl \ + --force +``` + +Export profiles (schema contracts): `agent-change-control-v1`, +`agent-memory-retrieval-v1`, `agent-recovery-v1`, `agent-security-hardening-v1`. +Export row schema version is **`2`** (`TRAJECTORY_EXPORT_SCHEMA_VERSION`). Each row +includes: + +| Field | Source | +|---------------------------------|-----------------------------------------------------------------------------------------------------------------------------------| +| `context.memory_precedents` | Active memory records overlapping trajectory/path scope | +| `context.trajectory_precedents` | Prior workflows with path overlap | +| `citations` | Claim-validation event cores + report digests | +| `scope.paths` | Resolved from Patch Trail / declare / check event cores | +| `patch_trail_summary` | When persisted in `memory_trajectory_patch_trails` | +| `projection_version` | `trajectory-v1`, `trajectory-v2`, or active `trajectory-v3`; v2 adds Patch Trail digest and v3 adds quality score + agent subject | + +Rebuild supersedes older projection rows for the same workflow (one canonical +trajectory per `workflow_id` in export). Legacy audit rows without path facts in +frozen event core are supplemented deterministically from stored audit payloads +during projection. Changing profile shape requires a profile version bump. + +### MCP retrieval + +`get_relevant_memory` adds **`trajectories[]`** beside **`records[]`** when path +subjects match (declare `scope_paths`, check `changed_files`, or +`untouched_in_declared`). When a stored Patch Trail exists for a matched +trajectory, each preview includes **`patch_trail_summary`** (counts, digest, +verification status). With `detail_level=full`, the top-ranked trajectory also +surfaces **`patch_trail_summary`** at the response root. Compact retrieval omits +that root duplicate; the summary remains on the trajectory preview. + +`query_engineering_memory(mode=trajectory_get)` returns **`patch_trail`** when +persisted and always uses full detail, including **`quality_contract`**. +List/search previews retain headline quality, complexity, and anomaly counts. + +Trajectory rebuild (`memory trajectory rebuild` / MCP +`manage_engineering_memory(action=rebuild_trajectories)`) synthesizes Patch Trail +from audit event cores (`intent.declared`, `intent.checked`, verify events) and +stores it in **`memory_trajectory_patch_trails`**. Trajectory digest +(`trajectory-v2` and later) incorporates **`patch_trail_digest`** when present. +The active **`trajectory-v3`** digest additionally incorporates the persisted +quality score and records the primary agent as an `agent` subject. + +Scoped ranking adds a small boost when query scope paths intersect +**`untouched_in_declared`** paths from the stored Patch Trail. + +`query_engineering_memory` modes: + +| Mode | Scope | Notes | +|------------------------|---------------|-------------------------------------------------------| +| `trajectory_status` | project | Projection run manifest | +| `trajectory_search` | query text | Requires `query`; excludes `run:*` routine by default | +| `trajectory_get` | trajectory id | `record_id` = trajectory id | +| `trajectory_anomalies` | project | Contract anomalies, optionally including routine runs | +| `trajectory_agents` | project | Outcome and quality aggregates by exact agent label | +| `trajectory_dashboard` | project | Combined status, agent, and anomaly payload | + +Filter: `filters.include_routine=true` on `trajectory_search` includes single-event +`run:*` analysis workflows. + +Evidence kind **`trajectory`** links memory records to trajectories; human approve +still required for agent drafts. + +See [Trajectory labels](trajectory-labels.md) for labels and +[Trajectory quality and passport](trajectory-quality-and-passport.md) for +scoring, anomalies, dashboards, and IDE passport semantics. + +### Enterprise boundary (export) + +Community CodeClone writes **local JSONL only** — no remote API, upload, or +training pipeline. Corporate policy packs, signing, approval workflows, and dataset +registry are out of scope unless explicitly requested. + +Refs: `codeclone/memory/trajectory/rebuild_workflow.py`, +`codeclone/memory/trajectory/export.py`, `tests/test_memory_trajectory_*.py`, +`tests/test_audit_event_core_v2.py`. diff --git a/docs/book/13-engineering-memory/trajectory-labels.md b/docs/book/13-engineering-memory/trajectory-labels.md new file mode 100644 index 00000000..7e5c5f8a --- /dev/null +++ b/docs/book/13-engineering-memory/trajectory-labels.md @@ -0,0 +1,34 @@ +### Trajectory labels and step names + +Each projected trajectory carries a sorted **`labels`** list in +`memory_trajectories.labels_json`. Labels are deterministic tags derived from audit +event cores — not free-form agent text. + +| Label | When set | +|-----------------------------|---------------------------------------------------------------------| +| `change_control_workflow` | Any change-controller event (`intent.*`, `patch_contract.*`, …) | +| `verified_finish` | `patch_contract.verified` with accepted outcome | +| `scope_clean` | `intent.checked` with status `clean` or `expanded` | +| `scope_expanded` | `intent.expanded` present | +| `queue_used` | `intent.queued` present | +| `patch_trail_recorded` | `patch_trail.computed` present | +| `receipt_issued` | `review_receipt.created` present | +| `claim_validated` | `claim_validation.completed` present | +| `analysis_observed` | Standalone `analysis.completed` workflow (no change-control events) | +| `memory_used` | `manage_engineering_memory` tool use in the stream | +| `recovered` | `intent.promoted` (queue recovery) | +| `foreign_conflict_seen` | Workspace conflict | +| `hook_blocked` | Hook surface warn/error | +| `claim_guard_failed` | Claim validation violated | +| `baseline_abuse_detected` | Baseline abuse | +| `external_changes_accepted` | Finish accepted with external changes | + +Routine successful edit cycles should carry **`change_control_workflow`** and +**`verified_finish`** at minimum. Empty `labels` indicates a projection bug or a +legacy row that needs `memory trajectory rebuild`. + +Each step in MCP `trajectory_get` includes **`step_label`** — a human-readable name +from `codeclone/memory/trajectory/step_labels.py` (event catalog + status). CLI +`memory trajectory show` prints labels and step labels. + +See also: [Trajectory memory](trajectory-and-patch-trail.md). diff --git a/docs/book/13-engineering-memory/trajectory-quality-and-passport.md b/docs/book/13-engineering-memory/trajectory-quality-and-passport.md new file mode 100644 index 00000000..8edc8dc2 --- /dev/null +++ b/docs/book/13-engineering-memory/trajectory-quality-and-passport.md @@ -0,0 +1,124 @@ +# Trajectory Quality and Passport + + + +Trajectory projection version `3` adds explainable quality, complexity, +anomaly, agent, and dashboard views. These are derived diagnostics over +canonical audit evidence, not analysis findings or edit permissions. + +For the event-to-trajectory projection itself, see +[Trajectory and patch trail](trajectory-and-patch-trail.md). + +## Passport model + +```mermaid +flowchart TD + A["Canonical trajectory"] --> B["Outcome"] + A --> C["Verification"] + A --> D["Scope"] + A --> E["Incidents"] + A --> F["Anomalies"] + A --> G["Receipt"] + B --> H["Quality score = minimum component"] + C --> H + D --> H + E --> H + F --> H + G --> H + A --> I["Complexity score
separate, non-grade"] + H --> J["Trajectory passport"] + I --> J +``` + +The passport keeps quality and complexity separate: + +- quality answers how well the workflow satisfied its contract; +- complexity describes how much declared scope, event activity, and workflow + structure the trajectory contained. + +High complexity is not a defect and does not reduce quality by itself. + +## Quality score + +Quality score version `2` is the minimum of six components: + +| Component | Scoring | +|--------------|---------------------------------------------------------------------------------------------------| +| Outcome | accepted `100`, accepted external `85`, partial `55`, abandoned `40`, blocked `30`, violated `20` | +| Verification | accepted `100`, accepted external `85`, unverified `50`, violated/blocked `0`, not reached `40` | +| Scope | clean `100`, expanded `85`, partial `70`, violated `0` | +| Incidents | `max(0, 100 - 10 × incident_count)` | +| Anomalies | starts at `100`; error costs `12`, warning costs `5` | +| Receipt | change-control trajectory with receipt `100`, without `85`; non-change workflow `100` | + +When patch-trail verification is unavailable, the verification component falls +back to quality tier: verified `100`, corrected `90`, routine `85`, partial +`60`, incident `45`. + +The minimum-component rule makes the limiting evidence visible instead of +averaging a contract failure away. + +## Complexity score + +Complexity is: + +```text +min(100, + min(40, declared_scope_count * 2) + + min(30, event_count * 3) + + min(20, workflow_step_count * 2)) +``` + +Bands are `low < 35`, `moderate 35..69`, and `high >= 70`. + +## Anomalies + +The projection can emit: + +- outcome anomalies: violated, blocked, or abandoned; +- quality incidents and elevated incident count; +- incident labels such as baseline abuse, claim-guard failure, foreign + conflict, hook failure, or recovered state; +- incomplete change cycles or missing intent cleanup; +- scope violations; +- verification gaps. + +Anomalies are deterministic review cues. They are not repository findings. + +## Analytics surfaces + +Agent analytics group by the exact canonical `agent_label`, not an inferred +agent family. The dashboard combines projection status, agent aggregates, +anomalies, and recent trajectories. + +Routine `run:*` workflows and trajectories with quality tier `routine` are +excluded by default. Opt in via CLI `--include-routine` or MCP +`filters.include_routine=true` on `query_engineering_memory` trajectory modes +(`trajectory_search`, `trajectory_dashboard`, etc.) — not a top-level +`get_relevant_memory` MCP parameter. + +Available CLI commands: + +```bash +codeclone memory trajectory status --root . +codeclone memory trajectory rebuild --root . +codeclone memory trajectory list --root . +codeclone memory trajectory search QUERY --root . +codeclone memory trajectory show TRAJECTORY_ID --root . +codeclone memory trajectory agents --root . +codeclone memory trajectory anomalies --root . +codeclone memory trajectory dashboard --root . +codeclone memory trajectory export --root . \ + --profile agent-change-control-v1 \ + --out trajectories.jsonl +``` + +MCP modes are `trajectory_status`, `trajectory_search`, `trajectory_get`, +`trajectory_anomalies`, `trajectory_agents`, and `trajectory_dashboard`. +`trajectory_get` uses `record_id` as the trajectory ID and always returns full +detail. + +The VS Code extension exposes a dashboard, detail view, copyable dashboard +brief, and passport sections for quality, complexity, duration, events, steps, +incidents, evidence, patch trail, contract gates, and score calculations. See +[VS Code integration](../integrations/vs-code-extension.md). diff --git a/docs/book/13-engineering-memory/trust-and-lifecycle.md b/docs/book/13-engineering-memory/trust-and-lifecycle.md new file mode 100644 index 00000000..70795301 --- /dev/null +++ b/docs/book/13-engineering-memory/trust-and-lifecycle.md @@ -0,0 +1,75 @@ +## Trust boundaries + +```mermaid +flowchart LR + subgraph AgentCan["Agent (MCP)"] + R[Read ranked memory] + D[Write draft candidates] + V[Validate claims text] + P[Propose from receipt] + end + + subgraph HumanCI["Human / CI"] + I[memory init / refresh CLI] + A[approve / reject / archive
VS Code or CLI --i-know-what-im-doing] + end + + subgraph McpSync["MCP sync (policy-gated)"] + B[auto bootstrap on get_relevant_memory] + RF[refresh_from_run explicit] + end + +subgraph Never["Never via MCP"] +X1[Expand edit scope] +X2[Override findings] +X3[Mutate baselines / cache / reports] +X4[Promote draft → active without human] +end + +AgentCan --> Store[(Memory DB)] +HumanCI --> Store +McpSync -->|ingest system records|Store +Never -.->|blocked|Store +``` + +| Action | Who | Resulting status | +|-----------------------------------------|---------------------------------------|--------------------------------------------| +| Init / refresh ingest | Human or CI (`codeclone memory init`) | `active` system records | +| Auto bootstrap / refresh from MCP run | MCP when `mcp_sync_policy` allows | `active` system records (same ingest path) | +| `refresh_from_run` | Agent MCP (explicit) | Force ingest from selected MCP run | +| `record_candidate` | Agent MCP | `draft` | +| `finish(propose_memory=true)` on accept | Agent MCP | `draft` proposals + staleness side effects | +| `approve` | Human CLI (`--i-know-what-im-doing`) or VS Code IDE channel | `active` + `verified`/`supported` | +| `reject` | Human CLI (`--i-know-what-im-doing`) or VS Code IDE channel | `rejected` | +| `archive` | Human CLI (`--i-know-what-im-doing`) or VS Code IDE channel | `archived` | +| Refresh detects drift | System on `init --refresh` | `stale` | +| Patch touches linked path | System on accepted finish | `stale` | + +--- + +## Record lifecycle + +```mermaid +stateDiagram-v2 + [*] --> draft: agent record_candidate\nfinish propose_memory + [*] --> active: init ingest\nhuman approve + draft --> active: human approve + draft --> rejected: human reject + active --> stale: refresh drift\nscope files changed + stale --> active: refresh reactivation\nhuman re-approve + active --> archived: human archive + stale --> archived: vacuum retention + rejected --> archived: vacuum retention + draft --> archived: vacuum retention +``` + +**Confidence** (`inferred` → `supported` → `verified`) and **origin** +(`system`, `agent`, `human`) are separate axes. Agents must treat `draft` and +`inferred` as non-authoritative. + +Default retrieval excludes `stale`. Keyword `search` excludes `draft` unless +`include_drafts=true`; scoped `get_relevant_memory` and `for_path` / +`for_symbol` include draft agent notes automatically so handoffs are visible. +Draft records remain non-authoritative. + +--- diff --git a/docs/book/13-testing-as-spec.md b/docs/book/13-testing-as-spec.md deleted file mode 100644 index 5a1009d5..00000000 --- a/docs/book/13-testing-as-spec.md +++ /dev/null @@ -1,96 +0,0 @@ -# 13. Testing as Specification - -## Purpose - -Map critical contracts to tests that lock behavior. - -## Public surface - -Contract tests are concentrated in: - -- `tests/test_baseline.py` -- `tests/test_cache.py` -- `tests/test_report.py` -- `tests/test_report_contract_coverage.py` -- `tests/test_cli_inprocess.py` -- `tests/test_cli_unit.py` -- `tests/test_coverage_join.py` -- `tests/test_golden_fixtures.py` -- `tests/test_html_report.py` -- `tests/test_mcp_service.py` -- `tests/test_detector_golden.py` -- `tests/test_golden_v2.py` - -## Data model - -Test classes by role: - -- Unit contract tests (schema, validation, canonicalization) -- Integration contract tests (CLI mode behavior, exit-code priority) -- Golden detector snapshot (single canonical python tag) - -## Contracts - -The following matrix is treated as executable contract: - -| Contract | Tests | -|--------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Baseline schema/integrity/compat gates | `tests/test_baseline.py` | -| Cache v2.8 fail-open + status mapping + API-surface-aware reuse + runtime-reachability/security-surface persistence + API signature order preservation | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag`, `tests/test_cli_inprocess.py::test_cli_public_api_breaking_count_stable_across_warm_cache`, `tests/test_cli_inprocess.py::test_cli_api_surface_ignores_non_api_warm_cache` | -| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | -| Report schema v2.11 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | -| HTML render-only explainability + escaping | `tests/test_html_report.py` | -| Current-run Cobertura coverage join parsing, gating, and projections | `tests/test_coverage_join.py`, `tests/test_pipeline_metrics.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py`, `tests/test_html_report.py` | -| Report-only security surfaces inventory and projections | `tests/test_security_surfaces.py`, `tests/test_pipeline_metrics.py`, `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`, `tests/test_html_report.py`, `tests/test_mcp_service.py`, `tests/test_mcp_server.py` | -| Framework-aware dead-code reachability facts | `tests/test_extractor.py`, `tests/test_pipeline_metrics.py`, `tests/test_cache.py` | -| Golden fixture clone exclusion policy | `tests/test_golden_fixtures.py`, `tests/test_cli_inprocess.py::test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups`, `tests/test_report.py::test_report_json_clone_groups_can_include_suppressed_golden_fixture_bucket` | -| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | - -## Invariants (MUST) - -- Every schema/status contract change requires tests and docs update. -- Golden detector fixture is canonicalized to one Python tag. -- Untrusted baseline behavior must be tested for both normal and gating modes. -- V2 golden fixtures lock dead-code/test-path semantics, metrics/dependency aggregates, - stable per-function structural fact surfaces (`stable_structure` / - `cohort_structural_findings`), and CLI+`pyproject.toml` contract behavior. - -Refs: - -- `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` -- `tests/test_golden_v2.py::test_golden_v2_analysis_contracts` -- `tests/test_golden_v2.py::test_golden_v2_cli_pyproject_contract` -- `tests/test_cli_inprocess.py::test_cli_legacy_baseline_normal_mode_ignored_and_exit_zero` -- `tests/test_cli_inprocess.py::test_cli_legacy_baseline_fail_on_new_fails_fast_exit_2` - -## Failure modes - -| Condition | Expected test signal | -|---------------------------------|-----------------------------------------| -| Baseline payload contract drift | baseline integrity/canonical tests fail | -| Cache schema drift | cache version/parse tests fail | -| Report schema drift | compact layout tests fail | -| Exit priority drift | CI inprocess tests fail | - -## Determinism / canonicalization - -- Determinism tests compare ordering and stable payloads, not runtime-specific timestamps. - -## Locked by tests - -- `tests/test_baseline.py::test_baseline_payload_fields_contract_invariant` -- `tests/test_cache.py::test_cache_v13_missing_optional_sections_default_empty` -- `tests/test_report.py::test_report_json_compact_v21_contract` -- `tests/test_coverage_join.py::test_build_coverage_join_maps_cobertura_lines_to_function_spans` -- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` -- `tests/test_html_report.py::test_html_and_json_group_order_consistent` -- `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` -- `tests/test_golden_v2.py::test_golden_v2_analysis_contracts` -- `tests/test_golden_v2.py::test_golden_v2_cli_pyproject_contract` -- `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` -- `tests/test_extractor.py::test_collect_dead_candidates_skips_protocol_and_stub_like_symbols` -- `tests/test_metrics_modules.py::test_find_unused_respects_referenced_qualnames` - -## Non-guarantees - -- Test implementation details (fixtures/helper names) can change if contract assertions remain equivalent. diff --git a/docs/book/14-claim-guard.md b/docs/book/14-claim-guard.md new file mode 100644 index 00000000..de7133c6 --- /dev/null +++ b/docs/book/14-claim-guard.md @@ -0,0 +1,180 @@ + + +# 14. Claim Guard + +## Purpose + +Define the `validate_review_claims` MCP tool in the CodeClone `2.1` release +line. + +Claim guard keeps review text disciplined. It validates cited claims against +semantic flags already present in stored MCP runs. It does not perform +free-form NLP, source analysis, or fact checking. + +--- + +## Public surface + +| Artifact | Path | +|----------------|--------------------------------------------------------| +| MCP tool | `validate_review_claims` | +| Service method | `CodeCloneMCPService.validate_review_claims` | +| Session mixin | `codeclone/surfaces/mcp/_session_claim_guard_mixin.py` | +| Pure validator | `codeclone/surfaces/mcp/_claim_guard.py` | + +--- + +## Validation pipeline + +```mermaid +graph LR + T["Review text"] --> E["Extract citations
finding IDs, metric families"] + E --> W["Text window
±80 chars around citation"] + W --> P["Pattern checks
P-1 … P-5"] + P --> V{"Violations?"} + V -->|"yes"| INV["valid: false"] + V -->|"no"| OK["valid: true"] + + style INV fill:#fee2e2 + style OK fill:#f0fdf4 +``` + +The pipeline is fully deterministic: + +1. Resolve the stored run. +2. Index canonical and short finding IDs from the canonical report. +3. Read metric-family gate semantics from the metric registry. +4. Extract citations from the supplied text. +5. Check keyword patterns inside a bounded text window around each citation. + +--- + +## Parameters + +| Parameter | Type | Default | Meaning | +|----------------------|---------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------| +| `text` | `str` | required | Markdown, plain text, or JSON string to validate | +| `run_id` | `str \| None` | latest | Stored MCP run whose report semantics are used | +| `require_citations` | `bool` | `true` | Warn when no known finding IDs or metric family names are cited | +| `patch_health_delta` | `int \| None` | omitted | Optional `health_after - health_before` from verify. When negative, flags regression-free or fully-clean claims even if verify `accepted` | + +!!! info "Text limits" + Text must be non-empty and at most `50,000` characters. + +--- + +## Contract + +The tool is **read-only**. It does not mutate source files, baselines, +reports, analysis cache, review markers, or change intents. + +### Response shape + +| Field | Type | Meaning | +|-----------------------|--------|--------------------------------------| +| `valid` | `bool` | `true` when no violations were found | +| `citations_found` | `int` | Number of recognized citations | +| `violations` | `list` | Deterministic overclaim records | +| `warnings` | `list` | Missing or unknown citations | +| `validated_citations` | `list` | Per-citation validity summary | + +Warnings do not make the response invalid. Only violations set +`valid=false`. + +--- + +## Patterns + +Five deterministic overclaim patterns, each checking keyword proximity +around cited finding IDs or metric family names. An additional +profile-aware warning detects structural claims on non-structural profiles. + +### P-1: Security surface overclaim + +Security Surfaces described as vulnerabilities or exploitability. +Security Surfaces are a **report-only boundary inventory** — they show +where security-relevant capabilities exist, not whether they are +exploitable. + +### P-2: Gate overclaim + +A report-only metric family described as a CI failure or blocking gate. +Not all metric families participate in gating; report-only families are +informational. + +### P-3: Regression overclaim + +A finding with `novelty="known"` described as new relative to the baseline. +Known findings are accepted baseline debt. A patch-local introduction or +reintroduction claim requires before-run to after-run verification evidence; +single-run baseline novelty is insufficient. + +### P-4: Dead code certainty overclaim + +Dead-code certainty claimed despite runtime reachability evidence. When +framework reachability patterns match a dead-code candidate, certainty +claims are invalid. + +### P-5: Fix overclaim + +A finding claimed as fixed or resolved before a post-patch run is +available. Without a comparison run, fix claims cannot be verified. + +### Structural scope warning + +When the verification profile is not `python_structural`, the guard emits a +`structural_checks_not_applicable` warning if the review text contains keywords +suggesting structural checks were performed (e.g. "no regressions", +"all checks passed", "structural verification"). This is a warning, not a +violation — it does not set `valid=false`. + +### Health regression overclaim + +When `patch_health_delta` is negative (from `check_patch_contract` verify or +`finish_controlled_change` → `verification.structural_delta.health_delta`), the +guard emits a `health_regression_overclaim` **warning** and a matching +**violation** if the text contains structural-scope keywords such as +"no regressions", "regression-free", or "all checks passed". Verify may still +return `accepted`; negative health delta is advisory context, not an automatic +verify failure. `finish_controlled_change` also surfaces +`health_regression_advisory` on accepted verify when delta is negative. + +Pass `patch_health_delta` explicitly when using the atomic workflow +(`check_patch_contract` → `validate_review_claims`). `finish_controlled_change` +passes it automatically when `claims_text` is supplied. `review_text` on +`finish_controlled_change` is a human note and is not claim-validated. + +Finish top-level `status: accepted_with_external_changes` still runs Claim Guard +when `claims_text` is provided — external workspace dirt is advisory, not a +verify failure. Do not claim "clean working tree" when `external_changes` is +non-empty unless the user explicitly scoped to ignore peer WIP. + +--- + +## Non-goals + +!!! warning "What claim guard is not" + - Not a vulnerability scanner + - Not a CI gate + - Not an LLM fact checker + - Not proof that uncited text is correct + - Not a replacement for `check_patch_contract` + +--- + +## Locked by tests + +- `tests/test_mcp_service.py` +- `tests/test_mcp_server.py` +- `tests/test_mcp_tool_schema_snapshot.py` + +--- + +## See also + +- [25-mcp-interface/index.md](25-mcp-interface/index.md) — full MCP tool and resource contract +- [MCP deep dive](../guide/mcp/README.md) — architecture, workflows, prompt patterns diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md deleted file mode 100644 index 0310cc17..00000000 --- a/docs/book/14-compatibility-and-versioning.md +++ /dev/null @@ -1,115 +0,0 @@ -# 14. Compatibility and Versioning - -## Purpose - -Define when to bump baseline/cache/report/fingerprint versions and how runtime -compatibility is enforced. - -## Public surface - -- Version constants: `codeclone/contracts/__init__.py` -- Clone baseline compatibility: - `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` -- Metrics baseline compatibility: - `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` -- Cache compatibility: `codeclone/cache/store.py:Cache.load` -- Report schema assignment: - `codeclone/report/document/builder.py:build_report_document` -- MCP public surface: `codeclone/surfaces/mcp/server.py`, - `codeclone/surfaces/mcp/service.py` - -## Data model - -Current contract versions: - -- `BASELINE_SCHEMA_VERSION = "2.1"` -- `BASELINE_FINGERPRINT_VERSION = "1"` -- `CACHE_VERSION = "2.8"` -- `REPORT_SCHEMA_VERSION = "2.11"` -- `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` - -Refs: - -- `codeclone/contracts/__init__.py` - -## Contracts - -Version bump rules: - -- bump **baseline schema** only for clone-baseline JSON layout/type changes -- bump **fingerprint version** when clone identity semantics change -- bump **cache schema** for cache wire-format or compatibility-semantics changes -- bump **report schema** for canonical report document shape/meaning changes -- bump **metrics-baseline schema** only for standalone metrics-baseline payload changes - -Operational compatibility rules: - -- runtime writes baseline schema `2.1` -- runtime accepts clone baseline `1.0`, `2.0`, and `2.1` -- runtime writes standalone metrics-baseline schema `1.2` -- runtime accepts standalone metrics-baseline `1.1` and `1.2` -- runtime writes cache schema `2.8` -- MCP does not define a separate schema constant; tool/resource semantics are package-versioned public surface - -Baseline regeneration is required when: - -- `fingerprint_version` changes -- `python_tag` changes - -It is not required for package patch/minor updates when compatibility gates still pass. - -## Health model evolution - -CodeClone does not currently define a separate health-model version constant. -Health semantics are package-versioned behavior and must be documented in: - -- this chapter -- [15-health-score.md](15-health-score.md) -- release notes - -A lower score after upgrade may reflect a broader scoring model, not only worse code. - -## Invariants (MUST) - -- Contract changes require code + tests + changelog/docs updates. -- Schema mismatches map to explicit statuses. -- Legacy baselines stay untrusted and require regeneration. - -Refs: - -- `codeclone/baseline/trust.py:BaselineStatus` -- `codeclone/baseline/clone_baseline.py:_is_legacy_baseline_payload` - -## Failure modes - -| Change type | User impact | -|------------------------------|----------------------------------------------------------------| -| Baseline schema bump | Older unsupported baselines become untrusted until regenerated | -| Fingerprint bump | Clone IDs change; baseline regeneration required | -| Cache schema bump | Old caches are ignored and rebuilt automatically | -| Report schema bump | Downstream report consumers must update | -| Metrics-baseline schema bump | Dedicated metrics-baseline files must be regenerated | - -## Determinism / canonicalization - -- Version constants are explicit and enforced in code. -- Compatibility decisions are runtime checks, not doc-only expectations. - -Refs: - -- `codeclone/contracts/__init__.py` -- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` -- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` - -## Locked by tests - -- `tests/test_baseline.py::test_baseline_verify_schema_incompatibilities` -- `tests/test_baseline.py::test_baseline_verify_schema_incompatibilities[schema_major_mismatch]` -- `tests/test_baseline.py::test_baseline_verify_fingerprint_mismatch` -- `tests/test_cache.py::test_cache_v_field_version_mismatch_warns` -- `tests/test_report.py::test_report_json_compact_v21_contract` - -## Non-guarantees - -- Backward compatibility is not guaranteed across incompatible schema/fingerprint bumps. -- Health Score is not mathematically frozen forever; the obligation to document scoring-model changes is. diff --git a/docs/book/15-health-score.md b/docs/book/15-health-score.md index 207d790b..f332b7b6 100644 --- a/docs/book/15-health-score.md +++ b/docs/book/15-health-score.md @@ -1,4 +1,8 @@ -# Health Score + + +# 15. Health Score ## Purpose @@ -13,7 +17,7 @@ policy for future scoring-model expansion. - Canonical report surface: `codeclone/report/document/builder.py:build_report_document` - Health snapshot projections: - `codeclone/report/derived.py:_health_snapshot`, + `codeclone/report/document/derived.py:_health_snapshot`, `codeclone/report/overview.py:_health_snapshot` - CLI / HTML / MCP consumers: `codeclone/surfaces/cli/summary.py`, @@ -92,6 +96,6 @@ If that happens: ## See also -- [08-report.md](08-report.md) -- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- [05-report.md](05-report.md) +- [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) +- [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/16-metrics-and-quality-gates.md similarity index 72% rename from docs/book/15-metrics-and-quality-gates.md rename to docs/book/16-metrics-and-quality-gates.md index 48d230f4..ec66a18f 100644 --- a/docs/book/15-metrics-and-quality-gates.md +++ b/docs/book/16-metrics-and-quality-gates.md @@ -1,4 +1,9 @@ -# 15. Metrics and Quality Gates + + +# 16. Metrics and Quality Gates ## Purpose @@ -115,6 +120,43 @@ Refs: - `tests/test_pipeline_metrics.py::test_metric_gate_reasons_collects_all_enabled_reasons` - `tests/test_pipeline_metrics.py::test_metric_gate_reasons_partial_new_metrics_paths` - `tests/test_metrics_baseline.py::test_metrics_baseline_embedded_clone_payload_and_schema_resolution` +- `tests/test_metrics_modules.py::test_compute_lcom4_honors_ignored_methods` +- `tests/test_extractor.py::test_extract_protocol_class_excludes_stub_methods_from_lcom4` +- `tests/test_extractor.py::test_extract_pydantic_cohesion_exclusions` + +## LCOM4 cohesion applicability + +LCOM4 measures connected components over instance-behavior methods in a class cohesion +graph. Starting in `2.1.0`, the graph excludes declaration surfaces that do not carry +instance-level behavioral cohesion: + +- **Protocol classes** — when a class inherits from `typing.Protocol` / + `typing_extensions.Protocol` (including module aliases), all of its methods are + excluded from the LCOM4 graph. The whole class is an interface surface, not a + behavior cluster. +- **Pydantic validation/serialization hooks** — methods decorated with Pydantic + validator/serializer hooks resolved from `pydantic` / `pydantic.v1` imports or module + aliases are excluded: `field_validator`, `model_validator`, `root_validator`, + `validator`, `field_serializer`, and `model_serializer`. +- **`computed_field` is not excluded** — it commonly reads `self.*` and participates in + real object cohesion, so it stays in the graph. + +Reporting stays honest: + +- `method_count` on class metrics still reflects all methods on the class. +- Only the LCOM4 graph and component count use the analyzed subset. +- When one or zero analyzed methods remain, LCOM4 collapses to `1` (no penalty). + +Interactive CLI runs may show a one-time migration note when a trusted baseline was +generated by `2.0.2` and the current CodeClone version is `2.1.0` or newer; see +[CLI](11-cli.md) tips. + +Refs: + +- `codeclone/metrics/cohesion.py:compute_lcom4` +- `codeclone/analysis/_module_walk.py:_cohesion_ignored_method_names` +- `codeclone/analysis/class_metrics.py:_class_metrics_for_node` +- `codeclone/surfaces/cli/tips.py:maybe_print_cohesion_lcom4_migration_note` ## Non-guarantees diff --git a/docs/book/16-dead-code-contract.md b/docs/book/17-dead-code-contract.md similarity index 94% rename from docs/book/16-dead-code-contract.md rename to docs/book/17-dead-code-contract.md index b3f2b514..4f93c4b2 100644 --- a/docs/book/16-dead-code-contract.md +++ b/docs/book/17-dead-code-contract.md @@ -1,4 +1,8 @@ -# 16. Dead Code Contract + + +# 17. Dead Code Contract ## Purpose @@ -165,8 +169,12 @@ Refs: - `tests/test_extractor.py::test_dead_code_marks_symbol_dead_when_referenced_only_by_tests` - `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[skip_pep562_hooks]` -- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` -- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` +- + +`tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` +- +`tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` + - `tests/test_extractor.py::test_dead_code_uses_fastapi_route_and_dependency_reachability` - `tests/test_extractor.py::test_dead_code_uses_fastapi_annotated_dependency_reachability` - `tests/test_extractor.py::test_dead_code_uses_fastapi_route_decorator_factory_reachability` @@ -209,6 +217,6 @@ Refs: ## See also -- [05-core-pipeline.md](05-core-pipeline.md) -- [09-cli.md](09-cli.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- [03-core-pipeline.md](03-core-pipeline.md) +- [11-cli.md](11-cli.md) +- [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) diff --git a/docs/book/17-suggestions-and-clone-typing.md b/docs/book/18-suggestions-and-clone-typing.md similarity index 93% rename from docs/book/17-suggestions-and-clone-typing.md rename to docs/book/18-suggestions-and-clone-typing.md index b9dc7ad1..bf4fa7eb 100644 --- a/docs/book/17-suggestions-and-clone-typing.md +++ b/docs/book/18-suggestions-and-clone-typing.md @@ -1,4 +1,8 @@ -# 17. Suggestions and Clone Typing + + +# 18. Suggestions and Clone Typing ## Purpose diff --git a/docs/book/19-inline-suppressions.md b/docs/book/19-inline-suppressions.md index cb296ad8..0a2e1431 100644 --- a/docs/book/19-inline-suppressions.md +++ b/docs/book/19-inline-suppressions.md @@ -1,3 +1,7 @@ + + # 19. Inline Suppressions ## Purpose @@ -36,12 +40,15 @@ Refs: - same-line single-line declaration - first line of a multiline declaration header - closing header line containing `:` -- Current supported dead-code rule id: `dead-code`. +- Parsed rule ids: `dead-code`, `clone-cohort-drift`, `clone-guard-exit-divergence`. + Only `dead-code` has a runtime effect today. Clone rule ids are reserved: + they parse and bind like other rule ids but do not suppress clone findings. - Rule list supports comma-separated values and deduplicates deterministically. - Suppression applies only to declaration targets (`def`, `async def`, `class`). - Suppression is target-scoped: class-level suppression does not implicitly suppress unrelated methods. -- Dead-code suppression is applied in final liveness filtering by rule id. +- Dead-code suppression is applied in final liveness filtering by rule id + (`codeclone/metrics/dead_code.py:find_unused`). - Suppressed dead-code candidates are reported separately (not as active findings) with deterministic suppression metadata in report metrics. @@ -54,13 +61,14 @@ Refs: ## Failure modes -| Condition | Behavior | -|---------------------------------------------------|-------------------------------------| -| malformed `# codeclone: ignore[...]` payload | ignored silently | -| unknown `codeclone[...]` rule id | ignored silently | -| suppression on non-declaration line | ignored silently | -| duplicate rule ids in one directive | deduplicated deterministically | -| suppression rule mismatch (`dead-code` vs others) | does not suppress dead-code finding | +| Condition | Behavior | +|---------------------------------------------------|--------------------------------------| +| malformed `# codeclone: ignore[...]` payload | ignored silently | +| unknown `codeclone[...]` rule id | ignored silently | +| suppression on non-declaration line | ignored silently | +| duplicate rule ids in one directive | deduplicated deterministically | +| non-`dead-code` rule id on a declaration | parsed/bound only; no finding effect | +| suppression rule mismatch (`dead-code` vs others) | does not suppress dead-code finding | ## Determinism / canonicalization @@ -85,11 +93,18 @@ Refs: - `tests/test_suppressions.py::test_extract_suppression_directives_supports_inline_and_leading_forms` - `tests/test_suppressions.py::test_extract_suppression_directives_ignores_invalid_forms[unknown_and_malformed]` - `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[adjacent_leading_only]` -- `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[class_inline_does_not_propagate]` +- + +`tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[class_inline_does_not_propagate]` + - `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[method_target]` - `tests/test_suppressions.py::test_build_suppression_index_deduplicates_rules_stably` -- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` -- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` +- + +`tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` +- +`tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` + - `tests/test_metrics_modules.py::test_find_unused_applies_inline_dead_code_suppression` - `tests/test_metrics_modules.py::test_find_suppressed_unused_returns_actionable_suppressed_candidates` - `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` @@ -103,5 +118,5 @@ Refs: ## See also -- [16-dead-code-contract.md](16-dead-code-contract.md) -- [08-report.md](08-report.md) +- [17-dead-code-contract.md](17-dead-code-contract.md) +- [05-report.md](05-report.md) diff --git a/docs/book/18-benchmarking.md b/docs/book/20-benchmarking.md similarity index 90% rename from docs/book/18-benchmarking.md rename to docs/book/20-benchmarking.md index 0fdfc081..dc5f791b 100644 --- a/docs/book/18-benchmarking.md +++ b/docs/book/20-benchmarking.md @@ -1,4 +1,8 @@ -# 18. Benchmarking (Docker) + + +# 20. Benchmarking (Docker) ## Purpose @@ -105,6 +109,6 @@ Permissions note: ## See also -- [12-determinism.md](12-determinism.md) -- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- [22-determinism.md](22-determinism.md) +- [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) +- [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md deleted file mode 100644 index a54070b6..00000000 --- a/docs/book/20-mcp-interface.md +++ /dev/null @@ -1,164 +0,0 @@ -# 20. MCP Interface - -## Purpose - -Define the current public MCP surface in the CodeClone `2.0` release line. - -The MCP layer is optional, read-only, and built on the same canonical -pipeline/report contracts as the CLI. It does not create a second analysis -engine or a second persistence model. - -!!! note "Read-only integration contract" - MCP surfaces the same canonical report and run state as the CLI and HTML - report. It must not mutate source, baseline, cache, or report artifacts. - -## Public surface - -- package extra: `codeclone[mcp]` -- launcher: `codeclone-mcp` -- server wiring: `codeclone/surfaces/mcp/server.py` -- in-process service/session: `codeclone/surfaces/mcp/service.py`, - `codeclone/surfaces/mcp/session.py` - -## Shape - -Current server characteristics: - -- optional dependency; base `codeclone` install does not require MCP runtime -- transports: - - `stdio` - - `streamable-http` -- run storage: - - in-memory only - - bounded by `--history-limit` - - latest-run pointer is process-local -- roots: - - analysis tools require an absolute repository root - - relative roots such as `.` are rejected -- analysis modes: - - `full` - - `clones_only` -- cache policies: - - `reuse` - - `off` - - `refresh` is rejected by the read-only MCP service contract; use `reuse` - or `off` - -!!! warning "Absolute roots and remote exposure" - Analysis tools require an absolute repository root, and HTTP exposure - beyond loopback is intentionally explicit. Keep `stdio` as the default for - local IDE and agent clients. - -## Tools - -Current tool set: `21` tools. - -The MCP surface is intentionally triage-first: analyze first, summarize/triage -second, then drill into one finding or one hotspot family. - -### Analysis and run-level tools - -| Tool | Key parameters | Purpose | -|-------------------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy` | Full deterministic analysis of one repo root; registers the latest in-memory run. | -| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection over the same canonical run/report contract. | -| `get_run_summary` | `run_id` | Cheapest run-level snapshot. Start here after analysis when you need health, findings, baseline/cache status, and inventory in compact form. | -| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view over one stored run. | -| `help` | `topic`, `detail` | Bounded workflow/contract guidance for supported MCP topics. | -| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta view over findings and health; returns `incomparable` when roots/settings differ. | -| `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Evaluate CI/gating decisions against a stored run without mutating process or repo state. | - -### Report and finding projection tools - -| Tool | Key parameters | Purpose | -|-----------------------|------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------| -| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read canonical report sections; `metrics_detail` is the bounded/paginated drill-down path. | -| `list_findings` | `run_id`, `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, changed-scope filters, pagination | Deterministic filtered finding list over canonical stored findings. | -| `get_finding` | `finding_id`, `run_id`, `detail_level` | Return one canonical finding group by short or full id. | -| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return the remediation/explainability packet for one finding. | -| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, pagination | Return one derived hotspot list such as `most_actionable` or `production_hotspots`. | -| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented summary for changed scope; `markdown` is the default human/LLM-facing format. | - -### Focused check tools - -| Tool | Key parameters | Purpose | -|--------------------|-------------------------------------------------------------------------------------------------|-------------------------------------------------------| -| `check_clones` | `run_id` or absolute `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Narrow clone-only query over a compatible stored run. | -| `check_complexity` | `run_id` or absolute `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Narrow complexity-hotspot query. | -| `check_coupling` | `run_id` or absolute `root`, `path`, `max_results`, `detail_level` | Narrow coupling-hotspot query. | -| `check_cohesion` | `run_id` or absolute `root`, `path`, `max_results`, `detail_level` | Narrow cohesion-hotspot query. | -| `check_dead_code` | `run_id` or absolute `root`, `path`, `min_severity`, `max_results`, `detail_level` | Narrow dead-code query. | - -### Session-local tools - -| Tool | Key parameters | Purpose | -|--------------------------|--------------------------------|-------------------------------------------------------------------------------------| -| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | -| `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | -| `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | - -## Resources - -Resources are deterministic read-only projections over stored runs. - -| URI | Purpose | -|---------------------------------------------------|-------------------------------------------------------------| -| `codeclone://latest/summary` | Compact summary for the latest stored run. | -| `codeclone://latest/report.json` | Canonical JSON report for the latest stored run. | -| `codeclone://latest/health` | Health/metrics snapshot for the latest stored run. | -| `codeclone://latest/gates` | Last gate-evaluation result produced in this MCP session. | -| `codeclone://latest/changed` | Changed-files projection for the latest diff-aware run. | -| `codeclone://latest/triage` | Production-first triage payload for the latest stored run. | -| `codeclone://schema` | Canonical report schema-style descriptor. | -| `codeclone://runs/{run_id}/summary` | Compact summary for one specific stored run. | -| `codeclone://runs/{run_id}/report.json` | Canonical JSON report for one specific stored run. | -| `codeclone://runs/{run_id}/findings/{finding_id}` | Canonical JSON finding payload for one specific stored run. | - -## Contract rules - -- MCP is read-only with respect to source files, baselines, cache artifacts, - and report artifacts. -- MCP reuses the same canonical report document as CLI/JSON/HTML/SARIF. -- Finding ids, ordering, and summary data are deterministic projections over - the stored run. -- `analyze_changed_paths` requires either explicit `changed_paths` or - `git_diff_ref`. -- `analyze_repository` and `analyze_changed_paths` require an absolute `root`. -- `check_*` tools may resolve against an existing stored run, but if `root` is - provided it must also be absolute. -- `git_diff_ref` is validated before any subprocess call. -- Review markers are session-local in-memory state only. -- Run history is process-local and does not survive restart. -- Missing optional MCP dependency is surfaced explicitly by the launcher. -- `metrics_detail(family="security_surfaces")` exposes a compact, report-only - inventory of exact security-relevant capability surfaces. It does not claim - vulnerabilities or exploitability. - -## Security model - -- default transport is local `stdio` -- non-local HTTP exposure requires explicit `--allow-remote` -- server runtime is loaded lazily so base installs and normal CI do not require - MCP packages -- MCP must not mutate repo state or synthesize findings outside canonical - report facts - -## Determinism - -- run identity is derived from canonical report integrity -- summary, hotspots, findings, and remediation payloads are deterministic - projections over stored run state -- MCP must not create MCP-only analysis semantics or MCP-only gate semantics - -## Locked by tests - -- `tests/test_mcp_service.py` -- `tests/test_mcp_server.py` -- `tests/test_mcp_tool_schema_snapshot.py` - -## See also - -- [09-cli.md](09-cli.md) -- [08-report.md](08-report.md) -- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) -- [../mcp.md](../mcp.md) diff --git a/docs/book/21-security-model.md b/docs/book/21-security-model.md new file mode 100644 index 00000000..0c8392e3 --- /dev/null +++ b/docs/book/21-security-model.md @@ -0,0 +1,232 @@ + + +# 21. Security Model + +## Purpose + +Describe implemented protections and explicit security boundaries. + +## Public surface + +- Scanner path validation: `codeclone/scanner/__init__.py:iter_py_files` +- File read and parser limits: `codeclone/core/worker.py:process_file`, + `codeclone/analysis/parser.py:_parse_limits` +- Baseline/cache validation: `codeclone/baseline/*`, `codeclone/cache/*` +- HTML escaping: `codeclone/report/html/primitives/escape.py`, + `codeclone/report/html/assemble.py` +- MCP read-only enforcement: `codeclone/surfaces/mcp/*` +- Repository path containment: `codeclone/utils/repo_paths.py` + +## Data model + +Security-relevant input classes: + +- filesystem paths (root/source/baseline/cache/report) +- untrusted JSON files (baseline/cache) +- untrusted source snippets and metadata rendered into HTML +- MCP request parameters (`root`, filters, diff refs, cache policy) + +## Contracts + +- CodeClone parses source text; it does not execute repository Python code. +- Sensitive root directories are blocked by scanner policy. +- Symlink traversal outside the root is skipped. +- HTML escapes text and attribute contexts before embedding. +- MCP is read-only with respect to source files, baselines, analysis cache + (`cache.json`), and canonical report artifacts. +- Allowed repo-local writes are explicit and isolated: ephemeral controller + coordination (file backend under `.codeclone/intents/` or SQLite under + `.codeclone/db/intents.sqlite3`), optional controller audit + (`.codeclone/db/audit.sqlite3`), Engineering Memory/projection state under + `.codeclone/memory/`, and opt-in Platform Observability + (`.codeclone/db/platform_observability.sqlite3`). +- Platform Observability stores bounded metadata and literal-free SQL + fingerprints, never raw payload bodies, and cannot affect analysis truth, + gates, baselines, memory facts, or edit authorization. +- Session-local review markers and in-memory run history do not survive + process restart. +- Five session/coordination tools are marked `destructiveHint` in MCP metadata + (`manage_change_intent`, `start_controlled_change`, + `finish_controlled_change`, `mark_finding_reviewed`, `clear_session_runs`). +- `--allow-remote` is required for non-loopback HTTP bind. It is an explicit + operator opt-in, not a substitute for authentication. For `streamable-http`, + `CODECLONE_MCP_AUTH_TOKEN` is mandatory at server start (see + [Remote MCP transport](#remote-mcp-transport)). stdio transport remains a + local-trust surface on the host. +- MCP accepts cache policies `reuse` and `off`; `refresh` is rejected at + runtime with a contract error. +- `git_diff_ref` is validated as a safe single revision expression before any `git diff` subprocess call. +- MCP `processes` is capped to `min(requested, os.cpu_count() or 4, 64)`. + This is a resource ceiling only; it does not change analysis results. + +## Trust boundaries (explicit) + +These are documented limits, not hidden guarantees. + +### Repository path containment + +`resolve_under_repo_root` in `codeclone/utils/repo_paths.py` is the shared +resolver for audit paths, intent-registry DB paths, memory config paths, MCP +optional artifacts, and cache wire filepath projection. By default paths must +stay under the analysis root after normalization; symlink escapes outside the +root are rejected. + +Refs: + +- `codeclone/utils/repo_paths.py` +- `tests/test_repo_paths.py` + +### MCP optional artifact paths + +`baseline_path`, `metrics_baseline_path`, `cache_path`, and `coverage_xml` on +`analyze_repository` / `analyze_changed_paths` resolve through the same helper. +**Default:** repo-relative only; absolute or out-of-repo paths are rejected. +**Opt-in:** set `allow_external_artifacts=true` on the analysis tool call when +shared monorepo artifacts live outside the scan root (privileged input). + +Parameter details: [25-mcp-interface/index.md](25-mcp-interface/index.md). Tool copy: +`help(topic="trust_boundaries")`. + +Refs: + +- `codeclone/surfaces/mcp/_session_helpers.py:_resolve_optional_path` + +### Cache checksum semantics + +Cache signatures detect corruption and accidental mutation of the canonical +cache payload. They are not adversarial authentication against a privileged +local attacker who can rewrite `.codeclone/cache.json` directly. + +Refs: + +- `codeclone/cache/integrity.py:sign_cache_payload` +- `codeclone/cache/integrity.py:verify_cache_payload_signature` + +### Workspace change intents + +The workspace intent registry coordinates concurrent edits between processes +running as the same local UID on the same host (file backend: +`.codeclone/intents/`; SQLite backend: `.codeclone/db/intents.sqlite3` +when configured). Records are advisory, TTL-bound (default 1 hour, lease 5 +minutes), gitignored, and integrity-checked (SHA-256 over canonical JSON) but not +cryptographically authenticated. A same-UID process with repository write access +can forge or delete intent records; that UID can already modify source files and +baselines directly. Treat intents as coordination hints, not proof of agent +identity. + +The Cursor plugin may enforce `preToolUse` by **reading** this registry through +`codeclone.workspace_intent` (read-only; no lazy-close or writes). The hook gate +authorizes edits only for **own active** or **foreign active** intents (not +stale/queued). That reduces accidental edits without intent; it does not stop a +hostile same-UID process. + +Refs: + +- `codeclone/workspace_intent/gate.py` +- `codeclone/surfaces/mcp/_workspace_intents.py` +- `codeclone/surfaces/mcp/_session_workflow_mixin.py` + +### Remote MCP transport + +Loopback binding is the default. `--allow-remote` removes the loopback-only +transport guard so HTTP MCP can bind on non-local interfaces. + +For **every** `streamable-http` start (loopback or remote), set +`CODECLONE_MCP_AUTH_TOKEN` to a secret of at least 32 characters. The launcher +refuses to bind HTTP transport when the variable is missing or too short; there +is no unauthenticated HTTP fallback. Clients must send +`Authorization: Bearer …`; the server validates with `hmac.compare_digest` +(stdlib only). CodeClone does not ship TLS or multi-tenant session management — +use a reverse proxy when exposing beyond loopback. + +Variable semantics and precedence: +[10-config Environment variable overrides](10-config-and-defaults.md#mcp-http-authentication). + +Refs: + +- `codeclone/surfaces/mcp/auth.py` +- `codeclone/surfaces/mcp/server.py` +- `tests/test_mcp_http_auth.py` +- `tests/test_mcp_server.py::test_mcp_server_main_rejects_non_loopback_host_without_opt_in` + +### Platform Observability + +The observer is an optional local diagnostics boundary. Its CLI and MCP readers +open the telemetry store read-only; the instrumentation writer commits one +completed operation and its spans atomically. No network exporter is provided. + +The MCP slicer is bounded and declares that its output is CodeClone-development +telemetry, not repository quality evidence. See +[26-platform-observability.md](26-platform-observability.md). + +Refs: + +- `codeclone/analysis/parser.py:_parse_with_limits` +- `codeclone/scanner/__init__.py:SENSITIVE_DIRS` +- `codeclone/scanner/__init__.py:iter_py_files` +- `codeclone/report/html/primitives/escape.py:_escape_html` + +## Invariants (MUST) + +- Baseline and cache integrity checks use constant-time comparison. +- Size guards are enforced before parsing baseline/cache JSON. +- Cache failures degrade safely; baseline trust failures follow the explicit trust model. + +Refs: + +- `codeclone/baseline/clone_baseline.py:Baseline.verify_integrity` +- `codeclone/cache/store.py:Cache.load` +- `codeclone/surfaces/cli/workflow.py:_main_impl` + +## Failure modes + +| Condition | Security behavior | +|------------------------------------------|--------------------| +| Symlink points outside root | File skipped | +| Root under sensitive dirs | Validation error | +| Oversized baseline | Baseline rejected | +| Oversized cache | Cache ignored | +| HTML-injected payload in metadata/source | Escaped output | +| `--allow-remote` not passed for HTTP | Transport rejected | +| Invalid `cache_policy` requested in MCP | Policy rejected | +| `git_diff_ref` fails validation | Parameter rejected | + +## Determinism / canonicalization + +- Canonical JSON hashing for baseline/cache prevents formatting-only drift. +- Security failures map to explicit statuses rather than silent mutation. + +Refs: + +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:canonical_json` +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/cache/versioning.py:CacheStatus` + +## Locked by tests + +- `tests/test_security.py::test_scanner_path_traversal` +- `tests/test_scanner_extra.py::test_iter_py_files_symlink_loop_does_not_traverse` +- `tests/test_security.py::test_html_report_escapes_user_content` +- `tests/test_html_report.py::test_html_report_escapes_script_breakout_payload` +- `tests/test_cache.py::test_cache_too_large_warns` +- `tests/test_mcp_service.py::test_mcp_service_rejects_refresh_cache_policy_in_read_only_mode` +- `tests/test_mcp_service.py::test_mcp_service_caps_process_count_from_request_and_config` +- `tests/test_mcp_server.py::test_mcp_server_main_rejects_non_loopback_host_without_opt_in` +- `tests/test_repo_paths.py` +- `tests/test_mcp_http_auth.py` +- `tests/test_security_invariants.py` + +## Non-guarantees + +- Baseline/cache integrity is tamper-evident at file-content level; it is not cryptographic attestation against a + privileged attacker. +- Baseline `payload_sha256` and cache signatures protect against accidental corruption and unsynchronized edits; they + do not authenticate files against a hostile same-UID writer. +- Workspace intent files are not signed and must not be treated as proof of which agent declared a change. +- MCP optional artifact paths outside the scan root require explicit + `allow_external_artifacts=true`; default resolution stays under the repo root. +- Remote MCP without the auth token env var is not authenticated; with + `--allow-remote` it is not a hardened multi-tenant network service. diff --git a/docs/book/21-vscode-extension.md b/docs/book/21-vscode-extension.md deleted file mode 100644 index 5dd50f35..00000000 --- a/docs/book/21-vscode-extension.md +++ /dev/null @@ -1,191 +0,0 @@ -# 21. VS Code Extension - -## Purpose - -Document the current contract and behavior of the VS Code extension shipped in -`extensions/vscode-codeclone/`. - -This chapter describes the extension as an interface layer over existing -CodeClone contracts. It does not define a second analysis truth model. - -Marketplace: [orenlab.codeclone](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) - -!!! note "No second truth path" - The extension is a guided IDE client over `codeclone-mcp`. It may reshape - review UX, but it must not recompute findings, health, or report truth - independently from MCP and canonical report semantics. - -## Position in the platform - -The VS Code extension is: - -- a native IDE client over `codeclone-mcp` -- read-only with respect to repository state -- baseline-aware and triage-first -- code-centered rather than report-dashboard-centered -- limited in Restricted Mode and fully active only after workspace trust - -The extension exists to make the current CodeClone review workflow easier to -use inside the editor. It must not reinterpret report semantics or invent -findings outside canonical report and MCP payloads. - -## Source of truth - -The extension reads from: - -- MCP tool responses -- MCP session-local reviewed state -- canonical report semantics already exposed through MCP - -It must not: - -- run a second analysis engine in the extension layer -- recompute health or finding semantics independently -- mutate source files, baselines, cache, or report artifacts - -## Current surface - -The extension currently exposes three native VS Code views: - -- `Overview` -- `Hotspots` -- `Runs & Session` - -It also provides: - -- one workspace-level status bar item -- command palette entry points for analysis and review -- one onboarding walkthrough -- markdown detail panels for findings, remediation, help topics, setup help, - restricted-mode guidance, and report-only detail for `Security Surfaces` and - `Overloaded Modules` -- lightweight Explorer file decorations for review-relevant files -- editor-local CodeLens and title actions for the active review target - -## Workflow model - -The intended IDE path mirrors CodeClone MCP: - -1. `Analyze Workspace` or `Review Changes` -2. compact overview and priority review -3. review new regressions or production hotspots -4. use `Set Analysis Depth` only when you need a higher-sensitivity follow-up -5. reveal source -6. open canonical finding or remediation only when needed - -This is deliberately different from a lint-list model. The extension should -prefer guided review over broad enumeration. - -## Current capabilities - -The extension currently supports: - -- full-workspace analysis -- changed-files analysis against a configured git diff reference -- conservative default analysis with an explicit deeper-review or custom-threshold - follow-up profile -- compact overview of structural health, current run state, baseline drift, and - current-run `Coverage Join` facts when MCP exposes `metrics.coverage_join`, - plus report-only `Security Surfaces` when MCP exposes - `metrics.security_surfaces` -- review queues for new regressions, production hotspots, changed-scope - findings, `Coverage Join` review items, and report-only `Security Surfaces` / - `Overloaded Modules` -- optional Coverage Join input through `codeclone.analysis.coverageXml`, with - workspace-root `coverage.xml` auto-detected when present -- source reveal, peek, canonical finding detail, remediation detail, and - session-local reviewed markers -- bounded MCP help topics inside the IDE, with the optional `coverage` topic on - newer CodeClone/MCP servers -- explicit HTML-report bridge when a local HTML report already exists - -These capabilities must remain clients of MCP and canonical report truth rather -than parallel extension-only logic. - -## State boundaries - -The extension must keep three state classes visibly separate: - -### Repository truth - -Comes from CodeClone analysis through MCP and canonical report semantics. - -### Current run - -Bounded by the active MCP session and the current latest run used by the -extension for a workspace. - -### Reviewed markers - -Session-local workflow markers only. - -Reviewed markers: - -- are in-memory only -- do not update baseline state -- do not rewrite findings -- do not change canonical report truth - -## Trust and runtime model - -!!! warning "Workspace trust still matters" - The extension is intentionally limited in Restricted Mode. Local analysis, - local git access, and local MCP startup remain disabled until the workspace - is trusted. - -The extension runs as a workspace extension and requires: - -- local filesystem access -- local git access for changed-files review -- a local `codeclone-mcp` launcher, or an explicitly configured launcher -- CodeClone `2.0.0` or newer - -In `auto` mode, launcher resolution prefers the current workspace virtualenv -before `PATH`. Runtime and version-mismatch messages identify that resolved launcher source. - -Launcher override settings (`codeclone.mcp.command`, `codeclone.mcp.args`) are -machine-scoped. Analysis-depth settings are resource-scoped so they can vary by -workspace or folder. - -For this reason: - -- Restricted Mode support is `limited` -- untrusted workspaces may show setup/onboarding/help surfaces only -- local analysis and local MCP startup remain disabled until trust is granted -- virtual workspaces are unsupported - -## Design rules - -- **Native VS Code first**: tree views, status bar, Quick Pick, CodeLens, and - file decorations before any custom UI. -- **Conservative by default**: the extension starts with repo defaults or - `pyproject`-resolved thresholds and treats lower-threshold analysis as an - explicit exploratory follow-up. -- **Source-first**: findings prefer `Reveal Source` over detail panels; - canonical detail and HTML report bridge are opt-in. -- **Report-only separation**: Overloaded Modules stay visually distinct from - findings, gates, and health. `Security Surfaces` stay visually distinct too - and remain boundary inventory rather than vulnerability claims. -- **Safe HTML bridge**: `Open in HTML Report` verifies the local file exists - and is not older than the current run. -- **Session-local state**: reviewed markers shape review UX but never leak - into repository truth. -- **First-run clarity**: onboarding leads to `Analyze Workspace`, not - transport setup. -- **Restricted Mode honesty**: explain requirements without pretending - analysis is available before trust is granted. - -## Relationship to other interfaces - -- CLI remains the scripting and CI surface. -- HTML remains the richest human report surface. -- MCP remains the read-only integration contract for agents and IDE clients. -- The VS Code extension is a guided IDE view over that MCP surface. - -## Non-guarantees - -- Exact view grouping and copy may evolve between extension releases. -- Internal client-side caching and view-model shaping may evolve as long as the - extension remains faithful to MCP and canonical report semantics. -- Explorer decoration styling, review-loop polish, and other non-contract UI - details may evolve without changing the extension contract. diff --git a/docs/book/22-claude-desktop-bundle.md b/docs/book/22-claude-desktop-bundle.md deleted file mode 100644 index bea55018..00000000 --- a/docs/book/22-claude-desktop-bundle.md +++ /dev/null @@ -1,106 +0,0 @@ -# 22. Claude Desktop Bundle - -## Purpose - -Document the current contract and behavior of the Claude Desktop bundle shipped -in `extensions/claude-desktop-codeclone/`. - -This chapter describes the bundle as a local install and launcher layer over -existing CodeClone MCP contracts. It does not define a second analysis truth -model. - -!!! note "Wrapper only" - The Claude Desktop bundle is a thin local launcher surface over - `codeclone-mcp`. Analysis truth, findings, and health semantics remain in - the canonical MCP server. - -## Position in the platform - -The Claude Desktop bundle is: - -- a local `.mcpb` install surface for Claude Desktop -- a small Node wrapper around `codeclone-mcp` -- read-only with respect to repository state -- local-stdio-only by design -- configuration-aware only for launcher resolution - -The bundle exists to make local setup easier, not to reinterpret CodeClone -analysis. - -## Source of truth - -The bundle delegates to the existing `codeclone-mcp` launcher. - -That means newly added canonical MCP surfaces flow through from the resolved -local server version. The bundle does not need a second feature switch for -surfaces such as current-run `Coverage Join` facts or the optional `coverage` -help topic. - -It must not: - -- run a second analysis engine -- redefine tools, findings, or health semantics -- mutate source files, baselines, cache, or report artifacts -- turn local Claude Desktop integration into a separate report surface - -## Current surface - -The bundle currently provides: - -- one installable `.mcpb` package -- one local Node launcher wrapper -- the same canonical MCP tool/help surface exposed by the resolved local - `codeclone-mcp` version -- two user settings: - - launcher command - - advanced launcher args as a JSON array -- one build script for deterministic local packaging - -It intentionally does not add bundle-only MCP tools or prompts. - -## Runtime model - -The wrapper: - -1. resolves a local `codeclone-mcp` launcher -2. validates advanced args -3. forces `--transport stdio` -4. launches the child process with `shell: false` -5. proxies stdio until shutdown - -The wrapper may auto-discover a few common global install locations, but it now -prefers: - -- a workspace-local `.venv` -- the active Poetry environment for the current workspace -- user-local install locations and `PATH` -- or an explicit launcher command in bundle settings - -This keeps the launcher closer to the active project Python when possible. - -## Design rules - -- **Canonical MCP first**: the bundle must keep Claude Desktop on the same - documented MCP surface as other clients. -- **Local-only transport**: reject transport and remote-listener overrides. -- **Setup honesty**: fail with a bounded install hint when the launcher is - missing. -- **No hidden runtime dependency games**: the bundle does not pretend to bundle - Python or CodeClone itself. -- **Small and deterministic**: package only the wrapper, manifest, icon, and - documentation needed for local installation. - -## Relationship to other interfaces - -- CLI remains the scripting and CI surface. -- MCP remains the read-only agent/client contract. -- Claude Code can still register `codeclone-mcp` directly through `mcp add`. -- The Claude Desktop bundle is the installable local package layer for users - who want a native Claude Desktop setup path. - -## Non-guarantees - -- Bundle presentation inside Claude Desktop may evolve with MCPB client UX. -- Auto-discovery heuristics for common launcher locations may evolve as long as - the explicit launcher setting remains stable. -- The bundle does not guarantee automatic updates or remote install flows. diff --git a/docs/book/12-determinism.md b/docs/book/22-determinism.md similarity index 92% rename from docs/book/12-determinism.md rename to docs/book/22-determinism.md index c67fe45d..fbb3b9d4 100644 --- a/docs/book/12-determinism.md +++ b/docs/book/22-determinism.md @@ -1,4 +1,9 @@ -# 12. Determinism + + +# 22. Determinism ## Purpose diff --git a/docs/book/23-codex-plugin.md b/docs/book/23-codex-plugin.md deleted file mode 100644 index 5e36abd6..00000000 --- a/docs/book/23-codex-plugin.md +++ /dev/null @@ -1,99 +0,0 @@ -# 23. Codex Plugin - -## Purpose - -Document the current contract and behavior of the Codex plugin shipped in -`plugins/codeclone/`. - -This chapter describes the plugin as a local Codex discovery and guidance layer -over existing CodeClone MCP contracts. - -!!! note "Guidance layer only" - The plugin contributes discovery metadata, a local MCP definition, and - review skills. It does not add a second analyzer or Codex-only finding - semantics. - -## Position in the platform - -The Codex plugin is: - -- a repo-local Codex plugin under `plugins/` -- backed by `.agents/plugins/marketplace.json` -- read-only with respect to repository state -- a composition of local MCP server metadata plus Codex skill guidance -- a native Codex setup surface, not a second extension model - -## Source of truth - -The plugin delegates analysis to the existing `codeclone-mcp` launcher and -guides usage through a plugin-bundled skill. - -New canonical MCP surfaces flow through from the resolved local server version. -That includes current-run metric families such as `Coverage Join` and the -optional `coverage` help topic when that server supports them. - -It must not: - -- run a second analysis engine -- redefine findings, health, or gates -- mutate source files, baselines, cache, or report artifacts -- drift away from canonical MCP semantics - -## Current surface - -The plugin currently provides: - -- `.codex-plugin/plugin.json` -- `.mcp.json` -- `scripts/launch_mcp` -- `README.md` -- two bundled skills: - - `codeclone-review` - - `codeclone-hotspots` -- a repo-local marketplace entry in `.agents/plugins/marketplace.json` - -## Runtime model - -The plugin surface is additive: - -- `.mcp.json` contributes a local stdio MCP server definition -- `plugins/codeclone/scripts/launch_mcp.py` resolves the local launcher without shell wrapping -- that launcher prefers a workspace `.venv`, then a Poetry env, then `PATH` -- the skills contribute workflow guidance and starter prompts -- `README.md` documents local usage and boundaries inside the repository tree -- Codex remains free to use direct `mcp add` config alongside or instead of the - plugin - -The plugin does not rewrite user config or install CodeClone automatically. - -## Design rules - -- **Codex-native packaging**: use `plugins/` plus `.agents/plugins/marketplace.json` - for discovery. -- **Canonical MCP first**: all analysis still flows through `codeclone-mcp`. -- **Skill guidance, not analysis logic**: the skill teaches conservative-first - CodeClone review but does not create new findings. -- **No hidden installation side effects**: the plugin does not patch - `~/.codex/config.toml`. -- **Repo-local clarity**: the plugin is meant to travel with the repository as - a native Codex surface. -- **Launcher honesty**: the plugin assumes `codeclone-mcp` is already - installable in the current workspace or reachable on `PATH`, and prefers the - workspace environment when one is present. -- **Shell-free launch**: the bundled launcher must stay argv-based and - local-stdio-only. - -## Relationship to other interfaces - -- CLI remains the scripting and CI surface. -- MCP remains the cross-client integration contract. -- `codex mcp add` remains a valid manual setup path. -- The Codex plugin is the native Codex discovery and guidance layer for - CodeClone. - -## Non-guarantees - -- Codex plugin UI presentation may evolve independently of the plugin manifest - content. -- Users who already configured `codeclone-mcp` manually may still prefer the - direct MCP path over the bundled plugin MCP definition. diff --git a/docs/book/23-testing-as-spec.md b/docs/book/23-testing-as-spec.md new file mode 100644 index 00000000..e7e55b31 --- /dev/null +++ b/docs/book/23-testing-as-spec.md @@ -0,0 +1,123 @@ + + +# 23. Testing as Specification + +## Purpose + +Map critical contracts to tests that lock behavior. + +## Public surface + +Contract tests are concentrated in: + +- `tests/test_baseline.py` +- `tests/test_cache.py` +- `tests/test_report.py` +- `tests/test_report_contract_coverage.py` +- `tests/test_cli_inprocess.py` +- `tests/test_cli_unit.py` +- `tests/test_coverage_join.py` +- `tests/test_golden_fixtures.py` +- `tests/test_html_report.py` +- `tests/test_mcp_service.py` +- `tests/test_detector_golden.py` +- `tests/test_golden_v2.py` +- `tests/test_memory_*.py`, `tests/test_semantic_*.py`, `tests/test_mcp_memory_management.py` +- `tests/test_memory_trajectory_*.py`, `tests/test_memory_experience_*.py` +- `tests/test_memory_projection_jobs*.py` +- `tests/test_observability_*.py` +- `tests/test_docs_ia_contract.py`, `tests/test_docs_build_contract.py` +- `tests/test_architecture.py` + +## Test taxonomy + +Treat tests as specification. Every new behavior belongs in the closest bucket; +public-surface changes need contract tests, not only unit tests. + +| Bucket | Intent | Examples | +|-----------------------------|----------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------| +| **Unit** | Module behavior and edge conditions | `tests/test_cfg.py`, `tests/test_normalize.py`, `tests/test_metrics_modules.py`, `tests/test_suppressions.py` | +| **Contract** | Baseline, cache, report, CLI, MCP public semantics | `tests/test_baseline.py`, `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py` | +| **Golden** | Snapshot sentinels for stable outputs | `tests/test_detector_golden.py`, `tests/test_golden_v2.py` | +| **Determinism / invariant** | Ordering, branch paths, canonical stability | `tests/test_report_branch_invariants.py`, `tests/test_core_branch_coverage.py`, `tests/test_semantic_determinism_gate.py` | +| **Scenario / regression** | Multi-step integration and process behavior | `tests/test_cli_inprocess.py`, `tests/test_pipeline_process.py`, `tests/test_cli_smoke.py` | + +Maintainer routing tables and golden-update policy also live in `AGENTS.md` §17 +and §16 (change routing); this chapter is the published contract copy. + +## Contracts + +The following matrix is treated as executable contract: + +| Contract | Tests | +|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Baseline schema/integrity/compat gates | `tests/test_baseline.py` | +| Cache v2.10 fail-open + status mapping + API-surface-aware reuse + runtime-reachability/security-surface persistence + function-relationship-fact persistence/aggregation + API signature order preservation | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag`, `tests/test_cli_inprocess.py::test_cli_public_api_breaking_count_stable_across_warm_cache`, `tests/test_cli_inprocess.py::test_cli_api_surface_ignores_non_api_warm_cache` | +| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | +| Report schema v2.11 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | +| HTML render-only explainability + escaping | `tests/test_html_report.py` | +| Current-run Cobertura coverage join parsing, gating, and projections | `tests/test_coverage_join.py`, `tests/test_pipeline_metrics.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py`, `tests/test_html_report.py` | +| Report-only security surfaces inventory and projections | `tests/test_security_surfaces.py`, `tests/test_pipeline_metrics.py`, `tests/test_cache.py`, `tests/test_report_contract_coverage.py`, `tests/test_cli_unit.py`, `tests/test_html_report.py`, `tests/test_mcp_service.py`, `tests/test_mcp_server.py` | +| Framework-aware dead-code reachability facts | `tests/test_extractor.py`, `tests/test_pipeline_metrics.py`, `tests/test_cache.py` | +| Golden fixture clone exclusion policy | `tests/test_golden_fixtures.py`, `tests/test_cli_inprocess.py::test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups`, `tests/test_report.py::test_report_json_clone_groups_can_include_suppressed_golden_fixture_bucket` | +| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | +| Engineering Memory SQLite schema, governance, retrieval | `tests/test_memory_schema.py`, `tests/test_memory_store.py`, `tests/test_memory_governance.py`, `tests/test_memory_retrieval.py`, `tests/test_memory_mcp_sync.py` | +| Semantic index projection, rebuild, LanceDB backend | `tests/test_semantic_projection.py`, `tests/test_semantic_rebuild.py`, `tests/test_semantic_lancedb_backend.py`, `tests/test_semantic_embedding.py` | +| Trajectory projection, quality passport, anomalies, retrieval | `tests/test_memory_trajectory_projector.py`, `tests/test_memory_trajectory_quality.py`, `tests/test_memory_trajectory_anomalies.py`, `tests/test_memory_trajectory_retrieval.py` | +| Experience distillation, evidence diversity, scoped retrieval, promotion | `tests/test_memory_experience_distillation.py`, `tests/test_memory_experience_retrieval.py`, `tests/test_memory_experience_promotion.py` | +| Projection queue coalescing, watermarks, worker lifecycle | `tests/test_memory_projection_jobs.py`, `tests/test_memory_projection_jobs_schema.py`, `tests/test_projection_spawn_guard.py` | +| Platform Observability config, correlation, persistence, query, rendering, MCP | `tests/test_observability_config.py`, `tests/test_observability_correlation.py`, `tests/test_observability_store.py`, `tests/test_observability_query.py`, `tests/test_observability_render.py`, `tests/test_observability_mcp_registrar.py` | +| Documentation IA, line budgets, strict site build | `tests/test_docs_ia_contract.py`, `tests/test_docs_build_contract.py` | +| Layer dependency direction | `tests/test_architecture.py` | + +## Invariants (MUST) + +- Every schema/status contract change requires tests and docs update. +- Golden detector fixture is canonicalized to one Python tag. +- Untrusted baseline behavior must be tested for both normal and gating modes. +- V2 golden fixtures lock dead-code/test-path semantics, metrics/dependency aggregates, + stable per-function structural fact surfaces (`stable_structure` / + `cohort_structural_findings`), and CLI+`pyproject.toml` contract behavior. + +Refs: + +- `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` +- `tests/test_golden_v2.py::test_golden_v2_analysis_contracts` +- `tests/test_golden_v2.py::test_golden_v2_cli_pyproject_contract` +- `tests/test_cli_inprocess.py::test_cli_legacy_baseline_normal_mode_ignored_and_exit_zero` +- `tests/test_cli_inprocess.py::test_cli_legacy_baseline_fail_on_new_fails_fast_exit_2` + +## Failure modes + +| Condition | Expected test signal | +|---------------------------------|-----------------------------------------| +| Baseline payload contract drift | baseline integrity/canonical tests fail | +| Cache schema drift | cache version/parse tests fail | +| Report schema drift | compact layout tests fail | +| Exit priority drift | CI inprocess tests fail | + +## Determinism / canonicalization + +- Determinism tests compare ordering and stable payloads, not runtime-specific timestamps. + +## Locked by tests + +- `tests/test_baseline.py::test_baseline_payload_fields_contract_invariant` +- `tests/test_cache.py::test_cache_v13_missing_optional_sections_default_empty` +- `tests/test_report.py::test_report_json_compact_v21_contract` +- `tests/test_coverage_join.py::test_build_coverage_join_maps_cobertura_lines_to_function_spans` +- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source` +- `tests/test_html_report.py::test_html_and_json_group_order_consistent` +- `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` +- `tests/test_golden_v2.py::test_golden_v2_analysis_contracts` +- `tests/test_golden_v2.py::test_golden_v2_cli_pyproject_contract` +- `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` +- `tests/test_extractor.py::test_collect_dead_candidates_skips_protocol_and_stub_like_symbols` +- `tests/test_metrics_modules.py::test_find_unused_respects_referenced_qualnames` + +## Non-guarantees + +- Test implementation details (fixtures/helper names) can change if contract assertions remain equivalent. diff --git a/docs/book/24-compatibility-and-versioning.md b/docs/book/24-compatibility-and-versioning.md new file mode 100644 index 00000000..6cd88053 --- /dev/null +++ b/docs/book/24-compatibility-and-versioning.md @@ -0,0 +1,202 @@ + + +# 24. Compatibility and Versioning + +## Purpose + +Define when to bump baseline/cache/report/fingerprint versions and how runtime +compatibility is enforced. + +## Public surface + +- Version constants: `codeclone/contracts/__init__.py` (central); + subsystem-local versions in owning modules (audit event core, + implementation-context payload/resolver) +- Clone baseline compatibility: + `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- Metrics baseline compatibility: + `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` +- Cache compatibility: `codeclone/cache/store.py:Cache.load` +- Report schema assignment: + `codeclone/report/document/builder.py:build_report_document` +- MCP public surface: `codeclone/surfaces/mcp/server.py`, + `codeclone/surfaces/mcp/service.py` + +## Data model + +Current contract versions: + +- `BASELINE_SCHEMA_VERSION = "2.1"` +- `BASELINE_FINGERPRINT_VERSION = "1"` +- `CACHE_VERSION = "2.10"` +- `REPORT_SCHEMA_VERSION = "2.11"` +- `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` +- `ENGINEERING_MEMORY_SCHEMA_VERSION = "1.7"` +- `PATCH_TRAIL_SCHEMA_VERSION = "1"` (finish-time Patch Trail JSON; audit + SQLite sidecar) +- `TRAJECTORY_EXPORT_SCHEMA_VERSION = "2"` (JSONL export rows; `codeclone/memory/trajectory/profiles.py`) +- `TRAJECTORY_PROJECTION_VERSION = "trajectory-v3"` (derived trajectory rows) +- `TRAJECTORY_QUALITY_SCORE_VERSION = "2"` (quality contract formula) +- `EXPERIENCE_DISTILLATION_VERSION = "experience-v1"` (derived Experience rows) +- `SEMANTIC_INDEX_FORMAT_VERSION = "2"` (LanceDB sidecar; separate from SQLite memory schema) +- `PLATFORM_OBSERVABILITY_SCHEMA_VERSION = "1.1"` (dev-only telemetry SQLite) +- `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION = "1.2"` (corpus analytics SQLite) +- `CORPUS_EXPORT_SCHEMA_VERSION = "1.3"` (clustering JSON export) +- `CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION = "1"` (profile manifests) +- `CORPUS_CONTROL_PLANE_CONTRACT_VERSION = "1.0"` (profile/selection export) +- `CORPUS_REPRESENTATION_CONTRACT_VERSION = "3"` (intent representation payloads) +- `CORPUS_NORMALIZER_VERSION = "1"` (corpus normalization pipeline) +- `CORPUS_EMBEDDING_CONTRACT_VERSION = "2"` (analytics embedding sidecar) +- `CORPUS_AGENT_LABEL_CONTRACT_VERSION = "1"` (agent label payloads) +- `CORPUS_PARTITION_MAP_VERSION = "1"` (partition map artifacts) +- `IDE_GOVERNANCE_PROTOCOL_VERSION = 2` (VS Code Memory HMAC attestation) +- `AUDIT_EVENT_CORE_VERSION = "2"` (`codeclone/audit/events.py`; frozen audit step core) +- `CONTEXT_CONTRACT_VERSION = "1"` (`codeclone/surfaces/mcp/_implementation_context.py`) +- `CALL_RESOLUTION_VERSION = "1"` (`codeclone/surfaces/mcp/_implementation_context.py`) + +Refs: + +- `codeclone/contracts/__init__.py` +- `codeclone/audit/events.py` +- `codeclone/surfaces/mcp/_implementation_context.py` + +## Contracts + +Version bump rules: + +- bump **baseline schema** only for clone-baseline JSON layout/type changes +- bump **fingerprint version** when clone identity semantics change +- bump **cache schema** for cache wire-format or compatibility-semantics changes + — `2.9` adds rebuildable, off-report per-function call/reference facts (`fr` + wire section) as a sibling projection without changing serialized `Unit` rows; + `2.10` aggregates those facts across files onto the analysis result and MCP + run record (still off the canonical report) +- bump **report schema** for canonical report document shape/meaning changes +- bump **metrics-baseline schema** only for standalone metrics-baseline payload changes +- bump **engineering memory schema** for SQLite DDL / governed record-shape changes + (`codeclone/memory/schema_migrate.py`) — **`1.4`** added Patch Trail + persistence, **`1.5`** quality scoring, **`1.6`** Experience tables, and + **`1.7`** the projection-job flush-scheduling column (`flush_claimed_by`) +- bump **patch trail schema** (`PATCH_TRAIL_SCHEMA_VERSION`) when finish-time Patch + Trail JSON shape changes incompatibly +- bump **trajectory export schema** (`TRAJECTORY_EXPORT_SCHEMA_VERSION`) when JSONL + row shape changes incompatibly +- bump **trajectory projection**, **quality score**, or **Experience + distillation** versions when their derived identity/formula changes; rebuild + derived rows rather than migrating source evidence +- bump **semantic index format** when LanceDB projection or stored row fields change + incompatibly — forces index rebuild, not SQLite migration ( + see [13-engineering-memory/index.md](13-engineering-memory/index.md)) +- bump **Platform Observability schema** only for incompatible telemetry-store + changes; it remains separate from reports, gates, baselines, and memory facts + (see [26-platform-observability.md](26-platform-observability.md)) +- bump **corpus analytics store/export/representation/embedding** versions when + SQLite layout or export semantics change incompatibly; rebuild analytics + artifacts rather than treating them as analysis truth ( + see [27-corpus-analytics.md](27-corpus-analytics.md)) + - store `1.2` adds immutable manifest snapshots and profile batches, + batch-run memberships, suitability assessments, and append-only + selection events; writable migration chains `1.0 → 1.1 → 1.2`; + - export `1.3` adds control-plane contract `1.0`, profile context, + profile summary, profile recommendation, and active selection while + preserving Slice 1.1 comparison keys; + - export `1.2` separated formal validity from interpretation, + exposes full-versus-limited projection, bounded preview disclosure, + partition metrics, and nullable all-run sweep comparison facts; + - representation `3` retains raw representation-owned input hashing and + materializes explicit trajectory, Patch Trail, and registry-overlay + presence facts for new snapshots. Registry state remains outside source + identity and existing contract-2 snapshots are not rewritten; + - embedding `2` defines vector digests over canonical little-endian + float32 bytes. Older embedding generations are rejected and must be + regenerated. + +Operational compatibility rules: + +- runtime writes baseline schema `2.1` +- runtime accepts clone baseline `1.0`, `2.0`, and `2.1` +- runtime writes standalone metrics-baseline schema `1.2` +- runtime accepts standalone metrics-baseline `1.x` where the baseline minor + version is less than or equal to the runtime minor (currently through `1.2`) +- runtime writes cache schema `2.10` +- MCP does not define a separate schema constant; tool/resource semantics are + package-versioned public surface +- adding or changing an MCP tool is a package-versioned interface change and + requires tests, docs, changelog, and tool-schema snapshot updates; it does not + bump the canonical report schema unless report JSON changes +- implementation-context payload/resolver changes bump their subsystem-local + versions in `codeclone/surfaces/mcp/_implementation_context.py`; the + `context_artifact_digest` and `context_projection_digest` use canonical + sorted-key JSON and bare-hex SHA-256. Off-report manifests and relationship + projections do not bump the report schema. + +Baseline regeneration is required when: + +- `fingerprint_version` changes +- `python_tag` changes + +It is not required for package patch/minor updates when compatibility gates still pass. + +## Health model evolution + +CodeClone does not currently define a separate health-model version constant. +Health semantics are package-versioned behavior and must be documented in: + +- this chapter +- [15-health-score.md](15-health-score.md) +- release notes + +A lower score after upgrade may reflect a broader scoring model, not only worse code. + +## Invariants (MUST) + +- Contract changes require code + tests + changelog/docs updates. +- Schema mismatches map to explicit statuses. +- Legacy baselines stay untrusted and require regeneration. + +Refs: + +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/baseline/clone_baseline.py:_is_legacy_baseline_payload` + +## Failure modes + +| Change type | User impact | +|--------------------------------|------------------------------------------------------------------------------| +| Baseline schema bump | Older unsupported baselines become untrusted until regenerated | +| Fingerprint bump | Clone IDs change; baseline regeneration required | +| Cache schema bump | Old caches are ignored and rebuilt automatically | +| Report schema bump | Downstream report consumers must update | +| Metrics-baseline schema bump | Dedicated metrics-baseline files must be regenerated | +| Engineering Memory schema bump | Older DBs migrate or re-init per `schema_migrate.py` | +| Semantic index format bump | LanceDB sidecar invalidated; run `memory semantic rebuild` | +| Platform Observability bump | Local diagnostic store reader/writer must migrate together | +| Corpus analytics store bump | Writable open migrates supported stores; read-only open rejects stale schema | +| Corpus embedding contract bump | Existing generations must be regenerated before clustering | + +## Determinism / canonicalization + +- Version constants are explicit and enforced in code. +- Compatibility decisions are runtime checks, not doc-only expectations. + +Refs: + +- `codeclone/contracts/__init__.py` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` + +## Locked by tests + +- `tests/test_baseline.py::test_baseline_verify_schema_incompatibilities` +- `tests/test_baseline.py::test_baseline_verify_schema_incompatibilities[schema_major_mismatch]` +- `tests/test_baseline.py::test_baseline_verify_fingerprint_mismatch` +- `tests/test_cache.py::test_cache_v_field_version_mismatch_warns` +- `tests/test_report.py::test_report_json_compact_v21_contract` + +## Non-guarantees + +- Backward compatibility is not guaranteed across incompatible schema/fingerprint bumps. +- Health Score is not mathematically frozen forever; the obligation to document scoring-model changes is. diff --git a/docs/book/25-mcp-interface/determinism-and-tests.md b/docs/book/25-mcp-interface/determinism-and-tests.md new file mode 100644 index 00000000..25dab987 --- /dev/null +++ b/docs/book/25-mcp-interface/determinism-and-tests.md @@ -0,0 +1,48 @@ + + +# MCP Security, Determinism, and Tests + +Tool inventory and payload contracts: +[MCP interface](index.md). Platform diagnostics: +[Platform Observability tool](tools/platform-observability.md). + +## Security model + +| Property | Guarantee | +|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Default transport | Local `stdio` | +| HTTP auth | `streamable-http` requires `CODECLONE_MCP_AUTH_TOKEN` (≥32 characters) for every start — loopback or remote; no unauthenticated HTTP mode | +| Remote exposure | Explicit `--allow-remote` required for non-loopback bind | +| Lazy loading | Base installs and CI do not require MCP packages | +| Read-only | Never mutates source, baseline, cache, or canonical report artifacts; may write the ephemeral workspace intent registry under `.codeclone/`, optional audit/observability DBs, Engineering Memory **draft** rows, and projection job metadata when enabled | + +--- + +## Determinism + +- Run identity is derived from canonical report integrity digest. +- Summary, hotspots, findings, and remediation payloads are deterministic + projections over stored run state. +- MCP must not create MCP-only analysis semantics or MCP-only gate + semantics. + +--- + +## Locked by tests + +- `tests/test_mcp_service.py` +- `tests/test_mcp_server.py` +- `tests/test_mcp_tool_schema_snapshot.py` +- `tests/test_observability_mcp_registrar.py` +- `tests/test_observability_query.py` + +--- + +## See also + +- [14-claim-guard.md](../14-claim-guard.md) — citation-based review validation +- [12-structural-change-controller/index.md](../12-structural-change-controller/index.md) — change control workflow +- [11-cli.md](../11-cli.md) — CLI reference +- [05-report.md](../05-report.md) — canonical report schema +- [MCP deep dive](../../guide/mcp/README.md) — architecture, client setup, workflows, and prompt patterns +- [Platform Observability](../26-platform-observability.md) — observer storage, privacy, and anti-inference contract diff --git a/docs/book/25-mcp-interface/index.md b/docs/book/25-mcp-interface/index.md new file mode 100644 index 00000000..945d5707 --- /dev/null +++ b/docs/book/25-mcp-interface/index.md @@ -0,0 +1,144 @@ +# MCP Interface + +Agent workflows and setup: [MCP guide](../../guide/mcp/README.md). + +## Purpose + +Define the public MCP surface in the CodeClone **`2.1.0a1`** release line +(structural change controller + Engineering Memory MCP tools are live in this alpha). + +The MCP layer is optional and built on the same canonical pipeline/report +contracts as the CLI. It does not create a second analysis engine. + +!!! note "Integration surface, not a second analyzer" + MCP composes over the canonical report and run state shared by CLI, HTML, + and SARIF. It **never** mutates source files, baselines, analysis cache + (`.codeclone/cache.json`), or canonical report artifacts. It **may** write + ephemeral workspace intent records, Engineering Memory **drafts** (human + approve required), and optional audit evidence when enabled. + +--- + +## Public surface + +| Artifact | Path | +|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Package extra | `codeclone[mcp]` | +| Launcher | `codeclone-mcp` | +| Server wiring | `codeclone/surfaces/mcp/server.py` | +| Message catalog | `codeclone/surfaces/mcp/messages/*` (`tools`/`resources` titles, `help_topics`, `params`, `workflow`, `intent`, `errors`, patch-contract/verification copy, …) | +| Service / session | `codeclone/surfaces/mcp/service.py`, `codeclone/surfaces/mcp/session.py` | + +--- + +## Shape + +```mermaid +graph LR + subgraph Server["codeclone-mcp"] + T["Transport
stdio · streamable-http"] + SVC["Service
tool routing, shutdown"] + SESS["Session
runs, intents, markers"] + CTX["Implementation context
bounded · drift-aware"] + end + + T --> SVC --> SESS + SESS -->|" reads "| RP["Canonical Report"] + SESS --> CTX + CTX -->|" binds "| RP + SESS -->|" writes "| WIR["Workspace intents
file or sqlite backend"] + style Server stroke: #6366f1, stroke-width: 2px + style WIR fill: #fef9c3 +``` + +Current server characteristics: + +- **Optional dependency** — base `codeclone` install does not require MCP + runtime packages. +- **Transports** — `stdio` (default), `streamable-http` (Bearer auth required). +- **HTTP flags** — `--json-response` (default on), `--stateless-http` (default + on), `--debug`, `--log-level` (`DEBUG`–`CRITICAL`, default `INFO`). +- **Run storage** — in-memory only, bounded by `--history-limit` (default 4, + max 10). Latest-run pointer is process-local. +- **Roots** — analysis tools require an absolute repository root. Relative + roots such as `.` are rejected. +- **Analysis modes** — `full`, `clones_only`. +- **Cache policies** — `reuse` (default) and `off` only; `refresh` is CLI-only + and rejected by MCP. +- **Workspace intent registry** — `intent_registry_backend` selects `file` + (ephemeral JSON under `.codeclone/intents/`) or `sqlite` (auditable + rows under `.codeclone/db/intents.sqlite3` with closed-row retention; + default 14 days, configurable). See + [Plans and Retention](../../plans-and-retention.md). + +!!! warning "Absolute roots and HTTP transport" + Analysis tools require an absolute repository root. Every + `streamable-http` start requires `CODECLONE_MCP_AUTH_TOKEN` (≥32 characters); + the server exits without it. Non-loopback binding additionally requires + `--allow-remote`. See + [Environment variable overrides](../10-config-and-defaults.md#mcp-http-authentication) + and [Security Model](../21-security-model.md). + +--- + +## Contract rules + +- MCP is **read-only** with respect to source files, baselines, analysis + cache (`cache.json`), and report artifacts. +- MCP reuses the same canonical report document as CLI/JSON/HTML/SARIF. +- Finding IDs, ordering, and summary data are deterministic projections over + the stored run. +- `analyze_changed_paths` requires either explicit `changed_paths` or + `git_diff_ref`. +- Analysis tools require an absolute `root`. +- `check_*` tools may resolve against a stored run; if `root` is provided it + must be absolute. +- `git_diff_ref` is validated before any subprocess call. +- Review markers are session-local in-memory state only. +- Change intent, blast-radius cache, and workspace registry state do not + enter canonical report integrity, baseline, or cache artifacts. +- Run history is process-local and does not survive restart. +- `get_implementation_context` reads one existing run and reports live + workspace drift; it never auto-analyzes or authorizes an edit. +- MCP accepts cache policies `reuse` and `off`; `refresh` is rejected at runtime. +- Missing optional MCP dependency is surfaced explicitly by the launcher. +- `metrics_detail(family="security_surfaces")` exposes a compact, report-only + inventory of security-relevant capability surfaces. It does not claim + vulnerabilities or exploitability. +- `validate_review_claims` detects deterministic overclaims. See + [14-claim-guard.md](../14-claim-guard.md) for the full pattern catalog. + +--- + +## Tools + +Current tool set: **33 tools** for agent clients, organized by workflow phase. + +When the MCP server starts with `--ide-governance-channel` (CodeClone VS Code +extension), two additional read-only tools register: +`get_workspace_session_stats` and `get_controller_audit_trail` (**35 tools** +total). They are not listed in generic agent tool catalogs; payloads mirror CLI +`--session-stats` and `--audit` via `codeclone/controller_insights/`. + +```mermaid +graph LR + A["1. Analyze"] --> T["2. Triage"] + T --> D["3. Drill down"] + T --> F["4. Focused checks"] + D --> C["5. Implementation context"] + F --> C + C --> CC["6. Change control"] + CC --> S["7. Session"] + style A fill: #dbeafe + style T fill: #dbeafe + style CC fill: #f0fdf4 +``` + +The surface is intentionally triage-first: analyze → summarize/triage → +drill into one finding or one hotspot family. + +Tool families and exact parameters are split under +[Tools](tools/analysis.md), including +[Implementation context](tools/implementation-context.md), +[Help topics](tools/help-and-topics.md), and the +[Platform Observability slicer](tools/platform-observability.md). diff --git a/docs/book/25-mcp-interface/payload-conventions.md b/docs/book/25-mcp-interface/payload-conventions.md new file mode 100644 index 00000000..27f1c7d9 --- /dev/null +++ b/docs/book/25-mcp-interface/payload-conventions.md @@ -0,0 +1,33 @@ + + +# MCP payload conventions + +## Payload conventions + +Short reference for response structure patterns across the tool surface. + +**IDs** — Run IDs are 8-char hex handles. Finding IDs are short prefixed +forms. Both accept the full canonical form as input. + +**Detail levels** — `summary` (default for lists), `normal` (default for +single finding), `full` (compatibility payload with URIs). + +**Pagination** — `list_findings` and +`get_report_section(section="metrics_detail")` support `offset` and `limit`. +`list_hotspots` supports `limit` and `max_results` only (no `offset`). + +**Changed-scope filters** — `list_findings`, `list_hotspots`, and +`generate_pr_summary` accept `changed_paths` or `git_diff_ref` for PR +projection. + +**Threshold context** — Empty `check_*` responses include +`threshold_context` showing whether the run is genuinely quiet or simply +below the active threshold. + +**Budget nulls** — `check_patch_contract` uses `null` for disabled numeric +thresholds. Boolean policy gates use `forbid_*` names. + +**Long context** — `do_not_touch`, `review_context`, and similar sections +include `total`, `shown`, and `truncated` summaries. + +--- diff --git a/docs/book/25-mcp-interface/resources.md b/docs/book/25-mcp-interface/resources.md new file mode 100644 index 00000000..750cf9d2 --- /dev/null +++ b/docs/book/25-mcp-interface/resources.md @@ -0,0 +1,28 @@ +## Resources + +Resources are deterministic read-only projections over stored runs. They do +not trigger analysis. + +### Fixed resources (7) + +| URI | Content | +|----------------------------------|-------------------------------------------------| +| `codeclone://latest/summary` | Compact summary for the latest stored run | +| `codeclone://latest/report.json` | Canonical JSON report for the latest stored run | +| `codeclone://latest/health` | Health/metrics snapshot | +| `codeclone://latest/gates` | Last gate-evaluation result | +| `codeclone://latest/changed` | Changed-files projection | +| `codeclone://latest/triage` | Production-first triage payload | +| `codeclone://schema` | Canonical report shape descriptor | + +### Run-scoped templates (3) + +| URI template | Content | +|---------------------------------------------------|---------------------------------| +| `codeclone://runs/{run_id}/summary` | Summary for a specific run | +| `codeclone://runs/{run_id}/report.json` | Report for a specific run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | + +`codeclone://latest/*` always resolves to the most recent run. + +--- diff --git a/docs/book/25-mcp-interface/tools/analysis.md b/docs/book/25-mcp-interface/tools/analysis.md new file mode 100644 index 00000000..4f807656 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/analysis.md @@ -0,0 +1,42 @@ +### Analysis and run-level tools + +| Tool | Key parameters | Purpose | +|------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy`, `allow_external_artifacts`, `changed_paths` or `git_diff_ref` | Full deterministic analysis; registers an in-memory run | +| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy`, `allow_external_artifacts` | Diff-aware analysis with changed-files projection | +| `get_run_summary` | `run_id` | Cheapest run-level snapshot: health, findings, baseline/cache status | +| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view | +| `get_implementation_context` | `root`, `paths`, `symbols`, `intent_id`, `changed_scope`, `mode`, `include`, `depth`, `detail_level`, `budget`, `run_id` | Bounded, drift-aware structural context from one stored run | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta; returns `incomparable` when roots/settings differ | +| `evaluate_gates` | `run_id`, gate flags (`fail_on_new`, `fail_threshold`, `fail_complexity`, `fail_coupling`, `fail_cohesion`, `fail_cycles`, `fail_dead_code`, `fail_health`, `fail_on_new_metrics`, `fail_on_typing_regression`, `fail_on_docstring_regression`, `fail_on_api_break`, `fail_on_untested_hotspots`, `min_typing_coverage`, …), `coverage_min` | Preview CI gating decisions without mutating state — same gate vocabulary as [CLI flags](../../11-cli.md) and [Metrics and quality gates](../../16-metrics-and-quality-gates.md); threshold ints use `-1` to disable | +| `help` | `topic`, `detail` | Bounded workflow/contract guidance — see [Help topics](help-and-topics.md) | + +`allow_external_artifacts` (default `false`): when `true`, optional artifact +path parameters may resolve to absolute or out-of-repo locations. See +[Security Model](../../21-security-model.md). + +Selected analysis and workflow responses may include non-blocking `tips[]` +entries for workspace hygiene (for example when `.codeclone/` is not +covered by the repository root `.gitignore`). The CLI prints the same +advisory after interactive analysis runs (suppressed in `--quiet`, CI, and +non-TTY contexts). Tips are advisory only; MCP and CLI never edit +`.gitignore` automatically. + +## Implementation context + +`get_implementation_context` projects bounded structural, call-graph, contract, +and memory evidence from one stored run. It is read-only and never authorizes +edits. + +Key parameters: + +- `changed_scope` — when `true`, use the bounded live git-dirty set as the + subject; mutually exclusive with explicit `paths` or `symbols`. +- `mode` — `implementation` (default), `impact`, or `contract`. +- `budget` — global evidence cap; safety entries can trigger + `status="safety_context_overflow"`. +- `freshness.status="drifted"` — re-analyze before relying on the projection. + +Full contract (modes, facets, digests, intent pinning, symbol resolution): +[Implementation context](implementation-context.md). Quick orientation: +`help(topic=implementation_context)`. diff --git a/docs/book/25-mcp-interface/tools/atomic-change-control.md b/docs/book/25-mcp-interface/tools/atomic-change-control.md new file mode 100644 index 00000000..33896c05 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/atomic-change-control.md @@ -0,0 +1,43 @@ +### Atomic change control tools (advanced / diagnostic) + +| Tool | Key parameters | Purpose | +|-----------------------------|--------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `manage_change_intent` | `action`, `root`, `run_id`, `intent_id`, `scope`, `on_conflict`, `ttl_seconds`, `lease_seconds`, `changed_files` or `diff_ref` | Intent lifecycle: declare, get, check, clear, renew, promote, list_workspace, gc_workspace, recover, reset_workspace. Use for queue/promote/recover operations alongside workflow tools | +| `get_blast_radius` | `run_id`, `files`, `depth`, `include` | Pre-change risk boundary: full transitive graph, custom include filters | +| `get_relevant_memory` | `root`, `scope`, `intent_id`, `symbols`, `max_records`, `include_stale`, `include_drafts`, `detail_level` | Ranked engineering memory for declared edit scope. `trajectories[]` excludes routine `run:*` workflows by default (same semantics as `include_routine=false` on retrieval). For explicit routine control use `query_engineering_memory` trajectory modes with `filters.include_routine`. Compact by default: bounded record/trajectory subjects plus typed `records`, `experiences`, `trajectories`, and `coverage` lanes. Auto-bootstraps store when `mcp_sync_policy=bootstrap_if_missing` (default). See [Engineering Memory](../../13-engineering-memory/index.md) | +| `query_engineering_memory` | `root`, `mode`, …, optional `semantic` (search only), `detail_level` | Mode router: search, get, for_path, for_symbol, stale, drafts, coverage, status, trajectory_status, trajectory_search, trajectory_get, trajectory_anomalies, trajectory_agents, trajectory_dashboard. List/search modes default compact; `get`, `trajectory_get`, or `detail_level=full` are explicit drill-down. `filters` supports `types`, `statuses`, `confidences`, and `match_mode` (`any`\|`all`) for search. `semantic=true` blends LanceDB proximity when `[tool.codeclone.memory.semantic] enabled` and index built (default off). See [Engineering Memory](../../13-engineering-memory/index.md) | +| `manage_engineering_memory` | `root`, `action`, … | Agent-side: `refresh_from_run`, `record_candidate`, `promote_experience`, `validate_claims`, `propose_from_receipt`, `rebuild_semantic_index`, `rebuild_trajectories`, `enqueue_projection_rebuild`, `projection_rebuild_status`, `run_projection_jobs_once`. `promote_experience` creates a human-reviewable draft; human approve/reject/archive remains VS Code/CLI only. See [Engineering Memory](../../13-engineering-memory/index.md) | +| `check_patch_contract` | `mode`, `run_id`, `before_run_id`, `after_run_id`, `intent_id`, `strictness`, `changed_files` or `diff_ref` | Manual budget query or step-by-step verification | +| `create_review_receipt` | `run_id`, `intent_id`, `format`, `include_blast_radius`, `include_patch_contract` | Manual receipt generation | +| `validate_review_claims` | `text`, `run_id`, `require_citations`, `patch_health_delta` | Standalone citation-based overclaim detection; pass `patch_health_delta` from verify when using the atomic workflow | + +??? info "Blast radius: do_not_touch vs review_context" + `do_not_touch` is limited to actionable negative context: baselines, + generated CodeClone state, explicit forbidden paths. Report-only signals + such as security boundary inventory and overloaded-module candidates are + returned as `review_context` — information, not edit prohibitions. Long + context sections include `total`, `shown`, and `truncated` summaries. + +??? info "Patch contract modes" + **Budget** reads one stored run and optional intent. Shows regression + headroom per quality dimension before editing. Queued intents return + `edit_allowed=false`. **Verify** compares explicit before/after stored + runs, previews gates, validates scope, and reports baseline-abuse + signals. When `intent_id` is provided but `before_run_id` is omitted, + verify auto-resolves the before-run from the intent record. Missing runs + return `status="unverified"`. Identical before/after runs for + `python_structural` / `governance_config` return + `reason="after_run_not_new"`. Non-accepted responses include a + `next_step` hint and `claim_validation_recommended` flag. + + Verify regressions are run-relative, not baseline-novelty-relative: a + finding absent from the clean before-run and present in the after-run is a + patch regression even when its fingerprint is `novelty="known"` against + the trusted baseline. + + When a change intent is active, verify mode attributes regressions and + gate changes to the declared scope. Intent-scope regressions produce + contract violations; external regressions are reported as informational + context. Queued intents are rejected with `reason="intent_not_active"`. + See [Scope-Aware Patch Contract Verification](../../12-structural-change-controller/patch-contract-verify.md) + and [Verify Ergonomics](../../12-structural-change-controller/workflow-tools.md). diff --git a/docs/book/25-mcp-interface/tools/checks.md b/docs/book/25-mcp-interface/tools/checks.md new file mode 100644 index 00000000..1c3e4add --- /dev/null +++ b/docs/book/25-mcp-interface/tools/checks.md @@ -0,0 +1,9 @@ +### Focused check tools + +| Tool | Key parameters | Purpose | +|--------------------|----------------------------------------------------------------------------------------|--------------------------| +| `check_clones` | `run_id` or `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Narrow clone-only query | +| `check_complexity` | `run_id` or `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Complexity hotspot query | +| `check_coupling` | `run_id` or `root`, `path`, `max_results`, `detail_level` | Coupling hotspot query | +| `check_cohesion` | `run_id` or `root`, `path`, `max_results`, `detail_level` | Cohesion hotspot query | +| `check_dead_code` | `run_id` or `root`, `path`, `min_severity`, `max_results`, `detail_level` | Dead code query | diff --git a/docs/book/25-mcp-interface/tools/help-and-topics.md b/docs/book/25-mcp-interface/tools/help-and-topics.md new file mode 100644 index 00000000..fb68b81c --- /dev/null +++ b/docs/book/25-mcp-interface/tools/help-and-topics.md @@ -0,0 +1,73 @@ +# Help topics + +The `help` tool returns bounded workflow and contract guidance without pulling +canonical report payloads. Call `help(topic=…)` after analysis when tool or +profile semantics are unclear. + +--- + +## Parameters + +| Parameter | Default | Values | +|-----------|--------------|-------------------------------------------------------------------------------------------------| +| `topic` | — (required) | One of the 14 topics below | +| `detail` | `compact` | `compact` (summary, key points, recommended tools, anti-patterns) or `normal` (adds `warnings`) | + +`compact` always includes `anti_patterns` when the topic defines them. `normal` +adds `warnings`. Both levels return `summary`, `key_points`, `recommended_tools`, +and `doc_links`. + +--- + +## Topic catalog + +| Topic | Summary focus | Recommended first tools | +|--------------------------|-------------------------------------------------------------|----------------------------------------------------------------| +| `workflow` | Triage-first, budget-aware MCP usage | `analyze_repository`, `get_production_triage`, `list_hotspots` | +| `analysis_profile` | Conservative default thresholds vs exploratory lower limits | `analyze_repository`, `compare_runs` | +| `suppressions` | Declaration-scoped inline ignore policy | `get_finding`, `get_remediation` | +| `baseline` | Trusted comparison snapshot and baseline-relative novelty | `get_run_summary`, `evaluate_gates`, `compare_runs` | +| `coverage` | Cobertura join as current-run signal only | `analyze_repository`, `get_report_section` | +| `latest_runs` | Session-local `latest/*` resource handles | `analyze_repository`, `get_run_summary` | +| `review_state` | Session-local reviewed markers | `mark_finding_reviewed`, `list_hotspots` | +| `changed_scope` | PR/patch-focused changed-files review | `analyze_changed_paths`, `generate_pr_summary` | +| `change_control` | `start` / `finish` edit cycle | `start_controlled_change`, `finish_controlled_change` | +| `trust_boundaries` | Read-only MCP, artifact paths, Security Surfaces inventory | `help`, `get_run_summary` | +| `implementation_context` | Bounded context from one stored run | `get_implementation_context` | +| `observability` | Dev-only Platform Observability slicer | `query_platform_observability` | +| `engineering_memory` | Scoped memory retrieval and draft writes | `get_relevant_memory`, `query_engineering_memory` | +| `verification_profiles` | Finish-derived verification profiles and after-run rules | `finish_controlled_change`, `analyze_repository` | + +--- + +## When to call + +| Situation | Topic | +|---------------------------------------|--------------------------| +| First MCP session on a repository | `workflow` | +| Threshold or sensitivity questions | `analysis_profile` | +| Baseline / new-vs-known confusion | `baseline` | +| Before declaring an edit intent | `change_control` | +| Finish blocked on after-run / profile | `verification_profiles` | +| `get_implementation_context` facets | `implementation_context` | +| Memory lanes, drafts, trajectories | `engineering_memory` | +| HTTP auth, artifact paths, read-only | `trust_boundaries` | +| Debugging CodeClone runtime (maintainer) | `observability` | + +--- + +## Maintainer-only: `observability` + +Call `help(topic="observability")` and use `query_platform_observability` **only** +when developing **CodeClone itself** — not when reviewing a user's Python +repository. Requires `CODECLONE_OBSERVABILITY_ENABLED=1` on the producing +process before any store exists. See +[Maintainer workflow](../../../guide/observability/maintainer-workflow.md). + +--- + +## Related + +- Tool parameters: [Analysis tools](analysis.md) +- Implementation context contract: [Implementation context](implementation-context.md) +- Engineering Memory playbook: [Engineering Memory](../../13-engineering-memory/index.md) diff --git a/docs/book/25-mcp-interface/tools/ide-governance.md b/docs/book/25-mcp-interface/tools/ide-governance.md new file mode 100644 index 00000000..184dd3c4 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/ide-governance.md @@ -0,0 +1,16 @@ +### IDE-only tools (`--ide-governance-channel`) + +Registered only when the MCP launcher passes `--ide-governance-channel` (VS Code +extension). Agent MCP clients without that flag do not see these tools in +`list_tools`. + +| Tool | Key parameters | Purpose | +|-------------------------------|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `get_workspace_session_stats` | `root` | Workspace agents, intents, leases — same collector as CLI `--session-stats` | +| `get_controller_audit_trail` | `root`, `limit`, `audit_path` | Audit trail + payload footprint — same collector as CLI `--audit`. `limit` caps recent events (default 50). `audit_path` overrides the audit database location | + +Requires `audit_enabled=true` for meaningful audit rows. Payload footprint +`top_workflows` entries expose workflow metrics as `calls` and `tokens` (see +`codeclone/controller_insights/audit_trail.py`). + +--- diff --git a/docs/book/25-mcp-interface/tools/implementation-context.md b/docs/book/25-mcp-interface/tools/implementation-context.md new file mode 100644 index 00000000..ff065e02 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/implementation-context.md @@ -0,0 +1,118 @@ +# Implementation context + +`get_implementation_context` is a read-only projection over one stored MCP run. +It never re-analyzes, never changes `edit_allowed`, and never substitutes +`start_controlled_change`. For workflow placement, see +[Analysis tools](analysis.md). + +--- + +## Parameters + +| Parameter | Default | Purpose | +|-----------------|------------------|------------------------------------------------------------------------------------------------------| +| `root` | — (required) | Absolute repository root | +| `paths` | `null` | Repo-relative file or directory subjects | +| `symbols` | `null` | `module:symbol` qualnames (colon separator; dot notation rejected) | +| `intent_id` | `null` | Active intent — pins run and adds `change_control` block | +| `changed_scope` | `false` | Use bounded live git-dirty set as subject; **mutually exclusive** with explicit `paths` or `symbols` | +| `mode` | `implementation` | `implementation`, `impact`, or `contract` | +| `include` | `null` | Optional closed facet set | +| `depth` | `1` | Structural traversal depth (`0`–`3`) | +| `detail_level` | `compact` | `compact`, `normal`, or `full` | +| `budget` | `50` | Global evidence-entry cap (`1`–`200`) | +| `run_id` | `null` | Stored run; latest when omitted | + +`changed_scope=true` selects the dirty set explicitly. Without explicit +subjects, precedence is: paths/symbols → active intent `allowed_files` → bounded +git-dirty set. A clean tree with no subject returns `no_current_work`, never +whole-repository context. + +--- + +## Modes and facets + +| Mode | Orientation | +|------------------|---------------------------------------------------------------------------------------------------------| +| `implementation` | Editing context: module role, imports/importers, callees, public API, blast radius, tests, docs, memory | +| `impact` | Transitive dependency context, baseline-sensitive findings; adds callers | +| `contract` | Truth-map: `definition_sites`, `version_constants`, `contract_tests`, `memory_conflicts` | + +`contract` mode emits path-specific caller facets +(`persistence_path_callers`, `serialization_path_callers`, +`deserialization_path_callers`, `store_api_consumers`) only with a typed +contract-registry, protocol, or Engineering Memory anchor. Without an anchor they +report `status: "not_available"` rather than being guessed from names. + +`call_context` projects callers, callees, references, and `test_callers` from +run-bound relationship facts. Every edge is tagged `relation_kind` × +`resolution_status`. Production and test-origin callers stay in separate lanes; +test edges never make production code live. Unresolved calls use +`target_qualname: null`. `analysis.call_graph_status` is `complete`, `partial`, +or `unavailable`. + +Import, importer, and test-importer roles collapse into +`structural_context.related_modules` with explicit `relations` +(`imports`, `imported_by`, `tested_by`). + +--- + +## Freshness and digests + +```mermaid +flowchart LR + R["Stored MCP run"] --> C["Canonical report facts"] + R --> M["Run manifest"] + C --> P["Bounded context projection"] + M --> F["Live freshness delta"] + F --> P + P --> A["context_artifact_digest"] + P --> E["context_projection_digest"] +``` + +- `context_artifact_digest` binds the canonical run and off-report context artifact. +- `context_projection_digest` binds the normalized request and exact bounded evidence returned. +- `analysis.freshness` compares run manifest with live mtime+size and, when available, git `DirtySnapshot` delta. +- `freshness.status="drifted"` means analyze again before relying on the projection. + +A missing run returns `needs_analysis`. Invalid facets and paths outside the root +raise a contract error. + +--- + +## Budget and safety overflow + +`budget` is one global evidence-entry cap, not per-facet. Every bounded collection +reports `total`, `shown`, `truncated`, and `omitted`. Intent `do_not_touch` and +review-required entries consume budget first. The effective limit expands up to +the server hard cap so a small requested budget cannot hide safety context. + +If safety entries alone exceed that cap, the response uses +`status="safety_context_overflow"` and reports the omitted count. + +Symbol-only queries that resolve nothing return `status="subject_not_found"` with +actionable `next_steps` and omit empty facet scaffolding. + +--- + +## Intent and memory lanes + +With `intent_id`, the selected active intent pins the source run and adds +`change_control`: + +- `allowed_files` and `allowed_related` from declared scope; +- report-derived `review_context`; +- explicit and built-in `do_not_touch` boundaries; +- guards with `authorization_source="start_controlled_change"`. + +Engineering Memory records, test anchors, doc anchors, trajectories, and +Experiences project into separate bounded lanes. Memory is evidence, not edit +authority. + +--- + +## Related + +- Overview and sibling analysis tools: [Analysis tools](analysis.md) +- `help(topic=implementation_context)`: [Help topics](help-and-topics.md) +- Agent guide: [Implementation context](../../../guide/mcp/workflows/analyze-and-triage.md#implementation-context) diff --git a/docs/book/25-mcp-interface/tools/platform-observability.md b/docs/book/25-mcp-interface/tools/platform-observability.md new file mode 100644 index 00000000..a85ee6c4 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/platform-observability.md @@ -0,0 +1,67 @@ +# Platform Observability Tool + + + +`query_platform_observability` projects bounded diagnostics from CodeClone's +local observer store. It is intended **only** for CodeClone maintainers +developing the product — **not** for users evaluating their analyzed +repository. + +!!! warning "Prerequisites" + Observation is **off by default**. Set `CODECLONE_OBSERVABILITY_ENABLED=1` + on the CLI/MCP/worker process **before** reproduction. Without enablement + the tool returns `status=disabled` or `status=no_store` and provides no + repository-quality signal. + + See [Platform Observability](../../26-platform-observability.md) for storage, + privacy, configuration, and trust boundaries. + +## Parameters + +| Parameter | Contract | +|----------------|--------------------------------------------------------------------------| +| `root` | Absolute repository root. | +| `section` | One supported diagnostics section. | +| `detail_level` | `compact`, `normal`, or `full`; `full` currently downgrades to `normal`. | +| `limit` | Row cap, clamped to `1..50`. | +| `window` | `latest` or a correlation ID. | +| `operation_id` | Reserved; reported in `ignored_parameters`. | +| `span_id` | Reserved; reported in `ignored_parameters`. | + +Supported sections: + +- `summary` +- `slow_operations` +- `memory_pipeline_cost` +- `db_cost` +- `agent_context` +- `mcp_tool_matrix` +- `correlated_chains` +- `costly_noops` +- `pipeline` + +Each call returns one section only. Compact detail is bounded to five rows; +normal detail is bounded by `limit`. + +## Inert states + +When observability is disabled, the tool returns a disabled status. When no +local store exists, it returns a no-store status. Neither state changes +analysis behavior. + +An invalid section returns the available section names. Reserved parameters +are echoed as ignored instead of changing the projection. + +## Interpretation boundary + +The envelope states that: + +- the audience is CodeClone development; +- the data is not user-facing repository quality evidence; +- it does not affect reports, gates, baselines, memory facts, or edit + authorization; +- reported heuristics are diagnostic hints, not findings. + +This anti-inference boundary is part of the tool contract. See +[Determinism and tests](../determinism-and-tests.md) and the +[diagnostics guide](../../../guide/observability/diagnostics.md). diff --git a/docs/book/25-mcp-interface/tools/report-and-findings.md b/docs/book/25-mcp-interface/tools/report-and-findings.md new file mode 100644 index 00000000..df35cb34 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/report-and-findings.md @@ -0,0 +1,10 @@ +### Report and finding projection tools + +| Tool | Key parameters | Purpose | +|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read report sections; `metrics_detail` is paginated | +| `list_findings` | `run_id`, `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, changed-scope filters, `exclude_reviewed`, `max_results`, pagination | Filtered, paginated finding list. `exclude_reviewed=true` omits session-marked reviewed findings. `max_results` caps returned items (falls back to `limit`, hard-capped at 200) | +| `get_finding` | `finding_id`, `run_id`, `detail_level` | One canonical finding by short or full ID | +| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation/explainability for one finding | +| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, `limit`, `max_results` | Priority-ranked hotspot views by kind | +| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented markdown or JSON summary | diff --git a/docs/book/25-mcp-interface/tools/session-and-memory.md b/docs/book/25-mcp-interface/tools/session-and-memory.md new file mode 100644 index 00000000..3a0f5c8a --- /dev/null +++ b/docs/book/25-mcp-interface/tools/session-and-memory.md @@ -0,0 +1,20 @@ +### Session-local tools + +| Tool | Key parameters | Purpose | +|--------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------| +| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Session-local review marker (in-memory) | +| `list_reviewed_findings` | `run_id` | List reviewed markers for a run | +| `clear_session_runs` | — | Reset in-memory runs, session review markers, and workspace intent registry state for the MCP process | + +### Platform observability + +| Tool | Key parameters | Purpose | +|--------------------------------|------------------------------------------------------|----------------------------------------------------------------| +| `query_platform_observability` | `root`, `section`, `window`, `detail_level`, `limit` | Bounded, read-only slices of CodeClone's own runtime telemetry | + +This tool is **development-only**. It reports numeric operation/span, +database-cost, payload, agent-context, and pipeline diagnostics for CodeClone +itself. It never contributes repository findings, gates, baselines, memory +facts, or edit authorization, and it does not expose raw SQL or payload bodies. +See the dedicated +[Platform Observability tool contract](platform-observability.md). diff --git a/docs/book/25-mcp-interface/tools/workflow.md b/docs/book/25-mcp-interface/tools/workflow.md new file mode 100644 index 00000000..01f832a5 --- /dev/null +++ b/docs/book/25-mcp-interface/tools/workflow.md @@ -0,0 +1,35 @@ +### Workflow tools (preferred) + +| Tool | Key parameters | Purpose | +|----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `start_controlled_change` | `root`, `scope`, `intent`, `expected_effects`, `on_conflict`, `strictness`, `ttl_seconds`, `blast_radius_depth`, `dirty_scope_policy` | Pre-edit: workspace check + declare + blast radius + budget in one call. Returns `intent_id` for `finish`. `ttl_seconds` overrides intent lifetime (default `3600`, env `CODECLONE_INTENT_TTL_SECONDS` when omitted). `dirty_scope_policy=continue_own_wip` resumes known dirty scope when no foreign overlap. Does not run analysis | +| `finish_controlled_change` | `intent_id`, `changed_files` or `diff_ref`, `after_run_id`, `review_text`, `claims_text`, `propose_memory`, `create_receipt`, `auto_clear`, `strictness`, `detail_level`, `patch_trail_detail` | Post-edit pipeline: hygiene gate → scope check → verify → Patch Trail + audit → optional claims → receipt → clear. `after_run_id` required for Python structural / governance config profiles. Hygiene: `detail_level="full"` for per-path attribution; otherwise counts/blocking only. `patch_trail_detail`: `summary` (default) or `full` path lists on `patch_trail`. Top-level `status` may be `accepted_with_external_changes` when verify passes but out-of-scope git dirt remains. Set `propose_memory=true` for draft memory candidates on accept | + +`finish_controlled_change` separates human notes from validated claims: +`review_text` is an optional note, while `claims_text` is the text passed to +Claim Guard. The response includes a compact `summary` plus the full +`scope_check`, `verification`, `claims`, `receipt`, and `workspace_hygiene_after` +payloads. When `create_receipt` fails, verify may still be `accepted` but +`intent_cleared` stays `false`. + +??? info "Start/finish workspace hygiene" + Edit permission requires `start_controlled_change` to return + `status == "active"` **and** `edit_allowed == true`. Workflow + `status: "blocked"` is not persisted registry lifecycle. Start may attach + scoped `workspace_hygiene`; finish runs `finish_hygiene_check` before check/verify. + Hygiene path detail (`dirty_attribution`, classification arrays) requires + `detail_level="full"`; `summary`/`normal` return counts and blocking fields only. + **Blocking finish** (`reason: workspace_hygiene`, `blocks_finish: true`) happens + for `finish_block_reason` `missing_evidence`, `foreign_dirty_overlap`, and + (when strict finish mode is enabled) `own_unscoped_dirty`. Out-of-scope + unattributed dirt is **advisory** — it may surface as `external_changes` and + elevate top-level status to `accepted_with_external_changes` without failing + verify. Unchanged preexisting out-of-scope dirty is informational. Foreign + active/stale dirt outside your scope → `foreign_attributed_outside_scope` + (ignored). Recoverable intents do not grant foreign attribution. Queued + foreign intents do not populate `foreign_dirty_overlaps`. `files_for_scope_check` + is agent evidence only. Full pipeline and field reference: + [finish_controlled_change](../../12-structural-change-controller/finish-controlled-change.md). + `manage_change_intent(list_workspace)` returns repo-level + `workspace_dirty_summary` only. Registry lazy close vs `gc_workspace`: see + [Workspace hygiene and registry consistency](../../12-structural-change-controller/finish-hygiene.md). diff --git a/docs/book/26-platform-observability.md b/docs/book/26-platform-observability.md new file mode 100644 index 00000000..9c088974 --- /dev/null +++ b/docs/book/26-platform-observability.md @@ -0,0 +1,171 @@ +# 26. Platform Observability + + + +Platform Observability is a local diagnostics surface for CodeClone development. +It explains the cost and shape of CodeClone's own execution. It does **not** +describe repository quality and must never affect analysis truth, gates, +baselines, cache compatibility, findings, or edit authorization. + +!!! warning "Not for CodeClone end users" + If you use CodeClone to analyze **your** Python project, observer tooling + will not help with clones, health score, CI gates, or MCP review. Use the + normal CLI/MCP workflow instead. Platform Observability is **only** for + people developing **CodeClone itself**. + + Instrumentation is **disabled by default** and requires explicit environment + configuration before any telemetry is collected. See + [Maintainer workflow](../guide/observability/maintainer-workflow.md). + + For practical commands, see the + [observability diagnostics guide](../guide/observability/diagnostics.md). Maintainer + playbook: [Developing CodeClone with Platform Observability](../guide/observability/maintainer-workflow.md). + For the bounded MCP projection, see + [query_platform_observability](25-mcp-interface/tools/platform-observability.md). + +## Trust boundary + +```mermaid +flowchart LR + A["CLI / MCP / projection worker"] --> B["Operation and span instrumentation"] + B --> C["Local SQLite store
.codeclone/db/platform_observability.sqlite3"] + C --> D["CLI JSON / self-contained HTML"] + C --> E["Bounded MCP diagnostics"] + D --> F["Human diagnosis"] + E --> F + B -. " must not influence " .-> G["Analysis, findings, gates,
baseline, cache, permissions"] +``` + +The observer: + +- is disabled by default; +- stores data locally only; +- records metadata, counters, durations, bounded payload sizes, and normalized + literal-free SQL fingerprints; +- never records prompt or MCP payload bodies; +- exposes telemetry hints, not findings or vulnerabilities; +- remains inert when disabled or when no store exists. + +## Enabling instrumentation + +Configuration is environment-only. There is no `[tool.codeclone]` +observability table. + +| Variable | Meaning | +|---------------------------------------------------|-------------------------------------------------------------------------| +| `CODECLONE_OBSERVABILITY_ENABLED=1` | Enable instrumentation. | +| `CODECLONE_OBSERVABILITY_FORCE=1` | Permit observation in CI; it does not enable instrumentation by itself. | +| `CODECLONE_OBSERVABILITY_PROFILE=1` | Capture optional process metrics; requires `codeclone[perf]`. | +| `CODECLONE_OBSERVABILITY_PERSIST=0` | Instrument without persisting completed operations. | +| `CODECLONE_OBSERVABILITY_CAPTURE_PAYLOAD_SIZES=0` | Disable request/response size and token estimates. | +| `CODECLONE_OBSERVABILITY_PAYLOAD_SNAPSHOT=1` | Reserved and rejected: raw payload snapshots are not supported. | + +An explicit `CODECLONE_OBSERVABILITY_ENABLED=1` is sufficient in CI. +`CODECLONE_OBSERVABILITY_FORCE` never enables observation by itself and is +reserved as an explicit CI-gate override. + +Configuration fields for retention and row caps are reserved in the internal +model but are not automatic pruning guarantees in the current release. + +## Data model + +The local schema version is `1.1`. A completed operation and its spans are +written in one transaction. + +An operation records stable identifiers, parent/correlation IDs, surface, +operation name, timestamps, duration, status, bounded error classification, +session and root digests, request/response sizes, token estimates, and optional +process metrics (`rss_mb`, `rss_delta_mb`, `peak_rss_mb`, `peak_rss_delta_mb`, +CPU time, thread count, open file descriptors when `codeclone[perf]` is +installed). + +A span records its parent, duration, reason kind, deduplication state, numeric +counters, the same optional process metrics, and at most eight normalized SQL +fingerprints. SQL literals are removed before persistence. + +### Engineering Memory and semantic rebuild spans + +When observability is enabled, `codeclone memory …` commands record a CLI +operation (`cli.memory.{command}` or `cli.memory.semantic.{action}`) and +nested product spans: + +| Span | When | +|------------------------------------------------------|------------------------------------------------------| +| `memory.semantic.rebuild` | Semantic index rebuild (CLI, MCP, projection worker) | +| `memory.semantic.bootstrap` | Provider and LanceDB writer resolution | +| `memory.semantic.source.{memory\|audit\|trajectory}` | Per-source projection scan | +| `memory.semantic.embed` | Changed-row embedding batches | +| `memory.semantic.reconcile` | Stale-id deletion | +| `memory.semantic.search` | CLI semantic search | +| `memory.embedding.model_load` | First FastEmbed ONNX load in-process | +| `memory.embedding.infer` | FastEmbed batch inference | +| `memory.embedding.documents` | Document embedding helper | +| `memory.embedding.query` | Query embedding helper | + +The rebuild span carries counters such as `indexed`, `embedded`, +`skipped_unchanged`, `deleted`, `embedding_dimensions`, `embedding_batch_size`, +and `lane_{source}` tallies. + +Semantic rebuild reasons are classified as: + +- `content_changed` — rows were embedded and/or stale ids pruned +- `manual_rebuild` — full reconcile but index already current (hash-skip only) +- `schema_version_changed` +- `model_changed` +- `first_index` +- `unknown` + +Memory pipeline cost rows include `memory.*` product spans regardless of +whether they ran under a `memory`, `cli`, or `mcp` operation surface. + +## CLI projection + +```bash +codeclone observability trace --root . +codeclone observability trace --root . --last 50 --html /tmp/codeclone-observer.html +codeclone observability trace --root . --operation OPERATION_ID --json /tmp/trace.json +codeclone observability trace --root . --correlation CORRELATION_ID +``` + +Without `--json` or `--html`, the command writes JSON to stdout. A missing +store is an informational empty state and exits successfully. + +The HTML cockpit is self-contained and includes operation chains, a span +waterfall, pipeline and Engineering Memory costs, MCP tool aggregates, database +costs, normalized SQL fingerprints, agent context, and costly no-op signals. +It has no external assets or JavaScript dependency. + +## MCP projection + +`query_platform_observability` returns one bounded section per call: + +- `summary` +- `slow_operations` +- `memory_pipeline_cost` +- `db_cost` +- `agent_context` +- `mcp_tool_matrix` +- `correlated_chains` +- `costly_noops` +- `pipeline` + +`detail_level=compact` returns at most five rows. `normal` honors `limit`, +clamped to `1..50`; `full` currently downgrades to `normal`. `window` accepts +`latest` or a correlation ID. `operation_id` and `span_id` are reserved and +reported as ignored parameters. + +The response explicitly declares a CodeClone-development audience and states +that it is not user-facing quality evidence. See +[MCP determinism and tests](25-mcp-interface/determinism-and-tests.md) for the +bounded-projection contract. + +## Privacy and lifecycle + +The SQLite database is optional local diagnostic state. It is outside the +canonical report, baseline, and analysis cache contracts. Deleting it only +removes diagnostics; it does not alter analysis results. + +There is no network exporter. Automatic retention pruning is not currently +enforced, so operators who enable persistence own local database lifecycle. +See [Security model](21-security-model.md) and +[Plans and retention](../plans-and-retention.md). diff --git a/docs/book/27-corpus-analytics.md b/docs/book/27-corpus-analytics.md new file mode 100644 index 00000000..9870cedd --- /dev/null +++ b/docs/book/27-corpus-analytics.md @@ -0,0 +1,524 @@ +# Corpus Analytics + +Corpus Analytics is an optional, offline analytics lane for clustering +historical change-control intents. It reconstructs an intent corpus from +retained controller evidence, creates immutable-by-contract snapshots, writes +separate analytics embeddings, and runs deterministic PCA + HDBSCAN clustering. +Slice 1.1 adds an interpretation plane over those persisted facts. Slice 1.2 +adds a control plane: versioned profile lenses, finite profile-scoped sweeps, +separate suitability and ranking, immutable batch receipts, and append-only +maintainer selection events. + +It is **derived evidence, not authority**. Corpus Analytics never changes the +canonical structural report, reports/gates/baselines, cache compatibility, +Engineering Memory governance, or edit authorization. + +For a command-oriented walkthrough, see the +[Corpus Analytics guide](../guide/analytics/overview.md). Configuration is +indexed in [Config and Defaults](10-config-and-defaults.md), CLI behavior in +[CLI](11-cli.md), and storage layout in +[Schema Layouts](appendix/b-schema-layouts.md). + +## Trust Boundary + +```mermaid +flowchart LR + A["Audit DB
intent.declared"] --> S["Corpus snapshot
SQLite metadata"] + T["Trajectory projection
outcome and quality"] --> S + P["Patch Trail
scope and verification facts"] --> S + R["Live registry overlay
inspection only"] -.-> S + S --> E["Embedding generation
separate LanceDB sidecar"] + E --> C["L2 normalize
PCA(full)
HDBSCAN(euclidean)"] + C --> D["Persisted assignments
summaries and diagnostics"] + D --> V{"V1-V10
technically valid?"} + V -->|" yes "| Q["Profile suitability
optional lens, soft gates"] + Q --> F["Full interpretation
metrics, previews, provenance"] + V -->|" no "| L["Limited diagnostic
status, validity, safe raw counts"] + F --> J["JSON export 1.3"] + F --> H["Self-contained HTML"] + L --> J + L --> H + S -. " never authorizes or gates " .-> X["Structural report
baseline, gates, memory governance"] +``` + +Source ownership is explicit: + +| Fact | Owner | +|------------------------------------------------|------------------------------------------------------| +| Original description and declaration order | Earliest audit `intent.declared` by `audit_sequence` | +| Declared/changed files and verification facts | Patch Trail | +| Outcome, quality tier, labels, anomalies | Selected current-version trajectory | +| Lease/status and other live coordination state | Optional registry overlay | + +The registry overlay is exported for inspection when present, but it never +changes normalized text, representation identity, or `source_digest`. + +## End-to-End Lifecycle + +```mermaid +sequenceDiagram + participant U as Maintainer + participant CLI as codeclone analytics + participant SQL as Analytics SQLite + participant V as Analytics LanceDB + participant O as Platform Observability + U ->> CLI: snapshot --root . + CLI ->> O: analytics.snapshot span + CLI ->> SQL: snapshot + corpus items + U ->> CLI: embed --snapshot-id SNAPSHOT + CLI ->> O: analytics.embed span + CLI ->> V: float32 vectors + CLI ->> SQL: generation + row keys + digests + U ->> CLI: cluster --snapshot-id ... --embedding-generation-id ... + CLI ->> O: analytics.cluster span + CLI ->> SQL: running run + CLI ->> V: validated vectors + CLI ->> SQL: completed assignments and summaries + opt --profile PROFILE + CLI ->> SQL: manifest snapshot + immutable profile batch + CLI ->> SQL: suitability assessments + profile recommendation + end + opt --select-run RUN + CLI ->> SQL: append-only selection event + end + U ->> CLI: cluster-show or build outputs + CLI ->> O: analytics.report span + CLI -->> U: atomic JSON / HTML +``` + +Every clustering run is inserted as `running`. A successful run atomically +commits assignments, summaries, and `completed`; a processing error rolls those +artifacts back and persists `failed` with an error message. + +## Installation + +```bash +uv sync --extra analytics +# or +pip install "codeclone[analytics]" +``` + +Capability tiers: + +| Tier | Packages | Commands | +|-----------|-----------------------------------|----------------------------------------------------------------------------| +| `base` | core only | `snapshot`, `clusters`, `cluster-show`, `outliers`, `cluster --select-run` | +| `embed` | FastEmbed + LanceDB | `embed` | +| `cluster` | scikit-learn + external `hdbscan` | clustering and sweep | +| `full` | all of the above | `build` | + +Missing optional dependencies are contract errors (exit `2`) with an install +hint. Inspection/export commands do not import FastEmbed. + +`umap-learn` remains an optional dependency on supported Python versions, but +Slice 1 does not emit a UMAP visualization. Any later UMAP view must be labeled +visualization-only and must never feed clustering. + +## Configuration + +`[tool.codeclone.analytics]` overrides repository-local defaults. Relative paths +resolve from the repository root; absolute paths are allowed but are represented +as `` in snapshot manifests so user-specific paths do not enter +portable identity. + +| Key | Default | Contract | +|------------------------------------|--------------------------------------------------|-------------------------------------------------| +| `db_path` | `.codeclone/analytics/corpus_clustering.sqlite3` | Analytics metadata store | +| `vectors_path` | `.codeclone/analytics/corpus_vectors` | Dedicated LanceDB vectors | +| `embedding_model` | `BAAI/bge-small-en-v1.5` | FastEmbed model id | +| `embedding_dimension` | `384` | Vector width | +| `embedding_provider` | `fastembed` | Only supported provider in Slice 1 | +| `embedding_cache_dir` | memory semantic cache | Shared model artifact cache, not shared vectors | +| `min_correlation_sample_size` | `5` | Correlation denominator guard | +| `cluster_random_seed` | `42` | PCA deterministic seed | +| `default_pca_dimensions` | `64` | Requested PCA width | +| `default_min_cluster_size` | `8` | HDBSCAN default | +| `default_min_samples` | `3` | HDBSCAN default | +| `default_cluster_selection_method` | `eom` | `eom` or `leaf` | +| `default_profile_id` | unset | Used only by explicit `--profile auto` | +| `profile_paths` | `[]` | Additional repo-contained manifest files | +| `sweep_pca_dimensions` | `[32, 64, 128]` | Non-profile sweep PCA axis | +| `sweep_min_cluster_sizes` | `[5, 8, 12, 15]` | Non-profile sweep size axis | +| `sweep_min_samples` | `[1, 3, 5]` | Non-profile sweep sample axis | +| `sweep_selection_methods` | `["eom", "leaf"]` | Non-profile sweep method axis | +| `allow_model_download` | memory semantic setting | Whether FastEmbed may download | + +The historical audit database follows top-level +`[tool.codeclone].audit_path`. This prevents Analytics from silently reading a +different audit source than the controller. + +## Identity And Digests + +Corpus identity has three layers: + +```text +source_record_key = sha256(project_id + "\n" + intent_id) +representation_key = sha256(lane + kind + version + source_record_key) +snapshot_item_id = sha256(snapshot_id + "\n" + representation_key) +``` + +`source_digest` hashes source schema versions, lane, representation contract, +normalizer version, and sorted source/provenance digests. It excludes: + +- snapshot ids and timestamps; +- absolute source paths; +- live registry overlay state. + +Representation contract `3` retains the contract-2 raw-input hashing rules and +materializes explicit provenance presence facts for new snapshots: + +- `provenance.trajectory.selected`; +- `provenance.patch_trail.present`; +- `provenance.registry_overlay.present`. + +For `description_with_frame`, representation identity includes description, +intent kind, declared path families, and typed declared constraints before +normalization. Registry-overlay content and presence remain outside +`source_digest`; existing contract-2 snapshots are immutable and are not +backfilled. + +Cluster membership identity is: + +```text +membership_digest = sha256(sorted(snapshot_item_ids) joined by "\n") +``` + +HDBSCAN numeric labels are not stable identity. Display ids are assigned after +canonical ordering by size descending, actual PCA-space medoid item id, then +membership digest. Noise remains an explicit non-display bucket. + +## Storage And Integrity + +Current analytics store schema is `1.2`. + +- Writable open migrates supported `1.0` stores through `1.1` to `1.2`. +- Read-only open never migrates and rejects a stale schema. +- SQLite relationship triggers reject orphan-producing inserts/updates/deletes. +- Vector row keys and non-null display cluster ids are unique. +- Reporting and inspection open the metadata store read-only. + +SQLite and LanceDB cannot participate in one physical transaction. The +embedding workflow therefore: + +1. computes a new generation; +2. stages SQLite metadata; +3. writes LanceDB rows; +4. commits SQLite only after the sidecar write succeeds; +5. rolls back metadata and removes the generation on ordinary failures. + +Before clustering, CodeClone validates the generation contract, exact snapshot +item set, dimensions, row keys, and vector digests. Cross-snapshot runs, +missing sidecar rows, stale embedding contracts, or corrupted float32 payloads +are rejected rather than accepted as completed analytics. + +## Embedding Reproducibility + +Embedding contract `2` stores: + +- provider and provider package version; +- model id, optional revision, optional artifact fingerprint; +- dimensions and embedding contract version; +- cosine similarity manifest and L2 preprocessing contract; +- vector row key and SHA-256 digest over canonical little-endian float32 bytes. + +When model revision/artifact fingerprint is unavailable, +`exact_model_artifact_reproducibility=false`. JSON and HTML then state: + +> Full vector reproducibility is not guaranteed from model id alone. + +Exact reproduction additionally depends on the model artifact, provider and +numeric-library versions, hardware/runtime behavior, and identical normalized +inputs. Old embedding contract generations must be regenerated. + +## Clustering Contract + +The fixed path is: + +```text +float32 embeddings + -> L2 normalization + -> PCA(svd_solver="full", whiten=false, random_state=42) + -> external hdbscan.HDBSCAN(metric="euclidean", core_dist_n_jobs=1) + -> canonical partitions + -> diagnostics +``` + +The run manifest records Python, NumPy, SciPy, scikit-learn, and HDBSCAN +versions plus all fixed algorithm choices. `run_digest` covers snapshot, +embedding generation, effective sample/feature dimensions, effective +parameters, random seed, and the algorithm manifest. + +A sweep discards invalid small-corpus candidates and deduplicates requested +settings that collapse to the same effective parameters. A corpus with no valid +candidate fails explicitly instead of producing an empty successful sweep. + +Sweep ranking sets exactly one generation-wide +`recommended_by_heuristic=true`. Maintainer selection is an append-only event: + +```bash +codeclone analytics cluster --root . --select-run RUN_ID \ + --selected-by "$USER" \ + --selection-rationale "Best inspectable partition" +``` + +The legacy `selected_by_maintainer` run field is synchronized only for global +selection and is not authoritative. Recommendation is evidence, not a human +decision. + +## Profile Control Plane (Slice 1.2) + +A profile is a versioned lens over completed clustering facts, not a property +of a run and not a semantic taxonomy. The bundled registry contains stable, +balanced, discovery, and outlier-oriented lenses. Inspect it with: + +```bash +codeclone analytics profiles list --root . +codeclone analytics profiles show --root . \ + --profile-id intent-small-balanced-v1 +codeclone analytics profiles validate --root . +``` + +```mermaid +flowchart LR + M["Validated manifest
schema 1"] --> G["Finite deduplicated grid"] + G --> B["Immutable profile batch
manifest + candidate-space digests"] + B --> R["Completed run memberships"] + R --> V["Technical validity
V1-V10"] + V --> S["Profile suitability
soft gates"] + S --> K["Profile-aware ranking
suitable runs only"] + K --> P["Profile recommendation"] + P --> E["JSON 1.3 / HTML"] + H["Maintainer selection event"] --> E +``` + +Run a profile sweep explicitly: + +```bash +codeclone analytics cluster \ + --root . \ + --snapshot-id SNAPSHOT_ID \ + --embedding-generation-id GENERATION_ID \ + --profile intent-small-discovery-v1 +``` + +`--profile` implies `--sweep`. `--profile auto` uses +`default_profile_id`; when `--profile` is absent, the default profile is never +applied. Profile grids are authoritative for profile sweeps. For ordinary +sweeps, `--sweep-pca`, `--sweep-min-cluster-size`, +`--sweep-min-samples`, and `--sweep-selection-method` replace the matching +configured axes. Single-run flags (`--pca-dimensions`, +`--min-cluster-size`, `--min-samples`, `--cluster-selection-method`) are +mutually exclusive with sweep mode. + +Every profile execution creates a new immutable batch receipt. Candidate +failures are retained as failed runs but do not abort remaining candidates; +the batch becomes `completed_partial` when at least one candidate succeeds. +Technical validity, profile suitability, and maintainer acceptance remain +three separate verdict levels. + +Profile-scoped selection names a batch directly, or resolves the latest batch +for a profile: + +```bash +codeclone analytics cluster --root . --select-run RUN_ID \ + --selection-profile pbatch-0123456789abcdef +``` + +See [Schema Layouts](appendix/b-schema-layouts.md#corpus-analytics-store-12) +for the immutable tables and +[CLI](11-cli.md#public-surface) for the complete flag matrix. + +## Diagnostics + +Each cluster summary includes: + +- size and corpus percentage; +- average membership strength; +- PCA-space medoid; +- representatives and low-strength/far-boundary items; +- nearest cluster ids by PCA centroid distance; +- metadata distributions with numerator and denominator; +- explicit `insufficient_sample` when the denominator is below the configured + guard. + +The noise explorer emits only observable text/membership flags: +`short_text`, `long_text`, `multiple_paragraphs`, +`high_conjunction_count`, `template_match`, and +`low_membership_strength`. It does not invent semantic classes. + +## Report Interpretability (Slice 1.1) + +The report layer does not decide whether a cluster is semantically meaningful. +It first evaluates formal persisted-data invariants, then projects only the +facts that those invariants permit. + +```mermaid +flowchart TD + P["Persisted snapshot, run, assignments, summaries"] --> A["assess_partition_validity"] + A -->|" V1-V10 pass "| OK["full_interpretation"] + A -->|" one or more fail "| BAD["limited_diagnostic"] + OK --> M["Partition metrics
dominant ratios and size histogram"] + OK --> I["Cluster interpretation
previews, correlations, numeric summaries"] + OK --> R["Small-cluster provenance completeness"] + BAD --> D["Validity codes + presentation banner
safe diagnostic_facts only"] + M --> X["Shared JSON / HTML projection"] + I --> X + R --> X + D --> X +``` + +Technical validity covers: + +| Invariant | Formal check | +|-----------|--------------------------------------------------------------------------| +| `V1` | assignments exactly cover snapshot items with no duplicate item ids | +| `V2` | assignment labels and unique summaries are fully linked, including noise | +| `V3` | every summary size and membership digest matches its members | +| `V4` | every assignment carries its summary membership digest | +| `V5` | non-noise clusters satisfy effective `min_cluster_size` | +| `V6a` | persisted numeric values used by interpretation are finite or `null` | +| `V7` | run is completed and carries the canonical algorithm manifest | +| `V8` | embedding generation metadata covers the snapshot item set | +| `V9` | representative and boundary ids exist and belong to their cluster | +| `V10` | every decoded persisted JSON field has the expected object shape | + +An invalid run is still inspectable. JSON and HTML expose its invariant codes, +status, presentation banner, and only the raw counts allowed by the safe-output +matrix. They omit `partition_metrics`, cluster interpretation, item previews, +and heuristic score. A missing embedding-generation record is represented as +`embedding_generation: null` with an empty `embedding_items` array. + +Presentation is separate from validity: + +- `maintainer_selected` is explicit persisted provenance, not taxonomy truth; +- `heuristic_recommended` is sweep evidence, not a semantic verdict; +- `candidate_only` is a valid run selected by neither mechanism; +- `technically_invalid` always forces `limited_diagnostic`. + +Slice 1.2 adds `profile_recommended`, `valid_but_profile_rejected`, and the +comparison-level `no_profile_suitable_candidate` banner. Profile rejection +never removes partition metrics or changes a technically valid run to limited +diagnostic mode. Labels and descriptions come from the persisted manifest +snapshot linked to the batch, not from the current working tree. + +Full interpretation includes the largest-cluster ratio against the whole corpus +and against assigned non-noise items, a fixed cluster-size histogram, +representative and boundary previews, categorical correlations, numeric +summaries for file counts and description length, and observable +machine-inspectability signals. Small clusters (up to 15 items) also show +provenance completeness. + +Previews are normalized corpus text, truncated to 240 Unicode code points. +They appear only for representatives, boundary items, and noise exploration. +JSON keeps raw strings with ordinary JSON escaping; HTML escapes text at render +time. `content_disclosure` is computed from the previews actually emitted and +lists their scopes. Default exports never attach text previews to every +`items[]` entry. + +Export schema `1.2` introduced the interpretation fields: + +- `interpretation_contract_version = "1.0"` and `content_disclosure`; +- `clustering_run.validity` and `clustering_run.presentation`; +- `partition_metrics` in full mode or `diagnostic_facts` in limited mode; +- per-cluster `interpretation` blocks in full mode; +- candidate-local nullable `comparison` facts and top-level + `comparison_summary`. + +Export schema `1.3` preserves those keys and adds: + +- `interpretation_contract_version = "1.1"`; +- `control_plane_contract_version = "1.0"`; +- optional run-level `profile_context` and active `selection`; +- optional sweep-level `profile_summary`; +- candidate-local `profile_suitable` and `is_profile_recommended`. + +Sweep comparison includes every persisted run for the requested snapshot and +embedding generation, including failed or otherwise invalid runs. Only valid +runs receive a score and rank. Invalid dominant ratios and largest-cluster size +are `null` in JSON and `unavailable` in HTML. + +For the wire layout, see +[Schema Layouts](appendix/b-schema-layouts.md#corpus-analytics-json-export-13). +For compatibility rules, see +[Compatibility and Versioning](24-compatibility-and-versioning.md). + +## CLI And Reports + +The approved direct namespace is `codeclone analytics`: + +| Command | Purpose | +|----------------|---------------------------------------------------------------------| +| `snapshot` | Build an intent corpus snapshot | +| `embed` | Generate a separate analytics embedding generation | +| `cluster` | Run one configuration or a bounded sweep | +| `build` | Run snapshot → embed → cluster | +| `clusters` | List runs for a snapshot | +| `cluster-show` | Export a resolved run as full interpretation or limited diagnostics | +| `outliers` | Emit noise assignment ids | +| `profiles` | List, show, or validate profile manifests | + +`build --sweep --use-recommended` renders the global heuristic winner. +`build --profile PROFILE --use-recommended` renders the profile-batch winner, +or the comparison view when no candidate satisfies the lens. Neither action +records a maintainer selection. `--use-recommended` without explicit or +profile-implied sweep is rejected before dependency checks or artifact +creation. + +Output behavior: + +- single-run JSON contains snapshot/generation manifests, validity, + presentation, and either full interpretation or limited diagnostic facts; +- sweep JSON contains every persisted candidate, nullable comparison fields, + and aggregate valid/invalid/recommendation/selection counts; +- sweep HTML without `--use-recommended` is comparison-only; +- detailed full-mode HTML includes dominant ratios, cluster index, escaped + representative/boundary previews, split categorical/numeric metadata, + provenance completeness, and the noise explorer; +- detailed limited-mode HTML includes the technical-invalid banner and safe + diagnostic overview, without cluster interpretation panels; +- JSON and HTML are self-contained and written atomically to explicit output + paths. + +Expected user/config/capability/schema/integrity errors exit `2` on stderr +without a traceback. + +## Observability + +With `CODECLONE_OBSERVABILITY_ENABLED=1`, the CLI creates one operation named +`cli.analytics.` with nested spans: + +- `analytics.snapshot` +- `analytics.embed` +- `analytics.cluster` +- `analytics.build` +- `analytics.report` when an export is rendered + +Observability is bootstrapped before analytics stores open, so instrumented +SQLite queries are attributed to the active stage. These measurements are +development telemetry only; see +[Platform Observability](26-platform-observability.md). + +## Cross-Links + +- Historical trajectory evidence: + [Trajectory Quality and Passport](13-engineering-memory/trajectory-quality-and-passport.md) +- Runtime configuration: + [Config and Defaults](10-config-and-defaults.md) +- Exit semantics and terminal surfaces: + [CLI](11-cli.md) +- Version bump rules: + [Compatibility and Versioning](24-compatibility-and-versioning.md) +- SQLite/LanceDB layout: + [Schema Layouts](appendix/b-schema-layouts.md) + +## Locked By Tests + +- `tests/test_analytics_foundation.py` +- `tests/test_analytics_trajectory_selection.py` +- `tests/test_analytics_integration.py` +- `tests/test_analytics_integrity.py` +- `tests/test_analytics_reporting.py` +- `tests/test_analytics_cli.py` +- `tests/test_config_analytics.py` +- `tests/test_sqlite_readonly_openers.py` +- `tests/test_architecture.py::test_analytics_package_does_not_import_forbidden_surfaces` diff --git a/docs/book/README.md b/docs/book/README.md index 7ed5a711..7ffdaf6c 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -1,3 +1,9 @@ + + # CodeClone Contracts Book This book is the contract-level documentation for CodeClone v2.x. @@ -11,52 +17,82 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con ## How to read -- Start with **Intro → Architecture map → Terminology**. -- Then read the **contract spine**: Exit codes → Core pipeline → Baseline → Cache → Report. +- Start with **Terminology → Architecture map → Intro**. +- Then read the **pipeline spine**: Core pipeline → CFG → Report → HTML render → Baseline → Cache. +- **Change control** (Structural Change Controller, Engineering Memory, Claim Guard) is the governance layer. - Everything else is supporting detail, invariants, and reference. ## Table of Contents -- [00-intro.md](00-intro.md) -- [01-architecture-map.md](01-architecture-map.md) -- [02-terminology.md](02-terminology.md) +### Foundations + +- [00-intro.md](00-intro.md) — book charter and goals +- [01-terminology.md](01-terminology.md) — glossary +- [02-architecture-map.md](02-architecture-map.md) — authoritative module table + +### Pipeline and data + +- [03-core-pipeline.md](03-core-pipeline.md) — canonical pipeline contract +- [04-cfg-semantics.md](04-cfg-semantics.md) — CFG design and semantics +- [05-report.md](05-report.md) — report contract (schema v2.11) +- [06-html-render.md](06-html-render.md) — HTML rendering contract +- [07-baseline.md](07-baseline.md) — baseline contract (schema v2.1) +- [08-cache.md](08-cache.md) — cache contract (schema v2.10) -### Contracts spine +### Contracts and config -- [03-contracts-exit-codes.md](03-contracts-exit-codes.md) -- [04-config-and-defaults.md](04-config-and-defaults.md) -- [05-core-pipeline.md](05-core-pipeline.md) -- [06-baseline.md](06-baseline.md) -- [07-cache.md](07-cache.md) -- [08-report.md](08-report.md) +- [09-exit-codes.md](09-exit-codes.md) — exit codes and failure policy +- [10-config-and-defaults.md](10-config-and-defaults.md) — config reference +- [11-cli.md](11-cli.md) — CLI behavior and modes -### Interfaces +### Change control -- [09-cli.md](09-cli.md) -- [20-mcp-interface.md](20-mcp-interface.md) -- [21-vscode-extension.md](21-vscode-extension.md) -- [22-claude-desktop-bundle.md](22-claude-desktop-bundle.md) -- [23-codex-plugin.md](23-codex-plugin.md) -- [10-html-render.md](10-html-render.md) +- [12-structural-change-controller/index.md](12-structural-change-controller/index.md) — overview +- [12-structural-change-controller/finish-controlled-change.md](12-structural-change-controller/finish-controlled-change.md) — + finish pipeline +- [12-structural-change-controller/finish-hygiene.md](12-structural-change-controller/finish-hygiene.md) — hygiene + blocking vs advisory +- [12-structural-change-controller/patch-trail.md](12-structural-change-controller/patch-trail.md) — Patch Trail +- [13-engineering-memory/index.md](13-engineering-memory/index.md) — evidence-linked repository memory +- [14-claim-guard.md](14-claim-guard.md) — review claim validation + +### Quality signals + +- [15-health-score.md](15-health-score.md) — health score model +- [16-metrics-and-quality-gates.md](16-metrics-and-quality-gates.md) — metrics mode and gate flags +- [17-dead-code-contract.md](17-dead-code-contract.md) — dead-code detection and test-boundary policy +- [18-suggestions-and-clone-typing.md](18-suggestions-and-clone-typing.md) — suggestions and clone typing +- [19-inline-suppressions.md](19-inline-suppressions.md) — `# codeclone: ignore[...]` +- [20-benchmarking.md](20-benchmarking.md) — reproducible Docker benchmarking ### System properties -- [11-security-model.md](11-security-model.md) -- [12-determinism.md](12-determinism.md) -- [13-testing-as-spec.md](13-testing-as-spec.md) -- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) +- [21-security-model.md](21-security-model.md) — security model and threat boundaries +- [22-determinism.md](22-determinism.md) — determinism policy +- [23-testing-as-spec.md](23-testing-as-spec.md) — tests as specification +- [24-compatibility-and-versioning.md](24-compatibility-and-versioning.md) — compatibility and versioning rules +- [26-platform-observability.md](26-platform-observability.md) — local diagnostics for CodeClone's own runtime +- [27-corpus-analytics.md](27-corpus-analytics.md) — offline intent corpus clustering (optional `[analytics]`) + +### MCP interface + +- [25-mcp-interface/index.md](25-mcp-interface/index.md) — MCP interface contract +- [25-mcp-interface/tools/workflow.md](25-mcp-interface/tools/workflow.md) — workflow tools +- [25-mcp-interface/resources.md](25-mcp-interface/resources.md) — resource URIs +- [25-mcp-interface/tools/platform-observability.md](25-mcp-interface/tools/platform-observability.md) — bounded + diagnostics tool -### Quality and recommendations +### Integrations -- [15-health-score.md](15-health-score.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) -- [16-dead-code-contract.md](16-dead-code-contract.md) -- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) -- [18-benchmarking.md](18-benchmarking.md) -- [19-inline-suppressions.md](19-inline-suppressions.md) +- [integrations/vs-code-extension.md](integrations/vs-code-extension.md) — VS Code extension contract +- [integrations/cursor-plugin.md](integrations/cursor-plugin.md) — Cursor plugin contract +- [integrations/claude-code-plugin.md](integrations/claude-code-plugin.md) — Claude Code plugin contract +- [integrations/codex-plugin.md](integrations/codex-plugin.md) — Codex plugin contract +- [integrations/claude-desktop-bundle.md](integrations/claude-desktop-bundle.md) — Claude Desktop bundle contract +- [integrations/sarif.md](integrations/sarif.md) — SARIF projection contract ### Appendix -- [appendix/a-status-enums.md](appendix/a-status-enums.md) -- [appendix/b-schema-layouts.md](appendix/b-schema-layouts.md) -- [appendix/c-error-catalog.md](appendix/c-error-catalog.md) +- [appendix/a-status-enums.md](appendix/a-status-enums.md) — status enums and typed contracts +- [appendix/b-schema-layouts.md](appendix/b-schema-layouts.md) — schema layouts (baseline/cache/report) +- [appendix/c-error-catalog.md](appendix/c-error-catalog.md) — error catalog (contract vs internal) diff --git a/docs/book/appendix/a-status-enums.md b/docs/book/appendix/a-status-enums.md index 6ce4d6fe..d15701f0 100644 --- a/docs/book/appendix/a-status-enums.md +++ b/docs/book/appendix/a-status-enums.md @@ -1,3 +1,7 @@ + + # Appendix A. Status Enums ## Purpose @@ -9,6 +13,12 @@ Centralize machine-readable status sets used across baseline/cache/report/CLI co - Baseline statuses: `codeclone/baseline/trust.py:BaselineStatus` - Cache statuses: `codeclone/cache/versioning.py:CacheStatus` - Exit categories: `codeclone/contracts/__init__.py:ExitCode` +- Intent status: `codeclone/surfaces/mcp/_intent.py:IntentStatus` +- Intent ownership: `codeclone/surfaces/mcp/_workspace_intents.py:IntentOwnership` +- Workspace intent status: `codeclone/surfaces/mcp/_workspace_intents.py:WorkspaceIntentStatus` +- Patch contract: `codeclone/surfaces/mcp/_patch_contract.py:PatchContractStatus` +- Verification profile: `codeclone/surfaces/mcp/_verification_profile.py:VerificationProfile` +- Engineering Memory status: `codeclone/memory/enums.py:MemoryStatus` ## Data model @@ -52,6 +62,81 @@ Defined by `BASELINE_UNTRUSTED_STATUSES`. - `3` gating failure - `5` internal error +### WorkspaceIntentStatus + +- `active` +- `queued` +- `clean` +- `expanded` +- `violated` +- `expired` +- `orphaned` + +Persisted workspace registry records use these lifecycle values. Terminal GC +statuses are `clean`, `expired`, and `orphaned`. Semantics: +[Intent registry & queue](../12-structural-change-controller/intent-registry-and-queue.md). + +### IntentStatus (scope check / session lifecycle) + +- `active` +- `queued` +- `clean` +- `expanded` +- `violated` +- `unverified` +- `expired` + +Used by `manage_change_intent(check)` and session intent records. Finish +top-level `status: "unverified"` is a **response string**, not this enum value. + +### IntentOwnership + +- `own_active` +- `own_stale` +- `foreign_active` +- `foreign_stale` +- `recoverable` +- `expired` + +Semantics: +[Intent registry & queue](../12-structural-change-controller/intent-registry-and-queue.md). + +### PatchContractStatus + +- `accepted` +- `accepted_with_external_changes` +- `violated` +- `unverified` +- `expired` + +Semantics: +[Patch contract verification](../12-structural-change-controller/patch-contract-verify.md). + +### VerificationProfile + +- `state_artifact_change` +- `python_structural` +- `governance_config` +- `documentation_only` +- `non_python_patch` + +Priority-ordered. A single file from a higher-priority category overrides +the entire patch. Semantics are defined in +[Structural Change Controller § Verification Profiles](../12-structural-change-controller/verification-profiles.md). + +### MemoryStatus + +Defined by `codeclone/memory/enums.py:MemoryStatus`. Semantics are defined in +[Engineering Memory § Staleness and anchor durability](../13-engineering-memory/staleness-and-anchors.md). + +- `draft` — unapproved agent candidate +- `active` — trusted or system fact; default retrieval includes +- `historical` — anchor subject absent at `HEAD`; preserved, default retrieval includes +- `stale` — drift or ingest contradiction; excluded from default retrieval +- `superseded` — replaced by a newer record +- `rejected` — human rejected draft +- `archived` — explicitly archived + ## Contracts - Status values are serialized into report metadata. diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 8a0ef55f..83a329f4 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -1,16 +1,25 @@ + + # Appendix B. Schema Layouts ## Purpose Compact structural layouts for baseline/cache/report contracts in the current -2.0 release line. +`2.1` release line. Generator/package version in JSON examples is illustrative; +the actual version is defined in `codeclone/contracts/__init__.py` and +`pyproject.toml`. ## Baseline schema (`2.1`) ```json { "meta": { - "generator": { "name": "codeclone", "version": "2.0.2" }, + "generator": { + "name": "codeclone", + "version": "2.0.2" + }, "schema_version": "2.1", "fingerprint_version": "1", "python_tag": "cp314", @@ -20,11 +29,19 @@ Compact structural layouts for baseline/cache/report contracts in the current "api_surface_payload_sha256": "..." }, "clones": { - "functions": ["|"], - "blocks": ["|||"] + "functions": [ + "|" + ], + "blocks": [ + "|||" + ] }, - "metrics": { "...": "optional embedded metrics snapshot" }, - "api_surface": { "...": "optional embedded public API snapshot" } + "metrics": { + "...": "optional embedded metrics snapshot" + }, + "api_surface": { + "...": "optional embedded public API snapshot" + } } ``` @@ -61,14 +78,19 @@ Notes: ```json { "meta": { - "generator": { "name": "codeclone", "version": "2.0.2" }, + "generator": { + "name": "codeclone", + "version": "2.0.2" + }, "schema_version": "1.2", "python_tag": "cp314", "created_at": "2026-03-11T00:00:00Z", "payload_sha256": "...", "api_surface_payload_sha256": "..." }, - "metrics": { "...": "metrics snapshot" }, + "metrics": { + "...": "metrics snapshot" + }, "api_surface": { "modules": [ { @@ -92,11 +114,11 @@ Notes: } ``` -## Cache schema (`2.8`) +## Cache schema (`2.10`) ```json { - "v": "2.8", + "v": "2.10", "payload": { "py": "cp314", "fp": "1", @@ -111,25 +133,154 @@ Notes: }, "files": { "codeclone/cache/store.py": { - "st": [1730000000000000000, 2048], - "ss": [450, 12, 3, 1], - "u": [[ - "qualname", 1, 2, 2, 1, "fp", "0-19", 1, 0, "low", "raw_hash", - 0, "none", 0, "fallthrough", "none", "none" - ]], - "b": [["qualname", 10, 14, 5, "block_hash"]], - "s": [["qualname", 10, 14, 5, "segment_hash", "segment_sig"]], - "cm": [["qualname", 1, 30, 3, 2, 4, 2, "low", "low"]], - "cc": [["qualname", ["pkg.a", "pkg.b"]]], - "md": [["pkg.a", "pkg.b", "import", 10]], - "dc": [["pkg.a:unused_fn", "unused_fn", 20, 24, "function"]], - "rn": ["used_name"], - "rq": ["pkg.dep:used_name"], - "in": ["pkg.dep"], - "cn": ["ClassName"], - "rr": [["pkg.api:list_items", 20, 24, "function", "fastapi", "registers_handler", "medium", "route decorator", "router.get", "pkg.api:router"]], - "sc": [["process_boundary", "subprocess_run", "pkg.runner", "pkg.runner:run", 10, 10, "callable", "exact_call", "call", "subprocess.run"]], - "sf": [["duplicated_branches", "key", [["stmt_seq", "Expr,Return"]], [["pkg.a:f", 10, 12]]]] + "st": [ + 1730000000000000000, + 2048 + ], + "ss": [ + 450, + 12, + 3, + 1 + ], + "u": [ + [ + "qualname", + 1, + 2, + 2, + 1, + "fp", + "0-19", + 1, + 0, + "low", + "raw_hash", + 0, + "none", + 0, + "fallthrough", + "none", + "none" + ] + ], + "b": [ + [ + "qualname", + 10, + 14, + 5, + "block_hash" + ] + ], + "s": [ + [ + "qualname", + 10, + 14, + 5, + "segment_hash", + "segment_sig" + ] + ], + "cm": [ + [ + "qualname", + 1, + 30, + 3, + 2, + 4, + 2, + "low", + "low" + ] + ], + "cc": [ + [ + "qualname", + [ + "pkg.a", + "pkg.b" + ] + ] + ], + "md": [ + [ + "pkg.a", + "pkg.b", + "import", + 10 + ] + ], + "dc": [ + [ + "pkg.a:unused_fn", + "unused_fn", + 20, + 24, + "function" + ] + ], + "rn": [ + "used_name" + ], + "rq": [ + "pkg.dep:used_name" + ], + "in": [ + "pkg.dep" + ], + "cn": [ + "ClassName" + ], + "rr": [ + [ + "pkg.api:list_items", + 20, + 24, + "function", + "fastapi", + "registers_handler", + "medium", + "route decorator", + "router.get", + "pkg.api:router" + ] + ], + "sc": [ + [ + "process_boundary", + "subprocess_run", + "pkg.runner", + "pkg.runner:run", + 10, + 10, + "callable", + "exact_call", + "call", + "subprocess.run" + ] + ], + "sf": [ + [ + "duplicated_branches", + "key", + [ + [ + "stmt_seq", + "Expr,Return" + ] + ], + [ + [ + "pkg.a:f", + 10, + 12 + ] + ] + ] + ] } } }, @@ -150,6 +301,11 @@ Notes: canonicalization must not rewrite callable signature order. - `u` row decoder accepts both legacy 11-column rows and canonical 17-column rows (legacy rows map new structural fields to neutral defaults). +- `fr` (schema **`2.9`+**) stores per-function relationship facts: caller + qualname plus relationship rows (`relation_kind`, `resolution_status`, + `origin_lane`, `target_qualname`, line, expression, `resolution_rule`). + Facts are rebuildable and off the canonical report; schema **`2.10`** adds + cross-file aggregation onto the analysis result and MCP run record. ## Report schema (`2.11`) @@ -172,9 +328,21 @@ Notes: }, "analysis_thresholds": { "design_findings": { - "complexity": { "metric": "cyclomatic_complexity", "operator": ">", "value": 20 }, - "coupling": { "metric": "cbo", "operator": ">", "value": 10 }, - "cohesion": { "metric": "lcom4", "operator": ">=", "value": 4 } + "complexity": { + "metric": "cyclomatic_complexity", + "operator": ">", + "value": 20 + }, + "coupling": { + "metric": "cbo", + "operator": ">", + "value": 10 + }, + "cohesion": { + "metric": "lcom4", + "operator": ">=", + "value": 4 + } } }, "baseline": { @@ -642,10 +810,301 @@ DESIGN FINDINGS INTEGRITY ``` +## Engineering Memory schema (`1.7`) + +SQLite database at `.codeclone/memory/engineering_memory.sqlite3` (default). +Schema version stored in `memory_meta.schema_version`. + +Core tables: + +| Table | Role | +|--------------------------|-------------------------------------------------------------| +| `memory_records` | Typed statements with status, confidence, origin, payload | +| `memory_subjects` | Path/symbol/module links (`subject_kind`, `subject_key`) | +| `memory_evidence` | Deterministic evidence refs (report, git_commit, doc, …) | +| `memory_fts` | FTS5 search index (schema 1.1+) | +| `memory_revisions` | Governance audit trail | +| `memory_ingestion_runs` | Init/refresh run metadata | +| `memory_projection_jobs` | Coalesced trajectory/semantic/Experience jobs (schema 1.3+); `flush_claimed_by` flush-scheduling slot (schema 1.7+) | + +Trajectory tables (schema **`1.2`**+ trajectory DDL, active projection +**`trajectory-v3`**): + +| Table | Role | +|-------------------------------------|--------------------------------------------------------------------------------| +| `memory_trajectories` | One row per `(project_id, workflow_id, projection_version)` with quality score | +| `memory_trajectory_steps` | Ordered audit steps with frozen `event_core_json` | +| `memory_trajectory_subjects` | Path/module subjects linked to a trajectory | +| `memory_trajectory_evidence` | Report/run/audit evidence refs | +| `memory_trajectory_patch_trails` | Patch Trail JSON + digest per trajectory (schema **`1.4`**, Phase 26) | +| `memory_trajectory_projection_runs` | Rebuild run manifest | + +Experience tables (schema **`1.6`**, derived from trajectory evidence): + +| Table | Role | +|------------------------------|--------------------------------------------------------------| +| `memory_experiences` | Advisory distilled patterns (`experience-v1`) | +| `memory_experience_facets` | Agent-family facets today; profile/intent kinds are reserved | +| `memory_experience_evidence` | Contributing trajectory ids and outcomes | + +Patch Trail JSON uses `PATCH_TRAIL_SCHEMA_VERSION` (currently **`1`**) in +`codeclone/contracts/__init__.py`. Trajectory JSONL export rows use +`TRAJECTORY_EXPORT_SCHEMA_VERSION` (**`2`**) in +`codeclone/memory/trajectory/profiles.py` — separate from SQLite schema version. + +Record identity uses stable `identity_key` strings for upsert during refresh. +Migration path: `codeclone/memory/schema_migrate.py`. + +See [Engineering Memory](../13-engineering-memory/index.md) for lifecycle and agent +surfaces. + +## Semantic index sidecar (format `2`) + +Optional LanceDB directory (default `.codeclone/memory/semantic_index.lance`). +Format version constant: `SEMANTIC_INDEX_FORMAT_VERSION` in +`codeclone/contracts/__init__.py` (currently **`2`**). + +Table columns (PyArrow): + +| Column | Type | Notes | +|--------------------|-----------------|----------------------------------------------------| +| `id` | string | Row id; chunk rows use `trajectory:{id}:chunk:NNN` | +| `source` | string | `memory` / `audit` / `trajectory` | +| `parent_id` | string (nullable) | Trajectory id for chunk rows; null for single-row | +| `chunk_index` | int32 (nullable) | Zero-based chunk index | +| `chunk_count` | int32 (nullable) | Total chunks for the parent trajectory | +| `project_id` | string | | +| `subject_path` | string | | +| `kind` | string | | +| `status` | string | | +| `text_hash` | string | Chunk text hash (idempotent upsert key) | +| `embedding_model` | string | | +| `vector` | float32 list | Fixed embedding dimension | + +Trajectory projections longer than the embedding model window are split into +deterministic token-aligned chunks (strategy version +`SEMANTIC_CHUNK_STRATEGY_VERSION` in `codeclone/memory/semantic/chunking.py`). +Single-chunk trajectories keep the trajectory id as `id` with null chunk fields. +Retrieval collapses chunk hits to one score per parent trajectory. + +- **Not** governed by `ENGINEERING_MEMORY_SCHEMA_VERSION` — bumping memory SQLite + schema does not automatically invalidate the vector sidecar. +- **Rebuild** on incompatible format bumps (`codeclone memory semantic rebuild`); + no SQLite migration path for the sidecar. +- Row/projection semantics: [Engineering Memory](../13-engineering-memory/index.md); + bump rules: [24-compatibility-and-versioning.md](../24-compatibility-and-versioning.md). + +## Platform Observability schema (`1.1`) + +Optional local SQLite database at +`.codeclone/db/platform_observability.sqlite3`. It is disposable development +telemetry, not report, baseline, cache, audit, or Engineering Memory truth. + +| Table | Role | +|-----------------------|------------------------------------------------------------------------------------------------------------------------| +| `platform_meta` | Schema version metadata. | +| `platform_operations` | Surface-level operation identity, correlation, duration, status, bounded payload sizes, and optional process metrics. | +| `platform_spans` | Ordered subsystem timing, reason/dedupe metadata, counters, normalized SQL fingerprints, and optional process metrics. | + +Operation and span rows are persisted together in one transaction. Profile +columns are nullable and populated only when profiling is enabled with +`codeclone[perf]`. `db_fingerprints` is additively migrated for older local +stores. + +See [Platform Observability](../26-platform-observability.md) for configuration, +privacy, query, and anti-inference rules. + +## Corpus analytics store (`1.2`) + +Optional SQLite database (default `.codeclone/analytics/corpus_clustering.sqlite3`) +and LanceDB vector directory (default `.codeclone/analytics/corpus_vectors`). +Derived offline analytics — not report, baseline, cache, audit, or Engineering +Memory truth. + +| Artifact | Role | +|-------------------------|-----------------------------------------------------------------------| +| `corpus_snapshots` | Immutable-by-contract snapshot metadata and source digests | +| `corpus_items` | Normalized representation, metadata, and optional registry overlay | +| `embedding_generations` | Provider/model/preprocessing manifest | +| `embedding_items` | Vector row keys, float32 digests, dimensions; no vector blobs | +| `clustering_runs` | Requested/effective parameters, algorithm manifest, lifecycle status | +| `cluster_assignments` | Per-run item label, strength, and membership digest | +| `cluster_summaries` | Canonical display id and persisted diagnostics per cluster/noise | +| `profile_manifest_snapshots` | Immutable canonical manifest values, labels, and descriptions | +| `profile_batches` | One immutable execution receipt per profile sweep | +| `profile_batch_runs` | Ordered effective-parameter membership for each batch | +| `profile_assessments` | Technical-validity-aware suitability facts for batch members | +| `run_selections` | Append-only global or profile-batch maintainer decisions | +| LanceDB sidecar | Separate float32 vectors from Engineering Memory semantic index | + +Store schema version: `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION` in +`codeclone/contracts/__init__.py` (currently **`1.2`**). + +Writable open chains `1.0 → 1.1 → 1.2`; it never skips the intermediate +integrity migration. Read-only open never migrates and rejects stale schema. +SQLite triggers prevent orphan-producing inserts, relationship updates, and +parent deletes; unique indexes protect vector row keys, non-null display +cluster ids, and one effective candidate per profile batch. + +Profile state is an overlay over immutable clustering facts: + +```mermaid +erDiagram + CORPUS_SNAPSHOTS ||--o{ CLUSTERING_RUNS : owns + EMBEDDING_GENERATIONS ||--o{ CLUSTERING_RUNS : supplies + PROFILE_MANIFEST_SNAPSHOTS ||--o{ PROFILE_BATCHES : fixes + PROFILE_BATCHES ||--o{ PROFILE_BATCH_RUNS : contains + CLUSTERING_RUNS ||--o{ PROFILE_BATCH_RUNS : participates + PROFILE_BATCHES ||--o{ PROFILE_ASSESSMENTS : assesses + CLUSTERING_RUNS ||--o{ PROFILE_ASSESSMENTS : receives + PROFILE_BATCHES o|--o{ RUN_SELECTIONS : scopes + CLUSTERING_RUNS ||--o{ RUN_SELECTIONS : selects +``` + +`clustering_runs` has no profile columns. A re-sweep creates a new +`profile_batches` row with the exact manifest and candidate-space digests. +`run_selections` supersedes earlier active heads in the same +`(snapshot_id, embedding_generation_id, profile_batch_id)` scope. A null batch +means global selection. The legacy `selected_by_maintainer` field is a +global-scope mirror only. + +The SQLite transaction and LanceDB sidecar cannot share one physical +transaction. The embedding workflow therefore writes metadata and vectors as +one controlled operation, rolls SQLite back and removes the generation on +ordinary failures, and validates row keys, dimensions, and float32 digests +before clustering. Crash residue is detected as an integrity error rather than +accepted as a completed generation. + +### Corpus analytics JSON export (`1.3`) + +`CORPUS_EXPORT_SCHEMA_VERSION = "1.3"` projects interpretation contract `1.1` +and control-plane contract `1.0` over store schema `1.2`; it does not migrate +the SQLite database. + +```text +export +├── schema_version = "1.3" +├── interpretation_contract_version = "1.1" +├── control_plane_contract_version = "1.0" +├── snapshot +├── embedding_generation | null +├── embedding_items[] +├── clustering_run +│ ├── validity +│ ├── presentation +│ ├── profile_context? # stored batch assessment + manifest snapshot +│ ├── selection? # active event for export scope +│ └── partition_metrics | diagnostic_facts +├── clusters[] # full mode only +│ └── interpretation +│ ├── representative_previews[] +│ ├── boundary_previews[] +│ ├── categorical_correlations +│ ├── numeric_summaries +│ ├── provenance_completeness +│ └── machine_inspectability_signals +├── assignments[] # full mode only +├── noise_items[] # full mode only +├── profile_summary? # sweep export, sibling of comparison_summary +└── content_disclosure +``` + +`clustering_run.validity.failed_invariants` is a deterministic ordered subset +of `V1` through `V10`. `presentation.projection_mode` is +`full_interpretation` only when every invariant passes; otherwise it is +`limited_diagnostic`, `partition_metrics` is omitted, `score` is `null`, and +cluster/item interpretation arrays are absent. + +Sweep comparison exports every persisted run for the requested snapshot and +embedding generation. Each candidate has a sibling `comparison` object: + +```json +{ + "score": null, + "rank": null, + "recommended_by_heuristic": false, + "dominant_cluster_ratio": null, + "dominant_assigned_ratio": null, + "largest_cluster_size": null +} +``` + +Only technically valid candidates receive non-null comparison metrics, score, +and rank. Profile-scoped candidates add `profile_suitable` and +`is_profile_recommended`. `comparison_summary` preserves its Slice 1.1 keys; +`profile_summary` is a sibling with batch identity, manifest snapshot metadata, +suitability counts, recommendation rationale, and active selection. + +`content_disclosure` is derived from the final payload. Its preview scopes are +`cluster_representatives`, `cluster_boundaries`, and `noise_items`; the +contract limit is 240 Unicode code points. Default `items[]` entries do not +contain normalized text previews. + +### Corpus representation contract (`3`) + +New intent snapshots persist explicit provenance booleans inside +`corpus_items.metadata_json`: + +```json +{ + "provenance": { + "trajectory": {"selected": false}, + "patch_trail": {"present": false}, + "registry_overlay": {"present": false} + } +} +``` + +Trajectory and Patch Trail identity evidence retains its existing digest +behavior. Registry-overlay content and presence remain advisory and excluded +from source identity. Contract-2 snapshots are immutable and are interpreted +with conservative legacy rules; they are not rewritten. + +See [Corpus Analytics](../27-corpus-analytics.md) for CLI, configuration, and trust +boundaries, and +[Profile Control Plane](../27-corpus-analytics.md#profile-control-plane-slice-12) +for batch and selection semantics, and +[Report Interpretability](../27-corpus-analytics.md#report-interpretability-slice-11) +for validity and privacy rules. + +## Subsystem-local version constants + +Not defined in `codeclone/contracts/__init__.py`; bump in the owning module. + +| Constant | Value | Owner | +|---|---|---| +| `AUDIT_EVENT_CORE_VERSION` | `2` | `codeclone/audit/events.py` | +| `CONTEXT_CONTRACT_VERSION` | `1` | `codeclone/surfaces/mcp/_implementation_context.py` | +| `CALL_RESOLUTION_VERSION` | `1` | `codeclone/surfaces/mcp/_implementation_context.py` | +| `TRAJECTORY_EXPORT_SCHEMA_VERSION` | `2` | `codeclone/memory/trajectory/profiles.py` | + +Central corpus and governance constants (also in `codeclone/contracts/__init__.py`): + +| Constant | Value | +|---|---| +| `IDE_GOVERNANCE_PROTOCOL_VERSION` | `2` | +| `CORPUS_ANALYTICS_STORE_SCHEMA_VERSION` | `1.2` | +| `CORPUS_EXPORT_SCHEMA_VERSION` | `1.3` | +| `CORPUS_PROFILE_MANIFEST_SCHEMA_VERSION` | `1` | +| `CORPUS_CONTROL_PLANE_CONTRACT_VERSION` | `1.0` | +| `CORPUS_REPRESENTATION_CONTRACT_VERSION` | `3` | +| `CORPUS_NORMALIZER_VERSION` | `1` | +| `CORPUS_EMBEDDING_CONTRACT_VERSION` | `2` | +| `CORPUS_AGENT_LABEL_CONTRACT_VERSION` | `1` | +| `CORPUS_PARTITION_MAP_VERSION` | `1` | + ## Refs - `codeclone/baseline/clone_baseline.py` - `codeclone/cache/store.py` +- `codeclone/memory/schema.py` +- `codeclone/memory/schema_trajectory.py` +- `codeclone/memory/schema_migrate.py` +- `codeclone/memory/semantic/models.py` +- `codeclone/observability/store/schema.py` +- `codeclone/contracts/__init__.py` +- `codeclone/audit/events.py` +- `codeclone/surfaces/mcp/_implementation_context.py` - `codeclone/report/document/builder.py` - `codeclone/report/renderers/text.py` - `codeclone/report/renderers/markdown.py` diff --git a/docs/book/appendix/c-error-catalog.md b/docs/book/appendix/c-error-catalog.md index 4953c6d5..37393c52 100644 --- a/docs/book/appendix/c-error-catalog.md +++ b/docs/book/appendix/c-error-catalog.md @@ -1,3 +1,7 @@ + + # Appendix C. Error Catalog ## Purpose diff --git a/docs/book/integrations/claude-code-plugin.md b/docs/book/integrations/claude-code-plugin.md new file mode 100644 index 00000000..1a43e4b4 --- /dev/null +++ b/docs/book/integrations/claude-code-plugin.md @@ -0,0 +1,86 @@ + + +# Claude Code Plugin + +## Distribution contract + +The monorepo source lives under `plugins/claude-code-codeclone/`. +`scripts/sync_integrations.py --target claude-code` publishes it into the +dedicated `orenlab/codeclone-claude-code` storefront. + +The distribution repository contains: + +| Path | Role | +|------------------------------------------------|------------------------------------------------------------------| +| `.claude-plugin/marketplace.json` | Marketplace catalog named `orenlab-codeclone` | +| `plugins/codeclone/.claude-plugin/plugin.json` | Plugin identity and metadata | +| `plugins/codeclone/.mcp.json` | Local stdio MCP definition | +| `plugins/codeclone/skills/` | Review, hotspots, change control, memory, implementation context, platform observability (maintainer-only) | +| `plugins/codeclone/scripts/launch_mcp.py` | Standalone workspace-first launcher | + +## Installation contract + +Public installation is the two-step marketplace flow: + +```bash +claude plugin marketplace add orenlab/codeclone-claude-code +claude plugin install codeclone@orenlab-codeclone +``` + +Local `--plugin-dir` loading is a development path, not the user installation +contract. + +## Runtime model + +```mermaid +flowchart TD + A["Marketplace catalog"] --> B["Installed CodeClone plugin"] + B --> C["Namespaced skills"] + B --> D[".mcp.json"] + D --> E["Workspace-first launcher"] + E --> F["Local codeclone-mcp"] + F --> G["Canonical analysis and change control"] +``` + +The plugin is additive. It provides six skills and the standard agent MCP +surface from the locally resolved `codeclone-mcp` version. It does not install +the Python package, filter tools, or create a second analysis model. + +The MCP configuration uses `${CLAUDE_PLUGIN_ROOT}` because Claude Code copies +installed plugins into a versioned cache. Storefront sync replaces the +monorepo delegate launcher with the full standalone implementation. + +The plugin manifest intentionally omits `version`. For a Git-based marketplace, +Claude Code can identify the installed revision by commit SHA; adding an +explicit version would require the distribution release process to bump it for +every plugin change or risk retaining a stale cache entry. + +## Read-only and state boundaries + +The server must not mutate source files, baselines, analysis cache, or canonical +reports. Controller coordination, audit, and Engineering Memory may write only +their documented bounded local state. + +## Separation from Claude Desktop + +Claude Code and Claude Desktop are different install surfaces: + +- Claude Code installs a marketplace plugin with skills and `.mcp.json`. +- Claude Desktop installs the local `.mcpb` bundle. + +Neither surface owns analysis semantics; both connect to `codeclone-mcp`. + +## Current limits + +- `codeclone[mcp]` must already be available in the workspace environment or on + `PATH`. +- Duplicate manual MCP registration can expose the same server twice; keep one + active setup path. +- Plugin skills are namespaced as `/codeclone:`. + +## Further reading + +- [Claude Code setup](../../guide/integrations/claude-code/setup.md) +- [MCP usage guide](../../guide/mcp/README.md) +- [MCP interface contract](../25-mcp-interface/index.md) +- [Claude Desktop bundle](claude-desktop-bundle.md) diff --git a/docs/book/integrations/claude-desktop-bundle.md b/docs/book/integrations/claude-desktop-bundle.md new file mode 100644 index 00000000..018fedb4 --- /dev/null +++ b/docs/book/integrations/claude-desktop-bundle.md @@ -0,0 +1,87 @@ + + +# Claude Desktop Bundle + +This contract covers the Claude Desktop `.mcpb` package. Claude Code uses the +separate [Claude Code plugin](claude-code-plugin.md) and marketplace workflow. + +## Bundle workflow + +1. Build: `cd extensions/claude-desktop-codeclone && node scripts/build-mcpb.mjs` +2. Claude Desktop: **Settings → Extensions → Install Extension** → select `.mcpb` +3. If you want to bypass auto-discovery, set **CodeClone launcher command** in + the bundle settings to an absolute path. + +## Settings + +| Setting | Purpose | +|--------------------------------|-------------------------------------------------------------------------------------------------------------| +| **Workspace root path** | Optional absolute project root; launcher prefers that workspace `.venv` when Claude starts outside the repo | +| **CodeClone launcher command** | Absolute path or bare command for `codeclone-mcp` | +| **Advanced launcher args** | JSON array of extra args (transport is always stdio) | + +## Runtime model + +Node wrapper launches `codeclone-mcp` via local `stdio`. It: + +1. resolves a local `codeclone-mcp` launcher +2. validates advanced args +3. forces `--transport stdio` +4. launches the child process with `shell: false` +5. proxies stdio until shutdown + +The wrapper prefers a workspace-local `.venv`, then a Poetry environment, then +user-local install paths, then `PATH`. + +The bundle does **not** pass `--ide-governance-channel`. Agents see the standard +**33** default MCP tools (35 with `--ide-governance-channel`). VS Code session stats, audit trail webviews, and IDE +Memory +governance (`prepare_governance` / `commit_governance`) require the VS Code +extension launcher. + +Engineering Memory and optional semantic search follow the server contract in +[Engineering Memory](../13-engineering-memory/index.md) (`query_engineering_memory`, +`get_relevant_memory`; semantic off by default in pyproject). + +## Privacy + +Local wrapper only — no telemetry, no cloud sync, no remote listener. +See [Privacy Policy](../../privacy-policy.md). + +## Design rules + +- **Canonical MCP first**: the bundle keeps Claude Desktop on the same + documented MCP surface as other clients. +- **Local-only transport**: reject transport and remote-listener overrides. +- **Setup honesty**: fail with a bounded install hint when the launcher is + missing. +- **No hidden runtime dependency games**: the bundle does not pretend to bundle + Python or CodeClone itself. +- **Small and deterministic**: package only the wrapper, manifest, icon, and + documentation needed for local installation. + +## Non-guarantees + +- Bundle presentation inside Claude Desktop may evolve with MCPB client UX. +- Auto-discovery heuristics for common launcher locations may evolve as long as + the explicit launcher setting remains stable. +- The bundle does not guarantee automatic updates or remote install flows. + +## Current limits + +- expects either a workspace launcher, a user-local/global launcher, or an + explicitly configured absolute launcher path +- local install surface, not a hosted service layer + +## Source of truth + +- CLI remains the scripting and CI surface. +- MCP remains the read-only agent/client contract. +- Claude Code installs the dedicated marketplace plugin; direct `mcp add` + remains a manual fallback. +- The Claude Desktop bundle is the installable local package layer for users + who want a native Claude Desktop setup path. + +For the underlying MCP contract, see +[MCP usage guide](../../guide/mcp/README.md) and +[MCP interface contract](../25-mcp-interface/index.md). diff --git a/docs/book/integrations/codex-plugin.md b/docs/book/integrations/codex-plugin.md new file mode 100644 index 00000000..3ae09956 --- /dev/null +++ b/docs/book/integrations/codex-plugin.md @@ -0,0 +1,89 @@ + + +# Codex Plugin + +## What ships in the plugin + +| File | Purpose | +|--------------------------------------------|-----------------------------------------| +| `.codex-plugin/plugin.json` | Plugin metadata, prompts, instructions | +| `.mcp.json` | Workspace-first MCP launcher definition | +| `scripts/launch_mcp` | Shell-free launcher wrapper for Codex | +| `skills/codeclone-review/` | Conservative-first full review skill | +| `skills/codeclone-hotspots/` | Quick hotspot discovery skill | +| `skills/codeclone-change-control/` | Intent-first change workflow skill | +| `skills/codeclone-engineering-memory/` | Engineering memory read/write skill | +| `skills/codeclone-implementation-context/` | Bounded pre-edit context skill | +| `skills/codeclone-platform-observability/` | Maintainer-only observer diagnostics | +| `assets/` | Plugin branding | + +Six skills ship in the plugin (review, hotspots, change-control, +engineering-memory, implementation-context, platform-observability). The last is +**only** for developing CodeClone itself — not for end-user repository review. + +## Runtime model + +Additive — the marketplace install provides a local MCP definition and **six** +skills. New canonical MCP surfaces from the local `codeclone-mcp` version flow +through directly, including Coverage Join facts and the optional `coverage` +help topic when supported. The plugin does not mutate `~/.codex/config.toml` or +install a second server binary. The bundled launcher does not filter MCP tools; +agents receive the full default agent surface from the resolved +`codeclone-mcp` server (no `--ide-governance-channel` — IDE-only session/audit +tools are VS Code only). + +`.agents/plugins/marketplace.json` is the monorepo-local source entry used for +development and packaging into `orenlab/codeclone-codex`; it is not the public +install path. + +Public installation is: + +```bash +codex plugin marketplace add orenlab/codeclone-codex +codex plugin add codeclone@orenlab-codeclone +``` + +## Read-only contract + +Repository truth stays read-only: MCP must not mutate source files, baselines, +analysis cache, or canonical report artifacts. Change-control and session tools +may write ephemeral coordination state through the configured workspace intent +registry (file or SQLite backend) and optional audit records when enabled. + +## Design rules + +- **Codex-native packaging**: keep source under `plugins/` and publish the + marketplace distribution through `orenlab/codeclone-codex`. +- **Canonical MCP first**: all analysis still flows through `codeclone-mcp`. +- **Skill guidance, not analysis logic**: the skill teaches conservative-first + CodeClone review but does not create new findings. +- **No hidden installation side effects**: the plugin does not patch + `~/.codex/config.toml`. +- **Source clarity**: the monorepo copy is the source; the public install + surface is the `orenlab/codeclone-codex` distribution. +- **Launcher honesty**: the plugin assumes `codeclone-mcp` is already + installable in the current workspace or reachable on `PATH`, and prefers the + workspace environment when one is present. +- **Shell-free launch**: the bundled launcher must stay argv-based and + local-stdio-only. + +## Non-guarantees + +- Codex plugin UI presentation may evolve independently of the plugin manifest + content. +- Users who already configured `codeclone-mcp` manually may still prefer the + direct MCP path over the bundled plugin MCP definition. + +## Current limits + +- If you already registered `codeclone-mcp` manually, keep only one setup path + to avoid duplicate MCP surfaces. +- The bundled `.mcp.json` prefers `.venv`, then a Poetry env, then `PATH`. +- The bundled launcher stays shell-free and local-stdio-only. + +## Further reading + +- [MCP usage guide](../../guide/mcp/README.md) +- [MCP interface contract](../25-mcp-interface/index.md) +- [Engineering Memory](../13-engineering-memory/index.md) +- [Structural Change Controller](../12-structural-change-controller/index.md) diff --git a/docs/book/integrations/cursor-plugin.md b/docs/book/integrations/cursor-plugin.md new file mode 100644 index 00000000..6a49bd8e --- /dev/null +++ b/docs/book/integrations/cursor-plugin.md @@ -0,0 +1,144 @@ + + +# Cursor Plugin + +## Installation contract + +The public source is +`https://github.com/orenlab/codeclone-cursor`. Users install CodeClone from +Cursor's marketplace panel. Team administrators expose the storefront through +**Dashboard → Settings → Plugins → Team Marketplaces → Add Marketplace → +Import from Repo**. + +`~/.cursor/plugins/local` symlinks are development-only and must not be +presented as the normal installation path. + +## Rules + +All three ship under `plugins/cursor-codeclone/rules/`: + +| File | Activation | Role | +|---------------------------|---------------------|------------------------------------------------------------------------------------------| +| `codeclone-workflow.mdc` | `alwaysApply: true` | MCP-only discipline, absolute `root`, tool preferences, memory `root` requirement | +| `change-control-gate.mdc` | `alwaysApply: true` | Hard gate: `start` before edit, `finish` before done, memory before finish when required | +| `codeclone-python.mdc` | `globs: **/*.py` | Python context: analyze before structural edits, blast radius awareness | + +The change-control **skill** expands profiles and queue/promote; the +**change-control-gate** rule is the always-on prohibition layer. + +### Skill contract invariants + +Each skill follows these invariants: + +- **MCP tools only** — no CLI or local report fallbacks +- **Absolute roots** — analysis and memory tools require absolute `root` +- **Source of truth** — report CodeClone findings as-is +- **Conservative first pass** unless the user requests deeper sensitivity +- **Workflow tools preferred** — `start_controlled_change` / + `finish_controlled_change` for edits; atomic verify is advanced/fallback +- **Engineering Memory** — optional semantic search when server index is built; + human approve via VS Code Memory view or CLI `--i-know-what-im-doing` + +Skills are invocable via `/name` in Cursor chat (see each `SKILL.md`). + +## Skills + +Eight skills ship under `plugins/cursor-codeclone/skills/`: + +| Skill | Role | +|------------------------------------|------------------------------------------| +| `codeclone-change-control` | Intent-first edit workflow | +| `codeclone-engineering-memory` | Memory retrieval and draft writes | +| `codeclone-implementation-context` | Bounded structural context from MCP runs | +| `codeclone-hotspots` | Quick hotspot / health snapshot | +| `codeclone-review` | Conservative-first full review | +| `codeclone-platform-observability` | **Maintainer-only** — CodeClone runtime diagnostics (requires observer enable) | +| `blast-radius` | Read-only blast-radius inspection | +| `production-triage` | Baseline-relative production triage | + +Codex and Claude Code plugins ship six shared skills (includes +`codeclone-platform-observability`; no standalone `blast-radius` or +`production-triage`). + +## Hooks + +Documented from `hooks/hooks.json` and installers — **hook Python sources not +edited in doc-only passes.** + +### Why Settings → Hooks can show "Configured Hooks (0)" + +| Source | Path | Shown in Hooks UI | +|-----------------|--------------------------------------|--------------------------------------------| +| Project | `.cursor/hooks.json` | yes | +| User | `~/.cursor/hooks.json` | yes | +| Plugin manifest | `hooks/hooks.json` via `plugin.json` | **no** (may still run when plugin enabled) | + +Plugin manifest commands use `python "${CURSOR_PLUGIN_ROOT}/hooks/run_hook.py" +"` with subcommands `pre-tool-use-gate`, `post-tool-use`, +`session-cleanup`. + +### Hook events + +- **preToolUse** (`Write|StrReplace|ApplyPatch|Shell`, `failClosed: true`, 5s + timeout) — blocks when the workspace intent registry has no live **active** + intent. Uses `codeclone.workspace_intent` (file or SQLite registry). Scope: + - `python` (default): `.py` / `.pyi` and matching shell + - `repo`: any path under workspace root (including `.git/**`) + - Config: `.cursor/codeclone-hooks.json` or env — see + [Environment variable overrides](../10-config-and-defaults.md#cursor-plugin-hooks) +- **postToolUse** (`Write|StrReplace|ApplyPatch`, 5s) — injects + `additional_context` **only when the edited path is `.py` / `.pyi`** + (`post-tool-use-python-edit.py`). +- **stop** (`loop_limit: 1`, 5s) — optional `followup_message` when the + workspace intent registry still has **own or recoverable Cursor** non-terminal + intents (active, queued, violated, expanded). Foreign active/stale intents + from other agents are ignored — they require coordination, not + `manage_change_intent(clear)` from this session. Transcript JSONL is a + fallback only when registry read fails; it counts `CallMcpTool` workflow + events, not raw substring matches. + +Without an authorized intent, only read-only Git inspection shell commands are +allowed; `git apply`, commits, and direct `.git/**` writes are blocked. + +`enforce_scope` (`python` vs `repo`) is configured in `.cursor/codeclone-hooks.json` +or via `CODECLONE_HOOKS_ENFORCE_SCOPE` — see +[Environment variable overrides](../10-config-and-defaults.md#cursor-plugin-hooks). + +## Read-only contract + +MCP must not mutate source, baselines, analysis cache, or canonical reports. +Change-control and session tools may write ephemeral intent state +(`.codeclone/intents/` file backend by default; SQLite optional) and +optional audit rows when `audit_enabled=true`. + +## Design rules + +- **Cursor-native packaging** under `plugins/cursor-codeclone/` +- **Canonical MCP first** — launcher resolves `codeclone-mcp`, no tool filtering +- **Rules + skills** — `change-control-gate` always on; skills carry workflows +- **Hook safety** — `preToolUse` fail-closed; `postToolUse` / `stop` advisory +- **No hidden installs** — plugin does not patch Cursor or install Python packages + +## Non-guarantees + +- Cursor UI for skills/hooks may evolve independently of manifest content. +- Manual symlink installs may omit bundled rules/hooks unless the full plugin dir + is registered. +- Hook behavior follows Cursor's hook API contract. + +## Current limits + +- Duplicate MCP registration (plugin `mcp.json` + manual `codeclone-mcp` entry) + causes confusion — keep one path. +- `mcp.json` runs `python3 ./scripts/launch_mcp.py` relative to the plugin root, + not a bare `codeclone-mcp` JSON command (the launcher resolves the binary). +- Hooks do not call MCP; they read `codeclone.workspace_intent` only. +- VS Code extension features (Memory UI governance, session/audit webviews, + `codeclone.memory.search*` settings) are outside this plugin. + +## Further reading + +- [MCP usage guide](../../guide/mcp/README.md) +- [MCP interface contract](../25-mcp-interface/index.md) +- [Engineering Memory](../13-engineering-memory/index.md) +- [Structural Change Controller](../12-structural-change-controller/index.md) diff --git a/docs/book/integrations/github-action.md b/docs/book/integrations/github-action.md new file mode 100644 index 00000000..8a170627 --- /dev/null +++ b/docs/book/integrations/github-action.md @@ -0,0 +1,27 @@ + + +# GitHub Action + +CodeClone ships a composite GitHub Action for CI and pull-request workflows: +structural analysis, optional SARIF upload, PR summary comments, and +deterministic JSON reports. + +**Authoritative reference:** [`.github/actions/codeclone/README.md`](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) +in the CodeClone repository (inputs, outputs, exit codes, baseline requirements, +and v2 workflow shape). + +Quick start: + +```yaml +- uses: orenlab/codeclone/.github/actions/codeclone@v2 + with: + fail-on-new: "true" +``` + +The action installs `codeclone` from PyPI for remote consumers. When used from +the checked-out CodeClone monorepo (`uses: ./.github/actions/codeclone`), it +installs from the repository under test. + +For CLI flag semantics and exit codes, see [CLI](../11-cli.md) and +[Exit codes](../09-exit-codes.md). For SARIF upload details, see +[SARIF integration](sarif.md). diff --git a/docs/sarif.md b/docs/book/integrations/sarif.md similarity index 72% rename from docs/sarif.md rename to docs/book/integrations/sarif.md index c2ebd561..2ce04a72 100644 --- a/docs/sarif.md +++ b/docs/book/integrations/sarif.md @@ -1,12 +1,6 @@ -# SARIF for IDEs and Code Scanning + -## Purpose - -Explain how CodeClone projects canonical findings into SARIF and what IDEs or -code-scanning tools can rely on. - -SARIF is a deterministic projection layer. The canonical source of truth -remains the report document. +# SARIF ## Source files @@ -53,20 +47,6 @@ Current SARIF output includes: Coverage Join may materialize coverage design findings only when the canonical report already contains valid `metrics.families.coverage_join` facts. -## What SARIF is good for here - -SARIF is useful as: - -- an IDE-facing findings stream -- a code-scanning upload format -- another deterministic machine-readable projection over canonical report data - -It is not the source of truth for: - -- report integrity digest -- gating semantics -- baseline compatibility - ## Validation and tests Relevant tests: @@ -80,9 +60,3 @@ Contract-adjacent coverage includes: - reuse of the canonical report document - stable SARIF branch invariants - deterministic artifacts/rules/results ordering - -## See also - -- [08. Report](book/08-report.md) -- [10. HTML Render](book/10-html-render.md) -- [Examples / Sample Report](examples/report.md) diff --git a/docs/book/integrations/vs-code-extension.md b/docs/book/integrations/vs-code-extension.md new file mode 100644 index 00000000..222502de --- /dev/null +++ b/docs/book/integrations/vs-code-extension.md @@ -0,0 +1,155 @@ + + +# VS Code Extension + +## Trust model + +The extension uses a **limited Restricted Mode**: + +- onboarding and setup help remain available in untrusted workspaces +- local analysis and the local MCP server stay disabled until workspace trust + is granted + +The extension is not intended for virtual workspaces. + +That is intentional: CodeClone reads repository contents, local git state, and +the local MCP launcher. + +!!! warning "Workspace trust still matters" + The extension runs as a workspace extension and requires VS Code `1.120.0` + or newer, local filesystem access, local git access for changed-files review, + and a local `codeclone-mcp` launcher or an explicitly configured one. + CodeClone **`2.0.0` or newer** is required for core analysis, triage, and + change-control MCP tools. + + **Engineering Memory** (Memory tree view, search, IDE governance approve/reject, + trajectory dashboard) requires CodeClone **`2.1.0a1` or newer** with + `query_engineering_memory` and governance tools on the resolved launcher. + Older servers that pass the `2.0.0` gate still load the extension but show + Memory features as unavailable until upgraded. + + In `auto` mode, launcher resolution prefers the current workspace virtualenv + before `PATH`. Launcher override settings (`codeclone.mcp.command`, + `codeclone.mcp.args`) are machine-scoped. Analysis-depth settings are + resource-scoped so they can vary by workspace or folder. + +## Settings + +Authoritative definitions: `extensions/vscode-codeclone/package.json` → +`contributes.configuration.properties`. + +### Launcher (machine-scoped) + +| Setting | Default | Notes | +|-------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------| +| `codeclone.mcp.command` | `auto` | Workspace venv, then `PATH`. User/remote settings. | +| `codeclone.mcp.args` | `[]` | Extra launcher argv. The extension injects `--ide-governance-channel` for Memory governance and session/audit tools (do not duplicate in args). | + +### Analysis (resource-scoped) + +| Setting | Default | Notes | +|--------------------------------------------|------------|----------------------------------------------------| +| `codeclone.analysis.profile` | `defaults` | `defaults`, `deeperReview`, or `custom`. | +| `codeclone.analysis.cachePolicy` | `reuse` | `reuse` or `off`. | +| `codeclone.analysis.changedDiffRef` | `HEAD` | Git ref for **Review Changes**. | +| `codeclone.analysis.coverageXml` | `""` | Explicit Cobertura path for Coverage Join. | +| `codeclone.analysis.autoDetectCoverageXml` | `true` | Use workspace-root `coverage.xml` when path empty. | +| `codeclone.analysis.minLoc` | `10` | Custom thresholds — only when `profile=custom`. | +| `codeclone.analysis.minStmt` | `6` | Same. | +| `codeclone.analysis.blockMinLoc` | `20` | Same. | +| `codeclone.analysis.blockMinStmt` | `8` | Same. | +| `codeclone.analysis.segmentMinLoc` | `20` | Same. | +| `codeclone.analysis.segmentMinStmt` | `10` | Same. | + +### UI (window-scoped) + +| Setting | Default | Notes | +|------------------------------|---------|----------------------------------| +| `codeclone.ui.showStatusBar` | `true` | Workspace-level status bar item. | + +### Engineering Memory search (resource-scoped) + +These map to MCP `query_engineering_memory` parameters from +`extensions/vscode-codeclone/src/memorySearch.js` (`readMemorySearchSettings`). + +| Setting | Default | MCP mapping | Notes | +|----------------------------------------|-----------|-----------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `codeclone.memory.searchSemantic` | `true` | `semantic` on `mode=search` only | Extension **asks** for semantic blend by default. Server still needs `[tool.codeclone.memory.semantic] enabled`, a built sidecar, and a provider. Use `codeclone[semantic-local]` + `embedding_provider="fastembed"` for semantic-quality recall; otherwise FTS-only or diagnostic/degraded results report `semantic.used: false` / provider details. | +| `codeclone.memory.searchIncludeDrafts` | `false` | `include_drafts` (search) | Drafts are still included automatically on `for_path` per memory contract. | +| `codeclone.memory.searchIncludeStale` | `false` | `include_stale` (search and `for_path`) | | +| `codeclone.memory.searchMaxResults` | `20` | `max_results` (clamped 5–50) | | +| `codeclone.memory.searchDetailLevel` | `compact` | `detail_level`: `compact` or `full` | `mode=get` always returns full records. Not exposed in **Configure Memory Search** (settings UI only). | + +!!! important "Extension default differs from server default" + `searchSemantic` defaults to **`true` in VS Code** so the IDE requests semantic + blend when the user searches. CodeClone's **repository** default remains + `memory.semantic.enabled = false` until you opt in in `pyproject.toml`, install + the semantic extras, and rebuild the sidecar (MCP + `rebuild_semantic_index` or CLI `memory semantic rebuild`). + + **Configure Memory Search** updates `searchSemantic`, `searchIncludeDrafts`, + `searchIncludeStale`, and `searchMaxResults` at `ConfigurationTarget.WorkspaceFolder`. + `searchDetailLevel` is settings-editor only. Search queries must be 2–200 characters + without control characters (`sanitizeSearchQuery`). + +## State boundaries + +The extension keeps three state classes visibly separate: + +**Repository truth** — comes from CodeClone analysis through MCP and canonical +report semantics. + +**Current run** — bounded by the active MCP session and the current latest run +used by the extension for a workspace. + +**Reviewed markers** — session-local workflow markers only. They are in-memory +only, do not update baseline state, do not rewrite findings, and do not change +canonical report truth. + +## Design rules + +- **Native VS Code first**: tree views, status bar, Quick Pick, CodeLens, and + file decorations before any custom UI. +- **Conservative by default**: the extension starts with the `defaults` + profile (repo defaults or `pyproject`-resolved thresholds) and treats + `deeperReview` or `custom` as explicit exploratory follow-ups. +- **Source-first**: findings prefer `Reveal Source` over detail panels; + canonical detail and HTML report bridge are opt-in. +- **Report-only separation**: Overloaded Modules stay visually distinct from + findings, gates, and health. `Security Surfaces` stay visually distinct too + and remain boundary inventory rather than vulnerability claims. +- **Safe HTML bridge**: `Open in HTML Report` verifies the local file exists + and is not older than the current run. +- **Session-local state**: reviewed markers shape review UX but never leak + into repository truth. +- **Trajectory evidence**: dashboard/detail commands render MCP trajectory + status, anomalies, exact agent-label aggregates, quality passports, and + Patch Trail evidence without inventing IDE-local scoring. +- **First-run clarity**: onboarding leads to `Analyze Workspace`, not + transport setup. +- **Restricted Mode honesty**: explain requirements without pretending + analysis is available before trust is granted. + +## Non-guarantees + +- Exact view grouping and copy may evolve between extension releases. +- Internal client-side caching and view-model shaping may evolve as long as the + extension remains faithful to MCP and canonical report semantics. +- Explorer decoration styling, review-loop polish, and other non-contract UI + details may evolve without changing the extension contract. + +## Source of truth + +The extension reads the same canonical analysis semantics already exposed by +CodeClone CLI, canonical report JSON, and CodeClone MCP. + +- CLI remains the scripting and CI surface. +- HTML remains the richest human report surface. +- MCP remains the read-only integration contract for agents and IDE clients. +- The VS Code extension is a guided IDE view over that MCP surface. + +For the underlying interface contract, see +[MCP usage guide](../../guide/mcp/README.md) and +[MCP interface contract](../25-mcp-interface/index.md). +Trajectory scoring is defined by +[Trajectory quality and passport](../13-engineering-memory/trajectory-quality-and-passport.md). diff --git a/docs/claude-desktop-bundle.md b/docs/claude-desktop-bundle.md deleted file mode 100644 index 08873630..00000000 --- a/docs/claude-desktop-bundle.md +++ /dev/null @@ -1,71 +0,0 @@ -# Claude Desktop Bundle - -Local `.mcpb` bundle wrapper for `codeclone-mcp` in -`extensions/claude-desktop-codeclone/`. - -Installable package instead of hand-editing client JSON. Same canonical MCP -surface used by CLI, VS Code, Codex, and Claude Code. The manifest includes -pre-loaded instructions that guide Claude toward conservative-first, -production-first structural review. - -Because the bundle is only a launcher wrapper, newly added canonical MCP -surfaces from the local `codeclone-mcp` version flow through directly, -including current-run `Coverage Join` facts and the optional `coverage` help -topic when supported by that server. - -## Install - -The bundle prefers the current workspace launcher first: - -1. `./.venv/bin/codeclone-mcp` -2. the current Poetry environment launcher -3. user-local install paths and `PATH` - -```bash -uv venv -uv pip install --python .venv/bin/python "codeclone[mcp]" -.venv/bin/codeclone-mcp --help -``` - -Global fallback: - -```bash -uv tool install "codeclone[mcp]" -codeclone-mcp --help -``` - -## Bundle workflow - -1. Build: `cd extensions/claude-desktop-codeclone && node scripts/build-mcpb.mjs` -2. Claude Desktop: **Settings → Extensions → Install Extension** → select `.mcpb` -3. If you want to bypass auto-discovery, set **CodeClone launcher command** in - the bundle settings to an absolute path. - -## Settings - -| Setting | Purpose | -|--------------------------------|------------------------------------------------------| -| **CodeClone launcher command** | Absolute path or bare command for `codeclone-mcp` | -| **Advanced launcher args** | JSON array of extra args (transport is always stdio) | - -## Runtime model - -Node wrapper launches `codeclone-mcp` via local `stdio`. It prefers a -workspace-local `.venv`, then a Poetry environment, then user-local install -paths, then `PATH`. - -## Privacy - -Local wrapper only — no telemetry, no cloud sync, no remote listener. -See [Privacy Policy](privacy-policy.md). - -## Current limits - -- expects either a workspace launcher, a user-local/global launcher, or an - explicitly configured absolute launcher path -- local install surface, not a hosted service layer - -For the underlying MCP contract, see: - -- [MCP usage guide](mcp.md) -- [MCP interface contract](book/20-mcp-interface.md) diff --git a/docs/codex-plugin.md b/docs/codex-plugin.md deleted file mode 100644 index 01504188..00000000 --- a/docs/codex-plugin.md +++ /dev/null @@ -1,56 +0,0 @@ -# Codex Plugin - -CodeClone ships a native Codex plugin in `plugins/codeclone/`. -Repo-local discovery via `.agents/plugins/marketplace.json`. - -## What ships in the plugin - -| File | Purpose | -|------------------------------|----------------------------------------------------| -| `.codex-plugin/plugin.json` | Plugin metadata, prompts, instructions | -| `.mcp.json` | Workspace-first MCP launcher definition | -| `scripts/launch_mcp` | Shell-free launcher wrapper for Codex | -| `skills/codeclone-review/` | Conservative-first full review skill | -| `skills/codeclone-hotspots/` | Quick hotspot discovery skill | -| `assets/` | Plugin branding | - -## Install - -```bash -uv venv -uv pip install --python .venv/bin/python "codeclone[mcp]" -.venv/bin/codeclone-mcp --help -``` - -Global fallback: - -```bash -uv tool install "codeclone[mcp]" -codeclone-mcp --help -``` - -Manual MCP registration without the plugin: - -```bash -codex mcp add codeclone -- codeclone-mcp --transport stdio -``` - -## Runtime model - -Additive — Codex discovers the plugin from `.agents/plugins/marketplace.json`, -gets a local MCP definition and two skills. New canonical MCP surfaces from the -local `codeclone-mcp` version flow through directly, including `Coverage Join` -facts and the optional `coverage` help topic when supported. The plugin does -not mutate `~/.codex/config.toml` or install a second server binary. - -## Current limits - -- if you already registered `codeclone-mcp` manually, keep only one setup path - to avoid duplicate MCP surfaces -- the bundled `.mcp.json` prefers `.venv`, then a Poetry env, then `PATH` -- the bundled launcher stays shell-free and local-stdio-only - -For the underlying interface contract, see: - -- [MCP usage guide](mcp.md) -- [MCP interface contract](book/20-mcp-interface.md) diff --git a/docs/examples/report.md b/docs/examples/report.md index 8e48661b..611a5cbf 100644 --- a/docs/examples/report.md +++ b/docs/examples/report.md @@ -1,3 +1,7 @@ + + # Sample Report This page links to a live example report generated from the current `codeclone` @@ -39,7 +43,7 @@ documentation, so the HTML, canonical JSON, and SARIF artifacts stay aligned. Build the docs site, then generate the example report into the built site: ```bash -uv run --with mkdocs --with mkdocs-material mkdocs build --strict +uv run --with zensical==0.0.43 zensical build --clean --strict uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live ``` diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 00000000..ab4f10e9 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,286 @@ + + +# Getting Started + +Install CodeClone, run your first analysis, set up CI gating, and connect +an MCP client — in that order. + +## Install + +=== "uv (recommended)" + + ```bash + uv tool install codeclone + ``` + +=== "pip" + + ```bash + pip install codeclone + ``` + +=== "Run without installing" + + ```bash + uvx codeclone@latest . + ``` + +To use the MCP server (AI agents, IDE extensions), install the `mcp` extra: + +```bash +uv tool install "codeclone[mcp]" +# or +pip install "codeclone[mcp]" +``` + +!!! tip "Install the in-development 2.1 prerelease" + The 2.1 line ships as alpha/beta prereleases. A plain install resolves the + latest **stable** release; add a prerelease flag to get 2.1: + + ```bash + uv tool install --prerelease allow "codeclone[mcp]" # uv + pip install --pre "codeclone[mcp]" # pip + ``` + +## First Run + +```bash +codeclone . +``` + +This analyzes the current directory and prints a summary to stdout. +For an HTML report: + +```bash +codeclone . --html --open-html-report +``` + +Other formats — all rendered from one canonical JSON report: + +```bash +codeclone . --json # JSON +codeclone . --md # Markdown +codeclone . --sarif # SARIF (IDE / Code Scanning) +codeclone . --text # plain text +``` + +### Changed-scope review + +Analyze only files changed relative to a branch: + +```bash +codeclone . --changed-only --diff-against main +``` + +Or from a recent commit: + +```bash +codeclone . --paths-from-git-diff HEAD~1 +``` + +## CI Setup + +### 1. Create a baseline + +```bash +codeclone . --update-baseline +``` + +By default this writes `codeclone.baseline.json`, the unified clone and metrics +baseline. Commit it to the repository — it becomes the contract CI enforces. +If you use `--metrics-baseline` to redirect metric state, commit that file too. + +### 2. Run in CI + +```bash +codeclone . --ci +``` + +`--ci` equals `--fail-on-new --no-color --quiet`. When a trusted metrics +baseline is present, CI mode also enables `--fail-on-new-metrics`. + +Baseline governance: new clones and metric regressions fail the build; +accepted legacy debt passes. CI sees only what changed. + +### 3. Quality gates + +Add thresholds for stricter enforcement: + +```bash +codeclone . --fail-complexity 20 --fail-coupling 10 --fail-cohesion 4 +codeclone . --fail-cycles --fail-dead-code --fail-health 60 +codeclone . --fail-on-typing-regression --fail-on-docstring-regression +codeclone . --coverage coverage.xml --fail-on-untested-hotspots +``` + +See [Metrics and quality gates](book/16-metrics-and-quality-gates.md) for the +full gate reference. + +### GitHub Action + +```yaml +- uses: orenlab/codeclone/.github/actions/codeclone@v2 + with: + fail-on-new: "true" + sarif: "true" + pr-comment: "true" +``` + +Runs gating, generates reports, uploads SARIF to Code Scanning, and posts a +PR summary comment. +[Action docs](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) + +### Pre-commit hook + +```yaml +repos: + - repo: local + hooks: + - id: codeclone + name: CodeClone + entry: codeclone + language: system + pass_filenames: false + args: [ ".", "--ci" ] + types: [ python ] +``` + +### Exit codes + +| Code | Meaning | +|------|-----------------------------------------------------| +| `0` | Success | +| `2` | Contract error — untrusted baseline, invalid config | +| `3` | Gating failure — new clones or threshold exceeded | +| `5` | Internal error | + +Contract errors (`2`) take precedence over gating failures (`3`). +See [Exit codes](book/09-exit-codes.md). + +## MCP Setup + +The MCP server exposes **33 tools** for agent clients over the same canonical +pipeline (35 when VS Code starts the server with `--ide-governance-channel` for +session stats and audit insights). + +### Start the server + +```bash +codeclone-mcp --transport stdio # local clients (IDE, agents) +# HTTP: set CODECLONE_MCP_AUTH_TOKEN (≥32 chars) before start — required for streamable-http +codeclone-mcp --transport streamable-http # remote / HTTP clients +``` + +!!! warning + Analysis tools require an **absolute** repository root. + Relative roots like `.` are rejected. + +### Connect a client + +=== "VS Code" + + Install from the + [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone). + The extension connects to `codeclone-mcp` automatically. + + See [VS Code extension guide](guide/integrations/vscode/setup.md). + +=== "Claude Desktop" + + Use the pre-built bundle in + [`extensions/claude-desktop-codeclone/`](https://github.com/orenlab/codeclone/tree/main/extensions/claude-desktop-codeclone). + + See [Claude Desktop guide](guide/integrations/claude-desktop/setup.md). + +=== "Claude Code" + + ```bash + claude plugin marketplace add orenlab/codeclone-claude-code + claude plugin install codeclone@orenlab-codeclone + ``` + + The marketplace repository is + [orenlab/codeclone-claude-code](https://github.com/orenlab/codeclone-claude-code). + + See [Claude Code plugin guide](guide/integrations/claude-code/setup.md). + +=== "Codex" + + ```bash + codex plugin marketplace add orenlab/codeclone-codex + codex plugin add codeclone@orenlab-codeclone + ``` + + The marketplace repository is + [orenlab/codeclone-codex](https://github.com/orenlab/codeclone-codex). + + See [Codex plugin guide](guide/integrations/codex/setup.md). + +=== "Cursor" + + In Cursor, open **Dashboard → Settings → Plugins → Team Marketplaces**, + choose **Add Marketplace → Import from Repo**, and enter: + + ```text + https://github.com/orenlab/codeclone-cursor + ``` + + Then install **CodeClone** from the imported marketplace. + + See [Cursor plugin guide](guide/integrations/cursor/install-and-skills.md). + +=== "Manual registration" + + ```bash + # Codex + codex mcp add codeclone -- codeclone-mcp --transport stdio + + # Any MCP client + codeclone-mcp --transport stdio + ``` + +### Change controller (AI agents) + +When an AI agent edits code, the MCP change controller governs the structural +boundary: + +1. **Declare intent** — scope, files, and purpose +2. **Map blast radius** — reverse imports, clone cohorts, do-not-touch +3. **Check patch contract** — pre-edit budget, post-edit verification +4. **Generate receipt** — auditable artifact +5. **Validate claims** — cross-check review text against report + +See [Structural Change Controller](book/12-structural-change-controller/index.md). + +## Configuration + +CodeClone loads project configuration from `pyproject.toml`: + +```toml +[tool.codeclone] +baseline = "codeclone.baseline.json" +min_loc = 10 +min_stmt = 6 +block_min_loc = 20 +block_min_stmt = 8 +``` + +Precedence: CLI flags > `pyproject.toml` > built-in defaults. + +See [Config and defaults](book/10-config-and-defaults.md). + +## Next Steps + +- [Your first governed edit](start/first-governed-edit.md) — the full declare → edit → verify cycle +- [Architecture narrative](guide/explanation/how-it-works.md) — how the pipeline works +- [Baseline contract](book/07-baseline.md) — trust model and schema +- [MCP interface contract](book/25-mcp-interface/index.md) — tool surface and guarantees +- [Engineering Memory recipes](guide/mcp/workflows/memory-recipes.md) — scoped context and governed drafts +- [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) — workflow evidence and recurring + patterns +- [Platform Observability](guide/observability/diagnostics.md) — diagnose CodeClone's own runtime +- [Report contract](book/05-report.md) — canonical JSON schema diff --git a/docs/guide/README.md b/docs/guide/README.md new file mode 100644 index 00000000..f3f16d67 --- /dev/null +++ b/docs/guide/README.md @@ -0,0 +1,45 @@ + + +# Guide + +Recipes and workflows for humans and AI agents. For normative guarantees (schemas, +enums, payload semantics), use the [Contracts book](../book/README.md). + +!!! abstract "Who is this for?" + - **Developers** — install, CI, first analysis run + - **Agent authors** — MCP workflows, change control, memory recipes + - **IDE and agent users** — VS Code, Cursor, Claude Code, Codex, Claude Desktop setup + +## Start here + +| I want to… | Page | +|-----------------------------------------|------------------------------------------------------------------------| +| Install and run locally | [Getting started](../getting-started.md) | +| Understand the pipeline | [How CodeClone works](explanation/how-it-works.md) | +| Connect an AI agent via MCP | [MCP overview](mcp/README.md) | +| Govern agent edits | [Change control overview](change-control/overview.md) | +| Scope context before edits | [Engineering Memory overview](memory/overview.md) | +| Inspect trajectory history and patterns | [Trajectories and Experiences](memory/trajectories-and-experiences.md) | +| Diagnose CodeClone's own runtime *(maintainer)* | [Platform Observability](observability/diagnostics.md) | +| Cluster historical agent intents *(maintainer)* | [Corpus Analytics](analytics/overview.md) | + +## MCP workflows + +| Task | Recipe | +|----------------------------|---------------------------------------------------------------| +| First analysis pass | [Analyze & triage](mcp/workflows/analyze-and-triage.md) | +| Hotspots and checks | [Drill down & checks](mcp/workflows/drill-down-and-checks.md) | +| Declare → edit → finish | [Change control](mcp/workflows/change-control.md) | +| Memory before/after edits | [Memory recipes](mcp/workflows/memory-recipes.md) | +| Cobertura join & session markers | [Coverage join & session markers](mcp/workflows/session-and-coverage.md) | + +## Integrations + +| Client | Setup guide | Contract | +|----------------|---------------------------------------------------------------|-----------------------------------------------------------| +| VS Code | [Setup](integrations/vscode/setup.md) | [Contract](../book/integrations/vs-code-extension.md) | +| Cursor | [Install & skills](integrations/cursor/install-and-skills.md) | [Contract](../book/integrations/cursor-plugin.md) | +| Claude Code | [Install](integrations/claude-code/setup.md) | [Contract](../book/integrations/claude-code-plugin.md) | +| Codex | [Install](integrations/codex/setup.md) | [Contract](../book/integrations/codex-plugin.md) | +| Claude Desktop | [Setup](integrations/claude-desktop/setup.md) | [Contract](../book/integrations/claude-desktop-bundle.md) | +| SARIF export | [Export](integrations/sarif/export.md) | [Contract](../book/integrations/sarif.md) | diff --git a/docs/guide/analytics/overview.md b/docs/guide/analytics/overview.md new file mode 100644 index 00000000..55f8c10b --- /dev/null +++ b/docs/guide/analytics/overview.md @@ -0,0 +1,191 @@ +# Corpus Analytics + +Use Corpus Analytics when you want **offline clustering of historical change-control +intents** — for example to compare agent workflow cohorts, inspect outliers, or +export HTML/JSON summaries for maintainer review. + +## Prerequisites + +1. A repository with audit enabled and historical `intent.declared` events. +2. Engineering Memory trajectory projection (optional but improves selection). +3. Install optional dependencies: + +```bash +uv sync --extra analytics +``` + +## Quick start + +Build snapshot, embeddings, and a recommended clustering run in one step: + +```bash +codeclone analytics build --root . --sweep --use-recommended +``` + +`--use-recommended` requires `--sweep`. It renders the heuristic winner for +inspection; it does **not** set `selected_by_maintainer`. + +Use a versioned profile lens when the review question is more specific: + +```bash +codeclone analytics build \ + --root . \ + --profile intent-small-balanced-v1 \ + --use-recommended \ + --html-out /tmp/profile-report.html \ + --json-out /tmp/profile-report.json +``` + +`--profile` implies a finite sweep. `--profile auto` requires +`default_profile_id` in `pyproject.toml`; omitting `--profile` preserves the +ordinary single-run or sweep behavior. + +Write a detailed single-run report to explicit paths: + +```bash +codeclone analytics build \ + --root . \ + --representation description \ + --html-out /tmp/corpus-clusters.html \ + --json-out /tmp/corpus-clusters.json +``` + +Write a sweep comparison without choosing a primary detail view: + +```bash +codeclone analytics build \ + --root . \ + --sweep \ + --html-out /tmp/corpus-sweep.html \ + --json-out /tmp/corpus-sweep.json +``` + +## Reading the reports + +Corpus Analytics separates formal technical validity from human +interpretation: + +```mermaid +flowchart LR + R["Persisted clustering run"] --> V{"V1-V10 pass?"} + V -->|"yes"| F["Full interpretation
metrics, previews, provenance"] + V -->|"no"| L["Limited diagnostic
codes, status, safe counts"] + F --> P{"Profile lens?"} + P -->|"yes"| S["Suitability + profile ranking"] + P -->|"no"| O["Global heuristic comparison"] + S --> O2["JSON 1.3 / HTML"] + O --> O2 + L --> O +``` + +A valid run can still be only a candidate. The banner distinguishes +maintainer-selected, profile-recommended, valid-but-profile-rejected, +heuristically recommended, candidate-only, and technically invalid runs; none +of those labels claims a semantic taxonomy. + +Full reports show dominant-cluster ratios against both the whole corpus and +assigned non-noise items, bounded representative/boundary previews, numeric +summaries, categorical correlations, provenance completeness for small +clusters, and observable noise flags. Sweep comparison includes failed and +invalid runs as limited rows with `unavailable` metrics rather than silently +dropping them. + +Normalized text previews are capped at 240 Unicode code points. JSON keeps raw +strings; HTML escapes them. The export `content_disclosure` block reports +whether previews were actually emitted and in which scopes. See +[Report Interpretability](../../book/27-corpus-analytics.md#report-interpretability-slice-11) +for the invariants and safe-output rules, and +[JSON export schema](../../book/appendix/b-schema-layouts.md#corpus-analytics-json-export-13) +for the wire shape. + +## Step-by-step + +```bash +# 1. Immutable snapshot from audit + trajectory (+ optional registry overlay) +codeclone analytics snapshot --root . + +# 2. Analytics embeddings (separate LanceDB sidecar) +codeclone analytics embed --root . --snapshot-id SNAPSHOT_ID + +# 3. Cluster (add --sweep or --profile for a finite parameter search) +codeclone analytics cluster \ + --root . \ + --snapshot-id SNAPSHOT_ID \ + --embedding-generation-id GENERATION_ID + +# Optional profile registry and profile-scoped sweep +codeclone analytics profiles list --root . +codeclone analytics cluster \ + --root . \ + --snapshot-id SNAPSHOT_ID \ + --embedding-generation-id GENERATION_ID \ + --profile intent-small-discovery-v1 + +# 4. Inspect runs +codeclone analytics clusters --root . --snapshot-id SNAPSHOT_ID +codeclone analytics cluster-show \ + --root . --snapshot-id SNAPSHOT_ID --run-id RUN_ID + +# 5. Record an explicit maintainer choice +codeclone analytics cluster --root . --select-run RUN_ID \ + --selected-by "$USER" \ + --selection-rationale "Chosen for maintainer review" +``` + +For a profile-scoped decision, add +`--selection-profile PROFILE_ID_OR_PROFILE_BATCH_ID`. Use `none` for global +scope. + +## Configuration + +Defaults live in `[tool.codeclone.analytics]` inside `pyproject.toml`. See +[Corpus Analytics contract](../../book/27-corpus-analytics.md) for the full table. +The historical audit source follows top-level `[tool.codeclone].audit_path`. + +```toml +[tool.codeclone.analytics] +default_profile_id = "intent-small-balanced-v1" +profile_paths = ["analytics/profiles/team-review.json"] +sweep_pca_dimensions = [32, 64, 128] +sweep_min_cluster_sizes = [5, 8, 12, 15] +sweep_min_samples = [1, 3, 5] +sweep_selection_methods = ["eom", "leaf"] +``` + +Repository-local manifests use the same schema as bundled profiles. Paths must +resolve to files inside the repository. The default profile is consulted only +for explicit `--profile auto`. + +## Reproducibility + +Exports persist snapshot and embedding manifests, vector digests, requested and +effective parameters, fixed PCA/HDBSCAN settings, package versions, and the +random seed. Unless the model revision and artifact fingerprint are known, +CodeClone explicitly reports that full vector reproducibility is not guaranteed +from the model id alone. + +Existing embedding generations created under an incompatible embedding contract +are rejected. Run `embed` again for the same snapshot to create a compatible +generation. + +## Failure behavior + +- Expected input, capability, schema, and artifact-integrity errors exit with + code `2` and no traceback. +- A clustering run is persisted as `running`, then becomes `completed` or + `failed`; failed runs contain no committed assignments or summaries. +- Resolved invalid or failed runs remain exportable in limited diagnostic mode; + they never receive partition metrics, previews, score, or rank. +- A missing embedding-generation record is rendered explicitly as unavailable + metadata rather than fabricated from the run. +- JSON and HTML outputs are written atomically. +- Snapshot, embed, cluster, and report spans are recorded only when + `CODECLONE_OBSERVABILITY_ENABLED=1`. + +## What this is not + +- Not a second analyzer — it does not replace `codeclone` structural reports. +- Not Engineering Memory semantic search — vectors are stored separately. +- Not MCP-visible in Slice 1 — CLI only. + +Contract reference: [27-corpus-analytics.md](../../book/27-corpus-analytics.md). diff --git a/docs/guide/change-control/agent-cycle.md b/docs/guide/change-control/agent-cycle.md new file mode 100644 index 00000000..2005e6e8 --- /dev/null +++ b/docs/guide/change-control/agent-cycle.md @@ -0,0 +1,28 @@ + + +# Agent edit cycle + +Same sequence as [MCP change control workflow](../mcp/workflows/change-control.md). + +**Before first edit:** `start_controlled_change` must return `status: active` and +`edit_allowed: true`. Queued intents require `manage_change_intent(action=promote)`. + +**Evidence:** finish requires exactly one of `changed_files` or `diff_ref` listing +every in-scope dirty path. + +**After-run:** required when verification profile is `python_structural` or +`governance_config`. Pass a **new** `after_run_id` — identical before/after runs +return `after_run_not_new`. + +**Claims:** only `claims_text` goes to Claim Guard; `review_text` is a human note. + +**Memory:** call `get_relevant_memory` after start; optional +`finish(..., propose_memory=true)` for draft candidates (human approve in VS Code +Memory view). + +**Implementation context:** after memory, call `get_implementation_context` with +scoped paths when editing Python — bounded structural, call-graph, and contract +evidence from one stored run. + +Contract tables: [Verification profiles](../../book/12-structural-change-controller/verification-profiles.md), +[finish_controlled_change](../../book/12-structural-change-controller/finish-controlled-change.md). diff --git a/docs/guide/change-control/atomic-debug.md b/docs/guide/change-control/atomic-debug.md new file mode 100644 index 00000000..68d55515 --- /dev/null +++ b/docs/guide/change-control/atomic-debug.md @@ -0,0 +1,24 @@ + + +# Atomic debug path + +For legacy MCP servers or step-by-step debugging: + +``` +manage_change_intent(action="list_workspace") + -> analyze_repository + -> manage_change_intent(action="declare", scope={...}) + -> get_blast_radius(files=[...]) + -> check_patch_contract(mode="budget") + -> [edit within scope] + -> analyze_repository + -> manage_change_intent(action="check", intent_id=..., changed_files=[...]) + -> check_patch_contract(mode="verify", after_run_id=..., intent_id=...) + -> validate_review_claims(text="...", patch_health_delta=...) + -> create_review_receipt + -> manage_change_intent(action="clear") +``` + +Prefer [start/finish workflow](../mcp/workflows/change-control.md) when available. + +Tool params: [Atomic change control tools](../../book/25-mcp-interface/tools/atomic-change-control.md). diff --git a/docs/guide/change-control/overview.md b/docs/guide/change-control/overview.md new file mode 100644 index 00000000..933741fb --- /dev/null +++ b/docs/guide/change-control/overview.md @@ -0,0 +1,20 @@ + + +# Change control overview + +CodeClone v2.1 requires agents to **declare scope before editing**, verify the +patch against structural boundaries, and finish with evidence-linked hygiene. + +| Step | Action | +|------|---------------------------------------------------------------------------------| +| 1 | `analyze_repository` (or reuse valid run) | +| 2 | `start_controlled_change` → `edit_allowed=true` | +| 3 | `get_relevant_memory` (requires absolute `root`) | +| 4 | `get_implementation_context` for scoped Python paths (optional but recommended) | +| 5 | Edit inside declared scope only | +| 6 | After-run when profile requires it | +| 7 | `finish_controlled_change` with `changed_files` or `diff_ref` | + +MCP recipe: [Change control workflow](../mcp/workflows/change-control.md). + +Contract: [Structural Change Controller](../../book/12-structural-change-controller/index.md). diff --git a/docs/guide/change-control/queue-and-recovery.md b/docs/guide/change-control/queue-and-recovery.md new file mode 100644 index 00000000..7772d26f --- /dev/null +++ b/docs/guide/change-control/queue-and-recovery.md @@ -0,0 +1,31 @@ + + +# Queue & recovery + +## Multi-agent queue + +``` +start_controlled_change(scope={...}, on_conflict="queue") + -> wait for foreign intent to clear + -> manage_change_intent(action="promote", intent_id=...) + -> edit within scope + -> finish_controlled_change(...) +``` + +## Workspace hygiene (guide summary) + +Three contours: **status** (registry lifecycle), **ownership** (PID/TTL), +**hygiene** (git ∩ scope), **permission** (`edit_allowed`). + +- **`dirty_scope_policy`:** `continue_own_wip` resumes known WIP in scope when no + foreign overlap; finish still needs evidence. +- **`gc_workspace`:** explicit GC vs lazy close on read — different predicates. +- **Blocking finish:** `missing_evidence`, `foreign_dirty_overlap`, and (when + [strict finish mode](../../book/10-config-and-defaults.md#mcp-session-and-change-control-hygiene) + is enabled) `own_unscoped_dirty`. + +Normative tables: [Finish hygiene](../../book/12-structural-change-controller/finish-hygiene.md), +[payload semantics](../../book/12-structural-change-controller/payload-semantics.md). + +Recovery: `manage_change_intent(action=recover|reset_workspace)` when MCP hints +`recovery_available`. diff --git a/docs/guide/explanation/how-it-works.md b/docs/guide/explanation/how-it-works.md new file mode 100644 index 00000000..342d3fa9 --- /dev/null +++ b/docs/guide/explanation/how-it-works.md @@ -0,0 +1,192 @@ + + +# How CodeClone Works + +> This page is a narrative architecture overview. +> Contract-level guarantees are defined in the +> [Contracts Book](../../book/README.md). + +--- + +## Pipeline Overview + +CodeClone processes Python projects in the following stages: + +1. **Source scanning** +2. **AST parsing** +3. **AST normalization** +4. **CFG construction** +5. **Fingerprinting** +6. **Segment window extraction** +7. **Clone grouping** +8. **Project metrics** (complexity, coupling, health, dead code, …) +9. **Canonical report assembly** +10. **Baseline diff and metric gating** (CI exit decision) + +Full contract: [Core pipeline](../../book/03-core-pipeline.md). + +--- + +## 1. Source Scanning + +- Recursively scans `.py` files. +- Uses deterministic sorted traversal. +- Skips paths that resolve outside the root (symlink traversal guard). +- Applies cache-based skipping using file stat signatures. + +Cache contract: [Cache](../../book/08-cache.md). + +--- + +## 2. AST Parsing + +- Uses Python's built-in `ast` module. +- Supports Python 3.10+ syntax. + +--- + +## 3. AST Normalization + +Normalization removes non-structural noise: + +- variable names → `_VAR_` +- constants → `_CONST_` +- attributes → `_ATTR_` +- symbolic call targets are preserved (to avoid API conflation) +- syntactic sugar (e.g. `x += 1` → `x = x + 1`) +- commutative operand canonicalization (`+`, `*`, `|`, `&`, `^`) on proven constant domains +- local logical equivalence (`not (x in y)` → `x not in y`, `not (x is y)` → `x is not y`) +- docstrings removed +- type annotations removed + +This ensures structural stability across refactors. + +--- + +## 4. CFG Construction + +- Built per-function using `CFGBuilder`. +- Produces deterministic basic blocks. +- Captures structural control flow (`if`, `for`, `while`, `try`, `with`, `match`). +- Models short-circuit `and`/`or` as micro-CFG branches. +- Links `try/except` only from statements that may raise. +- Preserves `match case` and `except` handler order structurally. +- Models `break` / `continue` as terminating loop transitions. +- Preserves `for/while ... else` semantics. + +Full semantics: [CFG Semantics](../../book/04-cfg-semantics.md). + +--- + +## 5. Fingerprinting + +Each function CFG is converted into a canonical string form and hashed. +This fingerprint is used to group structurally identical functions. + +--- + +## 6. Segment Windows + +Large functions are also scanned with **segment windows** (sliding windows over +normalized statements). These are used to detect **internal clones** inside the +same function. + +Segment windows are **never** used as a final equivalence signal; they are +candidate generators with strict hash confirmation. + +--- + +## 7. Clone Detection + +Clone groups are detected at three granularities: + +### Function clone groups + +- Grouped by `fingerprint|loc_bucket`. +- Report typing is deterministic (`Type-1`..`Type-4`) in report layer. + +### Block clone groups + +- Repeated structural statement windows across functions. +- Report typing is `Type-4` with explainability facts from core. + +Noise filters applied: + +- minimum LOC / statement thresholds +- no overlapping blocks +- no same-function block clones +- `__init__` excluded from block analysis + +### Segment clones (internal/report-only) + +- Detected only **inside the same function**. +- Used for internal copy-paste discovery and report explainability. +- Not included in baseline or CI failure logic. + +### Structural findings (report-only) + +- `duplicated_branches`: repeated branch-body signatures. +- `clone_guard_exit_divergence`: guard/terminal divergence inside one function-clone cohort. +- `clone_cohort_drift`: drift from majority terminal/guard/try/side-effect profile. + +These findings are rendered in reports only and do not change baseline diff or CI +gating decisions. + +--- + +## 8. Reporting + +Detected findings can be rendered as interactive HTML, canonical JSON (schema +`2.11`), deterministic text, Markdown, or SARIF projections. Reporting is +separate from CI gating: report-only structural findings and segment clones do +not change baseline diff or gate evaluation. + +Report contract: [Report](../../book/05-report.md). +HTML rendering: [HTML Render](../../book/06-html-render.md). + +--- + +## 9. CI gating + +After the canonical report is built, clone baseline diff and configured metric +gates decide exit code `3` when policy fails. Gating mode is active when any +`--fail-*`, `--ci`, or minimum-coverage threshold is set (see +[CLI](../../book/11-cli.md)). Unreadable source in gating mode is a contract +error (exit `2`, marker `CONTRACT ERROR:`) and takes priority over clone/metric +gate failure. + +Exit codes: [09-exit-codes](../../book/09-exit-codes.md). + +--- + +## Surfaces + +Every output surface — CLI, HTML, MCP, IDE — is a projection of the same +canonical report. No surface adds a second analysis engine. + +| Surface | Role | Contract | +|----------------|------------------------------------|-----------------------------------------------------------| +| CLI | Scripting and CI | [CLI](../../book/11-cli.md) | +| MCP | Read-only agent/client integration | [MCP interface](../../book/25-mcp-interface/index.md) | +| VS Code | Guided IDE review | [VS Code](../integrations/vscode/setup.md) | +| Claude Desktop | Local `.mcpb` bundle | [Claude Desktop](../integrations/claude-desktop/setup.md) | +| Codex | Marketplace plugin with skills | [Codex](../integrations/codex/setup.md) | +| Cursor | Plugin with skills, rules, hooks | [Cursor](../integrations/cursor/install-and-skills.md) | +| SARIF | IDE code scanning | [SARIF](../integrations/sarif/export.md) | + +--- + +## Design Principles + +- Structural > textual +- Deterministic > precise +- Low-noise > completeness +- CI-first design + +Module map: [Architecture Map](../../book/02-architecture-map.md). diff --git a/docs/guide/integrations/claude-code/setup.md b/docs/guide/integrations/claude-code/setup.md new file mode 100644 index 00000000..7db28391 --- /dev/null +++ b/docs/guide/integrations/claude-code/setup.md @@ -0,0 +1,105 @@ + + +# Claude Code setup + +CodeClone ships a native Claude Code plugin through the public +[orenlab/codeclone-claude-code](https://github.com/orenlab/codeclone-claude-code) +marketplace repository. + +This is distinct from the +[Claude Desktop `.mcpb` bundle](../claude-desktop/setup.md). Claude Code loads +skills and the MCP definition; Claude Desktop installs an extension bundle. + +## Prerequisites + +- Claude Code with plugin support +- Python 3.10+ +- a local `codeclone-mcp` installation + +## Install from the marketplace + +Add the marketplace and install the plugin: + +```bash +claude plugin marketplace add orenlab/codeclone-claude-code +claude plugin install codeclone@orenlab-codeclone +``` + +The equivalent interactive commands are: + +```text +/plugin marketplace add orenlab/codeclone-claude-code +/plugin install codeclone@orenlab-codeclone +``` + +Verify: + +```bash +claude plugin marketplace list +claude plugin list +``` + +## Install the MCP launcher + +Global tool installation: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +Workspace-local installation: + +```bash +uv venv +uv pip install --python .venv/bin/python "codeclone[mcp]" +.venv/bin/codeclone-mcp --help +``` + +The plugin launcher resolves a workspace `.venv`, then the current Poetry +environment, then `codeclone-mcp` from `PATH`. + +## Runtime path + +```mermaid +flowchart LR + A["Claude Code"] --> B["CodeClone plugin"] + B --> C["Plugin skills"] + B --> D["Local stdio launcher"] + D --> E["codeclone-mcp"] + E --> F["Canonical report and controller"] +``` + +The plugin does not bundle Python or a second analyzer. It supplies guidance and +a local MCP definition over the same canonical CodeClone server. + +## Skills + +Claude Code namespaces installed plugin skills: + +| Task | Invocation | +|--------------------|-------------------------------------------| +| Repository review | `/codeclone:codeclone-review` | +| Hotspot snapshot | `/codeclone:codeclone-hotspots` | +| Controlled edit | `/codeclone:codeclone-change-control` | +| Engineering Memory | `/codeclone:codeclone-engineering-memory` | + +## Update or remove + +```bash +claude plugin marketplace update orenlab-codeclone +claude plugin update codeclone@orenlab-codeclone +claude plugin uninstall codeclone@orenlab-codeclone +``` + +## Local development + +Marketplace installation is the public path. For plugin development only: + +```bash +claude --plugin-dir plugins/claude-code-codeclone +claude plugin validate plugins/claude-code-codeclone +``` + +Contract reference: +[Claude Code plugin](../../../book/integrations/claude-code-plugin.md). diff --git a/docs/guide/integrations/claude-desktop/setup.md b/docs/guide/integrations/claude-desktop/setup.md new file mode 100644 index 00000000..5372f9e3 --- /dev/null +++ b/docs/guide/integrations/claude-desktop/setup.md @@ -0,0 +1,100 @@ + + +# Claude Desktop setup + +Local `.mcpb` bundle that launches `codeclone-mcp` over stdio. Same canonical MCP +surface as CLI, VS Code, Codex, and Cursor — no second analyzer or truth path. + +For the terminal agent, use the separate +[Claude Code marketplace plugin](../claude-code/setup.md). The `.mcpb` described +here is only for Claude Desktop. + +## Prerequisites + +- Claude Desktop with extension support +- Node.js (to build the bundle from source) +- Python 3.10+ with `codeclone[mcp]` installed + +## Install the MCP launcher + +The bundle prefers the current workspace launcher first: + +1. `./.venv/bin/codeclone-mcp` +2. the current Poetry environment launcher +3. user-local install paths and `PATH` + +Workspace-local setup: + +```bash +uv venv +uv pip install --python .venv/bin/python "codeclone[mcp]" +.venv/bin/codeclone-mcp --help +``` + +Global fallback: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +## Build and install the `.mcpb` bundle + +From the repository: + +```bash +cd extensions/claude-desktop-codeclone +node scripts/build-mcpb.mjs +``` + +In Claude Desktop: **Settings → Extensions → Install Extension** → select the +`.mcpb` from `dist/`. + +To bypass auto-discovery, set **CodeClone launcher command** in extension settings +to an absolute path to `codeclone-mcp`. + +## Configuration + +| Setting | Purpose | +|--------------------------------|-------------------------------------------------------------------------| +| **Workspace root path** | Optional absolute project root; launcher prefers that workspace `.venv` | +| **CodeClone launcher command** | Absolute path or bare command for `codeclone-mcp` | +| **Advanced launcher args** | JSON array of extra args (transport is always stdio) | + +## Read-only vs coordination writes + +The MCP server never mutates repository source, baselines, analysis cache, or +canonical reports. It may write ephemeral coordination state under +`.codeclone/intents/` (file backend) or `.codeclone/db/intents.sqlite3` +(SQLite backend), optional audit records when enabled, and Engineering +Memory **draft** rows through agent tools. Human approve/reject stays in VS Code +Memory or `codeclone memory approve --i-know-what-im-doing` (optional `--by NAME`). + +## First workflow + +```text +1. Use CodeClone to analyze this repository. +2. Declare a change intent before editing (start_controlled_change). +3. Show blast radius for the files you plan to change. +4. Edit within declared scope. +5. Finish the change intent with changed_files evidence. +``` + +Recipe pages: [MCP workflows](../../mcp/workflows/change-control.md), +[Change controller](../../../book/12-structural-change-controller/index.md). + +## Privacy + +Local wrapper only — no telemetry, no cloud sync, no remote listener. +See [Privacy Policy](https://orenlab.github.io/codeclone/privacy-policy/). + +## Development smoke + +```bash +cd extensions/claude-desktop-codeclone +npm run check +npm test +npm run pack +``` + +Contract reference: [Claude Desktop bundle](../../../book/integrations/claude-desktop-bundle.md). diff --git a/docs/guide/integrations/codex/setup.md b/docs/guide/integrations/codex/setup.md new file mode 100644 index 00000000..16b96eb4 --- /dev/null +++ b/docs/guide/integrations/codex/setup.md @@ -0,0 +1,84 @@ +# Codex setup + +## Install + +Install the plugin from the Codex marketplace: + +```bash +codex plugin marketplace add orenlab/codeclone-codex +codex plugin add codeclone@orenlab-codeclone +``` + +The first command registers the public marketplace repository. The second +installs the `codeclone` plugin from the marketplace named +`orenlab-codeclone`. + +Verify the configured marketplace and installed plugin: + +```bash +codex plugin marketplace list +codex plugin list +``` + +The plugin manifest version tracks the CodeClone package release line (currently +`2.1.0a1` in this monorepo). It describes the bundled guidance surface, not the +live MCP tool count — tools come from the resolved `codeclone-mcp` server. + +The plugin expects a local `codeclone-mcp` command. Install CodeClone with the +MCP extra in the workspace or globally: + +```bash +uv venv +uv pip install --python .venv/bin/python "codeclone[mcp]" +.venv/bin/codeclone-mcp --help +``` + +Global fallback: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +Manual MCP registration without the plugin: + +```bash +codex mcp add codeclone -- codeclone-mcp --transport stdio +``` + +## Skills + +### codeclone-review + +Full structural review: clone triage, changed-scope review, health-oriented +refactor planning. Starts conservative with default thresholds, supports +deeper follow-up with lowered thresholds and run comparison. + +### codeclone-hotspots + +Quick quality snapshot: health check, top risks, single-metric queries. +The cheapest useful path — `analyze_repository` then `get_production_triage`. + +### codeclone-change-control + +Intent-first change workflow for repository edits. Declares scope before +editing, maps blast radius, verifies the patch against the contract, generates +a review receipt, and validates cited review claims. This is the governance +skill — use it whenever the task requires changing files. + +### codeclone-engineering-memory + +Scope-aware Engineering Memory over MCP: `get_relevant_memory` (absolute +`root` required), `query_engineering_memory`, draft `record_candidate`, and +`finish(..., propose_memory=true)`. Complements change control — does not replace +intent declaration or patch verify. Human approve stays in the CodeClone VS Code +**Memory** view (not MCP). + +Optional **semantic search**: off by default in +`[tool.codeclone.memory.semantic]`; when enabled, install +`codeclone[semantic-local]` for local semantic-quality recall (or +`codeclone[semantic-lancedb]` for the diagnostic sidecar only), rebuild the index, then +`query_engineering_memory(mode=search, semantic=true)`. Default provider +`diagnostic` is deterministic, not semantic-quality embeddings; set +`embedding_provider = "fastembed"` for FastEmbed. See +[Engineering Memory](../../../book/13-engineering-memory/index.md). diff --git a/docs/guide/integrations/cursor/install-and-skills.md b/docs/guide/integrations/cursor/install-and-skills.md new file mode 100644 index 00000000..2f5bc9ad --- /dev/null +++ b/docs/guide/integrations/cursor/install-and-skills.md @@ -0,0 +1,197 @@ +# Cursor plugin + +## What ships in the plugin + +| Component | Path | Purpose | +|------------------------------------|---------------------------------|----------------------------------------------------------------------------------------------| +| `.cursor-plugin/plugin.json` | Manifest | `skills/`, `rules/`, `agents/`, `hooks/hooks.json`, `mcp.json` | +| `mcp.json` | MCP | `python3` + `./scripts/launch_mcp.py` — resolves `codeclone-mcp` (`.venv` → Poetry → `PATH`) | +| Skills (8) | `skills/*/` | See table below | +| Agent | `agents/structural-reviewer.md` | Invoke id: **`codeclone-structural-reviewer`** | +| Rules (3) | `rules/*.mdc` | See **Rules** | +| Hooks | `hooks/hooks.json` | Dispatches via `hooks/run_hook.py` (plugin manifest; optional project install) | +| `scripts/install-project-hooks.py` | Installer | Writes `.cursor/hooks.json` + `.cursor/codeclone-hooks.json` | +| `assets/` | Branding | Logo and icon | + +### Skills (directory vs chat command) + +Chat commands use the `name:` field in each `SKILL.md` (not always the folder +name on disk): + +| Folder on disk | Chat command (`name`) | Primary MCP flow | +|-------------------------------------|-------------------------------------|----------------------------------------------------------------------------| +| `production-triage/` | `/codeclone-production-triage` | `analyze_repository` → `get_production_triage` | +| `codeclone-hotspots/` | `/codeclone-hotspots` | `analyze_repository` → hotspots / `check_*` | +| `blast-radius/` | `/codeclone-blast-radius` | `analyze_repository` → `get_blast_radius` (read-only) | +| `codeclone-review/` | `/codeclone-review` | Full review loop (conservative first) | +| `codeclone-change-control/` | `/codeclone-change-control` | `start_controlled_change` → edit → `finish_controlled_change` | +| `codeclone-engineering-memory/` | `/codeclone-engineering-memory` | `get_relevant_memory`, `query_engineering_memory`, drafts | +| `codeclone-implementation-context/` | `/codeclone-implementation-context` | `get_implementation_context` after `start` | +| `codeclone-platform-observability/` | `/codeclone-platform-observability` | Maintainer-only: `query_platform_observability` (observer enable required) | + +Codex and Claude Code plugins ship six shared skills (includes +platform-observability) but **not** standalone production-triage or blast-radius +skills. + +## Install + +### Install from the Cursor marketplace + +The public storefront is +[orenlab/codeclone-cursor](https://github.com/orenlab/codeclone-cursor). + +If CodeClone is already listed in your marketplace panel, select **CodeClone**, +choose user or project scope, and install it. + +To expose the repository as a team marketplace: + +1. Open **Cursor Dashboard → Settings → Plugins**. +2. Under **Team Marketplaces**, select **Add Marketplace**. +3. Select **Import from Repo** and enter + `https://github.com/orenlab/codeclone-cursor`. +4. Add CodeClone, configure team access, and save. +5. Install CodeClone from Cursor's marketplace panel. + +Install `codeclone[mcp]` separately so the bundled launcher can resolve +`codeclone-mcp`: + +```bash +uv tool install "codeclone[mcp]" +codeclone-mcp --help +``` + +### Local development only + +Use a local symlink only while developing the plugin: + +```bash +ln -sfn /path/to/codeclone/plugins/cursor-codeclone ~/.cursor/plugins/local/codeclone +``` + +Reload Cursor after changing the local source. Do not present this path to +normal users as the installation flow. + +### Project hooks (Hooks UI) + +```bash +uv run python plugins/cursor-codeclone/scripts/install-project-hooks.py +# full-repo gate: +uv run python plugins/cursor-codeclone/scripts/install-project-hooks.py --enforce-scope repo +``` + +Writes: + +- `.cursor/hooks.json` — shown in **Settings → Hooks** +- `.cursor/codeclone-hooks.json` — `enforce_scope` (`python` default, or `repo`) + +Do **not** commit generated files (machine-local Python paths). This monorepo +ignores `/.cursor/` in `.gitignore`. + +!!! note "Marketplace catalogs" + `.agents/plugins/marketplace.json` belongs to Codex. Cursor installs this + plugin from the `orenlab/codeclone-cursor` storefront through Cursor's own + marketplace UI. + +## Skills + +### codeclone-production-triage + +Two MCP calls: `analyze_repository` then `get_production_triage`. Baseline-relative +triage — not patch-local verify. Suggests `codeclone-review` for a deeper session. + +### codeclone-hotspots + +Cheapest ad-hoc snapshot after `analyze_repository`; prefer `list_hotspots` / +`check_*` before broad `list_findings`. Optional `help(topic="coverage")` when +Coverage Join semantics matter. + +### codeclone-blast-radius + +Read-only: `get_blast_radius` after analysis. Does **not** call +`start_controlled_change`. Use `codeclone-change-control` for edits. + +### codeclone-review + +Conservative-first full review; optional deeper pass with explicit user request. +Does not declare intent by itself. + +### codeclone-change-control + +Normal edit cycle uses workflow tools (not legacy-only atomic path): + +`analyze_repository` → `start_controlled_change` → `get_relevant_memory` → edit +in scope → `analyze_repository` (when after-run required) → optional +`record_candidate` → `finish_controlled_change`. + +Queue/recovery: `manage_change_intent` (`promote`, `recover`, …). Atomic +`check_patch_contract` / `create_review_receipt` are advanced/debug only when +workflow tools are unavailable. + +### codeclone-implementation-context + +Bounded structural, call-graph, contract, and change-control evidence from one +stored MCP run. Call after `start_controlled_change` with `intent_id` before +editing scoped Python work. Read-only — does not declare intent. + +### codeclone-engineering-memory + +Scope memory before edits; optional `semantic=true` on `mode=search` when +`[tool.codeclone.memory.semantic]` is enabled, the semantic sidecar is installed, +and semantic index rebuild succeeded (`manage_engineering_memory` +`action=rebuild_semantic_index` or CLI `memory semantic rebuild`). Use +`codeclone[semantic-local]` +plus `embedding_provider = "fastembed"` for local semantic-quality recall; +`codeclone[semantic-lancedb]` alone supports only the deterministic diagnostic +provider. Human +approve/reject: VS Code **Memory** view (preferred) or CLI +`codeclone memory approve|reject|archive --i-know-what-im-doing` (MCP agents +cannot approve). + +Full contract: [Engineering Memory](../../../book/13-engineering-memory/index.md). + +### codeclone-platform-observability + +**Maintainer-only** — not for users reviewing their Python repository. + +Diagnose CodeClone's own runtime (MCP latency, DB cost, memory pipeline) via +`query_platform_observability` after **explicit** observer setup: + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +# restart codeclone-mcp / CLI with this env, reproduce, then query sections +``` + +Without enablement the tool returns `status=disabled` or `no_store`. Never treat +observer metrics as repository quality or edit authorization. + +Playbook: [Maintainer workflow](../../../guide/observability/maintainer-workflow.md). + +## Agent + +### codeclone-structural-reviewer + +Defined in `agents/structural-reviewer.md` with frontmatter `name: +codeclone-structural-reviewer`. Read-only review protocol; does not declare +intent or modify files. The structural reviewer agent uses CodeClone MCP tools +exclusively for evidence, does not modify files or declare change intent, and +does not treat report-only signals as CI failures or vulnerability claims. + +## Distribution + +- **Monorepo source:** `plugins/cursor-codeclone/` +- **Marketplace source:** `https://github.com/orenlab/codeclone-cursor` +- **Install:** Cursor marketplace panel; local symlink only for development +- **Standalone releases:** ship full `plugins/codeclone/scripts/launch_mcp.py` body + +## Runtime model + +Additive: local MCP via `launch_mcp.py`, eight skills, three rules (two +`alwaysApply` + one Python glob), optional hooks. The full default agent MCP +surface is passed through — the launcher does **not** +pass `--ide-governance-channel` (VS Code adds +2 IDE-only tools and Memory +governance). New server tools from upgraded `codeclone-mcp` pass through +unfiltered. + +Monorepo: `plugins/cursor-codeclone/scripts/launch_mcp.py` delegates to +`plugins/codeclone/scripts/launch_mcp.py`. Standalone releases must embed the +full launcher body. diff --git a/docs/guide/integrations/sarif/export.md b/docs/guide/integrations/sarif/export.md new file mode 100644 index 00000000..29d0b0f8 --- /dev/null +++ b/docs/guide/integrations/sarif/export.md @@ -0,0 +1,29 @@ +# SARIF export + +## Purpose + +Explain how CodeClone projects canonical findings into SARIF and what IDEs or +code-scanning tools can rely on. + +SARIF is a deterministic projection layer. The canonical source of truth +remains the report document. + +## What SARIF is good for here + +SARIF is useful as: + +- an IDE-facing findings stream +- a code-scanning upload format +- another deterministic machine-readable projection over canonical report data + +It is not the source of truth for: + +- report integrity digest +- gating semantics +- baseline compatibility + +## See also + +- [05. Report](../../../book/05-report.md) +- [06. HTML Render](../../../book/06-html-render.md) +- [Examples / Sample Report](../../../examples/report.md) diff --git a/docs/guide/integrations/vscode/setup.md b/docs/guide/integrations/vscode/setup.md new file mode 100644 index 00000000..c8e9426c --- /dev/null +++ b/docs/guide/integrations/vscode/setup.md @@ -0,0 +1,173 @@ +# VS Code setup + +## What it is for + +The extension helps you: + +- analyze the current workspace +- review changed files against a git diff +- start with a conservative first pass and lower thresholds only when you need + a more sensitive follow-up +- focus on new regressions and production hotspots first +- jump directly to source locations +- open canonical finding or remediation detail only when needed +- inspect current-run `Coverage Join` facts without inventing extension-local interpretations +- inspect report-only `Security Surfaces` as security-relevant boundary inventory +- inspect report-only Overloaded Module candidates without treating them like findings + +It does not create a second truth model and it does not mutate the repository. + +## Install requirements + +Install from the VS Code Marketplace: **`orenlab.codeclone`** (publisher +**orenlab**), or sideload a `.vsix` built from `extensions/vscode-codeclone`. + +The extension needs a local `codeclone-mcp` launcher and VS Code `1.120.0` or newer +(`engines.vscode` in `package.json`). + +Minimum supported CodeClone version: **`2.0.0`** (core analysis and change control). + +Engineering Memory features (Memory tree, search, governance, trajectory views) +require **`2.1.0a1` or newer** on the resolved `codeclone-mcp` launcher. + +In `auto` mode, it checks the current workspace virtualenv before falling back +to `PATH`. Runtime and version-mismatch messages identify that resolved launcher source. + +Recommended install: + +```bash +uv tool install "codeclone[mcp]" +``` + +If you want the launcher inside the current environment instead: + +```bash +uv pip install "codeclone[mcp]" +``` + +Verify the launcher: + +```bash +codeclone-mcp --help +``` + +When you run the CLI inside an interactive VS Code terminal, CodeClone may also +show a one-time extension hint after the summary. It is suppressed in quiet, +CI, and non-interactive runs, and is remembered per CodeClone version next to +the resolved project cache path. + +## Main views + +### Overview + +Compact health, current run state, baseline drift, and next-best review action. +When the current run includes external Cobertura join facts, Overview also +shows a factual `Coverage Join` section sourced from canonical MCP metrics. +When MCP exposes `security_surfaces`, Overview also shows a compact report-only +`Security Surfaces` section. + +### Hotspots + +Primary operational view for: + +- new regressions +- production hotspots +- changed-files findings +- report-only Security Surfaces +- report-only Overloaded Module candidates + +### Runs & Session + +Session-local state: + +- local server availability +- current run identity +- reviewed findings +- MCP help topics, including the optional `coverage` topic on newer + CodeClone/MCP servers + +### Memory + +Engineering Memory inbox: draft records, stale list, status, refresh/sync +actions, and human approve/reject through the IDE governance channel +(`prepare_governance` / `commit_governance` with session HMAC attestation). The +extension launches MCP with `--ide-governance-channel` and registers a +`SecretStorage` governance key on connect. + +## Review model + +The extension stays source-first: + +- `Review Priorities` and `Next Hotspot` / `Previous Hotspot` drive the review + loop +- `Reveal Source` is the default action for findings +- editor-local actions appear only when the current file matches the active + review target +- Explorer decorations stay lightweight and focus on new, production, or + changed-scope relevance +- report-only Security Surfaces stay source-first: reveal source, open compact + detail, or copy a review brief without promoting them to findings + +`Open in HTML Report` exists as an explicit bridge to the richer human report, +not as the primary IDE workflow. + +## Blast radius, session, and audit commands + +The extension also exposes structural change-controller helpers over MCP: + +- **Show Blast Radius** — `get_blast_radius` for a repo-relative file path +- **Copy Blast Radius Brief** — same payload formatted for review notes +- **Show Session Stats** / **Show Controller Audit Trail** — IDE-only MCP tools + (`get_workspace_session_stats`, `get_controller_audit_trail`) registered only + when the extension launches `codeclone-mcp` with `--ide-governance-channel`. + Payloads match CLI `--session-stats` and `--audit` via + `codeclone/controller_insights/`. +- **Clear Session** — `clear_session_runs` (in-memory runs, reviewed markers, + and workspace intent registry state for the MCP process) + +These commands require workspace trust and an active MCP connection. + +## Engineering Memory in the IDE + +- **Memory** view — draft inbox, approve/reject through the IDE governance + channel (`prepare_governance` / `commit_governance`), sync from run. +- **Search Engineering Memory** — QuickPick (`mode=search`; FTS + optional + semantic per `codeclone.memory.searchSemantic`, default **on** in the extension). +- **Memory for Active File** — `mode=for_path` for the active editor path. +- **Open Memory Search Panel** / **Refresh Memory Search** — results webview. +- **Configure Memory Search** — workspace wizard for semantic, drafts, stale, and + result limit (see **Engineering Memory search** settings below). +- **Show Trajectory Dashboard** — projection health, quality/outcome aggregates, + anomalies, and recent trajectories. +- **Show Trajectory Detail** — full passport with quality/complexity + calculations, Patch Trail, contract gates, incidents, steps, and evidence. +- **Copy Trajectory Dashboard Brief** — Markdown summary for review notes. + +Server-side semantic still requires `[tool.codeclone.memory.semantic] enabled`, +the semantic sidecar, and a successful rebuild (`manage_engineering_memory` +`action=rebuild_semantic_index` for MCP agents, or `codeclone memory semantic +rebuild` for CLI/CI). Install +`codeclone[semantic-local]` and set `embedding_provider = "fastembed"` for local +semantic-quality recall; `codeclone[semantic-lancedb]` alone can run only the +deterministic diagnostic provider. See +[Engineering Memory](../../../book/13-engineering-memory/index.md). +Trajectory semantics: +[Trajectory quality and passport](../../../book/13-engineering-memory/trajectory-quality-and-passport.md). + +## Open Triage + +**Open Triage** (`orenlab.codeclone.openTriage`) calls `get_production_triage` for +the current run before opening the markdown panel. Repeated opens reuse the cached +payload for 5 seconds when the run is unchanged and not marked stale; concurrent +opens share one in-flight request. + +## First-run path + +1. Open the `CodeClone` view container. +2. Run `Analyze Workspace`. +3. Use `Review Priorities` or `Review Changes`. +4. If the first pass looks clean but you want smaller repeated units, open + `Set Analysis Depth`. +5. Reveal source before opening deeper detail. + +If the launcher is missing, use `Open Setup Help` from the extension. diff --git a/docs/guide/mcp/README.md b/docs/guide/mcp/README.md new file mode 100644 index 00000000..fbb0e58e --- /dev/null +++ b/docs/guide/mcp/README.md @@ -0,0 +1,56 @@ + + +# MCP for AI Agents + +Use CodeClone through `codeclone-mcp` — same pipeline and report as the CLI. + +**Analysis truth is read-only:** MCP never mutates source, baselines, analysis +cache, or canonical reports. It **may** write session-local coordination +(workspace intents), Engineering Memory **drafts**, and optional audit rows when +enabled. Opt-in Platform Observability writes separate local development +telemetry and never becomes repository truth. + +Install: [Getting started — MCP extra](../../getting-started.md#install). + +!!! tip "Guide vs contract" + This section is **how to work** with MCP. Tool names, parameters, and response + shapes are normative in the [MCP interface contract](../../book/25-mcp-interface/index.md). + +## Setup + +| Step | Page | +|----------------------|-----------------------------------------------| +| Register a client | [Client setup](client-setup.md) | +| Launcher & transport | [Server & transport](server-and-transport.md) | +| Layer diagram | [Architecture](architecture.md) | +| Troubleshooting & issues | [MCP troubleshooting](troubleshooting.md) | + +## Workflows (recommended order) + +| Phase | Recipe | +|--------------------------|----------------------------------------------------------------------------| +| 1. Baseline-aware triage | [Analyze & triage](workflows/analyze-and-triage.md) | +| 2. Focused inspection | [Drill down & checks](workflows/drill-down-and-checks.md) | +| 3. Live code context | [Analyze & triage](workflows/analyze-and-triage.md#implementation-context) | +| 4. Governed edits | [Change control](workflows/change-control.md) | +| 5. Durable scope context | [Memory recipes](workflows/memory-recipes.md) | +| 6. Optional: coverage & session markers | [Coverage join & session markers](workflows/session-and-coverage.md) | + +**Maintainers only** (developing CodeClone itself — not user repo review): + +| Phase | Recipe | +|-------------------------------|-----------------------------------------------------------------------------| +| M. Observer setup & MCP drill | [Platform Observability recipes](workflows/observability-recipes.md) | +| M. Maintainer playbook | [Developing CodeClone with observer](../observability/maintainer-workflow.md) | + +## Reference shortcuts + +| Need | Page | +|---------------------------------|---------------------------------------------------------------------------------------| +| Prompt patterns | [Prompt patterns](prompts.md) | +| Payload field cheat sheet | [Payload cheatsheet](payload-cheatsheet.md) | +| Change control contract | [Structural Change Controller](../../book/12-structural-change-controller/index.md) | +| Implementation-context contract | [Implementation context](../../book/25-mcp-interface/tools/implementation-context.md) | +| `help()` topics | [Help topics](../../book/25-mcp-interface/tools/help-and-topics.md) | +| Engineering Memory contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | +| Runtime diagnostics (maintainer-only) | [Platform Observability](../observability/maintainer-workflow.md) | diff --git a/docs/guide/mcp/architecture.md b/docs/guide/mcp/architecture.md new file mode 100644 index 00000000..4ae4401f --- /dev/null +++ b/docs/guide/mcp/architecture.md @@ -0,0 +1,88 @@ + + +# MCP architecture + +## Where MCP fits + +MCP is an **integration surface**, not a second analyzer. It composes over the +same canonical pipeline and report contracts as the CLI and HTML report. + +```mermaid +graph LR + A[Source Code] --> B[Core Pipeline] + B --> C[Canonical Report] + C --> D[CLI] + C --> E[HTML] + C --> F[MCP] + C --> G[SARIF] + style F stroke: #6366f1, stroke-width: 2px +``` + +## Session architecture + +Every `codeclone-mcp` process owns an isolated session. Session state lives +entirely in process memory and does not survive restart. + +```mermaid +graph TD + subgraph MCPSession["MCPSession (in-memory)"] + RS[Run Store
bounded history] + AI[Active Intents
change control] + RM[Review Markers
session-local] + BRC[Blast Radius Cache] + GR[Gate Results] + end + + subgraph Disk["Disk (coordination + optional sidecars)"] + WIR["Workspace Intent Registry
.codeclone/intents/ or intents.sqlite3"] + MEM["Engineering Memory SQLite
.codeclone/memory/"] + AUD["Audit trail (optional)
.codeclone/db/"] + OBS["Platform Observability (dev-only)
platform_observability.sqlite3"] + end + + MCPSession -->|" coordination + drafts "| Disk + MCPSession -->|" never writes "| BL[Baselines] + MCPSession -->|" never writes "| CA["Analysis cache (.codeclone/cache.json)"] + MCPSession -->|" never writes "| RP[Canonical reports] + MCPSession -->|" never writes "| SC[Source Files] + style BL fill: #fee2e2 + style CA fill: #fee2e2 + style RP fill: #fee2e2 + style SC fill: #fee2e2 +``` + +**Read-only contract (analysis truth):** MCP never mutates source files, +baselines, analysis cache, or canonical report artifacts. It **may** write +ephemeral workspace intent records, Engineering Memory **drafts** (human approve +required for promotion), optional audit evidence, and opt-in development +telemetry when enabled. Platform Observability remains separate from repository +findings, reports, gates, baselines, and memory facts. + +## Mixin chain + +`MCPSession` is composed from focused mixins (`codeclone/surfaces/mcp/session.py`). +In Python MRO, the **first** listed mixin wins method resolution — workflow tools +sit outermost. + +```mermaid +graph BT + STM["_MCPSessionStateMixin
runs, markers, gates, observability query"] + INS["_MCPSessionInsightsMixin
session stats, audit queries"] + BR["_MCPSessionBlastRadiusMixin"] + MM["_MCPSessionMemoryMixin"] + IM["_MCPSessionIntentMixin"] + PC["_MCPSessionPatchContractMixin"] + RR["_MCPSessionReviewReceiptMixin"] + CG["_MCPSessionClaimGuardMixin"] + WF["_MCPSessionWorkflowMixin
start/finish orchestration"] + S["MCPSession"] + STM --> INS --> BR --> MM --> IM --> PC --> RR --> CG --> WF --> S + style S stroke: #6366f1, stroke-width: 2px + style WF fill: #eff6ff + style MM fill: #ecfdf5 +``` + +New capabilities extend the chain by adding a mixin **before** `MCPSession` in +the class definition — not by editing lower layers. + +--- diff --git a/docs/guide/mcp/client-setup.md b/docs/guide/mcp/client-setup.md new file mode 100644 index 00000000..77600b4c --- /dev/null +++ b/docs/guide/mcp/client-setup.md @@ -0,0 +1,75 @@ + + +# MCP client setup + +## Client setup + +All clients use the same server. Only the registration format differs. + +=== "Claude Code" + + ```bash + claude plugin marketplace add orenlab/codeclone-claude-code + claude plugin install codeclone@orenlab-codeclone + ``` + + The native plugin supplies the MCP definition and CodeClone skills. See the + [Claude Code plugin guide](../integrations/claude-code/setup.md). + + Manual MCP registration without the plugin remains available: + + ```bash + claude mcp add --scope project codeclone -- codeclone-mcp --transport stdio + ``` + +=== "Codex" + + ```bash + codex plugin marketplace add orenlab/codeclone-codex + codex plugin add codeclone@orenlab-codeclone + ``` + + The native plugin includes the MCP definition and CodeClone skills. + Manual MCP registration without the plugin is also valid: + + ```bash + codex mcp add codeclone -- codeclone-mcp --transport stdio + ``` + + See [Codex plugin guide](../integrations/codex/setup.md). + +=== "Cursor" + + For the complete integration, import + `https://github.com/orenlab/codeclone-cursor` through + **Dashboard → Settings → Plugins → Team Marketplaces → Add Marketplace → + Import from Repo**, then install **CodeClone**. + + The bundled [Cursor plugin](../integrations/cursor/install-and-skills.md) + includes MCP registration, skills, rules, and project hooks. Manual + `.cursor/mcp.json` registration is covered under generic setup below, but + does not install the rest of that surface. + +=== "Claude Desktop" + + A local `.mcpb` bundle ships in `extensions/claude-desktop-codeclone/`. + See [Claude Desktop bundle guide](../integrations/claude-desktop/setup.md). + +=== "JSON config (generic)" + + ```json + { + "mcpServers": { + "codeclone": { + "command": "codeclone-mcp", + "args": ["--transport", "stdio"] + } + } + } + ``` + + Works with Copilot Chat, Gemini CLI, and other MCP-capable clients. + +If `codeclone-mcp` is not on `PATH`, use the full launcher path. + +--- diff --git a/docs/guide/mcp/payload-cheatsheet.md b/docs/guide/mcp/payload-cheatsheet.md new file mode 100644 index 00000000..120d7c34 --- /dev/null +++ b/docs/guide/mcp/payload-cheatsheet.md @@ -0,0 +1,38 @@ + + +# Payload cheat sheet + +!!! warning "Non-normative" + Normative conventions: [MCP payload conventions](../../book/25-mcp-interface/payload-conventions.md). + +## Payload conventions + +Short reference for response structure patterns across the tool surface. + +**IDs** — Run IDs are 8-char hex handles. Finding IDs are short prefixed +forms. Both accept the full canonical form as input. + +**Detail levels** — `summary` (default for lists), `normal` (default for +single finding), `full` (compatibility payload with URIs). + +**Pagination** — `list_findings` and +`get_report_section(section="metrics_detail")` support `offset` and `limit`. +`list_hotspots` supports `limit` and `max_results` only (no `offset`). + +**Changed-scope filters** — `list_findings`, `list_hotspots`, and +`generate_pr_summary` accept `changed_paths` or `git_diff_ref` for PR +projection. + +**Threshold context** — Empty `check_*` responses include +`threshold_context` showing whether the run is genuinely quiet or simply +below the active threshold. + +**Engineering Memory** — `get_relevant_memory` omits routine `run:*` trajectories +from `trajectories[]` by default. Use `query_engineering_memory` trajectory +modes with `filters.include_routine=true` to include them. Scoped retrieval +defaults to `detail_level=compact`; use `full` or +`query_engineering_memory(mode=get)` for complete payloads. + +… + +[Full reference →](../../book/25-mcp-interface/payload-conventions.md) diff --git a/docs/guide/mcp/prompts.md b/docs/guide/mcp/prompts.md new file mode 100644 index 00000000..ee60de25 --- /dev/null +++ b/docs/guide/mcp/prompts.md @@ -0,0 +1,42 @@ + + +# MCP prompt patterns + +## Prompt patterns + +Good prompts include **scope**, **goal**, and **constraint**: + +```text title="Health check" +Use codeclone MCP to analyze this repository. +Give me a concise structural health summary and the top findings to look at first. +``` + +```text title="Changed-files review" +Use codeclone MCP in changed-files mode for my latest edits. +Focus only on findings that touch changed files and rank them by priority. +``` + +```text title="Gate preview" +Run codeclone through MCP and preview gating with fail_on_new. +Explain the exact reasons. Do not change any files. +``` + +```text title="AI-generated code check" +I added code with an AI agent. Use codeclone MCP to check for new structural drift. +Separate accepted baseline debt from patch-local before/after regressions. +``` + +!!! tip "Best practices" + + - Use `analyze_changed_paths` for PRs, not full analysis. + - Prefer `get_run_summary` or `get_production_triage` as the first pass. + - Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. + - Use `get_finding` / `get_remediation` for one finding instead of raising + `detail_level` on larger lists. + - Pass an absolute `root` — MCP rejects relative roots like `.`. + - Use `coverage_xml` only with `analysis_mode="full"`. + - Use `source_kind="production"` (or `tests`, `fixtures`, `mixed`, `other`) to + cut test/fixture noise. + - Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. + +--- diff --git a/docs/guide/mcp/server-and-transport.md b/docs/guide/mcp/server-and-transport.md new file mode 100644 index 00000000..892e6f9b --- /dev/null +++ b/docs/guide/mcp/server-and-transport.md @@ -0,0 +1,57 @@ + + +# MCP server & transport + +## Server + +### Transports + +| Transport | Default | Use case | +|-------------------|---------|---------------------------------| +| `stdio` | Yes | Local agents, IDEs, CLI clients | +| `streamable-http` | No | Remote clients, Responses API | + +```bash title="Local (default)" +codeclone-mcp --transport stdio +``` + +```bash title="HTTP (loopback)" +export CODECLONE_MCP_AUTH_TOKEN="$(openssl rand -hex 32)" +codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 +``` + +!!! warning "HTTP auth is mandatory" + `streamable-http` **always** requires `CODECLONE_MCP_AUTH_TOKEN` with at + least 32 characters. The server refuses to start without it — there is no + unauthenticated HTTP mode. Non-loopback hosts additionally require + `--allow-remote`. See + [Environment variable overrides](../../book/10-config-and-defaults.md#mcp-http-authentication) + and [Security Model](../../book/21-security-model.md#remote-mcp-transport). + +### Server flags + +| Flag | Default | Applies when | Effect | +|----------------------------|---------|-------------------|--------------------------------------------------------| +| `--history-limit` | `4` | all transports | In-memory run retention (`1`–`10`) | +| `--json-response` | on | `streamable-http` | JSON responses for Streamable HTTP | +| `--stateless-http` | on | `streamable-http` | Stateless Streamable HTTP mode | +| `--debug` | off | all transports | FastMCP debug mode | +| `--log-level` | `INFO` | all transports | `DEBUG`, `INFO`, `WARNING`, `ERROR`, or `CRITICAL` | +| `--allow-remote` | off | `streamable-http` | Bind non-loopback hosts (auth still required) | +| `--ide-governance-channel` | off | all transports | VS Code only — registers session-stats and audit tools | + +`--host` (default `127.0.0.1`) and `--port` (default `8000`) apply to +`streamable-http` only. Agent launchers must not pass `--ide-governance-channel`. + +### Run retention + +Run history is bounded: default `4`, max `10` (`--history-limit`). +Runs are in-memory only and do not survive process restart. + +### Absolute roots + +All analysis tools require an **absolute** repository root. Relative roots +like `.` are rejected because the server working directory may differ from +the client workspace. + +--- diff --git a/docs/guide/mcp/troubleshooting.md b/docs/guide/mcp/troubleshooting.md new file mode 100644 index 00000000..d004bff7 --- /dev/null +++ b/docs/guide/mcp/troubleshooting.md @@ -0,0 +1,97 @@ + + +# MCP troubleshooting + +When MCP setup, tool calls, or change-control responses fail — start here. +Install and transport basics: +[Client setup](client-setup.md), +[Server & transport](server-and-transport.md). + +Normative contracts: +[MCP interface](../../book/25-mcp-interface/index.md), +[Change controller](../../book/12-structural-change-controller/index.md). + +## Install and launcher + +| Symptom | Fix | +|---------|-----| +| `CodeClone MCP support requires the optional 'mcp' extra` | `uv tool install "codeclone[mcp]"` or `pip install 'codeclone[mcp]'` | +| Client cannot find `codeclone-mcp` | Install the extra above, or point `command` at the full launcher path in MCP config | +| Wrong / missing tools after upgrade | Restart the MCP process; confirm `codeclone --version` matches the client bundle | +| Plugin installed but MCP silent | Check client MCP logs; verify stdio command is `codeclone-mcp --transport stdio` | + +## Transport and HTTP + +| Symptom | Fix | +|---------|-----| +| HTTP server refuses to start | Set `CODECLONE_MCP_AUTH_TOKEN` to ≥32 characters before launch — no unauthenticated HTTP | +| Remote client cannot connect | Use `streamable-http`; pass Bearer token; for non-loopback hosts add `--allow-remote` | +| Client only accepts remote MCP | See [Server & transport](server-and-transport.md#transports) — stdio for local IDEs | + +## Analysis parameters + +| Symptom | Fix | +|---------|-----| +| `requires an absolute repository root` | Pass full path (`/Users/.../repo`), not `.` or a relative segment | +| `Repository root '…' does not exist` | Fix typo; ensure the path is the repo root on the machine running MCP | +| `path traversal not allowed` | Use repo-relative paths inside tools; do not pass `../` escapes | +| `changed_paths` rejected | Pass `list[str]` of repo-relative file paths, or use `git_diff_ref` | +| `analyze_changed_paths` fails | Provide **either** `changed_paths` **or** `git_diff_ref`, not neither | +| `cache_policy='refresh' is CLI-only` | MCP accepts `reuse` (default) or `off` only | +| `coverage_xml requires analysis_mode='full'` | Set `analysis_mode="full"` before joining Cobertura XML | +| Stale or wrong findings | Call `analyze_repository` again; runs are in-memory and bounded (`--history-limit`) | + +## Session and workflow state + +| Symptom | Fix | +|---------|-----| +| Agent reads results from an old run | Re-analyze, or pass the explicit `run_id` you intend | +| Review markers out of sync | `mark_finding_reviewed` + `list_findings(exclude_reviewed=true)`; markers are session-local | +| Need a clean MCP session | `clear_session_runs` — also clears workspace intents; see [Session markers](workflows/session-and-coverage.md#session-review-loop-in-memory-markers) | +| Process restarted — intents gone | Expected: intent registry is ephemeral; re-run `analyze_repository` → `start_controlled_change` | + +## Change control responses + +| `status` / message | What to do | +|--------------------|------------| +| `needs_analysis` | Call `analyze_repository(root=)` before `start_controlled_change` | +| `queued`, `edit_allowed: false` | Another intent is active — `manage_change_intent(action="promote")` or narrow scope | +| `blocked`, dirty scope overlap | Inspect git diff; commit/stash/revert, narrow scope, or `dirty_scope_policy="continue_own_wip"` for own WIP | +| `finish` → `unverified` | Follow `next_step` in the response (often a new after-run + same `intent_id`) | +| `finish` → `violated` | Fix scope or regressions; or `start` again with expanded `allowed_files` | +| Foreign intent overlap | Coordinate with the user — do not kill foreign PIDs without confirmation | + +Full workflow: +[Change control](workflows/change-control.md). + +## Engineering Memory + +| Symptom | Fix | +|---------|-----| +| `get_relevant_memory` fails without `root` | Always pass the same absolute `root` as analysis — `intent_id` alone is invalid | +| Empty memory on first use | Normal — `bootstrap_if_missing` ingests on first scoped call after `analyze_repository` | +| Cannot approve drafts via MCP | By design — use VS Code **Memory** view; agents only `record_candidate` | + +## Quick diagnostic checklist + +1. `codeclone --version` and `codeclone-mcp --help` succeed on the host that runs MCP. +2. `root` is **absolute** and points at the repository the client has open. +3. `analyze_repository` → `get_run_summary` works before deeper tools. +4. `help(topic="engineering_memory")` or `help(topic="change_control")` for contract copy. +5. Enable server debug: `codeclone-mcp --log-level DEBUG` (stdio clients: check MCP stderr). + +## Report a bug or false positive + +If the steps above do not match what you see, open a GitHub issue: + +**[github.com/orenlab/codeclone/issues](https://github.com/orenlab/codeclone/issues)** + +Include: + +- CodeClone version (`codeclone --version`) +- Client (Cursor, Codex, Claude Code, VS Code, Claude Desktop, other) +- Transport (`stdio` or `streamable-http`) +- Tool name and parameters (redact tokens and private paths) +- Full error text or MCP log excerpt + +--- diff --git a/docs/guide/mcp/workflows/analyze-and-triage.md b/docs/guide/mcp/workflows/analyze-and-triage.md new file mode 100644 index 00000000..1ba6cd79 --- /dev/null +++ b/docs/guide/mcp/workflows/analyze-and-triage.md @@ -0,0 +1,127 @@ + + +# Analyze & triage + +### Phase 1: Analyze + +| Tool | Purpose | +|-------------------------|---------------------------------------------------| +| `analyze_repository` | Full deterministic analysis of one repo root | +| `analyze_changed_paths` | Diff-aware analysis with changed-files projection | + +Both register the result as an in-memory run. All other tools read from +stored runs. + +### Phase 2: Triage + +| Tool | Purpose | +|-------------------------|------------------------------------------------------------| +| `get_run_summary` | Cheapest snapshot: health, findings, baseline status | +| `get_production_triage` | Production-first view: hotspots, suggestions, thresholds | +| `list_hotspots` | Priority-ranked hotspot views by kind | +| `compare_runs` | Run-to-run delta: regressions, improvements, health change | + +!!! tip "Start here" + After analysis, call `get_run_summary` or `get_production_triage` first. + Prefer `list_hotspots` or `check_*` before broad `list_findings` calls. + +### Workspace hygiene tips + +Selected MCP responses may include a non-blocking `tips[]` array with +structured workspace guidance. The first tip checks whether the repository +root `.gitignore` covers `.codeclone/` (or the broader `.cache/` tree). + +| Field | Example | +|-------------------|-----------------------------| +| `id` | `gitignore-codeclone-cache` | +| `severity` | `info` | +| `category` | `workspace_hygiene` | +| `suggested_entry` | `.codeclone/` | + +Tips are advisory only — not findings, gates, or failures. MCP never edits +`.gitignore` automatically; agents must declare scope before changing it. + +Surfaces: `analyze_repository`, `get_run_summary`, `get_production_triage`, +`start_controlled_change`, and the CLI after a normal interactive analysis run +(suppressed in `--quiet`, CI, and non-TTY contexts). + +## Health check + +``` +analyze_repository(root=) + -> get_run_summary or get_production_triage + -> list_hotspots or check_* + -> get_finding -> get_remediation +``` + +## PR review + +``` +analyze_changed_paths(root=, changed_paths=[...] or git_diff_ref="HEAD~1") + -> list_findings(sort_by="priority") + -> get_finding -> get_remediation + -> generate_pr_summary +``` + +## Implementation context + +After analysis and triage, ask for bounded context around the files you expect +to inspect: + +``` +get_implementation_context( + root=, + paths=["codeclone/surfaces/mcp/service.py"], + mode="implementation", +) +``` + +The response combines canonical structural facts with a live freshness delta. +Use `context_artifact_digest` to identify the source context artifact and +`context_projection_digest` when citing the exact bounded response. If +`freshness.status` is `drifted`, analyze again. This step informs scope; only +`start_controlled_change` can return `edit_allowed=true`. + +With no explicit subject, the tool resolves current work deterministically: + +1. active intent `allowed_files`; +2. otherwise the bounded live git-dirty set; +3. otherwise `status="no_current_work"`. + +Use `changed_scope=true` to request the dirty set explicitly. Do not combine it +with `paths` or `symbols`. + +Exact qualnames are also valid subjects: + +``` +get_implementation_context( + root=, + symbols=["codeclone.surfaces.mcp.service:CodeCloneMCPService"], + mode="implementation", +) +``` + +Symbol resolution uses the analyzed Unit inventory plus public API rows. +Inspect both `subject.resolved_symbols` and `subject.unresolved_symbols`; +CodeClone reports unknown qualnames instead of inferring a likely match. + +Structural import, importer, and test-importer roles appear as collapsed +`related_modules` entries with explicit `relations`. Read each collection's +summary: the global budget is shared across the response. Safety context is +allocated first; `safety_context_overflow` means even the hard cap could not +show every safety entry. + +Once an intent is active, pass its `intent_id` with the same explicit paths. +The response then shows the declared scope, review context, do-not-touch +boundaries, and guards beside lane-separated memory evidence. Use +`mode="impact"` when you need transitive dependency context and +baseline-sensitive findings. The context tool mirrors authorization evidence; +it does not grant or widen authorization. + +### Change control tool tiers + +| Tier | Tools | When to use | +|------|-------|-------------| +| Normal workflow | `analyze_repository`, `start_controlled_change`, `finish_controlled_change` | Every edit cycle | +| Queue/recovery | `manage_change_intent` (promote, recover, reset, renew) | Multi-agent coordination, crash recovery | +| Advanced/diagnostic | `get_blast_radius`, `check_patch_contract`, `validate_review_claims`, `create_review_receipt` | Deep inspection, step-by-step debugging | diff --git a/docs/guide/mcp/workflows/change-control.md b/docs/guide/mcp/workflows/change-control.md new file mode 100644 index 00000000..7085f4a0 --- /dev/null +++ b/docs/guide/mcp/workflows/change-control.md @@ -0,0 +1,46 @@ + + +# Change control workflow + +Primary MCP edit cycle (sole sequence diagram for change control in the guide): + +```mermaid +sequenceDiagram + participant Agent + participant MCP as CodeClone MCP + Agent ->> MCP: analyze_repository(root=) + MCP -->> Agent: run_id + Agent ->> MCP: start_controlled_change(root=, scope, intent, dirty_scope_policy?) + MCP -->> Agent: intent_id, blast_radius, budget, edit_allowed + Agent ->> MCP: get_relevant_memory(root, scope|intent_id) + MCP -->> Agent: ranked memory context + Agent ->> MCP: get_implementation_context(root, paths, intent_id?) + MCP -->> Agent: bounded structural context + Note over Agent: edit files + opt Python structural / governance config + Agent ->> MCP: analyze_repository + MCP -->> Agent: after_run_id + end + Agent ->> MCP: finish_controlled_change(intent_id, changed_files|diff_ref, after_run_id?, claims_text?) + MCP -->> Agent: status, summary, workspace_hygiene_after, intent_cleared +``` + +## Tool tiers + +| Tier | Tools | When | +|----------------|-------------------------------------------------------|---------------------| +| Normal | `start_controlled_change`, `finish_controlled_change` | Every edit cycle | +| Queue/recovery | `manage_change_intent` (promote, recover, …) | Multi-agent / crash | +| Advanced | `get_blast_radius`, `check_patch_contract`, … | Debugging only | + +Normative tool params: [MCP workflow tools](../../../book/25-mcp-interface/tools/workflow.md). +Finish pipeline and +hygiene: [finish_controlled_change](../../../book/12-structural-change-controller/finish-controlled-change.md), +[Finish hygiene](../../../book/12-structural-change-controller/finish-hygiene.md). + +## Related recipes + +- [Agent edit cycle](../../change-control/agent-cycle.md) +- [Queue & recovery](../../change-control/queue-and-recovery.md) +- [Atomic debug path](../../change-control/atomic-debug.md) +- [Engineering Memory recipes](memory-recipes.md) diff --git a/docs/guide/mcp/workflows/drill-down-and-checks.md b/docs/guide/mcp/workflows/drill-down-and-checks.md new file mode 100644 index 00000000..5ec06eaf --- /dev/null +++ b/docs/guide/mcp/workflows/drill-down-and-checks.md @@ -0,0 +1,27 @@ + + +# Drill down & focused checks + +### Phase 3: Drill down + +| Tool | Purpose | +|-----------------------|-------------------------------------------------------------| +| `list_findings` | Filtered, paginated findings with novelty and scope filters | +| `get_finding` | Single finding detail by short or canonical ID | +| `get_remediation` | Remediation and explainability for one finding | +| `get_report_section` | Read report sections; `metrics_detail` is paginated | +| `evaluate_gates` | Preview CI gating decisions without mutating state | +| `generate_pr_summary` | PR-friendly markdown or JSON summary | + +### Phase 4: Focused checks + +Narrow queries over a single quality dimension. Cheaper than `list_findings` +when you know which dimension to inspect. + +| Tool | Dimension | +|--------------------|--------------------------------| +| `check_clones` | Clone groups | +| `check_complexity` | Cyclomatic complexity hotspots | +| `check_coupling` | Afferent/efferent coupling | +| `check_cohesion` | Module cohesion | +| `check_dead_code` | Dead code candidates | diff --git a/docs/guide/mcp/workflows/memory-recipes.md b/docs/guide/mcp/workflows/memory-recipes.md new file mode 100644 index 00000000..d936b5c0 --- /dev/null +++ b/docs/guide/mcp/workflows/memory-recipes.md @@ -0,0 +1,91 @@ + + +# Engineering Memory recipes (MCP) + +Ranked scope context and governed drafts — **not** a second analyzer. Normative +tool shapes: [Engineering Memory MCP surface](../../../book/13-engineering-memory/mcp-surface.md). + +Session-local review markers live in +[Coverage join & session markers](session-and-coverage.md). + +## 1. Bootstrap before first scoped retrieval + +When the store is missing, default `mcp_sync_policy=bootstrap_if_missing` ingests +from the latest MCP run on the first scoped `get_relevant_memory`. + +| Step | Tool / action | +|--------------------------|------------------------------------------------------------------| +| Analyze | `analyze_repository(root=)` | +| Optional explicit ingest | `manage_engineering_memory(action=refresh_from_run, root=)` | +| Offline init | `codeclone memory init` (CI/offline; same ingest contract) | + +## 2. Scope context after `start_controlled_change` + +Call only after `edit_allowed=true`. **`root` is required** (same absolute path +as analysis). + +```text +get_relevant_memory(root=, intent_id=) + # or scope=["path/to/file.py", ...] +``` + +Read `memory_sync`, stale warnings, and `contradiction_note` entries before editing. +Do not treat `draft` / `inferred` rows as established facts. + +## 3. Draft observations during the cycle + +```text +manage_engineering_memory( + action=record_candidate, + root=, + record_type=risk_note | change_rationale | ..., + statement="", + subject_path="path/to/main/file.py", +) +``` + +Agents **cannot** `approve` / `reject` / `archive` via MCP. Humans promote drafts +in the VS Code Memory view or with +`codeclone memory approve --i-know-what-im-doing` (optional `--by NAME`). + +## 4. Finish proposals + +On accepted finish: + +```text +finish_controlled_change(..., propose_memory=true) +``` + +Returns `memory_candidates`, `memory_staleness`, `memory_coverage_delta`, and may +enqueue projection rebuild when configured. + +## 5. Search and drill-down + +| Goal | Call | +|----------------------|------------------------------------------------------------------------------------------------| +| Keyword search | `query_engineering_memory(mode=search, query=..., root=, filters={match_mode: any\|all})` | +| Semantic blend | same + `semantic=true` when semantic index is built | +| One path | `query_engineering_memory(mode=for_path, path=..., root=)` | +| Trajectory detail | `query_engineering_memory(mode=trajectory_get, record_id=, root=)` | +| Trajectory dashboard | `query_engineering_memory(mode=trajectory_dashboard, root=)` | +| Playbook | `help(topic=engineering_memory)` | + +## 6. Semantic index maintenance + +When `[tool.codeclone.memory.semantic] enabled=true`: + +```text +manage_engineering_memory(action=rebuild_semantic_index, root=) +``` + +Contract: [Semantic search](../../../book/13-engineering-memory/search-semantic.md). + +## 7. Trajectory and Experience evidence + +Scoped `get_relevant_memory` keeps governed records, trajectory precedents, and +advisory Experiences in separate response lanes. Inspect the workflow in +[Trajectories and Experiences](../../memory/trajectories-and-experiences.md); +use `promote_experience` only when a recurring pattern deserves human review as +a draft memory record. + +--- diff --git a/docs/guide/mcp/workflows/observability-recipes.md b/docs/guide/mcp/workflows/observability-recipes.md new file mode 100644 index 00000000..b46601b5 --- /dev/null +++ b/docs/guide/mcp/workflows/observability-recipes.md @@ -0,0 +1,107 @@ +# Platform Observability recipes (MCP) + + + +**Maintainer-only.** These recipes apply when you develop **CodeClone itself** +(MCP server, CLI instrumentation, memory pipelines, observer storage). They do +**not** help users analyze their Python repositories — for that use +[Analyze & triage](analyze-and-triage.md) and [Change control](change-control.md). + +Prerequisite: [Explicit observer enable](../../observability/maintainer-workflow.md#explicit-enable-required). + +Skill: `/codeclone-platform-observability` in bundled plugins. + +## 0. Confirm you need this surface + +| Question | Tool | +|-------------------------------------------------|------------------------------------------------------------| +| Health / clones / metrics of **user repo** | `get_production_triage`, `check_*` — **not** observability | +| Slow **CodeClone** MCP handler or DB during dev | `query_platform_observability` | +| Patch verify / edit scope | change-control workflow — **not** observability | + +If the user is not a CodeClone maintainer, **do not** call +`query_platform_observability`. + +## 1. Enable observer on the producing process + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +# restart codeclone-mcp (or CLI) with this env in the same shell / IDE config +``` + +Re-run the workflow under test. Without enablement every section returns +`status=disabled` or `status=no_store`. + +## 2. Read contract + +```text +help(topic="observability", detail="normal") +``` + +Covers sections, anti-inference rules, and inert disabled states. + +## 3. Start broad + +```json +{ + "root": "/absolute/path/to/codeclone", + "section": "summary", + "window": "latest", + "detail_level": "compact" +} +``` + +Tool: `query_platform_observability`. + +Follow `recommended_next_sections` in the response — **one section per call**. + +## 4. Common drill paths + +### Slow MCP session + +1. `summary` +2. `slow_operations` +3. `mcp_tool_matrix` +4. `correlated_chains` (if multi-step) + +### Memory / semantic rebuild cost + +1. `summary` +2. `memory_pipeline_cost` +3. `db_cost` (if SQL-heavy) + +### Pipeline analysis cost + +1. `summary` +2. `pipeline` +3. `costly_noops` + +### One workflow across CLI + MCP + worker + +1. Reproduce with shared correlation (same env-enabled processes) +2. `correlated_chains` with `window=` when known + +## 5. Interpretation rules (mandatory) + +- Audience is **CodeClone development** — envelope says so explicitly. +- Metrics are diagnostic hints, not findings or vulnerabilities. +- Do **not** tell end users their repo is unhealthy based on observer output. +- Do **not** use observer data in `finish_controlled_change` claims or review + receipts about repository quality. + +## 6. Human full trace + +MCP sections are bounded (≤50 rows). For waterfall HTML, maintainers run CLI +locally: + +```bash +codeclone observability trace --root . --html /tmp/codeclone-observer.html +``` + +Agents should not substitute CLI output for repository analysis. + +## Related + +- [Maintainer workflow](../../observability/maintainer-workflow.md) +- [Tool contract](../../../book/25-mcp-interface/tools/platform-observability.md) +- [Help topic catalog](../../../book/25-mcp-interface/tools/help-and-topics.md) diff --git a/docs/guide/mcp/workflows/session-and-coverage.md b/docs/guide/mcp/workflows/session-and-coverage.md new file mode 100644 index 00000000..4b69cc3b --- /dev/null +++ b/docs/guide/mcp/workflows/session-and-coverage.md @@ -0,0 +1,70 @@ + + +# Coverage join & session review markers + +Two **optional** MCP workflows that most agents skip on the first pass: + +1. **Coverage Join** — attach an external Cobertura XML from your test run and + preview untested-hotspot gating. +2. **Session review markers** — track which findings you already triaged inside + one long MCP process. + +Start with [Analyze & triage](analyze-and-triage.md) for health checks and PR +review. Use this page when you already have a coverage artifact or a long +finding backlog in the same chat session. + +Normative tool shapes: +[session tools](../../../book/25-mcp-interface/tools/session-and-memory.md), +[analysis & gates](../../../book/25-mcp-interface/tools/analysis.md), +[Coverage Join config](../../../book/10-config-and-defaults.md). + +## Coverage Join (Cobertura + gates) + +Join measured coverage to function hotspots for the **current run only**. Coverage +Join does not update baseline, cache, or canonical report persistence. + +| Requirement | Detail | +|-------------|--------| +| Analysis mode | `analysis_mode="full"` — `coverage_xml` is rejected in `clones_only` | +| Input | Cobertura XML path on `analyze_repository` (`coverage_xml`) | +| Typical follow-up | `get_report_section(section="metrics_detail", family="coverage_join")` | +| Gate preview | `evaluate_gates(fail_on_untested_hotspots=true, coverage_min=50)` | + +``` +analyze_repository(root=, coverage_xml="coverage.xml") + -> get_report_section(section="metrics_detail", family="coverage_join") + -> evaluate_gates(fail_on_untested_hotspots=true, coverage_min=50) +``` + +!!! tip "CLI equivalent" + `codeclone --coverage coverage.xml --fail-on-untested-hotspots` uses the same + join semantics. MCP `evaluate_gates` previews exit reasons without mutating + repository state. + +## Session review loop (in-memory markers) + +MCP keeps run snapshots and **session-local** reviewed markers in the server +process. They survive across tool calls but disappear on process restart — not +Engineering Memory, not baseline truth. + +Use when triaging many findings in one agent session: mark handled items, then +filter them out on the next pass. + +| Tool | Purpose | +|------|---------| +| `mark_finding_reviewed` | Mark one finding reviewed (optional `note`) | +| `list_findings(exclude_reviewed=true)` | Omit findings already marked in this session | +| `list_reviewed_findings` | List markers for audit | +| `clear_session_runs` | Reset in-memory runs, markers, and workspace intent registry | + +``` +list_findings + -> get_finding -> mark_finding_reviewed + -> list_findings(exclude_reviewed=true) -> ... + -> clear_session_runs # full session reset — also clears active intents +``` + +For durable facts across sessions, use [Memory recipes](memory-recipes.md) +instead of review markers. + +--- diff --git a/docs/guide/memory/overview.md b/docs/guide/memory/overview.md new file mode 100644 index 00000000..5d71e2df --- /dev/null +++ b/docs/guide/memory/overview.md @@ -0,0 +1,19 @@ + + +# Engineering Memory overview + +Local SQLite store of evidence-linked repository facts. Complements change +control with scoped context before edits. + +| Task | Page | +|----------------------------|----------------------------------------------------------------------------------------------| +| Bootstrap / sync | [MCP memory recipes](../mcp/workflows/memory-recipes.md) | +| MCP contract | [Engineering Memory](../../book/13-engineering-memory/index.md) | +| Trajectories / Experiences | [Practical guide](trajectories-and-experiences.md) | +| Trajectory contract | [Projection and Patch Trail](../../book/13-engineering-memory/trajectory-and-patch-trail.md) | +| Quality passport | [Quality and analytics](../../book/13-engineering-memory/trajectory-quality-and-passport.md) | +| Experience contract | [Experience Layer](../../book/13-engineering-memory/experience-layer.md) | + +Human **approve** of drafts: VS Code Memory view **or** +`codeclone memory approve --i-know-what-im-doing` (optional `--by NAME`; not MCP +agent tools). diff --git a/docs/guide/memory/trajectories-and-experiences.md b/docs/guide/memory/trajectories-and-experiences.md new file mode 100644 index 00000000..939bcdb2 --- /dev/null +++ b/docs/guide/memory/trajectories-and-experiences.md @@ -0,0 +1,77 @@ +# Work with Trajectories and Experiences + + + +Engineering Memory exposes two evidence layers beyond curated records: + +- trajectories reconstruct what happened during agent work; +- Experiences distill recurring patterns across those trajectories. + +Neither layer grants permission to edit. Use them to prepare and review work, +then use change control for authorization. + +```mermaid +flowchart LR + A["Audit evidence"] --> B["Trajectories"] + B --> C["Quality passport and anomalies"] + B --> D["Experience distillation"] + D --> E["Scoped advisory patterns"] + E --> F["Optional draft promotion"] + F --> G["Human governance"] +``` + +## Inspect trajectory health + +```bash +codeclone memory trajectory status --root . +codeclone memory trajectory dashboard --root . +codeclone memory trajectory anomalies --root . +codeclone memory trajectory agents --root . +``` + +Routine run projections are hidden by default. Add `--include-routine` when +you are diagnosing those workflows too. + +Search and inspect one trajectory: + +```bash +codeclone memory trajectory search "verification" --root . +codeclone memory trajectory show TRAJECTORY_ID --root . +``` + +The detail view explains the quality score, complexity band, incidents, +anomalies, evidence, and patch-trail verification. + +## Rebuild projections + +```bash +codeclone memory trajectory rebuild --root . +codeclone memory jobs run-once --root . +``` + +The background projection job refreshes trajectory, semantic, and Experience +projections in that execution order. See +[Projection jobs](../../book/13-engineering-memory/projection-jobs.md). + +## Retrieve Experiences + +Experiences are returned automatically by scoped memory retrieval when their +directory family matches the requested scope. They are kept separate from +memory records and trajectory precedents so callers cannot confuse advisory +patterns with governed facts. + +Through MCP, call `get_relevant_memory` with `scope` or an active `intent_id`. +The response may include: + +- `records`: governed memory records; +- `trajectories`: relevant precedents; +- `experiences`: recurring project patterns. + +To inspect a known Experience in full, use the Engineering Memory query +surface. To turn it into a reviewable draft, use +`manage_engineering_memory(action="promote_experience", experience_id="...")`. +Promotion is idempotent and does not approve the draft. + +The normative contracts +are [Trajectory quality and passport](../../book/13-engineering-memory/trajectory-quality-and-passport.md) +and [Experience layer](../../book/13-engineering-memory/experience-layer.md). diff --git a/docs/guide/observability/diagnostics.md b/docs/guide/observability/diagnostics.md new file mode 100644 index 00000000..6f1b30c3 --- /dev/null +++ b/docs/guide/observability/diagnostics.md @@ -0,0 +1,102 @@ +# Diagnose CodeClone with Platform Observability + + + +!!! warning "Maintainer-only — not for end users" + Platform Observability diagnoses **CodeClone's own runtime** while you + **develop CodeClone**. It does **not** help users analyze their Python + repositories (clones, health, gates, MCP review). For that, use the + [MCP guide](../mcp/README.md). + + Observation is **disabled by default** and requires explicit environment + setup before any data exists. See + [Maintainer workflow](maintainer-workflow.md). + + Platform Observability is for diagnosing CodeClone itself: slow MCP calls, + projection work, database query cost, redundant work, and correlated + CLI/MCP/worker activity. It is not a repository quality report. + + The normative contract is + [Platform Observability](../../book/26-platform-observability.md). + + Full maintainer playbook: + [Developing CodeClone with Platform Observability](maintainer-workflow.md). + +## Enable it locally + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +``` + +Run the CodeClone workflow you want to inspect, then query the local store: + +```bash +codeclone observability trace --root . +``` + +For optional process metrics: + +```bash +uv pip install "codeclone[perf]" +export CODECLONE_OBSERVABILITY_PROFILE=1 +``` + +In CI, observation remains off unless it is explicitly enabled: + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +``` + +`CODECLONE_OBSERVABILITY_FORCE=1` is an explicit CI-gate override but never +enables collection by itself. + +## Render the cockpit + +```bash +codeclone observability trace \ + --root . \ + --last 50 \ + --html /tmp/codeclone-observer.html +``` + +The self-contained page visualizes: + +```mermaid +flowchart LR + A["Operation chains"] --> B["Span waterfall"] + B --> C["Pipeline and memory costs"] + C --> D["MCP and DB aggregates"] + D --> E["SQL fingerprints and no-op hints"] +``` + +Use `--operation` to isolate one operation or `--correlation` to follow a +workflow across process boundaries. Use `--json` for a machine-readable export. + +## Query through MCP + +Start broad: + +```json +{ + "root": "/absolute/repository", + "section": "summary", + "window": "latest", + "detail_level": "compact" +} +``` + +Then select one bounded section such as `slow_operations`, `db_cost`, +`memory_pipeline_cost`, `mcp_tool_matrix`, or `correlated_chains`. + +Do not infer repository quality from these numbers. High database activity +means CodeClone executed database work; it does not mean the analyzed project +has a database problem. See +[MCP observability tool](../../book/25-mcp-interface/tools/platform-observability.md). + +## Local data lifecycle + +The store is `.codeclone/db/platform_observability.sqlite3`. CodeClone does not +send it to a remote telemetry service. Automatic pruning is not currently +enforced, so remove the file when you no longer need the diagnostics. + +Raw prompts, payload bodies, and SQL literals are not stored. diff --git a/docs/guide/observability/maintainer-workflow.md b/docs/guide/observability/maintainer-workflow.md new file mode 100644 index 00000000..95e837f7 --- /dev/null +++ b/docs/guide/observability/maintainer-workflow.md @@ -0,0 +1,120 @@ +# Developing CodeClone with Platform Observability + + + +Platform Observability is **maintainer tooling only**. It helps people who +**build and debug CodeClone itself** — not users who run CodeClone against their +own Python projects. + +If you want structural review, clones, health score, or CI gates for **your** +repository, use the normal CLI/MCP workflow ([MCP guide](../mcp/README.md), +[Production triage](../mcp/workflows/analyze-and-triage.md)). Observer data will +not answer those questions and must never be treated as repository quality +evidence. + +Normative contract: [Platform Observability](../../book/26-platform-observability.md). + +## Audience boundary + +| You are… | Use observer? | +|--------------------------------------------------------------|-------------------------------------------------| +| CodeClone contributor debugging MCP/CLI/memory/observer code | **Yes** (after explicit enable) | +| Application team using CodeClone on their repo | **No** | +| Agent reviewing user Python for clones/metrics | **No** — use review / hotspots / change control | + +Anti-inference: high `db_cost` means CodeClone executed SQL during its work, not +that the analyzed project has a database defect. High MCP payload sizes reflect +CodeClone's tool traffic, not low code quality in the target repo. + +## Explicit enable (required) + +Observation is **off by default**. No pyproject toggle — environment variables +only. + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +``` + +Every process that should emit telemetry must start **with this variable set**: + +- terminal `codeclone …` runs; +- `codeclone-mcp` (restart the MCP server after exporting); +- background projection workers spawned during memory rebuild. + +Optional: + +| Variable | Effect | +|-------------------------------------|--------------------------------------------------| +| `CODECLONE_OBSERVABILITY_PROFILE=1` | Process metrics (`codeclone[perf]`) | +| `CODECLONE_OBSERVABILITY_PERSIST=0` | Instrument without writing completed ops | +| `CODECLONE_OBSERVABILITY_FORCE=1` | CI override only — **does not** enable by itself | + +Until a reproducer runs under `CODECLONE_OBSERVABILITY_ENABLED=1`, there is no +store. MCP `query_platform_observability` returns `status=disabled` or +`status=no_store` — inert, not an error. + +Store path: `.codeclone/db/platform_observability.sqlite3` + +## Maintainer workflow + +```mermaid +flowchart TD + A["Export CODECLONE_OBSERVABILITY_ENABLED=1"] --> B["Restart MCP / CLI / worker"] + B --> C["Reproduce the slow or costly path"] + C --> D{"Agent or human?"} + D -->|Agent| E["query_platform_observability summary → drill sections"] + D -->|Human| F["codeclone observability trace --html …"] + E --> G["Change codeclone/observability or instrumentation"] + F --> G + G --> H["pytest tests/test_observability_*.py"] +``` + +### 1. Reproduce under observation + +Example — exercise MCP after enabling observer on the server process: + +```bash +export CODECLONE_OBSERVABILITY_ENABLED=1 +codeclone-mcp --transport stdio # or your IDE launcher with env inherited +``` + +Then run the MCP workflow you are debugging (analysis, memory rebuild, finish, +etc.). + +### 2. Agent path (bounded MCP) + +See [MCP observability recipes](../mcp/workflows/observability-recipes.md). + +Skill: `/codeclone-platform-observability` (bundled in CodeClone plugins). + +### 3. Human path (full cockpit) + +```bash +codeclone observability trace --root . --last 50 --html /tmp/codeclone-observer.html +``` + +Self-contained HTML: operation chains, span waterfall, MCP matrix, DB +fingerprints, memory pipeline costs. No external assets. + +### 4. Verify instrumentation changes + +```bash +uv run pytest -q tests/test_observability_*.py +``` + +Also run MCP registrar tests when touching server wiring: +`tests/test_observability_mcp_registrar.py`. + +## What observer never does + +- Does not affect reports, gates, baselines, cache, or finding identity +- Does not authorize edits or expand change-control scope +- Does not store raw MCP/prompt bodies or SQL literals +- Does not send data to a remote telemetry service + +## Related + +- [Diagnostics quick start](diagnostics.md) +- [MCP observability recipes](../mcp/workflows/observability-recipes.md) +- [MCP tool contract](../../book/25-mcp-interface/tools/platform-observability.md) +- [CONTRIBUTING.md — Platform Observability](https://github.com/orenlab/codeclone/blob/main/CONTRIBUTING.md) diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..316e41e6 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,93 @@ + + +# CodeClone Docs + +> Structural Change Controller for AI-assisted Python development — +> deterministic, baseline-aware, built for CI and AI agents. + +CodeClone runs one deterministic analysis pipeline and emits a canonical JSON +report. Every surface — CLI, HTML, MCP, IDE — is a projection of that report. +Humans and AI agents operate on the same structural facts. + +The v2.1 change controller starts before the first edit: an agent declares what +it intends to change, CodeClone maps the structural blast radius, verifies the +patch against the declared boundary, and generates an auditable review receipt. + +!!! note "Documentation for the in-development v2.1 line" + This site tracks the unreleased **v2.1** line; for the current stable release see [CodeClone v2.0.2](https://github.com/orenlab/codeclone/tree/v2.0.2). + +## New here? Follow the path + +1. [**Install & first run**](getting-started.md) — install, analyze a repo, read the report. +2. [**Connect your agent**](getting-started.md#mcp-setup) — wire CodeClone into your IDE or agent. +3. [**Your first governed edit**](start/first-governed-edit.md) — declare → edit → verify, end to end. + +!!! tip "Two tabs — pick one mental model" + **Guide** — install, run, MCP workflows, IDE setup, recipes. + Start at the [Guide hub](guide/README.md). + + **Contracts** — normative guarantees, schemas, enums, payload semantics. + Start at the [Contracts book](book/README.md). + +!!! note "Licensing" + Source code: MPL-2.0. Documentation and docs-site content: MIT. + +--- + +## Getting Started + +| Goal | Start here | +|-----------------------|---------------------------------------------------| +| First install and run | [Getting started](getting-started.md) | +| Understand the model | [How it works](guide/explanation/how-it-works.md) | +| Terminology lookup | [Terminology](book/01-terminology.md) | + +## CI and Gating + +| Goal | Start here | +|-------------------------------|-----------------------------------------------------------| +| Baseline-aware CI | [Getting started: CI setup](getting-started.md#ci-setup) | +| Exit codes and failure policy | [Exit codes](book/09-exit-codes.md) | +| Quality gates and metrics | [Metrics and gates](book/16-metrics-and-quality-gates.md) | +| Baseline contract | [Baseline](book/07-baseline.md) | + +## AI Agent Governance + +| Goal | Start here | +|-------------------------------------|-------------------------------------------------------------------------------| +| MCP usage (workflows, setup) | [MCP guide](guide/mcp/README.md) | +| First governed edit (tutorial) | [Your first governed edit](start/first-governed-edit.md) | +| Change controller workflow | [Structural Change Controller](book/12-structural-change-controller/index.md) | +| Engineering Memory (scope context) | [Engineering Memory](book/13-engineering-memory/index.md) | +| Trajectories and recurring patterns | [Trajectories and Experiences](guide/memory/trajectories-and-experiences.md) | +| MCP interface contract | [MCP interface](book/25-mcp-interface/index.md) | + +## IDE and Agent Clients + +| Surface | Guide (how to) | Contract (guarantees) | +|-----------------------|---------------------------------------------------------------------|-----------------------------------------------------------------------| +| VS Code extension | [Setup](guide/integrations/vscode/setup.md) | [VS Code contract](book/integrations/vs-code-extension.md) | +| Cursor plugin | [Install & skills](guide/integrations/cursor/install-and-skills.md) | [Cursor contract](book/integrations/cursor-plugin.md) | +| Claude Code plugin | [Install](guide/integrations/claude-code/setup.md) | [Claude Code contract](book/integrations/claude-code-plugin.md) | +| Codex plugin | [Install](guide/integrations/codex/setup.md) | [Codex contract](book/integrations/codex-plugin.md) | +| Claude Desktop bundle | [Setup](guide/integrations/claude-desktop/setup.md) | [Claude Desktop contract](book/integrations/claude-desktop-bundle.md) | +| SARIF & code scanning | [Export](guide/integrations/sarif/export.md) | [SARIF contract](book/integrations/sarif.md) | + +## Reports + +| Goal | Start here | +|-------------------------|---------------------------------------| +| Report model and schema | [Report contract](book/05-report.md) | +| HTML rendering | [HTML render](book/06-html-render.md) | +| Live sample | [Sample report](examples/report.md) | + +## Maintainers & internals + +Operating or building CodeClone itself? See [Platform Observability](guide/observability/diagnostics.md) +and [Corpus Analytics](guide/analytics/overview.md) under the **Maintainers** tab. + +**Editions & plans** — CodeClone is open source and runs locally; Team and Enterprise add scaled retention, managed options, and support. Pick the level that fits your needs: [Plans and retention](plans-and-retention.md). diff --git a/docs/mcp.md b/docs/mcp.md deleted file mode 100644 index bdeda025..00000000 --- a/docs/mcp.md +++ /dev/null @@ -1,359 +0,0 @@ -# MCP Usage Guide - -CodeClone MCP is a **read-only, baseline-aware** analysis server for AI agents -and MCP-capable clients. It exposes the deterministic pipeline without mutating -source files, baselines, cache, or report artifacts. Session-local review/run -state is mutable in memory only. - -Works with any MCP-capable client regardless of backend model. - -!!! note "Read-only by contract" - MCP is an integration surface over the same canonical pipeline and report - contracts as the CLI. It does not create a second analysis engine or write - back to repository state. - -## Install - -=== "Standalone tool" - - ```bash title="Install the MCP launcher as a standalone tool" - uv tool install "codeclone[mcp]" - ``` - -=== "Existing environment" - - ```bash title="Install the MCP extra into the current environment" - uv pip install "codeclone[mcp]" - ``` - -## Quick client setup - -If `codeclone-mcp` is already on your `PATH`, both Claude Code and Codex can -register it directly as a local stdio server. - -### Claude Code - -```bash -claude mcp add codeclone -- codeclone-mcp --transport stdio -claude mcp list -``` - -Use `--scope project` if you want Claude Code to store the shared config in -`.mcp.json` for the repository instead of your local user state. - -### Codex - -```bash -codex mcp add codeclone -- codeclone-mcp --transport stdio -codex mcp list -``` - -If you installed CodeClone into a project virtual environment rather than a -global tool path, use the full launcher path instead of bare `codeclone-mcp`. - -### Codex plugin - -A native Codex plugin ships in `plugins/codeclone/` with repo-local -discovery, a `.mcp.json` definition, and two skills (review + hotspots). -See [Codex plugin guide](codex-plugin.md). - -### Claude Desktop bundle - -A local `.mcpb` bundle ships in `extensions/claude-desktop-codeclone/` with -pre-loaded instructions and auto-discovery of the launcher. -See [Claude Desktop bundle guide](claude-desktop-bundle.md). - -## Start the server - -**Local agents** (Claude Code, Codex, Copilot Chat, Gemini CLI): - -```bash title="Start a local stdio MCP server" -codeclone-mcp --transport stdio -``` - -MCP analysis tools require an absolute repository root. Relative roots such as -`.` are rejected, because the server process working directory may differ from -the client workspace. The same absolute-path rule applies to `check_*` tools -when a `root` filter is provided. - -!!! note "Absolute roots are required" - MCP tool requests must pass an absolute repository root. This keeps runs - deterministic across clients whose working directories may differ from the - visible workspace path. - -**Remote / HTTP-only clients:** - -```bash title="Start the optional HTTP transport locally" -codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 -``` - -!!! warning "Remote exposure is opt-in" - Non-loopback hosts require `--allow-remote`, and the built-in HTTP server - does not provide authentication. Use it only on trusted networks or behind - your own authenticated reverse proxy. - -Non-loopback hosts require `--allow-remote` (no built-in auth). -When `--allow-remote` is enabled, any reachable network client can trigger -CPU-intensive analysis, read results, and probe repository-relative paths -through MCP request parameters. Use it only on trusted networks. For anything -production-adjacent, put the server behind a firewall or a reverse proxy with -authentication. - -Run retention is bounded: default `4`, max `10` (`--history-limit`). -If a tool request omits `processes`, MCP defers process-count policy to the -core CodeClone runtime. - -Current CodeClone `2.0` MCP surface: `21` tools, `7` fixed resources, and `3` -run-scoped URI templates. - -## Tool surface - -| Tool | Purpose | -|--------------------------|----------------------------------------------------------------------------------------------------------| -| `analyze_repository` | Full analysis → compact summary; use `get_run_summary` or `get_production_triage` as the first pass | -| `analyze_changed_paths` | Diff-aware analysis via `changed_paths` or `git_diff_ref`; compact changed-files snapshot | -| `get_run_summary` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds | -| `get_production_triage` | Production-first view: health, hotspots, suggestions, active thresholds; best first pass for noisy repos | -| `help` | Semantic guide for workflow, analysis profile, baseline, suppressions, review state, changed-scope | -| `compare_runs` | Run-to-run delta: regressions, improvements, health change | -| `list_findings` | Filtered, paginated findings; use after hotspots or `check_*` | -| `get_finding` | Single finding detail by id; defaults to `normal` detail level | -| `get_remediation` | Remediation payload for one finding | -| `list_hotspots` | Priority-ranked hotspot views; preferred before broad listing | -| `get_report_section` | Read report sections; `metrics_detail` is paginated with family/path filters | -| `evaluate_gates` | Evaluate CI gating decisions | -| `check_clones` | Clone findings only; narrower than `list_findings` | -| `check_complexity` | Complexity hotspots only | -| `check_coupling` | Coupling hotspots only | -| `check_cohesion` | Cohesion hotspots only | -| `check_dead_code` | Dead-code findings only | -| `generate_pr_summary` | PR-friendly markdown or JSON summary | -| `mark_finding_reviewed` | Session-local review marker (in-memory) | -| `list_reviewed_findings` | List reviewed findings for a run | -| `clear_session_runs` | Reset in-memory runs and session state | - -> `check_*` tools query stored runs only. Call `analyze_repository` or -> `analyze_changed_paths` first. - -**Payload conventions:** - -- `check_*` responses include only the relevant health dimension. -- Empty design `check_*` responses may also include a compact - `threshold_context` (`metric`, `threshold`, `measured_units`, - `highest_below_threshold`) to show whether the run is genuinely quiet or - simply below the active threshold. -- Finding responses use short MCP IDs and relative paths by default; - `detail_level=full` restores the compatibility payload with URIs. -- Summary and triage projections keep interpretation compact: `health_scope` - explains what the health score covers, `focus` explains the active view, and - `new_by_source_kind` attributes new findings without widening the payload. -- When baseline comparison is untrusted, summary and triage also expose - `baseline.compared_without_valid_baseline` plus baseline/runtime python tags. -- Summary `diff` also carries compact adoption/API deltas: - `typing_param_permille_delta`, `typing_return_permille_delta`, - `docstring_permille_delta`, `api_breaking_changes`, and `new_api_symbols`. -- When `analyze_repository` or `analyze_changed_paths` receives - `coverage_xml`, summaries include compact `coverage_join` facts. The XML path - may be absolute or relative to the analysis root, and the join remains a - current-run signal rather than baseline truth. -- Run summaries may also include compact `security_surfaces` facts: - item count, category count, production/test split, and `report_only=true`. - This layer inventories exact security-relevant capability surfaces and trust - boundaries; it does not claim vulnerabilities or exploitability. -- When `respect_pyproject=true`, MCP also applies `golden_fixture_paths`. - Fully matching golden-fixture clone groups are excluded from active clone and - gate projections but remain visible in the canonical report under the - optional `findings.groups.clones.suppressed.*` bucket. -- Invalid Cobertura XML does not fail `analyze_*`; summaries expose - `coverage_join.status="invalid"` plus `invalid_reason`. Coverage hotspot gate - preview still requires a valid join. -- Run IDs are 8-char hex handles; finding IDs are short prefixed forms. - Both accept the full canonical form as input. -- `metrics_detail(family="overloaded_modules")` exposes the report-only - module-hotspot layer without turning it into findings or gate data. -- `metrics_detail` also accepts `coverage_adoption`, `coverage_join`, - `security_surfaces`, and - `api_surface`. -- `help(topic=...)` is static: meaning, anti-patterns, next step, doc links. -- Start with repo defaults or `pyproject`-resolved thresholds, then lower them - only for an explicit higher-sensitivity exploratory pass. - -## Resource surface - -Fixed resources: - -| Resource | Content | -|----------------------------------|--------------------------------------------| -| `codeclone://latest/summary` | Latest run summary | -| `codeclone://latest/triage` | Latest production-first triage | -| `codeclone://latest/report.json` | Full canonical report | -| `codeclone://latest/health` | Health score and dimensions | -| `codeclone://latest/gates` | Last gate evaluation result | -| `codeclone://latest/changed` | Changed-files projection (diff-aware runs) | -| `codeclone://schema` | Canonical report shape descriptor | - -Run-scoped resource templates: - -| URI template | Content | -|---------------------------------------------------|---------------------------------| -| `codeclone://runs/{run_id}/summary` | Summary for a specific run | -| `codeclone://runs/{run_id}/report.json` | Report for a specific run | -| `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | - -Resources and URI templates are read-only views over stored runs; they do not -trigger analysis. - -`codeclone://latest/*` always resolves to the most recent run registered in the -current MCP server session. A later `analyze_repository` or -`analyze_changed_paths` call moves that pointer. -`mark_finding_reviewed` and `clear_session_runs` mutate only in-memory session -state. They never touch source files, baselines, cache, or report artifacts. - -## Recommended workflows - -### Budget-aware first pass - -``` -analyze_repository → get_run_summary or get_production_triage -→ list_hotspots or check_* → get_finding → get_remediation -``` - -### Semantic uncertainty recovery - -``` -help(topic="workflow" | "analysis_profile" | "baseline" | "coverage" | "suppressions" | "latest_runs" | "review_state" | "changed_scope") -``` - -### Full repository review - -``` -analyze_repository → get_production_triage -→ list_hotspots(kind="highest_priority") → get_finding → evaluate_gates -``` - -### Conservative first pass, then deeper review - -``` -analyze_repository(api_surface=true) # when you need API inventory/diff -→ help(topic="analysis_profile") when you need finer-grained local review -→ analyze_repository(min_loc=..., min_stmt=..., ...) as an explicit higher-sensitivity pass -→ compare_runs -``` - -### Coverage hotspot review - -``` -analyze_repository(coverage_xml="coverage.xml") -→ metrics_detail(family="coverage_join") -→ evaluate_gates(fail_on_untested_hotspots=true, coverage_min=50) - -Coverage Join in MCP separates measured `coverage_hotspots` from -`scope_gap_hotspots` (functions outside the supplied `coverage.xml` scope). -``` - -### Changed-files review (PR / patch) - -``` -analyze_changed_paths → get_report_section(section="changed") -→ list_findings(changed_paths=..., sort_by="priority") → get_remediation → generate_pr_summary -``` - -### Session-based review loop - -``` -list_findings → get_finding → mark_finding_reviewed -→ list_findings(exclude_reviewed=true) → … → clear_session_runs -``` - -## Prompt patterns - -Good prompts include **scope**, **goal**, and **constraint**: - -```text -# Health check -Use codeclone MCP to analyze this repository. -Give me a concise structural health summary and the top findings to look at first. - -# Changed-files review -Use codeclone MCP in changed-files mode for my latest edits. -Focus only on findings that touch changed files and rank them by priority. - -# Gate preview -Run codeclone through MCP and preview gating with fail_on_new. -Explain the exact reasons. Do not change any files. - -# AI-generated code check -I added code with an AI agent. Use codeclone MCP to check for new structural drift. -Separate accepted baseline debt from new regressions. -``` - -**Tips:** - -- Use `analyze_changed_paths` for PRs, not full analysis. -- Prefer `get_run_summary` or `get_production_triage` as the first pass. -- Prefer `list_hotspots` or narrow `check_*` tools before broad `list_findings`. -- Use `get_finding` / `get_remediation` for one finding instead of raising - `detail_level` on larger lists. -- Keep `git_diff_ref` to a safe single revision expression; option-like, - whitespace-containing, and punctuated shell-style inputs are rejected. -- Pass an absolute `root` — MCP rejects relative roots like `.`. -- Use `coverage_xml` only with `analysis_mode="full"`; clones-only analysis does - not collect the function-span facts needed for coverage join. -- Use `"production-only"` / `source_kind` filters to cut test/fixture noise. -- Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. - -## Client configuration - -All clients use the same server — only the registration format differs. - -### JSON clients (Claude Code, Copilot Chat, Gemini CLI) - -```json -{ - "mcpServers": { - "codeclone": { - "command": "codeclone-mcp", - "args": [ - "--transport", - "stdio" - ] - } - } -} -``` - -### Codex / OpenAI - -```toml -[mcp_servers.codeclone] -enabled = true -command = "codeclone-mcp" -args = ["--transport", "stdio"] -``` - -For the Responses API or remote-only clients, use `streamable-http`. - -If `codeclone-mcp` is not on `PATH`, use an absolute path to the launcher. - -## Security - -- Read-only by design: no source mutation, no baseline/cache writes. -- Run history and review markers are in-memory only — lost on process stop. -- Repository access is limited to what the server process can read locally. -- `streamable-http` binds to loopback by default; `--allow-remote` is explicit opt-in. - -## Troubleshooting - -| Problem | Fix | -|-----------------------------------------------------------|--------------------------------------------------------------------------------| -| `CodeClone MCP support requires the optional 'mcp' extra` | `uv tool install "codeclone[mcp]"` or `uv pip install "codeclone[mcp]"` | -| Client cannot find `codeclone-mcp` | `uv tool install "codeclone[mcp]"` or use an absolute launcher path | -| Client only accepts remote MCP | Use `streamable-http` transport | -| Agent reads stale results | Call `analyze_repository` again; `latest` always points to the most recent run | -| `changed_paths` rejected | Pass a `list[str]` of repo-relative paths, not a comma-separated string | - -## See also - -- [book/20-mcp-interface.md](book/20-mcp-interface.md) — formal interface contract -- [book/08-report.md](book/08-report.md) — canonical report contract -- [book/09-cli.md](book/09-cli.md) — CLI reference diff --git a/docs/plans-and-retention.md b/docs/plans-and-retention.md new file mode 100644 index 00000000..aede221d --- /dev/null +++ b/docs/plans-and-retention.md @@ -0,0 +1,170 @@ + + +# Plans and Retention + +CodeClone is open source and runs **fully locally**. Every edition — including +open source — ships the complete analysis, change-control, memory, and +integration product; nothing that runs on your machine is paywalled or +edition-capped. Team and Enterprise add **support and managed/hosted services** +on top of the same local core. Hosted capabilities are in development and are +marked **roadmap** below. + +--- + +## What every edition includes (open source) + +**The full local product is open source (MPL-2.0) and free.** No analysis, +change-control, memory, or integration capability is gated. + +- **Structural analysis & CI** — clones, complexity, coupling, cohesion, dead + code, dependency cycles, [Health Score](book/15-health-score.md), and + baseline-aware [quality gates](book/16-metrics-and-quality-gates.md). +- **Report surfaces** — canonical JSON, HTML, Markdown, text, and + [SARIF](guide/integrations/sarif/export.md), plus the + [GitHub Action](getting-started.md#github-action) (gating, SARIF upload, PR comments). +- **Report-only signals** — Security Surfaces, Overloaded Modules, API-surface + inventory with breaking-change detection, and external Coverage Join. +- **Structural Change Controller** — intent → blast radius → bounded edit → + patch verify → receipt, with Patch Trail and multi-agent coordination + ([change control](book/12-structural-change-controller/index.md)). +- **Live Implementation Context** — bounded structural, call-graph, and contract evidence. +- **Engineering Memory** — typed evidence-linked facts, FTS + local `fastembed` + semantic search, Trajectory Memory, quality passports, anomaly detection, and + the [Experience Layer](book/13-engineering-memory/experience-layer.md). +- **Corpus Analytics** — offline clustering of change-control intents (`codeclone[analytics]`). +- **33 MCP tools and native integrations** — VS Code, Cursor, Claude Code, + Codex, and Claude Desktop on one canonical analysis. +- **Platform Observability** — opt-in local runtime diagnostics. + +Local storage (intent registry, audit trail, Engineering Memory) is SQLite/file +and **configurable without an edition cap** — retention windows are plain +`[tool.codeclone]` settings, not license-gated. + +--- + +## Editions + +| Capability | Community (OSS) | Team | Enterprise | +|-------------------------------------------------------------|-----------------|----------------|-----------------| +| Full local analysis, change control, memory, integrations | full | full | full | +| Local semantic search (`fastembed`) | full | full | full | +| Local retention (registry / audit / memory) | configurable | configurable | configurable | +| Support | community | priority + SLA | dedicated + SLA | +| Managed control plane (hosted registry / audit / retention) | — | roadmap | roadmap | +| Hosted embedding / retrieval service | — | roadmap | roadmap | +| Cross-repo / org-wide trajectory & analytics dashboards | — | roadmap | roadmap | +| Managed PostgreSQL backends | — | — | roadmap | +| On-prem embedding model (`local_model`) | — | — | roadmap | + +!!! note "Roadmap items are not yet available" + Today CodeClone ships as a single open-source build. Selecting an unbuilt + provider or backend (`api`, `local_model`, PostgreSQL) returns a + "not available yet" error. Team and Enterprise are available now as + **support and licensing** tiers; managed/hosted services are in development. + [Contact us](#contact) to shape priorities. + +--- + +## Retention (local, configurable) + +The intent registry, audit trail, and Engineering Memory store data locally in +SQLite. Retention windows are configured in `[tool.codeclone]` and are **not +capped by edition** — full key reference in +[Config and Defaults](book/10-config-and-defaults.md). + +| Store | Key | Default | +|-------------------------------|----------------------------------|---------| +| Intent registry (closed rows) | `intent_registry_retention_days` | `14` | +| Audit trail | `audit_retention_days` | `30` | +| Memory drafts | `draft_retention_days` | `14` | + +You can already set any local window you need. Longer **managed** retention — +central storage, backup, compliance attestations, and cross-session forensics — +is the roadmap Team/Enterprise value. + +--- + +## Semantic retrieval providers + +| Provider | Status | What it is | +|---------------|--------------------------|------------------------------------------------------------------------------------------------------------| +| `diagnostic` | available | Deterministic hash vectors. For tests, not real recall. | +| `fastembed` | available (all editions) | Local `BAAI/bge-small-en-v1.5` via FastEmbed. No network, no API key. Install `codeclone[semantic-local]`. | +| `api` | roadmap | Hosted embedding / retrieval service. Currently returns "not available yet". | +| `local_model` | roadmap | On-prem custom embedding model for air-gapped deployments. Currently returns "not available yet". | + +Open source already includes **full local semantic search** with `fastembed` — +no functionality is removed. Hosted and on-prem providers are in development. + +--- + +## Audit trail + +The controller audit trail records intent lifecycle, lease transitions, and +workspace coordination in a local SQLite database when `audit_enabled=true` in +effective config. CLI display uses `--audit` / `--audit-json`. Payload mode +(`audit_payloads`) is `off` / `compact` / `full`; retention +(`audit_retention_days`, default `30`) is configurable. Managed/hosted audit +storage is a roadmap Team/Enterprise option. + +--- + +## Platform Observability + +Platform Observability is a development diagnostic store — not controller audit +retention and not repository quality history. It is disabled by default and +local in every edition; operators own the lifecycle of +`.codeclone/db/platform_observability.sqlite3`. The observer stores no raw +MCP/prompt bodies and never contributes findings, gates, baselines, memory +facts, or edit authorization. See +[Platform Observability](book/26-platform-observability.md). + +--- + +## Team and Enterprise + +**Available now** — priority/dedicated support, SLA, and onboarding for MCP, +VS Code, and controller workflows, plus help with CI gating and rollout. + +**In development (roadmap)** — a managed control plane: hosted registry / audit +/ retention, cross-repo and org-wide dashboards, hosted and on-prem embedding +providers, and PostgreSQL backends. + +The open-source contracts (integrity-protected intents, signed memory payloads, +deterministic reports) are identical across editions. Managed options add +operation and scale; they never weaken validation. + +## Contact + +For support tiers, roadmap timelines, managed-service interest, or compliance +requirements: + +**[sudo@secuapp.ru](mailto:sudo@secuapp.ru)** + +## Related configuration + +See [Config and Defaults](book/10-config-and-defaults.md) and +[Structural Change Controller — intent registry](book/12-structural-change-controller/index.md). + +```toml +[tool.codeclone] +intent_registry_backend = "sqlite" +intent_registry_path = ".codeclone/db/intents.sqlite3" +intent_registry_retention_days = 14 # default; any positive value, no edition cap + +[tool.codeclone.memory] +max_records = 10000 +max_candidates = 1000 +draft_retention_days = 14 + +[tool.codeclone.memory.semantic] +enabled = true +embedding_provider = "fastembed" # "diagnostic" or "fastembed" today; "api" / "local_model" are roadmap +allow_model_download = true +``` + +Environment overrides: +[Config and Defaults — environment variable overrides](book/10-config-and-defaults.md#environment-variable-overrides). diff --git a/docs/privacy-policy.md b/docs/privacy-policy.md index dd90ee4d..d1d1f567 100644 --- a/docs/privacy-policy.md +++ b/docs/privacy-policy.md @@ -1,3 +1,8 @@ + + # Privacy Policy This page describes the privacy behavior of CodeClone's local integration @@ -13,8 +18,18 @@ For the CLI, MCP server, VS Code extension, and Claude Desktop bundle: - CodeClone does not send repository contents to an external CodeClone backend - CodeClone reads local repository files, local git state, baselines, and cache only to perform the requested structural analysis +- Engineering Memory, trajectory/Experience projections, Controller audit, and + Platform Observability are optional local SQLite state under `.codeclone/` +- Platform Observability records bounded metadata, counters, timings, and + literal-free SQL fingerprints; it does not store raw prompts or payload bodies - the Claude Desktop bundle is only a local wrapper around `codeclone-mcp` +CodeClone does not provide a remote telemetry exporter. Automatic pruning of +the Platform Observability database is not currently enforced; users who enable +persistence control that local file's lifecycle. See +[Platform Observability](book/26-platform-observability.md) and +[Plans and Retention](plans-and-retention.md). + ## Claude Desktop bundle specifics The bundle in `extensions/claude-desktop-codeclone/`: diff --git a/docs/publishing.md b/docs/publishing.md index fe0be95f..cc5f8946 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -1,4 +1,10 @@ -# Publishing and Docs Site + + +# Publishing the Docs Site ## Purpose @@ -10,13 +16,15 @@ remains the current repository code and CI workflow. !!! note "Scope" This page covers docs-site build and publishing mechanics. Public behavior contracts still live in the book chapters and in the repository code. + For integration distribution (storefront sync), see + [Releasing & storefront sync](releasing.md). ## Current stack -- Site generator: `MkDocs` -- Theme: `Material for MkDocs` +- Site generator: `Zensical` +- Theme: Zensical built-in theme (Material-derived) - Docs root: `docs/` -- Site config: `mkdocs.yml` +- Site config: `zensical.toml` - Publish workflow: `.github/workflows/docs.yml` ## What gets published @@ -25,23 +33,28 @@ The published site contains: - the documentation tree under `docs/` - the contract book under `docs/book/` -- deep-dive pages such as architecture and CFG notes +- guide pages such as architecture narrative and integration pages - a live sample report for the current repository build under `Examples / Sample Report` ## Build flow -The docs workflow follows this order: +The docs workflow (`.github/workflows/docs.yml`) follows this order: 1. install project dependencies -2. build the MkDocs site with `mkdocs build --strict` +2. build the site with `zensical build --clean --strict` 3. generate a live sample report into `site/examples/report/live` 4. upload the built site as a GitHub Pages artifact 5. deploy on pushes to `main` +Admonition indentation (`!!!` / `???` body must be indented 4 spaces) is enforced +in the main test workflow via `tests/test_docs_build_contract.py`, not in +`docs.yml`. Repair locally with +`python3 scripts/lint_admonitions.py docs/ --fix`. + Relevant files: -- `mkdocs.yml` +- `zensical.toml` - `.github/workflows/docs.yml` - `scripts/build_docs_example_report.py` @@ -67,14 +80,14 @@ git. `site/` remains ignored. === "Build the site" - ```bash title="Validate the MkDocs site" - uv run --with mkdocs --with mkdocs-material mkdocs build --strict + ```bash title="Validate the Zensical site" + uv run --with zensical==0.0.43 zensical build --clean --strict ``` === "Build the site and sample report" ```bash title="Generate the live sample report into site/" - uv run --with mkdocs --with mkdocs-material mkdocs build --strict + uv run --with zensical==0.0.43 zensical build --clean --strict uv run python scripts/build_docs_example_report.py --output-dir site/examples/report/live ``` @@ -89,14 +102,14 @@ Then open: - Do not commit generated `site/` artifacts. - Keep docs publishing deterministic: no timestamps in published docs paths. - Keep the sample report generated from the same commit as the site itself. -- Prefer documenting docs-site mechanics here or in adjacent deep-dive pages, - not inside contract chapters unless a public contract is affected. +- Prefer documenting docs-site mechanics here, not inside contract chapters + unless a public contract is affected. ## When to update this page Update this page when you change: -- `mkdocs.yml` +- `zensical.toml` - `.github/workflows/docs.yml` - `scripts/build_docs_example_report.py` - the site navigation model diff --git a/docs/releasing.md b/docs/releasing.md new file mode 100644 index 00000000..eb88e489 --- /dev/null +++ b/docs/releasing.md @@ -0,0 +1,177 @@ + + +# Releasing & Storefront Sync + +## Integration distribution repos (storefronts) + +Public IDE/agent installs are mirrored from this monorepo into **sibling git +repositories** under a shared parent directory. The sync driver is +`scripts/sync_integrations.py`; contract tests live in +`tests/test_sync_integrations.py`. + +| CLI `--target` | Distribution directory | GitHub / marketplace | Monorepo source paths | +|------------------|-----------------------------|---------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `codex` | `codeclone-codex/` | `orenlab/codeclone-codex` | `plugins/codeclone/` + overlays under `scripts/integration_dist/` (root `README.md`, `.gitignore`, public `marketplace.json`) | +| `claude-code` | `codeclone-claude-code/` | `orenlab/codeclone-claude-code` | `plugins/claude-code-codeclone/` → `plugins/codeclone/` + shared standalone launcher + root `README.md`, `.gitignore`, and public `marketplace.json` overlays | +| `cursor` | `codeclone-cursor/` | `orenlab/codeclone-cursor` | `plugins/cursor-codeclone/` + `plugins/codeclone/scripts/launch_mcp.py` → `scripts/launch_mcp.py` + `gitignore.cursor` | +| `vscode` | `codeclone-vscode/` | VS Code Marketplace | `extensions/vscode-codeclone/` (flat) + `gitignore.vscode` | +| `claude-desktop` | `codeclone-claude-desktop/` | Claude Desktop `.mcpb` bundle | `extensions/claude-desktop-codeclone/` (flat) + `gitignore.claude-desktop` | + +Each target must be a **git repository** named exactly `codeclone-{target}` (for +example `codeclone-cursor`). The script refuses wrong directory names or +non-git targets. + +### What sync copies (and what it does not) + +**Copied:** plugin/extension trees listed above, **distribution overlays** from +`scripts/integration_dist/` (per-target `.gitignore`; Codex and Claude Code root +`README.md` plus their public marketplace manifests), plus generated +`SYNC_MANIFEST.json` at the distribution repo root (commit, package version from +`pyproject.toml`, file counts, UTC timestamp). + +**Codex README rule:** GitHub renders only the **repository root** `README.md`. +The plugin guide stays at `plugins/codeclone/README.md`. Sync writes a separate +root file from `scripts/integration_dist/README.codex.root.md` — do not copy the +plugin README to the repo root. + +**Codex marketplace rule:** Monorepo dev uses `.agents/plugins/marketplace.json` +(`orenlab-local`). The public `codeclone-codex` repo gets +`scripts/integration_dist/marketplace.codex.json` (`orenlab-codeclone` / +`displayName: CodeClone`). + +**Claude Code marketplace rule:** the public `codeclone-claude-code` repo gets +`.claude-plugin/marketplace.json` from +`scripts/integration_dist/marketplace.claude-code.json`. The distributable +plugin stays nested under `plugins/codeclone/`, while its root README comes from +`scripts/integration_dist/README.claude-code.root.md`. + +**Not copied:** the Python package (`codeclone/`), baselines, analysis cache, +canonical reports, monorepo `.cursor/rules` (developer-only; Cursor users get +`plugins/cursor-codeclone/rules/`), or arbitrary files already present in a +distribution repo (for example `.github/workflows/`, extra CI-only files). + +**Flat targets (Cursor, VS Code, Claude Desktop):** product `README.md` still +comes from the synced extension/plugin tree at the distribution repo root (same +file as in the monorepo). Codex and Claude Code use separate, +distribution-specific root READMEs. + +**Denied globally during copy:** `.git`, `__pycache__`, `*.pyc`, `node_modules`, +`dist/`, `build/`, `.coverage`. VS Code sync also skips `node_modules/**` and +`.coverage` under the extension tree. + +### Layout models + +- **Nested (Codex and Claude Code):** `plugins/codeclone/` stays under + `plugins/codeclone/` in the distribution repository. Stale files inside that + subtree are removed before copy. +- **Flat (Cursor, VS Code, Claude Desktop):** extension/plugin files land at the + distribution repo root. Sync deletes only **top-level names that still exist** + in the current source tree, then recopies. If you **remove an entire top-level + directory** from the monorepo source, sync does **not** delete the old copy in + the distribution repo — remove it manually or restore a stub directory before + syncing. + +### Standalone launcher overrides + +`plugins/cursor-codeclone/scripts/launch_mcp.py` in the monorepo is a thin +`runpy` delegate to the shared Codex launcher. Distribution **`codeclone-cursor`** +must ship the **full** `plugins/codeclone/scripts/launch_mcp.py` body so +`mcp.json` (`python3` + `./scripts/launch_mcp.py`) works standalone. Sync always +applies a second copy pair for that file after the plugin tree (see +`test_cursor_sync_ships_standalone_launcher`). + +The Claude Code source plugin uses the same monorepo delegation pattern. +Distribution **`codeclone-claude-code`** therefore replaces +`plugins/codeclone/scripts/launch_mcp.py` with the same full standalone +implementation (see `test_claude_code_sync_ships_standalone_launcher`). + +## Sync workflow (maintainers) + +Run from the **monorepo root** (`codeclone/`), with sibling repos checked out +next to it (default `--base-dir ..`) or pass an absolute parent path. + +=== "Dry run (plan only)" + + ```bash title="Print copy/delete counts without writing" + cd /path/to/codeclone + uv run python scripts/sync_integrations.py --dry-run --all --base-dir .. + ``` + +=== "Sync one storefront" + + ```bash title="Sync Codex marketplace repo only" + uv run python scripts/sync_integrations.py --target codex --base-dir .. + ``` + +=== "Sync all five storefronts" + + ```bash title="Update every distribution repo" + uv run python scripts/sync_integrations.py --all --base-dir .. + ``` + +=== "Dirty monorepo (emergency only)" + + ```bash title="Allow sync from uncommitted source" + uv run python scripts/sync_integrations.py --all --base-dir .. --allow-dirty + ``` + + Exit codes: **0** success, **1** validation error (missing source path, dirty + tree, bad target name), **2** copy/delete failure. + + After sync, commit and push **each distribution repository** separately. The + monorepo commit recorded in `SYNC_MANIFEST.json` is the sync source of truth + for audits. + +### Post-sync verification checklist + +Use this after `--all` or a single `--target` before tagging a plugin release: + +1. **`SYNC_MANIFEST.json`** — `target` matches repo; `codeclone_version` matches + monorepo `pyproject.toml`; `source_dirty` is `false` for release builds; + `files_copied` is stable for the same source tree. +2. **`.gitignore` (all five)** — present at distribution repo root; includes + `.idea/`, `.DS_Store`; VS Code copy also lists `node_modules/`, `*.vsix`, `out/`. +3. **Codex (`codeclone-codex`)** — root `README.md` is the distribution stub (not + a duplicate of `plugins/codeclone/README.md`); + `plugins/codeclone/skills/` has four skills; + `plugins/codeclone/.mcp.json` and `scripts/launch_mcp.py` present; + `.agents/plugins/marketplace.json` has `name: orenlab-codeclone`. +4. **Claude Code (`codeclone-claude-code`)** — root `README.md` documents the + two-step marketplace install; `.claude-plugin/marketplace.json` has + `name: orenlab-codeclone`; `plugins/codeclone/.claude-plugin/plugin.json`, + `.mcp.json`, four skills, and the standalone launcher are present. The plugin + manifest omits `version` intentionally so Git commit identity drives cache + updates. +5. **Cursor (`codeclone-cursor`)** — six skills including `production-triage/` and + `blast-radius/`; three rules under `rules/` (including `change-control-gate.mdc`); + `scripts/launch_mcp.py` contains `resolve_launch_target` and **not** `runpy`; + `mcp.json` still points at `./scripts/launch_mcp.py`. +6. **VS Code (`codeclone-vscode`)** — `package.json` and `src/` at repo root (no + `extensions/` mirror path); `codeclone.memory.searchSemantic` and related memory + search settings present when the monorepo extension ships them. +7. **Claude Desktop (`codeclone-claude-desktop`)** — `manifest.json`, `server/index.js`, + `src/launcher.js` at repo root; bundle build smoke: + `node extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs` in monorepo + or the equivalent script path in the distribution repo after sync. + +Automated regression: `uv run pytest -q tests/test_sync_integrations.py`. + +Byte-for-byte parity: for each synced file, the distribution copy should match +the monorepo source file that sync last wrote for that destination (remember +Cursor and Claude Code standalone launchers come from +`plugins/codeclone/scripts/`, not from their monorepo delegate stubs). + +## When to update this page + +Update this page when you change: + +- `scripts/sync_integrations.py` or `scripts/integration_dist/*` +- `tests/test_sync_integrations.py` +- integration distribution layout or sibling repo naming +- after changing any integration surface under `plugins/` or `extensions/`, run + sync and the post-sync checklist before publishing marketplace/plugin releases diff --git a/docs/start/first-governed-edit.md b/docs/start/first-governed-edit.md new file mode 100644 index 00000000..a3b4bd18 --- /dev/null +++ b/docs/start/first-governed-edit.md @@ -0,0 +1,133 @@ + + +# Your first governed edit + +CodeClone turns an ordinary edit into a *governed* edit: you declare what you +intend to change, edit inside that boundary, and the controller verifies the +patch and clears the intent with an auditable receipt. This tutorial walks one +small fix through the full cycle. + +**Before you start:** [install CodeClone and connect your agent](../getting-started.md). +Every call below uses an **absolute** repository root — relative roots like `.` +are rejected. + +## The cycle at a glance + +```text +analyze_repository → register a baseline "before" run +start_controlled_change → declare scope; get edit permission + blast radius +get_relevant_memory → load evidence-linked context for your scope +edit → change only files inside the declared scope +analyze_repository → an "after" run for structural verification +finish_controlled_change → scope check + verify + receipt + clear intent +``` + +## 1. Analyze — register a before-run + +```text +analyze_repository(root="/abs/path/to/repo") +``` + +Returns a `run_id` and a health snapshot. This run is the **before** state the +controller compares your edit against. Run it first — `start` needs an existing +run for the root. + +## 2. Declare your intent + +Say what you will touch *before you touch it*: + +```text +start_controlled_change( + root="/abs/path/to/repo", + scope={"allowed_files": ["myapp/formatting.py"]}, + intent="Fix default rounding in format_ratio", +) +``` + +Read three fields in the response: + +| Field | What it tells you | +|-------|-------------------| +| `edit_allowed` | `true` means you may edit — nothing before this authorizes a write | +| `scope.allowed_files` | the exact boundary; edits outside it are violations | +| `blast_radius.radius_level` | how far the change reaches (`low`/`medium`/`high`), plus dependents to review | + +Keep the returned `intent_id` — `finish` needs it. If the response is `queued`, +another agent holds an overlapping intent: wait and promote rather than editing. + +## 3. Load scoped memory + +```text +get_relevant_memory(root="/abs/path/to/repo", intent_id="") +``` + +Returns evidence-linked `records` (asserted facts), `trajectories` (past workflow +runs over these files), and `experiences` (recurring patterns). Read any stale or +contradiction notes before you edit — they flag context that has changed. Memory +informs your edit; it never authorizes one. + +## 4. Edit inside the boundary + +Make the change — and only inside `allowed_files`: + +```diff +- def format_ratio(value, digits=1): ++ def format_ratio(value, digits=2): +``` + +If the fix needs a file outside scope, **stop**. Re-run `start_controlled_change` +with a wider scope instead of silently editing extra files. + +## 5. Analyze again — the after-run + +```text +analyze_repository(root="/abs/path/to/repo") +``` + +For any Python change the controller needs this **after** run to verify there are +no structural regressions. Keep its `run_id`. + +## 6. Finish — verify and clear + +```text +finish_controlled_change( + intent_id="", + changed_files=["myapp/formatting.py"], + after_run_id="", +) +``` + +The controller runs hygiene, scope check, and structural verify, builds the patch +trail, and on success issues a receipt and clears the intent: + +| Field | Accept when | +|-------|-------------| +| `status` | `accepted` (or `accepted_with_external_changes`) | +| `scope_check.status` | `clean` or `expanded` | +| `intent_cleared` | `true` | + +## The completion gate + +Do **not** report the edit as done, verified, or ready until **all three** hold: + +1. `finish` returned `accepted`, +2. `scope_check.status` is `clean` (or `expanded`), and +3. `intent_cleared` is `true`. + +If `status` is `unverified` or `violated`, the intent stays active — follow the +`next_step` hint (often: re-run `analyze_repository` for a fresh after-run, then +`finish` again on the **same** `intent_id`). Never present an unverified patch as +finished. + +## Where to go next + +- [Change control recipe](../guide/mcp/workflows/change-control.md) — the how-to, + including queue, promote, and the atomic fallback path. +- [Structural Change Controller](../book/12-structural-change-controller/index.md) + — the normative contract: verification profiles, finish hygiene, receipts. +- [Engineering Memory](../guide/memory/overview.md) — what to record before you finish. diff --git a/docs/terms-of-use.md b/docs/terms-of-use.md index 0404835e..8d686fa1 100644 --- a/docs/terms-of-use.md +++ b/docs/terms-of-use.md @@ -1,3 +1,8 @@ + + # Terms of Use These terms describe the intended operational and integration boundaries of @@ -33,13 +38,15 @@ Integrations: CodeClone integrations do not modify or replace the security, account, privacy, or usage policies of third-party host applications such as -Claude Desktop, Codex, VS Code, Anthropic services, or OpenAI services. +Claude Desktop, Claude Code, Codex, Cursor, VS Code, Anthropic services, or +OpenAI services. Those platforms remain governed by their own applicable terms and policies. ## MCP and automation surfaces -The MCP interface is read-only by contract. +The MCP interface is read-only by contract with respect to source files, +baselines, analysis cache, and canonical report artifacts. CodeClone MCP integrations are intended for deterministic structural analysis, review, and triage workflows. They expose canonical findings, metrics, and @@ -48,9 +55,15 @@ review data, but do not mutate: - source files - git history - baselines -- repository state +- analysis cache or canonical report artifacts - CI configuration +Ephemeral controller coordination (workspace intent registry: file backend under +`.codeclone/intents/`, or SQLite under `.codeclone/db/intents.sqlite3` +when configured) and optional audit trail +(`.codeclone/db/audit.sqlite3` when `audit_enabled=true`) are the only +allowed repo-local writes. + Remote, shared, or network-exposed MCP deployments are the responsibility of the operator securing and governing those environments. diff --git a/docs/vscode-extension.md b/docs/vscode-extension.md deleted file mode 100644 index 0c3ca7b5..00000000 --- a/docs/vscode-extension.md +++ /dev/null @@ -1,181 +0,0 @@ -# VS Code Extension - -CodeClone ships a stable VS Code extension in `extensions/vscode-codeclone/`. - -It is a native IDE surface over `codeclone-mcp` and is designed for -baseline-aware, triage-first structural review inside the editor. - -Marketplace: [CodeClone for VS Code](https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone) - -## What it is for - -The extension helps you: - -- analyze the current workspace -- review changed files against a git diff -- start with a conservative first pass and lower thresholds only when you need - a more sensitive follow-up -- focus on new regressions and production hotspots first -- jump directly to source locations -- open canonical finding or remediation detail only when needed -- inspect current-run `Coverage Join` facts without inventing extension-local interpretations -- inspect report-only `Security Surfaces` as security-relevant boundary inventory -- inspect report-only Overloaded Module candidates without treating them like findings - -It does not create a second truth model and it does not mutate the repository. - -## Install requirements - -The extension needs a local `codeclone-mcp` launcher. - -Minimum supported CodeClone version: `2.0.0`. - -In `auto` mode, it checks the current workspace virtualenv before falling back -to `PATH`. Runtime and version-mismatch messages identify that resolved launcher source. - -Recommended install: - -```bash -uv tool install "codeclone[mcp]" -``` - -If you want the launcher inside the current environment instead: - -```bash -uv pip install "codeclone[mcp]" -``` - -Verify the launcher: - -```bash -codeclone-mcp --help -``` - -When you run the CLI inside an interactive VS Code terminal, CodeClone may also -show a one-time extension hint after the summary. It is suppressed in quiet, -CI, and non-interactive runs, and is remembered per CodeClone version next to -the resolved project cache path. - -## Main views - -### Overview - -Compact health, current run state, baseline drift, and next-best review action. -When the current run includes external Cobertura join facts, Overview also -shows a factual `Coverage Join` section sourced from canonical MCP metrics. -When MCP exposes `security_surfaces`, Overview also shows a compact report-only -`Security Surfaces` section. - -### Hotspots - -Primary operational view for: - -- new regressions -- production hotspots -- changed-files findings -- report-only Security Surfaces -- report-only Overloaded Module candidates - -### Runs & Session - -Session-local state: - -- local server availability -- current run identity -- reviewed findings -- MCP help topics, including the optional `coverage` topic on newer - CodeClone/MCP servers - -## Review model - -The extension stays source-first: - -- `Review Priorities` and `Next Hotspot` / `Previous Hotspot` drive the review - loop -- `Reveal Source` is the default action for findings -- editor-local actions appear only when the current file matches the active - review target -- Explorer decorations stay lightweight and focus on new, production, or - changed-scope relevance -- report-only Security Surfaces stay source-first: reveal source, open compact - detail, or copy a review brief without promoting them to findings - -`Open in HTML Report` exists as an explicit bridge to the richer human report, -not as the primary IDE workflow. - -## First-run path - -1. Open the `CodeClone` view container. -2. Run `Analyze Workspace`. -3. Use `Review Priorities` or `Review Changes`. -4. If the first pass looks clean but you want smaller repeated units, open - `Set Analysis Depth`. -5. Reveal source before opening deeper detail. - -If the launcher is missing, use `Open Setup Help` from the extension. - -## Trust model - -The extension uses a **limited Restricted Mode**: - -- onboarding and setup help remain available in untrusted workspaces -- local analysis and the local MCP server stay disabled until workspace trust - is granted - -The extension is not intended for virtual workspaces. - -That is intentional: CodeClone reads repository contents, local git state, and -the local MCP launcher. - -## Design decisions - -- Native VS Code views first, not a custom report dashboard -- Baseline-aware review instead of broad lint-style listing -- Conservative first pass by default; deeper sensitivity is explicit - -## Current limits - -- no always-on background analysis -- no `Problems`-panel duplication -- no persistent reviewed markers across MCP sessions -- `Open in HTML Report` opens a local HTML report only when it exists and looks - fresh enough for the current run - -## Settings that shape analysis depth - -- `codeclone.mcp.command` and `codeclone.mcp.args` are machine-scoped launcher - settings, so they belong in user or remote settings. -- `codeclone.analysis.profile` keeps the default conservative first pass - explicit and exposes `Deeper review` and `Custom` as deliberate follow-ups -- `codeclone.analysis.cachePolicy` and the threshold settings below are - resource-scoped, so they can vary by workspace or folder -- `codeclone.analysis.changedDiffRef` selects the git revision used by - changed-files review -- `codeclone.analysis.coverageXml` passes an explicit Cobertura XML path to - Coverage Join -- `codeclone.analysis.autoDetectCoverageXml` passes workspace-root - `coverage.xml` when present and `coverageXml` is empty -- `codeclone.analysis.minLoc` -- `codeclone.analysis.minStmt` -- `codeclone.analysis.blockMinLoc` -- `codeclone.analysis.blockMinStmt` -- `codeclone.analysis.segmentMinLoc` -- `codeclone.analysis.segmentMinStmt` - -Custom thresholds apply only when the profile is set to `custom`. - -`codeclone.ui.showStatusBar` is a window-scoped presentation setting. - -## Source of truth - -The extension reads the same canonical analysis semantics already exposed by: - -- CodeClone CLI -- canonical report JSON -- CodeClone MCP - -For the underlying interface contract, see: - -- [MCP usage guide](mcp.md) -- [MCP interface contract](book/20-mcp-interface.md) -- [VS Code extension contract](book/21-vscode-extension.md) diff --git a/extensions/claude-desktop-codeclone/README.md b/extensions/claude-desktop-codeclone/README.md index 83da9e9e..130466bf 100644 --- a/extensions/claude-desktop-codeclone/README.md +++ b/extensions/claude-desktop-codeclone/README.md @@ -1,10 +1,14 @@ # CodeClone for Claude Desktop -Local MCP bundle wrapper for `codeclone-mcp` — installs as a `.mcpb` package -instead of manual JSON editing. - -Same canonical MCP surface used by CLI, VS Code, Codex, and Claude Code. -Read-only, baseline-aware, local stdio only. +**Structural Change Controller for AI-assisted Python development** — local MCP +bundle wrapper for `codeclone-mcp`. Installs as a `.mcpb` package instead of manual JSON editing. + +Same canonical default agent MCP surface used by CLI, VS Code, Codex, and +Claude Code. +Repository read-only (source, baselines, cache, canonical reports); local stdio +only. The bundle proxies the full MCP server, including change-control and +session tools — ephemeral coordination under `.codeclone/intents/` and +optional audit records when enabled. As the local `codeclone-mcp` server gains new canonical surfaces, the bundle exposes them without adding a second client-side interpretation layer. @@ -48,11 +52,34 @@ command** in the extension settings to an absolute path. | Setting | Purpose | |--------------------------------|------------------------------------------------------| +| **Workspace root path** | Optional absolute project root; launcher prefers that workspace `.venv` when Claude starts outside the repo | | **CodeClone launcher command** | Absolute path or bare command for `codeclone-mcp` | | **Advanced launcher args** | JSON array of extra args (transport is always stdio) | ## Usage +### Change controller workflow + +```text +# 1. Analyze the repository +Use CodeClone to analyze this repository. + +# 2. Declare intent before editing +Declare a change intent for refactoring codeclone/analysis/parser.py — I plan to +extract the CFG builder into a separate module. + +# 3. Check blast radius +Show the blast radius for codeclone/analysis/parser.py. + +# 4. After editing — verify the patch +Check my change intent against the current diff. + +# 5. Generate the audit artifact +Create a review receipt for the verified change. +``` + +### Analysis and review + ```text # Conservative first pass Use CodeClone to analyze this repository and show the top production hotspots. @@ -82,6 +109,7 @@ npm run pack # build .mcpb ## Links -- [Claude Desktop bundle guide](https://orenlab.github.io/codeclone/claude-desktop-bundle/) -- [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) +- [Claude Desktop bundle guide](https://orenlab.github.io/codeclone/guide/integrations/claude-desktop/setup/) +- [MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) +- [Change controller docs](https://orenlab.github.io/codeclone/book/12-structural-change-controller/) - [Issues](https://github.com/orenlab/codeclone/issues) diff --git a/extensions/claude-desktop-codeclone/manifest.json b/extensions/claude-desktop-codeclone/manifest.json index ae51e65b..c6baeba1 100644 --- a/extensions/claude-desktop-codeclone/manifest.json +++ b/extensions/claude-desktop-codeclone/manifest.json @@ -2,9 +2,9 @@ "manifest_version": "0.3", "name": "codeclone", "display_name": "CodeClone", - "version": "2.0.0", - "description": "Baseline-aware structural review for Claude Desktop through a local CodeClone MCP launcher.", - "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only.", + "version": "2.1.0", + "description": "Structural Change Controller for AI-assisted Python development — local MCP bundle for Claude Desktop.", + "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. Structural change controller for AI-assisted Python development — same canonical default agent MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin. Repository read-only (source, baselines, cache, reports), baseline-aware, local stdio only. Includes intent declaration, blast radius, patch contract, review receipt, engineering memory, platform observability, and claim validation via validate_review_claims and finish_controlled_change. Ephemeral coordination may be stored under .codeclone/intents/ (file backend) or .codeclone/db/ (SQLite backend).", "author": { "name": "Den Rozhnovskiy", "email": "pytelemonbot@mail.ru", @@ -15,7 +15,7 @@ "url": "https://github.com/orenlab/codeclone.git" }, "homepage": "https://github.com/orenlab/codeclone", - "documentation": "https://orenlab.github.io/codeclone/claude-desktop-bundle/", + "documentation": "https://orenlab.github.io/codeclone/guide/integrations/claude-desktop/setup/", "support": "https://github.com/orenlab/codeclone/issues", "privacy_policies": [ "https://orenlab.github.io/codeclone/privacy-policy/" @@ -25,38 +25,116 @@ "mcp", "claude-desktop", "codeclone", + "change-controller", "structural-review", "baseline-aware", + "blast-radius", "code-quality" ], "tools": [ { "name": "analyze_repository", - "description": "Run a baseline-aware CodeClone analysis for the current repository." + "description": "Run a deterministic CodeClone analysis and register it as the latest MCP run." }, { "name": "analyze_changed_paths", - "description": "Run changed-files analysis for PR-style structural review." + "description": "Run a deterministic CodeClone analysis and return a changed-files projection." + }, + { + "name": "get_run_summary", + "description": "Cheapest run-level snapshot: health, findings, baseline, inventory." }, { "name": "get_production_triage", - "description": "Return the cheapest production-first hotspot and health snapshot." + "description": "Production-first triage: health, hotspots, suggestions, source-kind counters." }, { - "name": "get_run_summary", - "description": "Cheapest run snapshot: health, findings, baseline, inventory." + "name": "get_blast_radius", + "description": "Structural risk boundary: dependents, clone cohorts, do-not-touch paths, review context." + }, + { + "name": "get_relevant_memory", + "description": "Ranked, evidence-linked records, experiences, and trajectories for the declared edit scope. Read-only." + }, + { + "name": "query_engineering_memory", + "description": "Engineering Memory router: record search/status, trajectory detail/search/status, anomalies, agents, and dashboard views. Read-only." + }, + { + "name": "manage_engineering_memory", + "description": "Engineering Memory actions: refresh, draft recording, experience promotion, claim validation, semantic/trajectory rebuilds, and projection jobs. Human approve/reject/archive require the VS Code Memory view." + }, + { + "name": "query_platform_observability", + "description": "Read-only, development-only diagnostics over CodeClone runtime telemetry; never a repository quality signal." + }, + { + "name": "manage_change_intent", + "description": "Change intent lifecycle: list, declare, check, renew, queue/promote, recover, clear, and workspace cleanup." + }, + { + "name": "start_controlled_change", + "description": "Pre-edit workflow: declare change intent, compute blast radius, and return patch budget in one call." + }, + { + "name": "finish_controlled_change", + "description": "Post-edit workflow: verify scope, run patch contract, validate claims, generate receipt, and clear intent." + }, + { + "name": "check_patch_contract", + "description": "Patch contract checks: budget before editing, verify before/after runs after editing." + }, + { + "name": "create_review_receipt", + "description": "Deterministic audit artifact: provenance, intent scope, blast radius, patch status, and claims-not-made." + }, + { + "name": "validate_review_claims", + "description": "Validate cited review text against canonical report semantics and detect deterministic mischaracterizations." }, { "name": "list_hotspots", "description": "Priority-ranked hotspot views by kind." }, + { + "name": "list_findings", + "description": "List canonical finding groups with filters, pagination, and summary cards." + }, { "name": "get_finding", - "description": "Open one canonical finding by id." + "description": "Return a single canonical finding group by short or full id." + }, + { + "name": "get_implementation_context", + "description": "Return deterministic, bounded implementation context from one existing analysis run. Resolves explicit repo-relative paths and module:symbol qualnames, then projects module, dependency, API-surface, call/reference, blast-radius, cache-origin, and workspace-freshness facts without re-analysis or edit authorization." }, { "name": "get_remediation", - "description": "Return remediation guidance for one canonical finding." + "description": "Return actionable remediation guidance for a single finding." + }, + { + "name": "get_report_section", + "description": "Return a canonical report section for the latest or specified run." + }, + { + "name": "check_clones", + "description": "Return clone findings from a compatible stored run." + }, + { + "name": "check_complexity", + "description": "Return complexity hotspots from a compatible stored run." + }, + { + "name": "check_coupling", + "description": "Return coupling hotspots from a compatible stored run." + }, + { + "name": "check_cohesion", + "description": "Return cohesion hotspots from a compatible stored run." + }, + { + "name": "check_dead_code", + "description": "Return dead-code findings from a compatible stored run." }, { "name": "compare_runs", @@ -64,15 +142,27 @@ }, { "name": "evaluate_gates", - "description": "Evaluate CI gating decisions for the current run." + "description": "Evaluate CI gate conditions against an existing MCP run." }, { "name": "generate_pr_summary", - "description": "PR-friendly markdown or JSON summary of the analysis." + "description": "PR-friendly markdown or JSON summary of changed files." + }, + { + "name": "mark_finding_reviewed", + "description": "Mark a finding as reviewed in the current in-memory MCP session." + }, + { + "name": "list_reviewed_findings", + "description": "List in-memory reviewed findings for the current or specified run." + }, + { + "name": "clear_session_runs", + "description": "Clear all in-memory MCP analysis runs and ephemeral session state." }, { "name": "help", - "description": "Semantic guide for workflow, analysis profile, baseline, coverage, and review state." + "description": "Explain a CodeClone workflow or contract topic and suggest the safest next step." } ], "tools_generated": true, diff --git a/extensions/claude-desktop-codeclone/package.json b/extensions/claude-desktop-codeclone/package.json index dd3847cb..1f7685c1 100644 --- a/extensions/claude-desktop-codeclone/package.json +++ b/extensions/claude-desktop-codeclone/package.json @@ -1,8 +1,8 @@ { "name": "@orenlab/codeclone-claude-desktop", - "version": "2.0.0", + "version": "2.1.0", "private": true, - "description": "Claude Desktop MCP bundle wrapper for the local CodeClone MCP launcher.", + "description": "Structural Change Controller for AI-assisted Python development — Claude Desktop MCP bundle.", "license": "MPL-2.0", "type": "commonjs", "engines": { diff --git a/extensions/claude-desktop-codeclone/src/launcher.js b/extensions/claude-desktop-codeclone/src/launcher.js index 509f86ac..feaec085 100644 --- a/extensions/claude-desktop-codeclone/src/launcher.js +++ b/extensions/claude-desktop-codeclone/src/launcher.js @@ -15,6 +15,33 @@ const BLOCKED_ARGS = new Set([ "--json-response", "--stateless-http", ]); +const SPAWN_ENV_EXACT_KEYS = new Set([ + "PATH", + "HOME", + "USERPROFILE", + "APPDATA", + "LOCALAPPDATA", + "SystemRoot", + "WINDIR", + "TEMP", + "TMP", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TZ", + "TERM", + "PWD", + "OS", + "COMSPEC", + "PATHEXT", +]); +const SPAWN_ENV_PREFIXES = [ + "CODECLONE_", + "PYTHON", + "UV_", + "VIRTUAL_ENV", + "POETRY_", +]; /** * @typedef {{ @@ -116,6 +143,62 @@ function validateAdditionalArgs(args) { } } +/** + * @param {string} key + * @returns {boolean} + */ +function spawnEnvAllowsKey(key) { + if (SPAWN_ENV_EXACT_KEYS.has(key)) { + return true; + } + return SPAWN_ENV_PREFIXES.some((prefix) => key.startsWith(prefix)); +} + +/** + * @param {string | null | undefined} workspaceRoot + * @param {NodeJS.ProcessEnv} [baseEnv] + * @returns {NodeJS.ProcessEnv} + */ +function buildSpawnEnv(workspaceRoot, baseEnv = process.env) { + /** @type {NodeJS.ProcessEnv} */ + const env = {}; + for (const [key, value] of Object.entries(baseEnv)) { + if (typeof value === "string" && spawnEnvAllowsKey(key)) { + env[key] = value; + } + } + const root = normalizeConfiguredValue(workspaceRoot ?? ""); + if (root && !normalizeConfiguredValue(env.CODECLONE_WORKSPACE_ROOT)) { + env.CODECLONE_WORKSPACE_ROOT = root; + } + return env; +} + +/** + * @param {string} command + * @param {string} root + * @returns {boolean} + */ +function isLauncherWithinWorkspace(command, root) { + const launcher = String(command || "").trim(); + const workspaceRoot = String(root || "").trim(); + if (!launcher || !workspaceRoot) { + return false; + } + try { + const resolvedCommand = fsSync.realpathSync(launcher); + const resolvedRoot = fsSync.realpathSync(workspaceRoot); + const relative = path.relative(resolvedRoot, resolvedCommand); + return ( + relative !== "" && + !relative.startsWith("..") && + !path.isAbsolute(relative) + ); + } catch { + return false; + } +} + /** * @param {string} command * @returns {void} @@ -302,6 +385,9 @@ async function candidateWorkspaceCommands(env, platform, cwd) { continue; } if (await fileExists(candidate.command)) { + if (!isLauncherWithinWorkspace(candidate.command, candidate.root)) { + continue; + } existing.push(candidate); seen.add(candidate.command); } @@ -323,6 +409,9 @@ async function candidateWorkspaceCommands(env, platform, cwd) { continue; } if (await fileExists(command)) { + if (!isLauncherWithinWorkspace(command, ancestor)) { + continue; + } existing.push({command, root: ancestor}); seen.add(command); } @@ -621,10 +710,7 @@ async function runProxy(options = {}) { } const spawnCwd = spec.cwd && spec.cwd.length > 0 ? spec.cwd : undefined; - const childEnv = {...process.env}; - if (spawnCwd && !normalizeConfiguredValue(childEnv.CODECLONE_WORKSPACE_ROOT)) { - childEnv.CODECLONE_WORKSPACE_ROOT = spawnCwd; - } + const childEnv = buildSpawnEnv(spawnCwd ?? null); /** @type {string} */ let resolvedCommand; @@ -689,9 +775,11 @@ async function runProxy(options = {}) { module.exports = { BLOCKED_ARGS, buildSetupMessage, + buildSpawnEnv, candidateAutoCommands, candidateWorkspaceCommands, exitProxy, + isLauncherWithinWorkspace, normalizeConfiguredValue, parseLauncherArgsJson, resolveLaunchSpec, diff --git a/extensions/claude-desktop-codeclone/test/manifest.test.js b/extensions/claude-desktop-codeclone/test/manifest.test.js index c619f64e..48438415 100644 --- a/extensions/claude-desktop-codeclone/test/manifest.test.js +++ b/extensions/claude-desktop-codeclone/test/manifest.test.js @@ -12,6 +12,15 @@ const manifest = JSON.parse( const packageJson = JSON.parse( fs.readFileSync(path.join(rootDir, "package.json"), "utf8"), ); +const contractSnapshot = JSON.parse( + fs.readFileSync( + path.join( + rootDir, + "../../tests/fixtures/contract_snapshots/mcp_tool_schemas.json", + ), + "utf8", + ), +); test("manifest and package metadata stay aligned", () => { assert.equal(manifest.version, packageJson.version); @@ -30,8 +39,17 @@ test("manifest keeps the setup surface bounded and local", () => { assert.deepEqual(manifest.privacy_policies, [ "https://orenlab.github.io/codeclone/privacy-policy/", ]); - assert.equal(manifest.documentation, "https://orenlab.github.io/codeclone/claude-desktop-bundle/"); + assert.equal(manifest.documentation, "https://orenlab.github.io/codeclone/guide/integrations/claude-desktop/setup/"); assert.equal(manifest.tools_generated, true); - assert.equal(manifest.tools.length, 11); + // Derive the expected count from the canonical MCP contract snapshot so the + // bundle stays in lockstep with the server surface instead of drifting + // against a hardcoded number. + assert.equal(manifest.tools.length, contractSnapshot.length); assert.equal("instructions" in manifest, false); }); + +test("manifest tools match MCP contract snapshot", () => { + const expected = contractSnapshot.map((entry) => entry.name).sort(); + const actual = manifest.tools.map((entry) => entry.name).sort(); + assert.deepEqual(actual, expected); +}); diff --git a/extensions/vscode-codeclone/CHANGELOG.md b/extensions/vscode-codeclone/CHANGELOG.md index 3fe9bd26..7d432bc4 100644 --- a/extensions/vscode-codeclone/CHANGELOG.md +++ b/extensions/vscode-codeclone/CHANGELOG.md @@ -1,5 +1,60 @@ # Change Log +## 0.3.0 + +- add **Show Trajectory Dashboard** — secure read-only webview over MCP + `query_engineering_memory(mode=trajectory_dashboard)` with stored trajectory + counts, per-agent aggregation, and anomaly highlights +- add **Show Trajectory Detail** — step timeline webview via + `query_engineering_memory(mode=trajectory_get)` QuickPick over recent trajectories +- add **Copy Trajectory Dashboard Brief** — Markdown summary for review notes +- Memory view toolbar entries when the workspace is trusted and MCP is connected +- webviews use `enableScripts: false`, nonce CSP, and theme-native styling +- trajectory detail webview uses a compact **Trajectory Passport** layout: full-width + KPI rail (contract quality, complexity band, duration, events, steps, incidents, + evidence), compact patch-trail row, and a two-column analysis grid (contract + gates | complexity factors) with expandable **Show calculation** for both + scores +- event timeline markers use green/yellow/red tone from step status; titles drop + redundant `(status)` suffix; status remains in marker tooltip +- show **Trajectory duration** in the KPI rail (hover for time range); deterministic + contract-quality and complexity scores from contract breakdown (rebuild + trajectories after Engineering Memory schema 1.5 upgrade) +- add **Show Blast Radius** command — concentric SVG diagram of structural + impact for the active file, rendered in a secure WebviewPanel with no scripts + and nonce-scoped CSP +- add **Copy Blast Radius Brief** command — structured Markdown summary of + origin, dependents, clone cohort, risk signals, and guardrails copied to + clipboard +- both commands available from the editor title menu when a run is active and + the workspace is trusted +- bump minimum version to reflect the new MCP `get_blast_radius` dependency +- upgrade `@vscode/vsce` from `2.25.0` to `3.9.1`, resolving the transitive + `tmp` path-traversal (GHSA-ph9p-34f9-6g65) and `qs` DoS + (GHSA-q8mj-m7cp-5q26) vulnerabilities +- upgrade `@types/node` to `25.9.1` and `typescript` to `6.0.3` +- add **Show Session Stats** and **Show Controller Audit Trail** — secure + webviews mirroring CLI `--session-stats` and `--audit` via IDE-only MCP tools + (`get_workspace_session_stats`, `get_controller_audit_trail`; not listed for + agent clients on the default launcher) +- add **Copy Session Stats Brief** and **Copy Controller Audit Brief** commands +- Session view toolbar entries when the workspace is trusted and connected +- add **Search Engineering Memory** — QuickPick keyword search over MCP + `query_engineering_memory` (FTS + optional semantic re-rank) +- add **Memory for Active File** — path-scoped memory for the active editor +- add **Open Memory Search Panel** — secure read-only webview (`enableScripts: + false`, nonce CSP, allowlisted `codeclone.openMemoryRecordById` command URIs) +- add workspace settings under `codeclone.memory.*` for search semantics and limits +- Memory view welcome and toolbar link search commands without a search tree section +- add **bulk Memory inbox governance** — checkbox selection on draft rows, + `canSelectMany` multi-select with context-menu approve/reject, toolbar + **Approve/Reject Checked Memory Drafts**, **Select All Memory Drafts**, + **Select Memory Drafts by Type**, and **Clear Memory Draft Selection** with + one modal confirmation and progress notification per batch +- extend bulk governance to **Stale** records — checkboxes, inline approve, + multi-select approve, **Select All Stale** / **Select Stale by Type**; fix + silent no-op when approving stale rows (hydration previously inbox-only) + ## 0.2.7 - surface Coverage Join review items in Hotspots when coverage data is available diff --git a/extensions/vscode-codeclone/README.md b/extensions/vscode-codeclone/README.md index 992274ca..d4ffa452 100644 --- a/extensions/vscode-codeclone/README.md +++ b/extensions/vscode-codeclone/README.md @@ -3,9 +3,12 @@ [![License](https://img.shields.io/github/license/orenlab/codeclone?style=flat-square&color=6366f1)](LICENSE) [![Requires CodeClone](https://img.shields.io/badge/requires-codeclone_%3E%3D2.0.0-6366f1?style=flat-square)](https://orenlab.github.io/codeclone/) -Native VS Code surface for [codeclone-mcp](https://orenlab.github.io/codeclone/mcp/). -Brings baseline-aware structural analysis into the editor — triage-first, read-only, -and driven by the same canonical report as the CLI and HTML output. +Native VS Code surface for [codeclone-mcp](https://orenlab.github.io/codeclone/guide/mcp/) — +**Structural Change Controller for AI-assisted Python development**. Brings +baseline-aware structural analysis into the editor — triage-first, repository +read-only, and driven by the same canonical report as the CLI and HTML output. Session tools (`mark_finding_reviewed`, `clear_session_runs`) update +ephemeral MCP state only; they never mutate source, baselines, cache, or report +artifacts. > **Not a linter panel.** CodeClone for VS Code is designed for structural review and > refactoring flow, not diagnostics or Problems integration. @@ -18,6 +21,12 @@ and driven by the same canonical report as the CLI and HTML output. at a glance; report-only Security Surfaces and Overloaded Modules kept visually separate - **Baseline-aware** — distinguishes known debt from new regressions against the stored baseline - **Changed-files review** — `Review Changes` scopes analysis to the current diff via a configurable git ref +- **Blast Radius** — `Show Blast Radius` renders a concentric SVG diagram of structural + impact for the active file; `Copy Blast Radius Brief` puts a Markdown summary on the clipboard +- **Session & audit insights** — `Show Session Stats` and `Show Controller Audit Trail` mirror + CLI `--session-stats` and `--audit` in read-only webviews (IDE-only MCP tools, not exposed to agents) +- **Trajectory passports** — dashboard and detail views expose quality, + complexity, anomalies, Patch Trail evidence, and agent aggregates - **Coverage Join** — integrates `coverage.xml` to surface untested hotspots when available - **Source-first navigation** — `Reveal Source` opens the exact location; `Next / Previous Hotspot` steps through active targets in the editor @@ -29,9 +38,11 @@ and driven by the same canonical report as the CLI and HTML output. ## Requirements -- VS Code `1.85+` +- VS Code `1.120.0+` (`engines.vscode` in `package.json`) - Python workspace (trusted) - `codeclone-mcp` launcher (`codeclone >= 2.0.0`) +- The **Memory** view (Engineering Memory) requires `codeclone >= 2.1.0a1`; + on older servers it stays inactive and reports the required version. --- @@ -98,20 +109,108 @@ Focus mode is explicit and persisted per workspace; `Recommended` is the default Bounded MCP session state: server availability, current run identity, reviewed findings, and help topics. Reviewed markers are session-local and do not mutate the repository or report. +### Blast Radius + +Visual structural impact analysis for the active file. + +- **Show Blast Radius** — opens a WebviewPanel with a concentric SVG diagram + showing origin, direct dependents, transitive dependents, and clone cohort. + Risk signals (complexity, coverage, overloaded modules) are overlaid as + colored dots. Do-not-touch boundaries and guardrails are listed below the + diagram. +- **Copy Blast Radius Brief** — copies a structured Markdown summary of the + same data to the clipboard for use in PR descriptions or review notes. + +Both commands are available from the editor title context menu and the command +palette when a run is active and the workspace is trusted. The webview uses +`enableScripts: false` and a nonce-scoped Content Security Policy with no +external resource access. + +### Session stats & controller audit + +Workspace coordination dashboards (no analysis run required beyond MCP connection): + +- **Show Session Stats** — live agents, change intents, lease health, latest cached + run summary, and audit token footprint (`get_workspace_session_stats`) +- **Show Controller Audit Trail** — recent controller events when `audit_enabled=true` + (`get_controller_audit_trail`) +- **Copy Session Stats Brief** / **Copy Controller Audit Brief** — Markdown summaries + for review notes + +Available from the **Session** view title bar when the workspace is trusted and connected. +These MCP tools register only when the extension launches the server with +`--ide-governance-channel`; agent clients on the default `codeclone-mcp` launcher do not +see them in `list_tools`. + +### Memory (inbox + search) + +The **Memory** view remains the governance inbox (draft approve/reject). Search +is separate so the tree stays focused on human review work: + +- **Search Engineering Memory** — keyword QuickPick (`query_engineering_memory` + mode=search; optional semantic re-rank via settings) +- **Memory for Active File** — records bound to the current editor path + (mode=for_path) +- **Open Memory Search Panel** — read-only results webview (CSP, no scripts, + allowlisted `command:` links to open a record) +- **Show Trajectory Dashboard** — status, agent/outcome aggregates, anomalies, + and recent trajectories +- **Show Trajectory Detail** — quality passport, complexity factors, Patch + Trail, contract gates, incidents, steps, and evidence +- **Copy Trajectory Dashboard Brief** — Markdown summary for review notes + +Use **Configure Memory Search** to adjust semantic recall, drafts/stale filters, +and result limits per workspace. + +Trajectory views are read-only projections from +`query_engineering_memory`; they do not create IDE-local workflow truth. + --- ## Settings -| Setting | Default | Scope | Description | -|-------------------------------------|----------------|----------|-----------------------------------------------------------------------------------------------------| -| `codeclone.mcp.command` | `auto` | Machine | Launcher used to start the local CodeClone server. `auto` checks workspace virtualenv, then `PATH`. | -| `codeclone.mcp.args` | `[]` | Machine | Extra arguments passed to the launcher. | -| `codeclone.analysis.cachePolicy` | — | Resource | Default cache policy for analysis requests. Can differ per workspace or folder. | -| `codeclone.analysis.changedDiffRef` | — | Resource | Git revision used by **Review Changes**. | -| `codeclone.analysis.profile` | `conservative` | Resource | Analysis sensitivity. Use `deeper` or `custom` only as deliberate follow-ups. | -| `codeclone.analysis.minLoc` | — | Resource | Function/block/segment thresholds — active only when profile is `custom`. | -| `codeclone.analysis.coverageXml` | — | Resource | Path to `coverage.xml`. Auto-detects workspace-root file when unset. | -| `codeclone.ui.showStatusBar` | `true` | Window | Show or hide the workspace-level status bar item. | +Defaults and scopes match `package.json` → `contributes.configuration.properties`. + +### Launcher + +| Setting | Default | Scope | Description | +|---------|---------|-------|-------------| +| `codeclone.mcp.command` | `auto` | Machine | Launcher for `codeclone-mcp` (`auto`: workspace venv, then `PATH`). | +| `codeclone.mcp.args` | `[]` | Machine | Extra launcher argv. The extension injects `--ide-governance-channel` for Memory governance and session/audit MCP tools. | + +### Analysis + +| Setting | Default | Scope | Description | +|---------|---------|-------|-------------| +| `codeclone.analysis.profile` | `defaults` | Resource | `defaults`, `deeperReview`, or `custom`. | +| `codeclone.analysis.cachePolicy` | `reuse` | Resource | `reuse` or `off` for analysis requests. | +| `codeclone.analysis.changedDiffRef` | `HEAD` | Resource | Git ref for **Review Changes**. | +| `codeclone.analysis.coverageXml` | `""` | Resource | Cobertura path for Coverage Join. | +| `codeclone.analysis.autoDetectCoverageXml` | `true` | Resource | Use workspace-root `coverage.xml` when path is empty. | +| `codeclone.analysis.minLoc` | `10` | Resource | Custom clone thresholds (only when `profile=custom`). | +| `codeclone.analysis.minStmt` | `6` | Resource | Same. | +| `codeclone.analysis.blockMinLoc` | `20` | Resource | Same. | +| `codeclone.analysis.blockMinStmt` | `8` | Resource | Same. | +| `codeclone.analysis.segmentMinLoc` | `20` | Resource | Same. | +| `codeclone.analysis.segmentMinStmt` | `10` | Resource | Same. | + +### UI + +| Setting | Default | Scope | Description | +|---------|---------|-------|-------------| +| `codeclone.ui.showStatusBar` | `true` | Window | Workspace-level status bar item. | + +### Engineering Memory search + +| Setting | Default | Scope | Description | +|---------|---------|-------|-------------| +| `codeclone.memory.searchSemantic` | `true` | Resource | Pass `semantic=true` to MCP keyword search. FTS still runs when the server index is missing; server needs `memory.semantic` enabled + `semantic-lancedb` + rebuild for real blend. | +| `codeclone.memory.searchIncludeDrafts` | `false` | Resource | Include draft records in search / search panel (`include_drafts`). | +| `codeclone.memory.searchIncludeStale` | `false` | Resource | Include stale records (`include_stale`; also used for **Memory for Active File**). | +| `codeclone.memory.searchMaxResults` | `20` | Resource | Cap per search (5–50). | +| `codeclone.memory.searchDetailLevel` | `compact` | Resource | `compact` or `full` statement payloads in list modes. | + +**Configure Memory Search** sets semantic, drafts, stale, and max results per workspace folder. **Detail level** is editor-settings only. --- @@ -136,7 +235,13 @@ full analysis and MCP are disabled until workspace trust is granted. - **No second truth model** — health, findings, and drift come exclusively from `codeclone-mcp` and canonical report semantics. -- **Read-only** — the extension never edits source files, baselines, caches, or report artifacts. +- **Repository read-only** — the extension never edits source files, baselines, + caches, or report artifacts. **Mark Reviewed** and **Clear Session** call + ephemeral MCP session tools only. +- **Curated MCP surface** — IDE commands invoke a fixed subset of MCP tools + (analysis, triage, blast radius, review markers, session clear). Change-control + tools remain on the server for agent clients but are not wired to VS Code UI + commands. - **Report-only separation** — Security Surfaces and Overloaded Modules are visible but intentionally excluded from findings, gates, and health scoring. - **Source-first** — the default review action moves you to code before opening deeper detail. @@ -146,8 +251,8 @@ full analysis and MCP are disabled until workspace trust is granted. ## Documentation - [CodeClone documentation](https://orenlab.github.io/codeclone/) -- [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) -- [MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) +- [MCP usage guide](https://orenlab.github.io/codeclone/guide/mcp/) +- [MCP interface contract](https://orenlab.github.io/codeclone/book/25-mcp-interface/) --- diff --git a/extensions/vscode-codeclone/media/view-memory.svg b/extensions/vscode-codeclone/media/view-memory.svg new file mode 100644 index 00000000..b6534544 --- /dev/null +++ b/extensions/vscode-codeclone/media/view-memory.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/extensions/vscode-codeclone/package-lock.json b/extensions/vscode-codeclone/package-lock.json index b65a76cb..6ac4cc0c 100644 --- a/extensions/vscode-codeclone/package-lock.json +++ b/extensions/vscode-codeclone/package-lock.json @@ -1,28 +1,238 @@ { "name": "codeclone", - "version": "0.2.7", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "codeclone", - "version": "0.2.7", + "version": "0.3.0", "license": "MPL-2.0", "devDependencies": { - "@types/node": "^25.5.2", - "@types/vscode": "1.100.0", - "@vscode/vsce": "2.25.0", + "@types/node": "^25.9.1", + "@types/vscode": "1.120.0", + "@vscode/vsce": "3.9.1", "esbuild": "^0.28.0", - "typescript": "^6.0.2" + "typescript": "^6.0.3" + }, + "engines": { + "vscode": "^1.120.0" + } + }, + "node_modules/@azu/format-text": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@azu/format-text/-/format-text-1.0.2.tgz", + "integrity": "sha512-Swi4N7Edy1Eqq82GxgEECXSSLyn6GOb5htRFPzBDdUkECGXtlf12ynO5oJSpWKPwCaUssOu7NfhDcCWpIC6Ywg==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@azu/style-format": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@azu/style-format/-/style-format-1.0.1.tgz", + "integrity": "sha512-AHcTojlNBdD/3/KxIKlg8sxIWHfOtQszLvOpagLTO+bjC3u7SAszu1lf//u7JJC50aUSH+BVWDD/KvaA6Gfn5g==", + "dev": true, + "license": "WTFPL", + "dependencies": { + "@azu/format-text": "^1.0.1" + } + }, + "node_modules/@azure/abort-controller": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz", + "integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==", + "dev": true, + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-auth": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@azure/core-auth/-/core-auth-1.10.1.tgz", + "integrity": "sha512-ykRMW8PjVAn+RS6ww5cmK9U2CyH9p4Q88YJwvUslfuMmN98w/2rdGRLPqJYObapBCdzBVeDgYWdJnFPFb7qzpg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-util": "^1.13.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-client": { + "version": "1.10.2", + "resolved": "https://registry.npmjs.org/@azure/core-client/-/core-client-1.10.2.tgz", + "integrity": "sha512-1D2LpsU7y9xrqKjdIbsB7PlrRePw0xsVV8p+AKTlzITrWmscajryfJCdDJB/oGwvDI5HmRo04eMMADB67uwAwQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-rest-pipeline": { + "version": "1.24.0", + "resolved": "https://registry.npmjs.org/@azure/core-rest-pipeline/-/core-rest-pipeline-1.24.0.tgz", + "integrity": "sha512-PpLsoDQ3AMmKZ0VU+0GrmqMxgp/sExjlVm4R+nLWngeoEGAzOIPVifaxKGU5gMv+nWELUoHfvrolWD+ZS/nFJg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "@typespec/ts-http-runtime": "^0.3.4", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-tracing": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.3.1.tgz", + "integrity": "sha512-9MWKevR7Hz8kNzzPLfX4EAtGM2b8mr50HPDBvio96bURP/9C+HjdH3sBlLSNNrvRAr5/k/svoH457gB5IKpmwQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-util": { + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/@azure/core-util/-/core-util-1.13.1.tgz", + "integrity": "sha512-XPArKLzsvl0Hf0CaGyKHUyVgF7oDnhKoP85Xv6M4StF/1AhfORhZudHtOyf2s+FcbuQ9dPRAjB8J2KvRRMUK2A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/identity": { + "version": "4.13.1", + "resolved": "https://registry.npmjs.org/@azure/identity/-/identity-4.13.1.tgz", + "integrity": "sha512-5C/2WD5Vb1lHnZS16dNQRPMjN6oV/Upba+C9nBIs15PmOi6A3ZGs4Lr2u60zw4S04gi+u3cEXiqTVP7M4Pz3kw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.9.0", + "@azure/core-client": "^1.9.2", + "@azure/core-rest-pipeline": "^1.17.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.11.0", + "@azure/logger": "^1.0.0", + "@azure/msal-browser": "^5.5.0", + "@azure/msal-node": "^5.1.0", + "open": "^10.1.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/logger": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@azure/logger/-/logger-1.3.0.tgz", + "integrity": "sha512-fCqPIfOcLE+CGqGPd66c8bZpwAji98tZ4JI9i/mlTNTlsIWslCfpg48s/ypyLxZTump5sypjrKn2/kY7q8oAbA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/msal-browser": { + "version": "5.14.0", + "resolved": "https://registry.npmjs.org/@azure/msal-browser/-/msal-browser-5.14.0.tgz", + "integrity": "sha512-Dfl7hPZe9/JJwRhFFXHq2z1oHYBuGubmff3kWXOsd1AGgyXlqjNYAWuN/1JL/ZrcZBs8TKMjGSil6Rcc7E8VPQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/msal-common": "16.9.0" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-common": { + "version": "16.9.0", + "resolved": "https://registry.npmjs.org/@azure/msal-common/-/msal-common-16.9.0.tgz", + "integrity": "sha512-1MWGjqgUCRAYgLmVFZKp7fs3Rg1TFvIMgywY8ze2olNVvLlJoRThuoziWSDJuwwyJI5L4rnLb9Tyt5D9GvSLPw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-node": { + "version": "5.2.5", + "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-5.2.5.tgz", + "integrity": "sha512-RUuewWk9JvWJS5Yiy8/74Lm1rQAWlrU/qg/Bgtk1jIauVRtnb9XKwS5Xg0J+Whwjesq9EVrBIFgQEP8vHxgezA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/msal-common": "16.9.0", + "jsonwebtoken": "^9.0.0" }, "engines": { - "vscode": "^1.100.0" + "node": ">=20" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.7.tgz", + "integrity": "sha512-Aup7aUOfpbAUg2ROOJN6Iw5f9DMBlzu0mIkm/malLQFN/YQgO48wCj0Kxa3sEHJvPVFg7siR+qRInwXd2qhQKw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.29.7", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.29.7.tgz", + "integrity": "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" } }, "node_modules/@esbuild/aix-ppc64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.0.tgz", - "integrity": "sha512-lhRUCeuOyJQURhTxl4WkpFTjIsbDayJHih5kZC1giwE+MhIzAb7mEsQMqMf18rHLsrb5qI1tafG20mLxEWcWlA==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.1.tgz", + "integrity": "sha512-Svl7tq8k/08+p6CXPpRjQ1fKX+1odH/BQbb48fV6fj3CWHhsoIOoY87w1oHXm0qEpkIK3ZfVgp0hed3XBXzXMQ==", "cpu": [ "ppc64" ], @@ -37,9 +247,9 @@ } }, "node_modules/@esbuild/android-arm": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.28.0.tgz", - "integrity": "sha512-wqh0ByljabXLKHeWXYLqoJ5jKC4XBaw6Hk08OfMrCRd2nP2ZQ5eleDZC41XHyCNgktBGYMbqnrJKq/K/lzPMSQ==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.28.1.tgz", + "integrity": "sha512-0k2F129Xdio1TdJfzJ8sy1Q47vUD2NnwdhiAf7drUN1EBTfPf4hsFCtmMgu/6m8JSzsBrlmVjudMBQqOfG8usQ==", "cpu": [ "arm" ], @@ -54,9 +264,9 @@ } }, "node_modules/@esbuild/android-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.28.0.tgz", - "integrity": "sha512-+WzIXQOSaGs33tLEgYPYe/yQHf0WTU0X42Jca3y8NWMbUVhp7rUnw+vAsRC/QiDrdD31IszMrZy+qwPOPjd+rw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.28.1.tgz", + "integrity": "sha512-34EGEbCIAgosYz6goLcopX6Mo7NyGv9tfwEM2/7Ce2VcVRk568iSvniGWcUXIy7wEDR1wzolcxcriFVrWYcwBg==", "cpu": [ "arm64" ], @@ -71,9 +281,9 @@ } }, "node_modules/@esbuild/android-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.28.0.tgz", - "integrity": "sha512-+VJggoaKhk2VNNqVL7f6S189UzShHC/mR9EE8rDdSkdpN0KflSwWY/gWjDrNxxisg8Fp1ZCD9jLMo4m0OUfeUA==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.28.1.tgz", + "integrity": "sha512-dbwY7ltSMDWsRatcRpCnES4F+im88OCUgGZjy52shC7GqHRE/cYlxNbB4Z4UpJswpcc4Qxd2oE/ufM0p61IKng==", "cpu": [ "x64" ], @@ -88,9 +298,9 @@ } }, "node_modules/@esbuild/darwin-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.28.0.tgz", - "integrity": "sha512-0T+A9WZm+bZ84nZBtk1ckYsOvyA3x7e2Acj1KdVfV4/2tdG4fzUp91YHx+GArWLtwqp77pBXVCPn2We7Letr0Q==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.28.1.tgz", + "integrity": "sha512-TZbWkQY7kvTAXbXUT7uVACR5cMHsDiSz9z7ZKAX/RTq/WJEk3QyRr0wZpNhBDX+/0CtdqUIJlOiodQcta6tY3Q==", "cpu": [ "arm64" ], @@ -105,9 +315,9 @@ } }, "node_modules/@esbuild/darwin-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.28.0.tgz", - "integrity": "sha512-fyzLm/DLDl/84OCfp2f/XQ4flmORsjU7VKt8HLjvIXChJoFFOIL6pLJPH4Yhd1n1gGFF9mPwtlN5Wf82DZs+LQ==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.28.1.tgz", + "integrity": "sha512-zfdzgK9ACBNZLI/CyHTOx81SyNbM6YXn7rxSgX97VjyiPl9W1i4Ka4fgKECEoFCKGpvBj5qArWIGgQjOwkgskQ==", "cpu": [ "x64" ], @@ -122,9 +332,9 @@ } }, "node_modules/@esbuild/freebsd-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.28.0.tgz", - "integrity": "sha512-l9GeW5UZBT9k9brBYI+0WDffcRxgHQD8ShN2Ur4xWq/NFzUKm3k5lsH4PdaRgb2w7mI9u61nr2gI2mLI27Nh3Q==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.28.1.tgz", + "integrity": "sha512-wG2EA8ENdEI0qhkSZMjfqrdY+ziCYCPMmtZjjIwOmXFjmyzEHn+UUxk5of+SYsjtfs3VpnlC7QLzSI5hY/rOAw==", "cpu": [ "arm64" ], @@ -139,9 +349,9 @@ } }, "node_modules/@esbuild/freebsd-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.28.0.tgz", - "integrity": "sha512-BXoQai/A0wPO6Es3yFJ7APCiKGc1tdAEOgeTNy3SsB491S3aHn4S4r3e976eUnPdU+NbdtmBuLncYir2tMU9Nw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.28.1.tgz", + "integrity": "sha512-i7dZ9vQgnvSCzi/rYCXNgtF/U+eKZNJBzu3eTQbRgHnM7tNSizLOkRFAl3qzVc/Op/u5YkHHa4pf/3DOYHthLQ==", "cpu": [ "x64" ], @@ -156,9 +366,9 @@ } }, "node_modules/@esbuild/linux-arm": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.28.0.tgz", - "integrity": "sha512-CjaaREJagqJp7iTaNQjjidaNbCKYcd4IDkzbwwxtSvjI7NZm79qiHc8HqciMddQ6CKvJT6aBd8lO9kN/ZudLlw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.28.1.tgz", + "integrity": "sha512-qVXBOHQS+d5Y722GwJzJUtOLlX7km3CraOaGormF1pDtPd2C/l1SHRPgjLunLGe51Sh5YYWKMFDyV4SxgMQYTQ==", "cpu": [ "arm" ], @@ -173,9 +383,9 @@ } }, "node_modules/@esbuild/linux-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.28.0.tgz", - "integrity": "sha512-RVyzfb3FWsGA55n6WY0MEIEPURL1FcbhFE6BffZEMEekfCzCIMtB5yyDcFnVbTnwk+CLAgTujmV/Lgvih56W+A==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.28.1.tgz", + "integrity": "sha512-yHs+0uc8+nvEAfAfxrWQKK5peSNzBc4PegcMO0EJ2hT71uA7vB8Ihg2e77R2P7SG5uYjPbHlLLmve4LLLRCf0g==", "cpu": [ "arm64" ], @@ -190,9 +400,9 @@ } }, "node_modules/@esbuild/linux-ia32": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.28.0.tgz", - "integrity": "sha512-KBnSTt1kxl9x70q+ydterVdl+Cn0H18ngRMRCEQfrbqdUuntQQ0LoMZv47uB97NljZFzY6HcfqEZ2SAyIUTQBQ==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.28.1.tgz", + "integrity": "sha512-d1z4ZuP0ajrfz/FhGT4vv278rX8KnPPJx8i5+AtK7TYbx9Le9F1hyzurZpkEyjkGa9dUGhQow4C1NmeGvqxN2w==", "cpu": [ "ia32" ], @@ -207,9 +417,9 @@ } }, "node_modules/@esbuild/linux-loong64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.28.0.tgz", - "integrity": "sha512-zpSlUce1mnxzgBADvxKXX5sl8aYQHo2ezvMNI8I0lbblJtp8V4odlm3Yzlj7gPyt3T8ReksE6bK+pT3WD+aJRg==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.28.1.tgz", + "integrity": "sha512-M5sRjUVZrkm1OAPR3dlOYzNmN+loZKGVi1VUQGrwuqLcbR6qeAz+famMhjASeH3YVKvZz+zT1jlh/keC3Rj/lg==", "cpu": [ "loong64" ], @@ -224,9 +434,9 @@ } }, "node_modules/@esbuild/linux-mips64el": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.28.0.tgz", - "integrity": "sha512-2jIfP6mmjkdmeTlsX/9vmdmhBmKADrWqN7zcdtHIeNSCH1SqIoNI63cYsjQR8J+wGa4Y5izRcSHSm8K3QWmk3w==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.28.1.tgz", + "integrity": "sha512-mRObBZeHh2OxcBFPWE/FjylkRgZdYuiTR3vaTozquCGOH14iP9oN4x4Ge81CoIDYQrXmIxpFumJBu5MtZpnQJQ==", "cpu": [ "mips64el" ], @@ -241,9 +451,9 @@ } }, "node_modules/@esbuild/linux-ppc64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.28.0.tgz", - "integrity": "sha512-bc0FE9wWeC0WBm49IQMPSPILRocGTQt3j5KPCA8os6VprfuJ7KD+5PzESSrJ6GmPIPJK965ZJHTUlSA6GNYEhg==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.28.1.tgz", + "integrity": "sha512-slScBsMAb3GFDcdrCgLwZtPYRoH2H/youv10QiZyRjmsP48fznoveWytSgCI/R0ZcUgpc0ZhIUEx6LHts8yrfQ==", "cpu": [ "ppc64" ], @@ -258,9 +468,9 @@ } }, "node_modules/@esbuild/linux-riscv64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.28.0.tgz", - "integrity": "sha512-SQPZOwoTTT/HXFXQJG/vBX8sOFagGqvZyXcgLA3NhIqcBv1BJU1d46c0rGcrij2B56Z2rNiSLaZOYW5cUk7yLQ==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.28.1.tgz", + "integrity": "sha512-kw0owk1o0GFETUJyW0jc0G4Yzs0BHZn0JDZ8JRT088vjJYX777BAs1fDGxAC+q831qOs2DTC96mNsG2opdfyyQ==", "cpu": [ "riscv64" ], @@ -275,9 +485,9 @@ } }, "node_modules/@esbuild/linux-s390x": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.28.0.tgz", - "integrity": "sha512-SCfR0HN8CEEjnYnySJTd2cw0k9OHB/YFzt5zgJEwa+wL/T/raGWYMBqwDNAC6dqFKmJYZoQBRfHjgwLHGSrn3Q==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.28.1.tgz", + "integrity": "sha512-/lAIjX8aYFRByhh6L5rYtPEDRqa9de/4V/juOXcta5frjvzXO4/sqEtyytse0g3zZFuWu5cDN0MkLz2qRDD2Ag==", "cpu": [ "s390x" ], @@ -292,9 +502,9 @@ } }, "node_modules/@esbuild/linux-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.28.0.tgz", - "integrity": "sha512-us0dSb9iFxIi8srnpl931Nvs65it/Jd2a2K3qs7fz2WfGPHqzfzZTfec7oxZJRNPXPnNYZtanmRc4AL/JwVzHQ==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.28.1.tgz", + "integrity": "sha512-u/anNYF2mmVOEDwLtnQ1wOr3EZ9sTNGLWrsYGYwHWzGA3Si84IOkHXlbWTD1NB+9/1lcnweYKO54uhxZydNzfA==", "cpu": [ "x64" ], @@ -309,9 +519,9 @@ } }, "node_modules/@esbuild/netbsd-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.28.0.tgz", - "integrity": "sha512-CR/RYotgtCKwtftMwJlUU7xCVNg3lMYZ0RzTmAHSfLCXw3NtZtNpswLEj/Kkf6kEL3Gw+BpOekRX0BYCtklhUw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.28.1.tgz", + "integrity": "sha512-oks0DYbLwWMmaakTsCb+zL4E+aHRVLom9IJZOAthMQEPiQmydXHkziYEsGYRx0uNV/IjEKGAV941JzH02pflqw==", "cpu": [ "arm64" ], @@ -326,9 +536,9 @@ } }, "node_modules/@esbuild/netbsd-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.28.0.tgz", - "integrity": "sha512-nU1yhmYutL+fQ71Kxnhg8uEOdC0pwEW9entHykTgEbna2pw2dkbFSMeqjjyHZoCmt8SBkOSvV+yNmm94aUrrqw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.28.1.tgz", + "integrity": "sha512-aeL6lAnN89Hz43Mlh1G8ARasbuoYvSITDEx0tHh5b7jJnHcssqgjy9Yx430GDpmCa6OyrKoS0aNRjKundRizGg==", "cpu": [ "x64" ], @@ -343,9 +553,9 @@ } }, "node_modules/@esbuild/openbsd-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.28.0.tgz", - "integrity": "sha512-cXb5vApOsRsxsEl4mcZ1XY3D4DzcoMxR/nnc4IyqYs0rTI8ZKmW6kyyg+11Z8yvgMfAEldKzP7AdP64HnSC/6g==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.28.1.tgz", + "integrity": "sha512-MEFJe5C3R8pwXdZ5Y21oo6m7ePiS0d9pWucn99O/wvyJZChoIQKrQDxKrGeW8F5+T0okTHesAmDeiHDTIq0V/Q==", "cpu": [ "arm64" ], @@ -360,9 +570,9 @@ } }, "node_modules/@esbuild/openbsd-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.28.0.tgz", - "integrity": "sha512-8wZM2qqtv9UP3mzy7HiGYNH/zjTA355mpeuA+859TyR+e+Tc08IHYpLJuMsfpDJwoLo1ikIJI8jC3GFjnRClzA==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.28.1.tgz", + "integrity": "sha512-i/ZLIOafE0Z8cI/XANJAixoJL/uRAoS2xOA3rb0xN+KK0K177cMAsQYkzHtBrtMXAKuAc7HGgcWiZ/sRC1Nxgw==", "cpu": [ "x64" ], @@ -377,9 +587,9 @@ } }, "node_modules/@esbuild/openharmony-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.28.0.tgz", - "integrity": "sha512-FLGfyizszcef5C3YtoyQDACyg95+dndv79i2EekILBofh5wpCa1KuBqOWKrEHZg3zrL3t5ouE5jgr94vA+Wb2w==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.28.1.tgz", + "integrity": "sha512-ge+Z7EXFNt2BO1oAMsVpiQ8EwndV9i1xXerAeTIK7AtPs3bKFXQM7nlRxDSIUIMeueR1CNXxqztLzdNeReKBJg==", "cpu": [ "arm64" ], @@ -394,9 +604,9 @@ } }, "node_modules/@esbuild/sunos-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.28.0.tgz", - "integrity": "sha512-1ZgjUoEdHZZl/YlV76TSCz9Hqj9h9YmMGAgAPYd+q4SicWNX3G5GCyx9uhQWSLcbvPW8Ni7lj4gDa1T40akdlw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.28.1.tgz", + "integrity": "sha512-BEjgtECkL3vY+SaSQ6nzVfiALUeFxpawyp8Jmf5PtYhf1Ug40N1h/hxlhts+f1FvSvarEigdxS3BlSMI2PJLcQ==", "cpu": [ "x64" ], @@ -411,9 +621,9 @@ } }, "node_modules/@esbuild/win32-arm64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.28.0.tgz", - "integrity": "sha512-Q9StnDmQ/enxnpxCCLSg0oo4+34B9TdXpuyPeTedN/6+iXBJ4J+zwfQI28u/Jl40nOYAxGoNi7mFP40RUtkmUA==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.28.1.tgz", + "integrity": "sha512-lCv9eK/H6ZJWbE7bh2nw54CZ9M2nupBxJcTsdk/QQnWkdSjKGuxmmH8/GWrlT1eMmZfn4dGcCjRte397WqfQXA==", "cpu": [ "arm64" ], @@ -428,9 +638,9 @@ } }, "node_modules/@esbuild/win32-ia32": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.28.0.tgz", - "integrity": "sha512-zF3ag/gfiCe6U2iczcRzSYJKH1DCI+ByzSENHlM2FcDbEeo5Zd2C86Aq0tKUYAJJ1obRP84ymxIAksZUcdztHA==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.28.1.tgz", + "integrity": "sha512-zvb/mB2bSCoJOpoCBgYKKpX6YM6mJBlBUVUtVj41DlZJVEB6/0CKlRYxP5wWl1C1ILiCoAU5wZZ4q1P3qeS6Eg==", "cpu": [ "ia32" ], @@ -445,9 +655,9 @@ } }, "node_modules/@esbuild/win32-x64": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.28.0.tgz", - "integrity": "sha512-pEl1bO9mfAmIC+tW5btTmrKaujg3zGtUmWNdCw/xs70FBjwAL3o9OEKNHvNmnyylD6ubxUERiEhdsL0xBQ9efw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.28.1.tgz", + "integrity": "sha512-bm4Mowrv+GXMlpWX++EcXw/iLyd1o3+bJkC2DkWXYVvgZCqD/bSj9ctZeAMC3cIxgjRVR2Dufaiu4YPxr5gW1A==", "cpu": [ "x64" ], @@ -461,209 +671,837 @@ "node": ">=18" } }, - "node_modules/@types/node": { - "version": "25.5.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.2.tgz", - "integrity": "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg==", + "node_modules/@isaacs/cliui": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-9.0.0.tgz", + "integrity": "sha512-AokJm4tuBHillT+FpMtxQ60n8ObyXBatq7jD2/JA9dxbDDokKQm8KMht5ibGzLVU9IJDIKK4TPKgMHEYMn3lMg==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", "dev": true, "license": "MIT", "dependencies": { - "undici-types": "~7.18.0" + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" } }, - "node_modules/@types/vscode": { - "version": "1.100.0", - "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.100.0.tgz", - "integrity": "sha512-4uNyvzHoraXEeCamR3+fzcBlh7Afs4Ifjs4epINyUX/jvdk0uzLnwiDY35UKDKnkCHP5Nu3dljl2H8lR6s+rQw==", + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", "dev": true, - "license": "MIT" + "license": "MIT", + "engines": { + "node": ">= 8" + } }, - "node_modules/@vscode/vsce": { - "version": "2.25.0", - "resolved": "https://registry.npmjs.org/@vscode/vsce/-/vsce-2.25.0.tgz", - "integrity": "sha512-VXMCGUaP6wKBadA7vFQdsksxkBAMoh4ecZgXBwauZMASAgnwYesHyLnqIyWYeRwjy2uEpitHvz/1w5ENnR30pg==", + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", "dev": true, "license": "MIT", "dependencies": { - "azure-devops-node-api": "^12.5.0", - "chalk": "^2.4.2", - "cheerio": "^1.0.0-rc.9", - "cockatiel": "^3.1.2", - "commander": "^6.2.1", - "form-data": "^4.0.0", - "glob": "^7.0.6", - "hosted-git-info": "^4.0.2", - "jsonc-parser": "^3.2.0", - "leven": "^3.1.0", - "markdown-it": "^12.3.2", - "mime": "^1.3.4", - "minimatch": "^3.0.3", - "parse-semver": "^1.1.1", - "read": "^1.0.7", - "semver": "^7.5.2", - "tmp": "^0.2.1", - "typed-rest-client": "^1.8.4", - "url-join": "^4.0.1", - "xml2js": "^0.5.0", - "yauzl": "^2.3.1", - "yazl": "^2.2.2" - }, - "bin": { - "vsce": "vsce" + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" }, "engines": { - "node": ">= 16" - }, - "optionalDependencies": { - "keytar": "^7.7.0" + "node": ">= 8" } }, - "node_modules/ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "node_modules/@secretlint/config-creator": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/config-creator/-/config-creator-10.2.2.tgz", + "integrity": "sha512-BynOBe7Hn3LJjb3CqCHZjeNB09s/vgf0baBaHVw67w7gHF0d25c3ZsZ5+vv8TgwSchRdUCRrbbcq5i2B1fJ2QQ==", "dev": true, "license": "MIT", "dependencies": { - "color-convert": "^1.9.0" + "@secretlint/types": "^10.2.2" }, "engines": { - "node": ">=4" + "node": ">=20.0.0" } }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, - "license": "Python-2.0" - }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "node_modules/@secretlint/config-loader": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/config-loader/-/config-loader-10.2.2.tgz", + "integrity": "sha512-ndjjQNgLg4DIcMJp4iaRD6xb9ijWQZVbd9694Ol2IszBIbGPPkwZHzJYKICbTBmh6AH/pLr0CiCaWdGJU7RbpQ==", "dev": true, - "license": "MIT" + "license": "MIT", + "dependencies": { + "@secretlint/profiler": "^10.2.2", + "@secretlint/resolver": "^10.2.2", + "@secretlint/types": "^10.2.2", + "ajv": "^8.17.1", + "debug": "^4.4.1", + "rc-config-loader": "^4.1.3" + }, + "engines": { + "node": ">=20.0.0" + } }, - "node_modules/azure-devops-node-api": { - "version": "12.5.0", - "resolved": "https://registry.npmjs.org/azure-devops-node-api/-/azure-devops-node-api-12.5.0.tgz", - "integrity": "sha512-R5eFskGvOm3U/GzeAuxRkUsAl0hrAwGgWn6zAd2KrZmrEhWZVqLew4OOupbQlXUuojUzpGtq62SmdhJ06N88og==", + "node_modules/@secretlint/core": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/core/-/core-10.2.2.tgz", + "integrity": "sha512-6rdwBwLP9+TO3rRjMVW1tX+lQeo5gBbxl1I5F8nh8bgGtKwdlCMhMKsBWzWg1ostxx/tIG7OjZI0/BxsP8bUgw==", "dev": true, "license": "MIT", "dependencies": { - "tunnel": "0.0.6", - "typed-rest-client": "^1.8.4" + "@secretlint/profiler": "^10.2.2", + "@secretlint/types": "^10.2.2", + "debug": "^4.4.1", + "structured-source": "^4.0.0" + }, + "engines": { + "node": ">=20.0.0" } }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "node_modules/@secretlint/formatter": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/formatter/-/formatter-10.2.2.tgz", + "integrity": "sha512-10f/eKV+8YdGKNQmoDUD1QnYL7TzhI2kzyx95vsJKbEa8akzLAR5ZrWIZ3LbcMmBLzxlSQMMccRmi05yDQ5YDA==", "dev": true, - "license": "MIT" + "license": "MIT", + "dependencies": { + "@secretlint/resolver": "^10.2.2", + "@secretlint/types": "^10.2.2", + "@textlint/linter-formatter": "^15.2.0", + "@textlint/module-interop": "^15.2.0", + "@textlint/types": "^15.2.0", + "chalk": "^5.4.1", + "debug": "^4.4.1", + "pluralize": "^8.0.0", + "strip-ansi": "^7.1.0", + "table": "^6.9.0", + "terminal-link": "^4.0.0" + }, + "engines": { + "node": ">=20.0.0" + } }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "node_modules/@secretlint/formatter/node_modules/chalk": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", + "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], "license": "MIT", - "optional": true + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } }, - "node_modules/bl": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", - "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "node_modules/@secretlint/node": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/node/-/node-10.2.2.tgz", + "integrity": "sha512-eZGJQgcg/3WRBwX1bRnss7RmHHK/YlP/l7zOQsrjexYt6l+JJa5YhUmHbuGXS94yW0++3YkEJp0kQGYhiw1DMQ==", "dev": true, "license": "MIT", - "optional": true, "dependencies": { - "buffer": "^5.5.0", - "inherits": "^2.0.4", - "readable-stream": "^3.4.0" + "@secretlint/config-loader": "^10.2.2", + "@secretlint/core": "^10.2.2", + "@secretlint/formatter": "^10.2.2", + "@secretlint/profiler": "^10.2.2", + "@secretlint/source-creator": "^10.2.2", + "@secretlint/types": "^10.2.2", + "debug": "^4.4.1", + "p-map": "^7.0.3" + }, + "engines": { + "node": ">=20.0.0" } }, - "node_modules/boolbase": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", - "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "node_modules/@secretlint/profiler": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/profiler/-/profiler-10.2.2.tgz", + "integrity": "sha512-qm9rWfkh/o8OvzMIfY8a5bCmgIniSpltbVlUVl983zDG1bUuQNd1/5lUEeWx5o/WJ99bXxS7yNI4/KIXfHexig==", "dev": true, - "license": "ISC" + "license": "MIT" }, - "node_modules/brace-expansion": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz", - "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==", + "node_modules/@secretlint/resolver": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/resolver/-/resolver-10.2.2.tgz", + "integrity": "sha512-3md0cp12e+Ae5V+crPQYGd6aaO7ahw95s28OlULGyclyyUtf861UoRGS2prnUrKh7MZb23kdDOyGCYb9br5e4w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@secretlint/secretlint-formatter-sarif": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/secretlint-formatter-sarif/-/secretlint-formatter-sarif-10.2.2.tgz", + "integrity": "sha512-ojiF9TGRKJJw308DnYBucHxkpNovDNu1XvPh7IfUp0A12gzTtxuWDqdpuVezL7/IP8Ua7mp5/VkDMN9OLp1doQ==", "dev": true, "license": "MIT", "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "node-sarif-builder": "^3.2.0" } }, - "node_modules/buffer": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", - "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "node_modules/@secretlint/secretlint-rule-no-dotenv": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/secretlint-rule-no-dotenv/-/secretlint-rule-no-dotenv-10.2.2.tgz", + "integrity": "sha512-KJRbIShA9DVc5Va3yArtJ6QDzGjg3PRa1uYp9As4RsyKtKSSZjI64jVca57FZ8gbuk4em0/0Jq+uy6485wxIdg==", "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], "license": "MIT", - "optional": true, "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.1.13" + "@secretlint/types": "^10.2.2" + }, + "engines": { + "node": ">=20.0.0" } }, - "node_modules/buffer-crc32": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", - "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "node_modules/@secretlint/secretlint-rule-preset-recommend": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/secretlint-rule-preset-recommend/-/secretlint-rule-preset-recommend-10.2.2.tgz", + "integrity": "sha512-K3jPqjva8bQndDKJqctnGfwuAxU2n9XNCPtbXVI5JvC7FnQiNg/yWlQPbMUlBXtBoBGFYp08A94m6fvtc9v+zA==", "dev": true, "license": "MIT", "engines": { - "node": "*" + "node": ">=20.0.0" } }, - "node_modules/call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", - "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "node_modules/@secretlint/source-creator": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/source-creator/-/source-creator-10.2.2.tgz", + "integrity": "sha512-h6I87xJfwfUTgQ7irWq7UTdq/Bm1RuQ/fYhA3dtTIAop5BwSFmZyrchph4WcoEvbN460BWKmk4RYSvPElIIvxw==", "dev": true, "license": "MIT", "dependencies": { - "es-errors": "^1.3.0", - "function-bind": "^1.1.2" + "@secretlint/types": "^10.2.2", + "istextorbinary": "^9.5.0" }, "engines": { - "node": ">= 0.4" + "node": ">=20.0.0" + } + }, + "node_modules/@secretlint/types": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/@secretlint/types/-/types-10.2.2.tgz", + "integrity": "sha512-Nqc90v4lWCXyakD6xNyNACBJNJ0tNCwj2WNk/7ivyacYHxiITVgmLUFXTBOeCdy79iz6HtN9Y31uw/jbLrdOAg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@sindresorhus/merge-streams": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-2.3.0.tgz", + "integrity": "sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@textlint/ast-node-types": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/ast-node-types/-/ast-node-types-15.7.1.tgz", + "integrity": "sha512-Wii5UgUKFEh9Uv6wbq1zr4/Kf+dtjiUuzPrrXzKp8H+ifkvKNzi23V4Nz+6wVyHQn5T28AFuc8VH8OtzvGYecA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/linter-formatter": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/linter-formatter/-/linter-formatter-15.7.1.tgz", + "integrity": "sha512-TdwZ/debWYFD05K3CcoHtwvnCrza29wZxD+BjDTk/V5N7iRqkK1dTTHSD4A8AIgROLiDkHJmIKQbasbmsg8AvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azu/format-text": "^1.0.2", + "@azu/style-format": "^1.0.1", + "@textlint/module-interop": "15.7.1", + "@textlint/resolver": "15.7.1", + "@textlint/types": "15.7.1", + "chalk": "^4.1.2", + "debug": "^4.4.3", + "js-yaml": "^4.1.1", + "lodash": "^4.18.1", + "pluralize": "^2.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "table": "^6.9.0", + "text-table": "^0.2.0" + } + }, + "node_modules/@textlint/linter-formatter/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/@textlint/linter-formatter/node_modules/pluralize": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-2.0.0.tgz", + "integrity": "sha512-TqNZzQCD4S42De9IfnnBvILN7HAW7riLqsCyp8lgjXeysyPlX5HhqKAcJHHHb9XskE4/a+7VGC9zzx8Ls0jOAw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/linter-formatter/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@textlint/module-interop": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/module-interop/-/module-interop-15.7.1.tgz", + "integrity": "sha512-Jg+sQW2L/cRJypk59wtcMUVVpt8vmit5ZMT3gUnFwevP3A6Qp1HfOtUy9ObT4hBX3lOSGT/ekcCDxR1pL7uH1g==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/resolver": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/resolver/-/resolver-15.7.1.tgz", + "integrity": "sha512-8XnO0pgF6mXnm41VvWmBbEIdGPhiCUt31uLZkOis1ECeg/1SoUcIT6Mx/F0e1rukq8l0UlOSeY9a31CsvRMK0g==", + "dev": true, + "license": "MIT" + }, + "node_modules/@textlint/types": { + "version": "15.7.1", + "resolved": "https://registry.npmjs.org/@textlint/types/-/types-15.7.1.tgz", + "integrity": "sha512-Vye/GmFNBTgVzZFtIFJTmLB+s2A7oIADxNG6r9UhfPuY+Czv0z5G3xeyFZZudPlfxURsKUyPIU5XsjOFqVp33A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@textlint/ast-node-types": "15.7.1" + } + }, + "node_modules/@types/node": { + "version": "25.9.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.9.3.tgz", + "integrity": "sha512-603BddQMv3pUcr4U2dhujk83N2tTDVr/34wII2B6bJy6g+8WD6yUb11jszNs0gdi4PesVWl7ABt8nYMVpnLUcg==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": ">=7.24.0 <7.24.7" + } + }, + "node_modules/@types/normalize-package-data": { + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz", + "integrity": "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/sarif": { + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@types/sarif/-/sarif-2.1.7.tgz", + "integrity": "sha512-kRz0VEkJqWLf1LLVN4pT1cg1Z9wAuvI6L97V3m2f5B76Tg8d413ddvLBPTEHAZJlnn4XSvu0FkZtViCQGVyrXQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/vscode": { + "version": "1.120.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.120.0.tgz", + "integrity": "sha512-feaT4Rst+FkTch5zz/ZbNCxoIvo55YU80Be2kiL7OJcod4+CUYf2lUBPdIJzozNnSEMq1VRTGrWEcCGFB3fBmA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@typespec/ts-http-runtime": { + "version": "0.3.6", + "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-0.3.6.tgz", + "integrity": "sha512-jIXhD0eWQ1JA6ln/5Dltyx22UxWNrw0hZmhy2rlv6m6KgF7kplHx3g0fzi09lNmTJQRR91OlemYp3xFnvDK9og==", + "dev": true, + "license": "MIT", + "dependencies": { + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@vscode/vsce": { + "version": "3.9.1", + "resolved": "https://registry.npmjs.org/@vscode/vsce/-/vsce-3.9.1.tgz", + "integrity": "sha512-MPn5p+DoudI+3GfJSpAZZraE1lgLv0LcwbH3+xy7RgEhty3UIkmUMUA+5jPTDaxXae00AnX5u77FxGM8FhfKKA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@azure/identity": "^4.1.0", + "@secretlint/node": "^10.1.2", + "@secretlint/secretlint-formatter-sarif": "^10.1.2", + "@secretlint/secretlint-rule-no-dotenv": "^10.1.2", + "@secretlint/secretlint-rule-preset-recommend": "^10.1.2", + "@vscode/vsce-sign": "^2.0.0", + "azure-devops-node-api": "^12.5.0", + "chalk": "^4.1.2", + "cheerio": "^1.0.0-rc.9", + "cockatiel": "^3.1.2", + "commander": "^12.1.0", + "form-data": "^4.0.0", + "glob": "^11.0.0", + "hosted-git-info": "^4.0.2", + "jsonc-parser": "^3.2.0", + "leven": "^3.1.0", + "markdown-it": "^14.1.0", + "mime": "^1.3.4", + "minimatch": "^3.0.3", + "parse-semver": "^1.1.1", + "read": "^1.0.7", + "secretlint": "^10.1.2", + "semver": "^7.5.2", + "tmp": "^0.2.3", + "typed-rest-client": "^1.8.4", + "url-join": "^4.0.1", + "xml2js": "^0.5.0", + "yauzl": "^3.2.1", + "yazl": "^2.2.2" + }, + "bin": { + "vsce": "vsce" + }, + "engines": { + "node": ">= 20" + }, + "optionalDependencies": { + "keytar": "^7.7.0" + } + }, + "node_modules/@vscode/vsce-sign": { + "version": "2.0.9", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign/-/vsce-sign-2.0.9.tgz", + "integrity": "sha512-8IvaRvtFyzUnGGl3f5+1Cnor3LqaUWvhaUjAYO8Y39OUYlOf3cRd+dowuQYLpZcP3uwSG+mURwjEBOSq4SOJ0g==", + "dev": true, + "hasInstallScript": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optionalDependencies": { + "@vscode/vsce-sign-alpine-arm64": "2.0.6", + "@vscode/vsce-sign-alpine-x64": "2.0.6", + "@vscode/vsce-sign-darwin-arm64": "2.0.6", + "@vscode/vsce-sign-darwin-x64": "2.0.6", + "@vscode/vsce-sign-linux-arm": "2.0.6", + "@vscode/vsce-sign-linux-arm64": "2.0.6", + "@vscode/vsce-sign-linux-x64": "2.0.6", + "@vscode/vsce-sign-win32-arm64": "2.0.6", + "@vscode/vsce-sign-win32-x64": "2.0.6" + } + }, + "node_modules/@vscode/vsce-sign-alpine-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-alpine-arm64/-/vsce-sign-alpine-arm64-2.0.6.tgz", + "integrity": "sha512-wKkJBsvKF+f0GfsUuGT0tSW0kZL87QggEiqNqK6/8hvqsXvpx8OsTEc3mnE1kejkh5r+qUyQ7PtF8jZYN0mo8Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "alpine" + ] + }, + "node_modules/@vscode/vsce-sign-alpine-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-alpine-x64/-/vsce-sign-alpine-x64-2.0.6.tgz", + "integrity": "sha512-YoAGlmdK39vKi9jA18i4ufBbd95OqGJxRvF3n6ZbCyziwy3O+JgOpIUPxv5tjeO6gQfx29qBivQ8ZZTUF2Ba0w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "alpine" + ] + }, + "node_modules/@vscode/vsce-sign-darwin-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-darwin-arm64/-/vsce-sign-darwin-arm64-2.0.6.tgz", + "integrity": "sha512-5HMHaJRIQuozm/XQIiJiA0W9uhdblwwl2ZNDSSAeXGO9YhB9MH5C4KIHOmvyjUnKy4UCuiP43VKpIxW1VWP4tQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@vscode/vsce-sign-darwin-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-darwin-x64/-/vsce-sign-darwin-x64-2.0.6.tgz", + "integrity": "sha512-25GsUbTAiNfHSuRItoQafXOIpxlYj+IXb4/qarrXu7kmbH94jlm5sdWSCKrrREs8+GsXF1b+l3OB7VJy5jsykw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@vscode/vsce-sign-linux-arm": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-linux-arm/-/vsce-sign-linux-arm-2.0.6.tgz", + "integrity": "sha512-UndEc2Xlq4HsuMPnwu7420uqceXjs4yb5W8E2/UkaHBB9OWCwMd3/bRe/1eLe3D8kPpxzcaeTyXiK3RdzS/1CA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@vscode/vsce-sign-linux-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-linux-arm64/-/vsce-sign-linux-arm64-2.0.6.tgz", + "integrity": "sha512-cfb1qK7lygtMa4NUl2582nP7aliLYuDEVpAbXJMkDq1qE+olIw/es+C8j1LJwvcRq1I2yWGtSn3EkDp9Dq5FdA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@vscode/vsce-sign-linux-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-linux-x64/-/vsce-sign-linux-x64-2.0.6.tgz", + "integrity": "sha512-/olerl1A4sOqdP+hjvJ1sbQjKN07Y3DVnxO4gnbn/ahtQvFrdhUi0G1VsZXDNjfqmXw57DmPi5ASnj/8PGZhAA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@vscode/vsce-sign-win32-arm64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-win32-arm64/-/vsce-sign-win32-arm64-2.0.6.tgz", + "integrity": "sha512-ivM/MiGIY0PJNZBoGtlRBM/xDpwbdlCWomUWuLmIxbi1Cxe/1nooYrEQoaHD8ojVRgzdQEUzMsRbyF5cJJgYOg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@vscode/vsce-sign-win32-x64": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@vscode/vsce-sign-win32-x64/-/vsce-sign-win32-x64-2.0.6.tgz", + "integrity": "sha512-mgth9Kvze+u8CruYMmhHw6Zgy3GRX2S+Ed5oSokDEK5vPEwGGKnmuXua9tmFhomeAnhgJnL4DCna3TiNuGrBTQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "SEE LICENSE IN LICENSE.txt", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-escapes": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.3.0.tgz", + "integrity": "sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==", + "dev": true, + "license": "MIT", + "dependencies": { + "environment": "^1.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ansi-regex": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", + "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, + "license": "Python-2.0" + }, + "node_modules/astral-regex": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", + "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/azure-devops-node-api": { + "version": "12.5.0", + "resolved": "https://registry.npmjs.org/azure-devops-node-api/-/azure-devops-node-api-12.5.0.tgz", + "integrity": "sha512-R5eFskGvOm3U/GzeAuxRkUsAl0hrAwGgWn6zAd2KrZmrEhWZVqLew4OOupbQlXUuojUzpGtq62SmdhJ06N88og==", + "dev": true, + "license": "MIT", + "dependencies": { + "tunnel": "0.0.6", + "typed-rest-client": "^1.8.4" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/binaryextensions": { + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz", + "integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "editions": "^6.21.0" + }, + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "dev": true, + "license": "ISC" + }, + "node_modules/boundary": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/boundary/-/boundary-2.0.0.tgz", + "integrity": "sha512-rJKn5ooC9u8q13IMCrW0RSp31pxBCHE3y9V/tp3TdWSLf8Em3p6Di4NBpfzbJge9YjjFEsD0RtFEjtvHL5VyEA==", + "dev": true, + "license": "BSD-2-Clause" + }, + "node_modules/brace-expansion": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.15.tgz", + "integrity": "sha512-EwOCDEex4quD37XhqM3omwtMoJjr//isUZz1JopUNWms+4Z2ViyM/k1YIRePpoVNnQhENnxtFjLaxNHrT7xIUg==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/bundle-name": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-4.1.0.tgz", + "integrity": "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "run-applescript": "^7.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" } }, "node_modules/call-bound": { @@ -684,18 +1522,20 @@ } }, "node_modules/chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", "dev": true, "license": "MIT", "dependencies": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" }, "engines": { - "node": ">=4" + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" } }, "node_modules/cheerio": { @@ -761,19 +1601,22 @@ } }, "node_modules/color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", "dev": true, "license": "MIT", "dependencies": { - "color-name": "1.1.3" + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" } }, "node_modules/color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true, "license": "MIT" }, @@ -791,13 +1634,13 @@ } }, "node_modules/commander": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", - "integrity": "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==", + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", "dev": true, "license": "MIT", "engines": { - "node": ">= 6" + "node": ">=18" } }, "node_modules/concat-map": { @@ -807,6 +1650,21 @@ "dev": true, "license": "MIT" }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/css-select": { "version": "5.2.2", "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", @@ -837,6 +1695,24 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, "node_modules/decompress-response": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", @@ -865,6 +1741,49 @@ "node": ">=4.0.0" } }, + "node_modules/default-browser": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.5.0.tgz", + "integrity": "sha512-H9LMLr5zwIbSxrmvikGuI/5KGhZ8E2zH3stkMgM5LpOWDutGM2JZaj460Udnf1a+946zc7YBgrqEWwbk7zHvGw==", + "dev": true, + "license": "MIT", + "dependencies": { + "bundle-name": "^4.1.0", + "default-browser-id": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/default-browser-id": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/default-browser-id/-/default-browser-id-5.0.1.tgz", + "integrity": "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/define-lazy-prop": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-3.0.0.tgz", + "integrity": "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -960,6 +1879,40 @@ "node": ">= 0.4" } }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/editions": { + "version": "6.22.0", + "resolved": "https://registry.npmjs.org/editions/-/editions-6.22.0.tgz", + "integrity": "sha512-UgGlf8IW75je7HZjNDpJdCv4cGJWIi6yumFdZ0R7A8/CIhQiWUjyGLCxdHpd8bmyD1gnkfUNK0oeOXqUS2cpfQ==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "version-range": "^4.15.0" + }, + "engines": { + "ecmascript": ">= es5", + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, "node_modules/encoding-sniffer": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", @@ -998,6 +1951,19 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/environment": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz", + "integrity": "sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -1019,9 +1985,9 @@ } }, "node_modules/es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.2.tgz", + "integrity": "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==", "dev": true, "license": "MIT", "dependencies": { @@ -1048,9 +2014,9 @@ } }, "node_modules/esbuild": { - "version": "0.28.0", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.28.0.tgz", - "integrity": "sha512-sNR9MHpXSUV/XB4zmsFKN+QgVG82Cc7+/aaxJ8Adi8hyOac+EXptIp45QBPaVyX3N70664wRbTcLTOemCAnyqw==", + "version": "0.28.1", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.28.1.tgz", + "integrity": "sha512-HrJrvZv5ayxBzPfwphOoNzkzOIIlifzk0KJrGK2c8R4+LKpMtpYLQeUdjnwjWv/LZlkH2laZk+4w78pi99D4Vw==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -1061,42 +2027,32 @@ "node": ">=18" }, "optionalDependencies": { - "@esbuild/aix-ppc64": "0.28.0", - "@esbuild/android-arm": "0.28.0", - "@esbuild/android-arm64": "0.28.0", - "@esbuild/android-x64": "0.28.0", - "@esbuild/darwin-arm64": "0.28.0", - "@esbuild/darwin-x64": "0.28.0", - "@esbuild/freebsd-arm64": "0.28.0", - "@esbuild/freebsd-x64": "0.28.0", - "@esbuild/linux-arm": "0.28.0", - "@esbuild/linux-arm64": "0.28.0", - "@esbuild/linux-ia32": "0.28.0", - "@esbuild/linux-loong64": "0.28.0", - "@esbuild/linux-mips64el": "0.28.0", - "@esbuild/linux-ppc64": "0.28.0", - "@esbuild/linux-riscv64": "0.28.0", - "@esbuild/linux-s390x": "0.28.0", - "@esbuild/linux-x64": "0.28.0", - "@esbuild/netbsd-arm64": "0.28.0", - "@esbuild/netbsd-x64": "0.28.0", - "@esbuild/openbsd-arm64": "0.28.0", - "@esbuild/openbsd-x64": "0.28.0", - "@esbuild/openharmony-arm64": "0.28.0", - "@esbuild/sunos-x64": "0.28.0", - "@esbuild/win32-arm64": "0.28.0", - "@esbuild/win32-ia32": "0.28.0", - "@esbuild/win32-x64": "0.28.0" - } - }, - "node_modules/escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.8.0" + "@esbuild/aix-ppc64": "0.28.1", + "@esbuild/android-arm": "0.28.1", + "@esbuild/android-arm64": "0.28.1", + "@esbuild/android-x64": "0.28.1", + "@esbuild/darwin-arm64": "0.28.1", + "@esbuild/darwin-x64": "0.28.1", + "@esbuild/freebsd-arm64": "0.28.1", + "@esbuild/freebsd-x64": "0.28.1", + "@esbuild/linux-arm": "0.28.1", + "@esbuild/linux-arm64": "0.28.1", + "@esbuild/linux-ia32": "0.28.1", + "@esbuild/linux-loong64": "0.28.1", + "@esbuild/linux-mips64el": "0.28.1", + "@esbuild/linux-ppc64": "0.28.1", + "@esbuild/linux-riscv64": "0.28.1", + "@esbuild/linux-s390x": "0.28.1", + "@esbuild/linux-x64": "0.28.1", + "@esbuild/netbsd-arm64": "0.28.1", + "@esbuild/netbsd-x64": "0.28.1", + "@esbuild/openbsd-arm64": "0.28.1", + "@esbuild/openbsd-x64": "0.28.1", + "@esbuild/openharmony-arm64": "0.28.1", + "@esbuild/sunos-x64": "0.28.1", + "@esbuild/win32-arm64": "0.28.1", + "@esbuild/win32-ia32": "0.28.1", + "@esbuild/win32-x64": "0.28.1" } }, "node_modules/expand-template": { @@ -1110,28 +2066,99 @@ "node": ">=6" } }, - "node_modules/fd-slicer": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", - "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-glob": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", + "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", "dev": true, "license": "MIT", "dependencies": { - "pend": "~1.2.0" + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.8" + }, + "engines": { + "node": ">=8.6.0" + } + }, + "node_modules/fast-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz", + "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fastq": { + "version": "1.20.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz", + "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==", + "dev": true, + "license": "ISC", + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "dev": true, + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/form-data": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", - "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.6.tgz", + "integrity": "sha512-vKatAh4SlVfgbv+YtmhiRjhEMJsYpsG1Y2rMQtR+SVSbytsSD1YGzDIcrAJmdFec88u/+VoGmxnl+80gL1tRCQ==", "dev": true, "license": "MIT", "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", - "hasown": "^2.0.2", - "mime-types": "^2.1.12" + "hasown": "^2.0.4", + "mime-types": "^2.1.35" }, "engines": { "node": ">= 6" @@ -1145,12 +2172,20 @@ "license": "MIT", "optional": true }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "node_modules/fs-extra": { + "version": "11.3.5", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.5.tgz", + "integrity": "sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg==", "dev": true, - "license": "ISC" + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } }, "node_modules/function-bind": { "version": "1.1.2", @@ -1194,41 +2229,117 @@ "dev": true, "license": "MIT", "dependencies": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "dev": true, + "license": "MIT", + "optional": true + }, + "node_modules/glob": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-11.1.0.tgz", + "integrity": "sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "foreground-child": "^3.3.1", + "jackspeak": "^4.1.1", + "minimatch": "^10.1.1", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^2.0.0" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/glob/node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/glob/node_modules/brace-expansion": { + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.6.tgz", + "integrity": "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" }, "engines": { - "node": ">= 0.4" + "node": "18 || 20 || >=22" } }, - "node_modules/github-from-package": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", - "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "node_modules/glob/node_modules/minimatch": { + "version": "10.2.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz", + "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==", "dev": true, - "license": "MIT", - "optional": true + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.5" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } }, - "node_modules/glob": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", - "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "node_modules/globby": { + "version": "14.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-14.1.0.tgz", + "integrity": "sha512-0Ia46fDOaT7k4og1PDW4YbodWWr3scS2vAr2lTbsplOt2WkKp0vQbkI9wKis/T5LV/dqPjO3bpS/z6GTJB82LA==", "dev": true, - "license": "ISC", + "license": "MIT", "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.1.1", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "@sindresorhus/merge-streams": "^2.1.0", + "fast-glob": "^3.3.3", + "ignore": "^7.0.3", + "path-type": "^6.0.0", + "slash": "^5.1.0", + "unicorn-magic": "^0.3.0" }, "engines": { - "node": "*" + "node": ">=18" }, "funding": { - "url": "https://github.com/sponsors/isaacs" + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/gopd": { @@ -1244,14 +2355,21 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true, + "license": "ISC" + }, "node_modules/has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", "dev": true, "license": "MIT", "engines": { - "node": ">=4" + "node": ">=8" } }, "node_modules/has-symbols": { @@ -1284,9 +2402,9 @@ } }, "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.4.tgz", + "integrity": "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A==", "dev": true, "license": "MIT", "dependencies": { @@ -1342,6 +2460,34 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", @@ -1377,16 +2523,27 @@ "license": "BSD-3-Clause", "optional": true }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", - "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "node_modules/ignore": { + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", "dev": true, - "license": "ISC", - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/index-to-position": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/index-to-position/-/index-to-position-1.2.0.tgz", + "integrity": "sha512-Yg7+ztRkqslMAS2iFaU+Oa4KTSidr63OsFGlOrJoW981kIYO3CGCS3wA95P1mUi/IVSJkn0D479KTJpVpvFNuw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/inherits": { @@ -1394,7 +2551,8 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "dev": true, - "license": "ISC" + "license": "ISC", + "optional": true }, "node_modules/ini": { "version": "1.3.8", @@ -1404,6 +2562,191 @@ "license": "ISC", "optional": true }, + "node_modules/is-docker": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-3.0.0.tgz", + "integrity": "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ==", + "dev": true, + "license": "MIT", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-inside-container": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-inside-container/-/is-inside-container-1.0.0.tgz", + "integrity": "sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-docker": "^3.0.0" + }, + "bin": { + "is-inside-container": "cli.js" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-wsl": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.1.tgz", + "integrity": "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-inside-container": "^1.0.0" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/istextorbinary": { + "version": "9.5.0", + "resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz", + "integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "binaryextensions": "^6.11.0", + "editions": "^6.21.0", + "textextensions": "^6.11.0" + }, + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, + "node_modules/jackspeak": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.2.3.tgz", + "integrity": "sha512-ykkVRwrYvFm1nb2AJfKKYPr0emF6IiXDYUaFx4Zn9ZuIH7MrzEZ3sD5RlqGXNRpHtvUHJyOnCEFxOlNDtGo7wg==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^9.0.0" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/js-yaml": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.2.0.tgz", + "integrity": "sha512-ePWsvanv0DWuDRsW8dnt+R4jQ31SCRCQ7hhNcPXZPsoBZiemuZNYGf7adZdqX2D86j6rvKp3RpCxVTSb8WQlOw==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/puzrin" + }, + { + "type": "github", + "url": "https://github.com/sponsors/nodeca" + } + ], + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true, + "license": "MIT" + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", @@ -1411,6 +2754,65 @@ "dev": true, "license": "MIT" }, + "node_modules/jsonfile": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.1.tgz", + "integrity": "sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/jsonwebtoken": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.3.tgz", + "integrity": "sha512-MT/xP0CrubFRNLNKvxJ2BYfy53Zkm++5bX9dtuPbqAeQpTVe0MQTFhao8+Cp//EmJp244xt6Drw/GVEGCUj40g==", + "dev": true, + "license": "MIT", + "dependencies": { + "jws": "^4.0.1", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", + "ms": "^2.1.1", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "dev": true, + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, "node_modules/keytar": { "version": "7.9.0", "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz", @@ -1429,20 +2831,93 @@ "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/linkify-it": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.1.tgz", + "integrity": "sha512-wVoTjP4Q6R0NW5hiZkVJaFZPWgtXfoGF+6LucL3/FtiNjmcHhYjEr5f1Kqjirc1nBW07J/ZuRFumqr2oqccEWg==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/puzrin" + }, + { + "type": "github", + "url": "https://github.com/sponsors/markdown-it" + } + ], + "license": "MIT", + "dependencies": { + "uc.micro": "^2.0.0" + } + }, + "node_modules/lodash": { + "version": "4.18.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz", + "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.includes": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", + "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", + "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", + "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz", + "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", + "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", + "dev": true, + "license": "MIT" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", + "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==", + "dev": true, + "license": "MIT" }, - "node_modules/linkify-it": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.3.tgz", - "integrity": "sha512-ynTsyrFSdE5oZ/O9GEf00kPngmOfVwazR5GKDq6EYfhlpFug3J2zybX56a2PRRpc9P+FuSoGNAwjlbDs9jJBPQ==", + "node_modules/lodash.once": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", + "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==", "dev": true, - "license": "MIT", - "dependencies": { - "uc.micro": "^1.0.1" - } + "license": "MIT" + }, + "node_modules/lodash.truncate": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", + "integrity": "sha512-jttmRe7bRse52OsWIMDLaXxWqRAmtIUccAQ3garviCqJjafXOfNMO0yMfNpdD6zbGaTU0P5Nz7e7gAT6cKmJRw==", + "dev": true, + "license": "MIT" }, "node_modules/lru-cache": { "version": "6.0.0", @@ -1458,30 +2933,31 @@ } }, "node_modules/markdown-it": { - "version": "12.3.2", - "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-12.3.2.tgz", - "integrity": "sha512-TchMembfxfNVpHkbtriWltGWc+m3xszaRD0CZup7GFFhzIgQqxIfn3eGj1yZpfuflzPvfkt611B2Q/Bsk1YnGg==", + "version": "14.2.0", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.2.0.tgz", + "integrity": "sha512-1TGiQiJVRQ3NPmZH6sx5Cfnmg6GQm9jvC1ch4TK511NjSJvjzKLzn5pPfZRNZkRPZP0HqCioSndqH8v2nRaWVQ==", "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/puzrin" + }, + { + "type": "github", + "url": "https://github.com/sponsors/markdown-it" + } + ], "license": "MIT", "dependencies": { "argparse": "^2.0.1", - "entities": "~2.1.0", - "linkify-it": "^3.0.1", - "mdurl": "^1.0.1", - "uc.micro": "^1.0.5" + "entities": "^4.4.0", + "linkify-it": "^5.0.1", + "mdurl": "^2.0.0", + "punycode.js": "^2.3.1", + "uc.micro": "^2.1.0" }, "bin": { - "markdown-it": "bin/markdown-it.js" - } - }, - "node_modules/markdown-it/node_modules/entities": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.1.0.tgz", - "integrity": "sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==", - "dev": true, - "license": "BSD-2-Clause", - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" + "markdown-it": "bin/markdown-it.mjs" } }, "node_modules/math-intrinsics": { @@ -1495,12 +2971,36 @@ } }, "node_modules/mdurl": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", - "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==", "dev": true, "license": "MIT" }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dev": true, + "license": "MIT", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, "node_modules/mime": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", @@ -1575,6 +3075,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/minipass": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz", + "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, "node_modules/mkdirp-classic": { "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", @@ -1583,6 +3093,13 @@ "license": "MIT", "optional": true }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, "node_modules/mute-stream": { "version": "0.0.8", "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.8.tgz", @@ -1599,9 +3116,9 @@ "optional": true }, "node_modules/node-abi": { - "version": "3.89.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz", - "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==", + "version": "3.92.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.92.0.tgz", + "integrity": "sha512-KdHvFWZjEKDf0cakgFjebl371GPsISX2oZHcuyKqM7DtogIsHrqKeLTo8wBHxaXRAQlY2PsPlZmfo+9ZCxEREQ==", "dev": true, "license": "MIT", "optional": true, @@ -1620,6 +3137,55 @@ "license": "MIT", "optional": true }, + "node_modules/node-sarif-builder": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/node-sarif-builder/-/node-sarif-builder-3.4.0.tgz", + "integrity": "sha512-tGnJW6OKRii9u/b2WiUViTJS+h7Apxx17qsMUjsUeNDiMMX5ZFf8F8Fcz7PAQ6omvOxHZtvDTmOYKJQwmfpjeg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/sarif": "^2.1.7", + "fs-extra": "^11.1.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/normalize-package-data": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-6.0.2.tgz", + "integrity": "sha512-V6gygoYb/5EmNI+MEGrWkC+e6+Rr7mTmfHrxDbLzxQogBkgzo76rkok0Am6thgSF7Mv2nLOajAJj5vDJZEFn7g==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "hosted-git-info": "^7.0.0", + "semver": "^7.3.5", + "validate-npm-package-license": "^3.0.4" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/normalize-package-data/node_modules/hosted-git-info": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-7.0.2.tgz", + "integrity": "sha512-puUZAUKT5m8Zzvs72XWy3HtvVbTWljRE66cP60bxJzAqf2DgICo7lYTY2IHUmLnNpjYvw5bvmoHvPc0QO2a62w==", + "dev": true, + "license": "ISC", + "dependencies": { + "lru-cache": "^10.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/normalize-package-data/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true, + "license": "ISC" + }, "node_modules/nth-check": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", @@ -1652,10 +3218,68 @@ "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", "dev": true, "license": "ISC", + "optional": true, "dependencies": { "wrappy": "1" } }, + "node_modules/open": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/open/-/open-10.2.0.tgz", + "integrity": "sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA==", + "dev": true, + "license": "MIT", + "dependencies": { + "default-browser": "^5.2.1", + "define-lazy-prop": "^3.0.0", + "is-inside-container": "^1.0.0", + "wsl-utils": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-map": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.4.tgz", + "integrity": "sha512-tkAQEw8ysMzmkhgw8k+1U/iPhWNhykKnSk4Rd5zLoPJCuJaGRPo6YposrZgaxHKzDHdDWWZvE/Sk7hsL2X/CpQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "dev": true, + "license": "BlueOak-1.0.0" + }, + "node_modules/parse-json": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-8.3.0.tgz", + "integrity": "sha512-ybiGyvspI+fAoRQbIPRddCcSTV9/LsJbf0e/S85VLowVGzRmokfneg2kwVW/KU5rOXrPSbF1qAKPMgNTqqROQQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.26.2", + "index-to-position": "^1.1.0", + "type-fest": "^4.39.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/parse-semver": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/parse-semver/-/parse-semver-1.1.1.tgz", @@ -1729,14 +3353,54 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", "dev": true, "license": "MIT", "engines": { - "node": ">=0.10.0" + "node": ">=8" + } + }, + "node_modules/path-scurry": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz", + "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/path-scurry/node_modules/lru-cache": { + "version": "11.5.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.1.tgz", + "integrity": "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/path-type": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-6.0.0.tgz", + "integrity": "sha512-Vj7sf++t5pBD637NSfkxpHSMfWaeig5+DKWLhcqIYx6mWQz5hdJTGDVMQiJcw1ZYkhs7AazKDGpRVji1LJCZUQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/pend": { @@ -1746,6 +3410,36 @@ "dev": true, "license": "MIT" }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pluralize": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-8.0.0.tgz", + "integrity": "sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/prebuild-install": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", @@ -1787,10 +3481,20 @@ "once": "^1.3.1" } }, + "node_modules/punycode.js": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode.js/-/punycode.js-2.3.1.tgz", + "integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/qs": { - "version": "6.15.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.0.tgz", - "integrity": "sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ==", + "version": "6.15.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.2.tgz", + "integrity": "sha512-Rzq0KEyX/w/tEybncDgdkZrJgVUsUMk3xjh3t5bv3S1HTAtg+uOYt72+ZfwiQwKdysThkTBdL/rTi6HDmX9Ddw==", "dev": true, "license": "BSD-3-Clause", "dependencies": { @@ -1803,6 +3507,27 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/rc": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", @@ -1820,6 +3545,19 @@ "rc": "cli.js" } }, + "node_modules/rc-config-loader": { + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/rc-config-loader/-/rc-config-loader-4.1.4.tgz", + "integrity": "sha512-3GiwEzklkbXTDp52UR5nT8iXgYAx1V9ZG/kDZT7p60u2GCv2XTwQq4NzinMoMpNtXhmt3WkhYXcj6HH8HdwCEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "js-yaml": "^4.1.1", + "json5": "^2.2.3", + "require-from-string": "^2.0.2" + } + }, "node_modules/read": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/read/-/read-1.0.7.tgz", @@ -1833,20 +3571,111 @@ "node": ">=0.8" } }, + "node_modules/read-pkg": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-9.0.1.tgz", + "integrity": "sha512-9viLL4/n1BJUCT1NXVTdS1jtm80yDEgR5T4yCelII49Mbj0v1rZdKqj7zCiYdbB0CuCgdrvHcNogAKTFPBocFA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/normalize-package-data": "^2.4.3", + "normalize-package-data": "^6.0.0", + "parse-json": "^8.0.0", + "type-fest": "^4.6.0", + "unicorn-magic": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/read-pkg/node_modules/unicorn-magic": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.1.0.tgz", + "integrity": "sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/readable-stream": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", "dev": true, "license": "MIT", - "optional": true, + "optional": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/reusify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", + "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", + "dev": true, + "license": "MIT", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/run-applescript": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.1.0.tgz", + "integrity": "sha512-DPe5pVFaAsinSaV6QjQ6gdiedWDcRCbUuiQfQa2wmWV7+xC9bGulGI8+TdRmoFkAPaBXk8CrAbnlY2ISniJ47Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" + "queue-microtask": "^1.2.2" } }, "node_modules/safe-buffer": { @@ -1868,8 +3697,7 @@ "url": "https://feross.org/support" } ], - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/safer-buffer": { "version": "2.1.2", @@ -1888,10 +3716,32 @@ "node": ">=11.0.0" } }, + "node_modules/secretlint": { + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/secretlint/-/secretlint-10.2.2.tgz", + "integrity": "sha512-xVpkeHV/aoWe4vP4TansF622nBEImzCY73y/0042DuJ29iKIaqgoJ8fGxre3rVSHHbxar4FdJobmTnLp9AU0eg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@secretlint/config-creator": "^10.2.2", + "@secretlint/formatter": "^10.2.2", + "@secretlint/node": "^10.2.2", + "@secretlint/profiler": "^10.2.2", + "debug": "^4.4.1", + "globby": "^14.1.0", + "read-pkg": "^9.0.1" + }, + "bin": { + "secretlint": "bin/secretlint.js" + }, + "engines": { + "node": ">=20.0.0" + } + }, "node_modules/semver": { - "version": "7.7.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", - "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "version": "7.8.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.4.tgz", + "integrity": "sha512-rUCObTnP32Q08R2uuIrt7r9PlEonuTmtuXYcW6s5kjdlj3xbnwe+21yXptAUYcMAABLkYYTtnmzb3w3EDZfueA==", "dev": true, "license": "ISC", "bin": { @@ -1901,16 +3751,39 @@ "node": ">=10" } }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/side-channel": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", - "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.1.tgz", + "integrity": "sha512-6x6dK6zJdpTzF4sQeNYxwtvBzf6Eg4GtlesS94HOvTudUeyK2WXAaIfmDgsyslYrRBeFIlsi54AYsFGUuhmvrQ==", "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", - "object-inspect": "^1.13.3", - "side-channel-list": "^1.0.0", + "object-inspect": "^1.13.4", + "side-channel-list": "^1.0.1", "side-channel-map": "^1.0.1", "side-channel-weakmap": "^1.0.2" }, @@ -1922,14 +3795,14 @@ } }, "node_modules/side-channel-list": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", - "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz", + "integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==", "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", - "object-inspect": "^1.13.3" + "object-inspect": "^1.13.4" }, "engines": { "node": ">= 0.4" @@ -1977,6 +3850,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/simple-concat": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", @@ -2026,6 +3912,73 @@ "simple-concat": "^1.0.0" } }, + "node_modules/slash": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-5.1.0.tgz", + "integrity": "sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/slice-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", + "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "astral-regex": "^2.0.0", + "is-fullwidth-code-point": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/slice-ansi?sponsor=1" + } + }, + "node_modules/spdx-correct": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.2.0.tgz", + "integrity": "sha512-kN9dJbvnySHULIluDHy32WHRUu3Og7B9sbY7tsFLctQkIqnMh3hErYgdMjTYuqmcXX+lK5T1lnUt3G7zNswmZA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "spdx-expression-parse": "^3.0.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/spdx-exceptions": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.5.0.tgz", + "integrity": "sha512-PiU42r+xO4UbUS1buo3LPJkjlO7430Xn5SVAhdpzzsPHsjbYVflnnFdATgabnLude+Cqu25p6N+g2lw/PFsa4w==", + "dev": true, + "license": "CC-BY-3.0" + }, + "node_modules/spdx-expression-parse": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", + "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/spdx-license-ids": { + "version": "3.0.23", + "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.23.tgz", + "integrity": "sha512-CWLcCCH7VLu13TgOH+r8p1O/Znwhqv/dbb6lqWy67G+pT1kHmeD/+V36AVb/vq8QMIQwVShJ6Ssl5FPh0fuSdw==", + "dev": true, + "license": "CC0-1.0" + }, "node_modules/string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", @@ -2037,6 +3990,60 @@ "safe-buffer": "~5.2.0" } }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.2.0.tgz", + "integrity": "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.2.2" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, "node_modules/strip-json-comments": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", @@ -2048,17 +4055,84 @@ "node": ">=0.10.0" } }, + "node_modules/structured-source": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/structured-source/-/structured-source-4.0.0.tgz", + "integrity": "sha512-qGzRFNJDjFieQkl/sVOI2dUjHKRyL9dAJi2gCPGJLbJHBIkyOHxjuocpIEfbLioX+qSJpvbYdT49/YCdMznKxA==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "boundary": "^2.0.0" + } + }, "node_modules/supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", "dev": true, "license": "MIT", "dependencies": { - "has-flag": "^3.0.0" + "has-flag": "^4.0.0" }, "engines": { - "node": ">=4" + "node": ">=8" + } + }, + "node_modules/supports-hyperlinks": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/supports-hyperlinks/-/supports-hyperlinks-3.2.0.tgz", + "integrity": "sha512-zFObLMyZeEwzAoKCyu1B91U79K2t7ApXuQfo8OuxwXLDgcKxuwM+YvcbIhm6QWqz7mHUH1TVytR1PwVVjEuMig==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0", + "supports-color": "^7.0.0" + }, + "engines": { + "node": ">=14.18" + }, + "funding": { + "url": "https://github.com/chalk/supports-hyperlinks?sponsor=1" + } + }, + "node_modules/table": { + "version": "6.9.0", + "resolved": "https://registry.npmjs.org/table/-/table-6.9.0.tgz", + "integrity": "sha512-9kY+CygyYM6j02t5YFHbNz2FN5QmYGv9zAjVp4lCDjlCw7amdckXlEt/bjMhUIfj4ThGRE4gCUH5+yGnNuPo5A==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "ajv": "^8.0.1", + "lodash.truncate": "^4.4.2", + "slice-ansi": "^4.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/table/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/table/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" } }, "node_modules/tar-fs": { @@ -2093,16 +4167,76 @@ "node": ">=6" } }, + "node_modules/terminal-link": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/terminal-link/-/terminal-link-4.0.0.tgz", + "integrity": "sha512-lk+vH+MccxNqgVqSnkMVKx4VLJfnLjDBGzH16JVZjKE2DoxP57s6/vt6JmXV5I3jBcfGrxNrYtC+mPtU7WJztA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-escapes": "^7.0.0", + "supports-hyperlinks": "^3.2.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", + "dev": true, + "license": "MIT" + }, + "node_modules/textextensions": { + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz", + "integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==", + "dev": true, + "license": "Artistic-2.0", + "dependencies": { + "editions": "^6.21.0" + }, + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, "node_modules/tmp": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz", - "integrity": "sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==", + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.7.tgz", + "integrity": "sha512-e0votIpp4Uo2AJYSzVHV6xCcawuiez3DzqDAbrTc3YxBkplN6e+dM13ZeIcZnDg/QpSuU2zfZ3rzwY8ukEnaXw==", "dev": true, "license": "MIT", "engines": { "node": ">=14.14" } }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "license": "0BSD" + }, "node_modules/tunnel": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", @@ -2127,6 +4261,19 @@ "node": "*" } }, + "node_modules/type-fest": { + "version": "4.41.0", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", + "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==", + "dev": true, + "license": "(MIT OR CC0-1.0)", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/typed-rest-client": { "version": "1.8.11", "resolved": "https://registry.npmjs.org/typed-rest-client/-/typed-rest-client-1.8.11.tgz", @@ -2140,9 +4287,9 @@ } }, "node_modules/typescript": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.2.tgz", - "integrity": "sha512-bGdAIrZ0wiGDo5l8c++HWtbaNCWTS4UTv7RaTH/ThVIgjkveJt83m74bBHMJkuCbslY8ixgLBVZJIOiQlQTjfQ==", + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz", + "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==", "dev": true, "license": "Apache-2.0", "bin": { @@ -2154,9 +4301,9 @@ } }, "node_modules/uc.micro": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", - "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", + "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==", "dev": true, "license": "MIT" }, @@ -2168,9 +4315,9 @@ "license": "MIT" }, "node_modules/undici": { - "version": "7.24.7", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.7.tgz", - "integrity": "sha512-H/nlJ/h0ggGC+uRL3ovD+G0i4bqhvsDOpbDv7At5eFLlj2b41L8QliGbnl2H7SnDiYhENphh1tQFJZf+MyfLsQ==", + "version": "7.28.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.28.0.tgz", + "integrity": "sha512-cRZYrTDwWznlnRiPjggAGxZXanty6M8RV1ff8Wm4LWXBp7/IG8v5DnOm74DtUBp9OONpK75YlPnIjQqX0dBDtA==", "dev": true, "license": "MIT", "engines": { @@ -2178,12 +4325,35 @@ } }, "node_modules/undici-types": { - "version": "7.18.2", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", - "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "version": "7.24.6", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.24.6.tgz", + "integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==", "dev": true, "license": "MIT" }, + "node_modules/unicorn-magic": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.3.0.tgz", + "integrity": "sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/url-join": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz", @@ -2199,6 +4369,30 @@ "license": "MIT", "optional": true }, + "node_modules/validate-npm-package-license": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", + "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0" + } + }, + "node_modules/version-range": { + "version": "4.15.0", + "resolved": "https://registry.npmjs.org/version-range/-/version-range-4.15.0.tgz", + "integrity": "sha512-Ck0EJbAGxHwprkzFO966t4/5QkRuzh+/I1RxhLgUKKwEn+Cd8NwM60mE3AqBZg5gYODoXW0EFsQvbZjRlvdqbg==", + "dev": true, + "license": "Artistic-2.0", + "engines": { + "node": ">=4" + }, + "funding": { + "url": "https://bevry.me/fund" + } + }, "node_modules/whatwg-encoding": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", @@ -2223,12 +4417,45 @@ "node": ">=18" } }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "dev": true, - "license": "ISC" + "license": "ISC", + "optional": true + }, + "node_modules/wsl-utils": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/wsl-utils/-/wsl-utils-0.1.0.tgz", + "integrity": "sha512-h3Fbisa2nKGPxCpm89Hk33lBLsnaGBvctQopaBSOW/uIs6FTe1ATyAnKFJrzVs9vpGdsTe73WF3V4lIsk4Gacw==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-wsl": "^3.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } }, "node_modules/xml2js": { "version": "0.5.0", @@ -2262,14 +4489,16 @@ "license": "ISC" }, "node_modules/yauzl": { - "version": "2.10.0", - "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", - "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-3.4.0.tgz", + "integrity": "sha512-jIH9yLR9wqr0wOS0TpBvo/g/2UgZH5qePVbjgRliiF0BYvOZyaBknKsF+x9Iht0O6sqgnB93rCICdOZFecJuDw==", "dev": true, "license": "MIT", "dependencies": { - "buffer-crc32": "~0.2.3", - "fd-slicer": "~1.1.0" + "pend": "~1.2.0" + }, + "engines": { + "node": ">=12" } }, "node_modules/yazl": { diff --git a/extensions/vscode-codeclone/package.json b/extensions/vscode-codeclone/package.json index fdce31b9..90bd90f9 100644 --- a/extensions/vscode-codeclone/package.json +++ b/extensions/vscode-codeclone/package.json @@ -1,8 +1,8 @@ { "name": "codeclone", "displayName": "CodeClone", - "description": "Baseline-aware, triage-first structural review for Python, powered by CodeClone MCP.", - "version": "0.2.7", + "description": "Structural Change Controller for AI-assisted Python development — baseline-aware, triage-first structural review in VS Code, powered by codeclone-mcp.", + "version": "0.3.0", "publisher": "orenlab", "license": "MPL-2.0", "repository": { @@ -42,13 +42,13 @@ } }, "engines": { - "vscode": "^1.100.0" + "vscode": "^1.120.0" }, "scripts": { "build": "node esbuild.config.mjs", "watch": "node esbuild.config.mjs --watch", "typecheck": "tsc -p jsconfig.json --noEmit", - "check": "npm run build && npm run typecheck && node --check src/constants.js && node --check src/formatters.js && node --check src/runtime.js && node --check src/renderers.js && node --check src/providers.js && node --check src/support.js && node --check src/runArtifacts.js && node --check src/mcpClient.js && node --check src/extension.js", + "check": "npm run build && npm run typecheck && node --check src/constants.js && node --check src/formatters.js && node --check src/runtime.js && node --check src/renderers.js && node --check src/providers.js && node --check src/support.js && node --check src/runArtifacts.js && node --check src/memoryGovernance.js && node --check src/memoryBulkSelection.js && node --check src/memoryController.js && node --check src/memorySearch.js && node --check src/memorySearchRenderer.js && node --check src/workspaceInsightsRenderer.js && node --check src/trajectoryViewerRenderer.js && node --check src/mcpClient.js && node --check src/extension.js", "test": "node --test test/*.test.js", "test:host": "npm run build && node test/runExtensionHost.js", "vscode:prepublish": "npm run build" @@ -62,6 +62,7 @@ "onView:codeclone.overview", "onView:codeclone.hotspots", "onView:codeclone.session", + "onView:codeclone.memory", "onCommand:codeclone.connectMcp", "onCommand:codeclone.analyzeWorkspace", "onCommand:codeclone.analyzeChangedFiles", @@ -83,7 +84,32 @@ "onCommand:codeclone.copyCoverageJoinBrief", "onCommand:codeclone.openSecuritySurface", "onCommand:codeclone.copySecuritySurfaceBrief", - "onCommand:codeclone.manageWorkspaceTrust" + "onCommand:codeclone.manageWorkspaceTrust", + "onCommand:codeclone.refreshMemory", + "onCommand:codeclone.syncMemoryFromRun", + "onCommand:codeclone.approveMemoryRecord", + "onCommand:codeclone.rejectMemoryRecord", + "onCommand:codeclone.approveCheckedMemoryDrafts", + "onCommand:codeclone.rejectCheckedMemoryDrafts", + "onCommand:codeclone.selectAllMemoryDrafts", + "onCommand:codeclone.selectMemoryDraftsByType", + "onCommand:codeclone.clearMemoryDraftSelection", + "onCommand:codeclone.selectAllMemoryStale", + "onCommand:codeclone.selectMemoryStaleByType", + "onCommand:codeclone.openMemoryRecord", + "onCommand:codeclone.openMemoryRecordById", + "onCommand:codeclone.searchEngineeringMemory", + "onCommand:codeclone.memoryForActiveFile", + "onCommand:codeclone.openMemorySearchPanel", + "onCommand:codeclone.refreshMemorySearch", + "onCommand:codeclone.configureMemorySearch", + "onCommand:codeclone.showWorkspaceSessionStats", + "onCommand:codeclone.showControllerAuditTrail", + "onCommand:codeclone.showTrajectoryDashboard", + "onCommand:codeclone.showTrajectoryDetail", + "onCommand:codeclone.copyTrajectoryDashboardBrief", + "onCommand:codeclone.copyWorkspaceSessionStatsBrief", + "onCommand:codeclone.copyControllerAuditTrailBrief" ], "main": "./dist/extension.js", "contributes": { @@ -115,6 +141,12 @@ "name": "Runs & Session", "type": "tree", "icon": "media/view-session.svg" + }, + { + "id": "codeclone.memory", + "name": "Memory", + "type": "tree", + "icon": "media/view-memory.svg" } ] }, @@ -158,6 +190,21 @@ "view": "codeclone.session", "when": "isWorkspaceTrusted && codeclone.connected && !codeclone.hasRun", "contents": "The local CodeClone server is ready. Run [Analyze Workspace](command:codeclone.analyzeWorkspace) or [Review Changes](command:codeclone.analyzeChangedFiles) to create the first run." + }, + { + "view": "codeclone.memory", + "when": "!isWorkspaceTrusted", + "contents": "Engineering Memory requires a trusted workspace.\n\nOpen [Manage Workspace Trust](command:codeclone.manageWorkspaceTrust) to review and approve draft memory records." + }, + { + "view": "codeclone.memory", + "when": "isWorkspaceTrusted && !codeclone.connected", + "contents": "Connect CodeClone to load Engineering Memory.\n\nRun [Analyze Workspace](command:codeclone.analyzeWorkspace) or [Verify Local Server](command:codeclone.connectMcp), then [Sync from latest run](command:codeclone.syncMemoryFromRun) to ingest system facts." + }, + { + "view": "codeclone.memory", + "when": "isWorkspaceTrusted && codeclone.connected", + "contents": "Check drafts in **Inbox**, then use the toolbar or context menu to approve or reject in bulk with one confirmation.\n\n[Search memory](command:codeclone.searchEngineeringMemory) · [Memory for active file](command:codeclone.memoryForActiveFile) · [Search panel](command:codeclone.openMemorySearchPanel)\n\n[Refresh memory](command:codeclone.refreshMemory) · [Sync from latest run](command:codeclone.syncMemoryFromRun)" } ], "commands": [ @@ -359,6 +406,173 @@ "title": "Open Overview", "category": "CodeClone" }, + { + "command": "codeclone.showBlastRadius", + "title": "Show Blast Radius", + "category": "CodeClone", + "icon": "$(target)" + }, + { + "command": "codeclone.copyBlastRadiusBrief", + "title": "Copy Blast Radius Brief", + "category": "CodeClone", + "icon": "$(copy)" + }, + { + "command": "codeclone.showWorkspaceSessionStats", + "title": "Show Session Stats", + "category": "CodeClone", + "icon": "$(debug-console)" + }, + { + "command": "codeclone.showControllerAuditTrail", + "title": "Show Controller Audit Trail", + "category": "CodeClone", + "icon": "$(history)" + }, + { + "command": "codeclone.copyWorkspaceSessionStatsBrief", + "title": "Copy Session Stats Brief", + "category": "CodeClone", + "icon": "$(copy)" + }, + { + "command": "codeclone.copyControllerAuditTrailBrief", + "title": "Copy Controller Audit Brief", + "category": "CodeClone", + "icon": "$(copy)" + }, + { + "command": "codeclone.showTrajectoryDashboard", + "title": "Show Trajectory Dashboard", + "category": "CodeClone", + "icon": "$(history)" + }, + { + "command": "codeclone.showTrajectoryDetail", + "title": "Show Trajectory Detail", + "category": "CodeClone", + "icon": "$(list-tree)" + }, + { + "command": "codeclone.copyTrajectoryDashboardBrief", + "title": "Copy Trajectory Dashboard Brief", + "category": "CodeClone", + "icon": "$(copy)" + }, + { + "command": "codeclone.refreshMemory", + "title": "Refresh Memory", + "category": "CodeClone", + "icon": "$(refresh)" + }, + { + "command": "codeclone.syncMemoryFromRun", + "title": "Sync Memory From Run", + "category": "CodeClone", + "icon": "$(cloud-download)" + }, + { + "command": "codeclone.approveMemoryRecord", + "title": "Approve Memory Record", + "category": "CodeClone", + "icon": "$(check)" + }, + { + "command": "codeclone.rejectMemoryRecord", + "title": "Reject Memory Record", + "category": "CodeClone", + "icon": "$(close)" + }, + { + "command": "codeclone.approveCheckedMemoryDrafts", + "title": "Approve Checked Memory Drafts", + "category": "CodeClone", + "icon": "$(check-all)" + }, + { + "command": "codeclone.rejectCheckedMemoryDrafts", + "title": "Reject Checked Memory Drafts", + "category": "CodeClone", + "icon": "$(close-all)" + }, + { + "command": "codeclone.selectAllMemoryDrafts", + "title": "Select All Memory Drafts", + "category": "CodeClone", + "icon": "$(checklist)" + }, + { + "command": "codeclone.selectMemoryDraftsByType", + "title": "Select Memory Drafts by Type", + "category": "CodeClone", + "icon": "$(filter)" + }, + { + "command": "codeclone.clearMemoryDraftSelection", + "title": "Clear Memory Draft Selection", + "category": "CodeClone", + "icon": "$(clear-all)" + }, + { + "command": "codeclone.selectAllMemoryStale", + "title": "Select All Stale Memory Records", + "category": "CodeClone", + "icon": "$(checklist)" + }, + { + "command": "codeclone.selectMemoryStaleByType", + "title": "Select Stale Memory Records by Type", + "category": "CodeClone", + "icon": "$(filter)" + }, + { + "command": "codeclone.openMemoryRecord", + "title": "Open Memory Record", + "category": "CodeClone", + "icon": "$(open-preview)" + }, + { + "command": "codeclone.openMemoryView", + "title": "Open Memory View", + "category": "CodeClone", + "icon": "$(inbox)" + }, + { + "command": "codeclone.searchEngineeringMemory", + "title": "Search Engineering Memory", + "category": "CodeClone", + "icon": "$(search)" + }, + { + "command": "codeclone.memoryForActiveFile", + "title": "Memory for Active File", + "category": "CodeClone", + "icon": "$(file)" + }, + { + "command": "codeclone.openMemorySearchPanel", + "title": "Open Memory Search Panel", + "category": "CodeClone", + "icon": "$(open-preview)" + }, + { + "command": "codeclone.refreshMemorySearch", + "title": "Refresh Memory Search", + "category": "CodeClone", + "icon": "$(refresh)" + }, + { + "command": "codeclone.configureMemorySearch", + "title": "Configure Memory Search", + "category": "CodeClone", + "icon": "$(settings-gear)" + }, + { + "command": "codeclone.openMemoryRecordById", + "title": "Open Memory Record By Id", + "category": "CodeClone" + }, { "command": "codeclone.clearSessionState", "title": "Clear Session", @@ -444,9 +658,17 @@ "command": "codeclone.copySecuritySurfaceBrief", "when": "false" }, + { + "command": "codeclone.openMemoryRecordById", + "when": "false" + }, { "command": "codeclone.reviewSecuritySurface", "when": "false" + }, + { + "command": "codeclone.copyBlastRadiusBrief", + "when": "false" } ], "view/title": [ @@ -550,6 +772,126 @@ "when": "view == codeclone.session && isWorkspaceTrusted", "group": "secondary@1" }, + { + "command": "codeclone.showWorkspaceSessionStats", + "when": "view == codeclone.session && isWorkspaceTrusted && codeclone.connected", + "group": "navigation@3" + }, + { + "command": "codeclone.showControllerAuditTrail", + "when": "view == codeclone.session && isWorkspaceTrusted && codeclone.connected", + "group": "navigation@4" + }, + { + "command": "codeclone.copyWorkspaceSessionStatsBrief", + "when": "view == codeclone.session && isWorkspaceTrusted && codeclone.connected", + "group": "secondary@3" + }, + { + "command": "codeclone.copyControllerAuditTrailBrief", + "when": "view == codeclone.session && isWorkspaceTrusted && codeclone.connected", + "group": "secondary@4" + }, + { + "command": "codeclone.searchEngineeringMemory", + "when": "view == codeclone.memory && isWorkspaceTrusted", + "group": "navigation@2" + }, + { + "command": "codeclone.memoryForActiveFile", + "when": "view == codeclone.memory && isWorkspaceTrusted", + "group": "navigation@3" + }, + { + "command": "codeclone.openMemorySearchPanel", + "when": "view == codeclone.memory && isWorkspaceTrusted", + "group": "navigation@4" + }, + { + "command": "codeclone.refreshMemory", + "when": "view == codeclone.memory && isWorkspaceTrusted", + "group": "navigation@1" + }, + { + "command": "codeclone.showTrajectoryDashboard", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.connected", + "group": "navigation@5" + }, + { + "command": "codeclone.showTrajectoryDetail", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.connected", + "group": "navigation@6" + }, + { + "command": "codeclone.copyTrajectoryDashboardBrief", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.connected", + "group": "secondary@1" + }, + { + "command": "codeclone.syncMemoryFromRun", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.connected", + "group": "navigation@5" + }, + { + "command": "codeclone.approveCheckedMemoryDrafts", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasCheckedDrafts", + "group": "navigation@6" + }, + { + "command": "codeclone.rejectCheckedMemoryDrafts", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasCheckedDrafts", + "group": "navigation@7" + }, + { + "command": "codeclone.selectAllMemoryDrafts", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasDrafts", + "group": "secondary@2" + }, + { + "command": "codeclone.selectMemoryDraftsByType", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasDrafts", + "group": "secondary@3" + }, + { + "command": "codeclone.clearMemoryDraftSelection", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasCheckedDrafts", + "group": "secondary@4" + }, + { + "command": "codeclone.selectAllMemoryStale", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasStale", + "group": "secondary@5" + }, + { + "command": "codeclone.selectMemoryStaleByType", + "when": "view == codeclone.memory && isWorkspaceTrusted && codeclone.memoryHasStale", + "group": "secondary@6" + }, + { + "command": "codeclone.manageWorkspaceTrust", + "when": "view == codeclone.memory && !isWorkspaceTrusted", + "group": "navigation@1" + }, + { + "command": "codeclone.connectMcp", + "when": "view == codeclone.memory && isWorkspaceTrusted && !codeclone.connected", + "group": "secondary@1" + }, + { + "command": "codeclone.approveMemoryRecord", + "when": "view == codeclone.memory && (viewItem == codeclone.memoryDraft || viewItem == codeclone.memoryStale)", + "group": "inline@1" + }, + { + "command": "codeclone.rejectMemoryRecord", + "when": "view == codeclone.memory && viewItem == codeclone.memoryDraft", + "group": "inline@2" + }, + { + "command": "codeclone.openMemoryRecord", + "when": "view == codeclone.memory && (viewItem == codeclone.memoryDraft || viewItem == codeclone.memoryStale)", + "group": "1_memory@1" + }, { "command": "codeclone.openSetupHelp", "when": "view == codeclone.session && (!isWorkspaceTrusted || !codeclone.connected)", @@ -641,6 +983,66 @@ "command": "codeclone.copySecuritySurfaceBrief", "when": "viewItem == codeclone.securitySurface", "group": "navigation@1" + }, + { + "command": "codeclone.approveMemoryRecord", + "when": "view == codeclone.memory && (viewItem == codeclone.memoryDraft || viewItem == codeclone.memoryStale)", + "group": "1_memory@1" + }, + { + "command": "codeclone.rejectMemoryRecord", + "when": "view == codeclone.memory && viewItem == codeclone.memoryDraft", + "group": "1_memory@2" + }, + { + "command": "codeclone.openMemoryRecord", + "when": "view == codeclone.memory && (viewItem == codeclone.memoryDraft || viewItem == codeclone.memoryStale)", + "group": "1_memory@3" + }, + { + "command": "codeclone.selectAllMemoryDrafts", + "when": "view == codeclone.memory && viewItem == codeclone.memoryInbox", + "group": "1_memory@1" + }, + { + "command": "codeclone.selectMemoryDraftsByType", + "when": "view == codeclone.memory && viewItem == codeclone.memoryInbox", + "group": "1_memory@2" + }, + { + "command": "codeclone.approveCheckedMemoryDrafts", + "when": "view == codeclone.memory && viewItem == codeclone.memoryInbox && codeclone.memoryHasCheckedDrafts", + "group": "1_memory@3" + }, + { + "command": "codeclone.rejectCheckedMemoryDrafts", + "when": "view == codeclone.memory && viewItem == codeclone.memoryInbox && codeclone.memoryHasCheckedDrafts", + "group": "1_memory@4" + }, + { + "command": "codeclone.clearMemoryDraftSelection", + "when": "view == codeclone.memory && viewItem == codeclone.memoryInbox && codeclone.memoryHasCheckedDrafts", + "group": "1_memory@5" + }, + { + "command": "codeclone.selectAllMemoryStale", + "when": "view == codeclone.memory && viewItem == codeclone.memoryStaleSection", + "group": "1_memory@1" + }, + { + "command": "codeclone.selectMemoryStaleByType", + "when": "view == codeclone.memory && viewItem == codeclone.memoryStaleSection", + "group": "1_memory@2" + }, + { + "command": "codeclone.approveCheckedMemoryDrafts", + "when": "view == codeclone.memory && viewItem == codeclone.memoryStaleSection && codeclone.memoryHasCheckedDrafts", + "group": "1_memory@3" + }, + { + "command": "codeclone.clearMemoryDraftSelection", + "when": "view == codeclone.memory && viewItem == codeclone.memoryStaleSection && codeclone.memoryHasCheckedDrafts", + "group": "1_memory@4" } ], "editor/title": [ @@ -683,6 +1085,21 @@ "command": "codeclone.copySecuritySurfaceBrief", "when": "editorTextFocus && codeclone.activeReviewTargetVisibleInEditor && codeclone.activeReviewTargetIsSecuritySurface", "group": "secondary@4" + }, + { + "command": "codeclone.showBlastRadius", + "when": "editorTextFocus && codeclone.hasRun && isWorkspaceTrusted", + "group": "secondary@6" + }, + { + "command": "codeclone.copyBlastRadiusBrief", + "when": "editorTextFocus && codeclone.hasRun && isWorkspaceTrusted", + "group": "secondary@7" + }, + { + "command": "codeclone.memoryForActiveFile", + "when": "editorTextFocus && isWorkspaceTrusted && codeclone.connected", + "group": "secondary@8" } ] }, @@ -827,15 +1244,51 @@ "scope": "window", "default": true, "description": "Show a single workspace-level CodeClone status bar item." + }, + "codeclone.memory.searchSemantic": { + "type": "boolean", + "scope": "resource", + "default": true, + "description": "Blend semantic recall into Engineering Memory keyword search (FTS fallback when the index is unavailable)." + }, + "codeclone.memory.searchIncludeDrafts": { + "type": "boolean", + "scope": "resource", + "default": false, + "description": "Include draft records in memory search and the search panel." + }, + "codeclone.memory.searchIncludeStale": { + "type": "boolean", + "scope": "resource", + "default": false, + "description": "Include stale records in memory search and the search panel." + }, + "codeclone.memory.searchMaxResults": { + "type": "integer", + "scope": "resource", + "default": 20, + "minimum": 5, + "maximum": 50, + "description": "Maximum Engineering Memory records returned per search." + }, + "codeclone.memory.searchDetailLevel": { + "type": "string", + "enum": [ + "compact", + "full" + ], + "scope": "resource", + "default": "compact", + "description": "Statement detail level for search/list modes (get always returns full records)." } } } }, "devDependencies": { - "@types/node": "^25.5.2", - "@types/vscode": "1.100.0", - "@vscode/vsce": "2.25.0", + "@types/node": "^25.9.1", + "@types/vscode": "1.120.0", + "@vscode/vsce": "3.9.1", "esbuild": "^0.28.0", - "typescript": "^6.0.2" + "typescript": "^6.0.3" } } diff --git a/extensions/vscode-codeclone/src/constants.js b/extensions/vscode-codeclone/src/constants.js index 74f71b36..12c8fede 100644 --- a/extensions/vscode-codeclone/src/constants.js +++ b/extensions/vscode-codeclone/src/constants.js @@ -113,6 +113,9 @@ const REVIEW_DECORATION_THEMES = { const WORKSPACE_STATE_HOTSPOT_FOCUS_MODE = "codeclone.hotspotFocusMode"; const WORKSPACE_STATE_LAST_HELP_TOPIC = "codeclone.lastHelpTopic"; +/** Minimum interval between live get_production_triage calls for Open Triage. */ +const TRIAGE_LIVE_REFRESH_COOLDOWN_MS = 5000; + module.exports = { HELP_TOPICS, KNOWN_HELP_TOPICS, @@ -124,4 +127,5 @@ module.exports = { REVIEW_DECORATION_THEMES, WORKSPACE_STATE_HOTSPOT_FOCUS_MODE, WORKSPACE_STATE_LAST_HELP_TOPIC, + TRIAGE_LIVE_REFRESH_COOLDOWN_MS, }; diff --git a/extensions/vscode-codeclone/src/extension.js b/extensions/vscode-codeclone/src/extension.js index 74a2e93c..c611e6e0 100644 --- a/extensions/vscode-codeclone/src/extension.js +++ b/extensions/vscode-codeclone/src/extension.js @@ -1,5 +1,6 @@ "use strict"; +const crypto = require("node:crypto"); const fs = require("node:fs/promises"); const path = require("node:path"); /** @type {any} */ @@ -16,6 +17,7 @@ const { REVIEW_DECORATION_THEMES, WORKSPACE_STATE_HOTSPOT_FOCUS_MODE, WORKSPACE_STATE_LAST_HELP_TOPIC, + TRIAGE_LIVE_REFRESH_COOLDOWN_MS, } = require("./constants"); const { capitalize, @@ -67,6 +69,8 @@ const { const {CodeCloneMcpClient, MCPClientError} = require("./mcpClient"); const { markdownBulletList, + renderBlastRadiusMarkdown, + renderBlastRadiusSvgHtml, renderCoverageJoinMarkdown, renderFindingMarkdown, renderOverloadedModuleMarkdown, @@ -77,9 +81,40 @@ const { renderSetupMarkdown, renderTriageMarkdown, } = require("./renderers"); -const {loadRunArtifacts} = require("./runArtifacts"); +const { + renderAuditTrailHtml, + renderAuditTrailMarkdown, + renderSessionStatsHtml, + renderSessionStatsMarkdown, +} = require("./workspaceInsightsRenderer"); +const { + renderTrajectoryDashboardHtml, + renderTrajectoryDetailHtml, + renderTrajectoryDashboardMarkdown, + formatTrajectoryPickDescription, +} = require("./trajectoryViewerRenderer"); +const {fetchProductionTriage, loadRunArtifacts, shouldUseCachedTriage} = require("./runArtifacts"); +const {MemoryController, recordStatement} = require("./memoryController"); +const { + buildBulkConfirmDetail, + dedupeGovernanceNodes, + distinctRecordTypes, + formatBulkResultSummary, + recordIdFromTreeItemId, + resolveGovernanceTargets, +} = require("./memoryBulkSelection"); +const { + MemorySearchController, + activeEditorMemoryPath, + isValidMemoryRecordId, +} = require("./memorySearch"); +const { + ensureIdeGovernanceRegistered, + withIdeGovernanceChannel, +} = require("./memoryGovernance"); const { HotspotsTreeProvider, + MemoryTreeProvider, OverviewTreeProvider, ReviewCodeLensProvider, ReviewFileDecorationProvider, @@ -102,6 +137,7 @@ const { STALE_REASON_EDITOR, STALE_REASON_WORKSPACE, isMinimumSupportedCodeCloneVersion, + isLauncherWithinWorkspace, launchSpecOrigin, resolveAnalysisSettings, sameAnalysisSettings, @@ -125,6 +161,8 @@ class CodeCloneController { log: true, }); this.client = new CodeCloneMcpClient(this.outputChannel); + this.memoryController = new MemoryController(this); + this.memorySearchController = new MemorySearchController(this); this.states = new Map(); this.hotspotFocusMode = this.loadHotspotFocusMode(); const storedHelpTopic = this.context.workspaceState.get( @@ -162,6 +200,7 @@ class CodeCloneController { this.overviewProvider = new OverviewTreeProvider(this); this.hotspotsProvider = new HotspotsTreeProvider(this); this.sessionProvider = new SessionTreeProvider(this); + this.memoryProvider = new MemoryTreeProvider(this); this.reviewCodeLensProvider = new ReviewCodeLensProvider(this); this.reviewFileDecorationProvider = new ReviewFileDecorationProvider(this); this.overviewView = vscode.window.createTreeView("codeclone.overview", { @@ -176,6 +215,30 @@ class CodeCloneController { treeDataProvider: this.sessionProvider, showCollapseAll: false, }); + this.memoryView = vscode.window.createTreeView("codeclone.memory", { + treeDataProvider: this.memoryProvider, + showCollapseAll: true, + canSelectMany: true, + }); + this.memoryView.onDidChangeCheckboxState((event) => { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + for (const [treeItem, state] of event.items) { + const recordId = recordIdFromTreeItemId(treeItem.id); + if (!recordId) { + continue; + } + this.memoryController.setDraftChecked( + folder, + recordId, + state === vscode.TreeItemCheckboxState.Checked + ); + } + this.memoryProvider.refresh(); + this.updateContextKeys(); + }); this.onClientState = (state) => { if (this.disposed) { return; @@ -211,6 +274,8 @@ class CodeCloneController { this.overviewProvider, this.hotspotsProvider, this.sessionProvider, + this.memoryProvider, + this.memoryView, this.reviewCodeLensProvider, this.reviewFileDecorationProvider, this.overviewView, @@ -377,6 +442,94 @@ class CodeCloneController { vscode.commands.registerCommand("codeclone.reviewSecuritySurface", (node) => this.reviewSecuritySurface(node) ), + vscode.commands.registerCommand("codeclone.showBlastRadius", () => + this.showBlastRadius() + ), + vscode.commands.registerCommand("codeclone.copyBlastRadiusBrief", () => + this.copyBlastRadiusBrief() + ), + vscode.commands.registerCommand("codeclone.showWorkspaceSessionStats", () => + this.showWorkspaceSessionStats() + ), + vscode.commands.registerCommand("codeclone.showControllerAuditTrail", () => + this.showControllerAuditTrail() + ), + vscode.commands.registerCommand("codeclone.copyWorkspaceSessionStatsBrief", () => + this.copyWorkspaceSessionStatsBrief() + ), + vscode.commands.registerCommand("codeclone.copyControllerAuditTrailBrief", () => + this.copyControllerAuditTrailBrief() + ), + vscode.commands.registerCommand("codeclone.showTrajectoryDashboard", () => + this.showTrajectoryDashboard() + ), + vscode.commands.registerCommand("codeclone.showTrajectoryDetail", () => + this.showTrajectoryDetail() + ), + vscode.commands.registerCommand("codeclone.copyTrajectoryDashboardBrief", () => + this.copyTrajectoryDashboardBrief() + ), + vscode.commands.registerCommand("codeclone.refreshMemory", () => + this.refreshMemoryView() + ), + vscode.commands.registerCommand("codeclone.syncMemoryFromRun", () => + this.syncMemoryFromRun() + ), + vscode.commands.registerCommand( + "codeclone.approveMemoryRecord", + (node, selectedItems) => + this.governMemoryRecordSelection(node, selectedItems, "approve") + ), + vscode.commands.registerCommand( + "codeclone.rejectMemoryRecord", + (node, selectedItems) => + this.governMemoryRecordSelection(node, selectedItems, "reject") + ), + vscode.commands.registerCommand("codeclone.approveCheckedMemoryDrafts", () => + this.governCheckedMemoryDrafts("approve") + ), + vscode.commands.registerCommand("codeclone.rejectCheckedMemoryDrafts", () => + this.governCheckedMemoryDrafts("reject") + ), + vscode.commands.registerCommand("codeclone.selectAllMemoryDrafts", () => + this.selectAllMemoryDrafts() + ), + vscode.commands.registerCommand("codeclone.selectMemoryDraftsByType", () => + this.selectMemoryDraftsByType() + ), + vscode.commands.registerCommand("codeclone.clearMemoryDraftSelection", () => + this.clearMemoryDraftSelection() + ), + vscode.commands.registerCommand("codeclone.selectAllMemoryStale", () => + this.selectAllMemoryStale() + ), + vscode.commands.registerCommand("codeclone.selectMemoryStaleByType", () => + this.selectMemoryStaleByType() + ), + vscode.commands.registerCommand("codeclone.openMemoryRecord", (node) => + this.openMemoryRecord(node) + ), + vscode.commands.registerCommand("codeclone.openMemoryRecordById", (recordId) => + this.openMemoryRecordById(recordId) + ), + vscode.commands.registerCommand("codeclone.searchEngineeringMemory", () => + this.searchEngineeringMemory() + ), + vscode.commands.registerCommand("codeclone.memoryForActiveFile", () => + this.memoryForActiveFile() + ), + vscode.commands.registerCommand("codeclone.openMemorySearchPanel", () => + this.openMemorySearchPanel() + ), + vscode.commands.registerCommand("codeclone.refreshMemorySearch", () => + this.refreshMemorySearchPanel() + ), + vscode.commands.registerCommand("codeclone.configureMemorySearch", () => + this.configureMemorySearch() + ), + vscode.commands.registerCommand("codeclone.openMemoryView", () => + vscode.commands.executeCommand("codeclone.memory.focus") + ), ]; this.context.subscriptions.push(...subscriptions); } @@ -389,6 +542,15 @@ class CodeCloneController { return this.states.get(key); } + getMemoryWorkspaceFolder() { + const state = this.getPrimaryState(); + if (state?.folder) { + return state.folder; + } + const folders = vscode.workspace.workspaceFolders; + return folders && folders.length > 0 ? folders[0] : undefined; + } + getPrimaryState() { const activeFolder = this.getPreferredFolder(); if (activeFolder) { @@ -640,10 +802,13 @@ class CodeCloneController { const config = vscode.workspace.getConfiguration("codeclone", folder.uri); const configuredCommand = config.get("mcp.command", "auto"); const configuredArgs = config.get("mcp.args", []); + const governanceArgs = withIdeGovernanceChannel( + Array.isArray(configuredArgs) ? configuredArgs : [] + ); if (configuredCommand && configuredCommand !== "auto") { return normalizedLaunchSpec({ command: configuredCommand, - args: Array.isArray(configuredArgs) ? configuredArgs : [], + args: governanceArgs, cwd: folder.uri.fsPath, source: "configured", }); @@ -656,17 +821,20 @@ class CodeCloneController { })) ); const localLauncher = candidateChecks.find((entry) => entry.exists)?.candidate; - if (localLauncher) { + if ( + localLauncher && + isLauncherWithinWorkspace(localLauncher, folder.uri.fsPath) + ) { return normalizedLaunchSpec({ command: localLauncher, - args: Array.isArray(configuredArgs) ? configuredArgs : [], + args: governanceArgs, cwd: folder.uri.fsPath, source: "workspaceLocal", }); } const primary = /** @type {any} */ (normalizedLaunchSpec({ command: "codeclone-mcp", - args: Array.isArray(configuredArgs) ? configuredArgs : [], + args: governanceArgs, cwd: folder.uri.fsPath, source: "path", })); @@ -682,6 +850,11 @@ class CodeCloneController { } async ensureConnected(folder) { + if (!(await this.ensureWorkspaceTrust())) { + throw new MCPClientError( + "CodeClone requires a trusted workspace before starting the local MCP server." + ); + } const launchSpec = await this.resolveLaunchSpec(folder); if (this.client.isConnected() && this.connectionInfo.launchSpec) { const activeLaunchSpec = this.connectionInfo.launchSpec; @@ -735,8 +908,29 @@ class CodeCloneController { ) ); } + try { + const registration = await ensureIdeGovernanceRegistered( + this.client, + this.context, + effectiveLaunchSpec.cwd + ); + if (registration.status !== "ok") { + logChannelMessage( + this.outputChannel, + "warn", + `[codeclone] IDE governance registration returned ${registration.status}.` + ); + } + } catch (error) { + logChannelMessage( + this.outputChannel, + "warn", + `[codeclone] IDE governance registration failed: ${error.message}` + ); + } this.updateContextKeys(); this.updateStatusBar(); + this.memoryProvider.refresh(); return connection; } @@ -1141,6 +1335,8 @@ class CodeCloneController { state.currentRunId = runId; state.latestSummary = artifacts.summary; state.latestTriage = artifacts.triage; + state.lastTriageFetchAt = Date.now(); + state.lastTriageFetchRunId = runId; state.metricsSummary = artifacts.metricsSummary; state.changedSummary = changedMode ? analysisPayload : null; state.analysisSettings = analysisSettings; @@ -1191,13 +1387,73 @@ class CodeCloneController { async openProductionTriage() { const state = this.getPrimaryState(); - if (!state || !state.latestTriage) { + if (!state || !state.currentRunId) { await vscode.window.showInformationMessage( "Start with Analyze Workspace or Review Changes before opening triage." ); return; } - await this.showMarkdownDocument(renderTriageMarkdown(state)); + try { + await this.ensureConnected(state.folder); + const triage = await this.resolveLiveTriage(state); + if (!triage) { + await vscode.window.showWarningMessage( + "Could not load production triage for the current run." + ); + return; + } + await this.showMarkdownDocument(renderTriageMarkdown(state)); + } catch (error) { + this.handleError(error, "Could not open production triage."); + } + } + + async resolveLiveTriage(state) { + const runId = state.currentRunId; + if (!runId) { + return null; + } + if (state.triageFetchPromise) { + return state.triageFetchPromise; + } + const now = Date.now(); + if ( + shouldUseCachedTriage( + { + now, + currentRunId: runId, + lastTriageFetchAt: state.lastTriageFetchAt, + lastTriageFetchRunId: state.lastTriageFetchRunId, + stale: state.stale, + cooldownMs: TRIAGE_LIVE_REFRESH_COOLDOWN_MS, + }, + Boolean(state.latestTriage) + ) + ) { + return state.latestTriage; + } + const fetchPromise = vscode.window + .withProgress( + { + location: vscode.ProgressLocation.Notification, + title: "Refreshing production triage", + cancellable: false, + }, + async () => fetchProductionTriage(this.client, runId) + ) + .then((triage) => { + state.latestTriage = triage; + state.lastTriageFetchAt = Date.now(); + state.lastTriageFetchRunId = runId; + return triage; + }) + .finally(() => { + if (state.triageFetchPromise === fetchPromise) { + state.triageFetchPromise = null; + } + }); + state.triageFetchPromise = fetchPromise; + return fetchPromise; } setActiveReviewTarget(target) { @@ -2786,84 +3042,464 @@ class CodeCloneController { ); } - async clearSessionState() { + async showBlastRadius() { const folder = this.getPreferredFolder(); if (!folder) { return; } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + const state = this.getWorkspaceState(folder); + if (!state.currentRunId) { + const choice = await vscode.window.showInformationMessage( + "No CodeClone run is available. Analyze the workspace first.", + "Analyze Workspace" + ); + if (choice === "Analyze Workspace") { + await this.analyzeWorkspace(); + } + return; + } + const files = this.resolveBlastRadiusFiles(folder); + if (files.length === 0) { + const input = await vscode.window.showInputBox({ + title: "Blast Radius", + prompt: "Enter a workspace-relative file path", + placeHolder: "src/module.py", + }); + if (!input || !input.trim()) { + return; + } + files.push(this.normalizeBlastRadiusFileInput(folder, input)); + } try { await this.ensureConnected(folder); - await this.client.callTool("clear_session_runs", {}); - for (const state of this.states.values()) { - state.currentRunId = null; - state.latestSummary = null; - state.metricsSummary = null; - state.latestTriage = null; - state.changedSummary = null; - state.analysisSettings = null; - state.reviewed = []; - state.reviewArtifacts = emptyReviewArtifacts(); - state.gitSnapshot = null; - state.stale = false; - state.staleReason = null; - state.groupCache.clear(); + const payload = await this.client.callTool("get_blast_radius", { + files, + run_id: state.currentRunId, + depth: "transitive", + }); + const nonce = crypto.randomBytes(16).toString("hex"); + const panel = vscode.window.createWebviewPanel( + "codeclone.blastRadius", + `Blast Radius: ${files.map((f) => path.basename(f)).join(", ")}`, + vscode.ViewColumn.Beside, + { + enableScripts: false, + localResourceRoots: [], + } + ); + panel.iconPath = new vscode.ThemeIcon("target"); + panel.webview.html = renderBlastRadiusSvgHtml( + payload, + folder.name, + nonce + ); + } catch (error) { + this.handleError(error, "Could not compute blast radius."); + } + } + + async copyBlastRadiusBrief() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + const state = this.getWorkspaceState(folder); + if (!state.currentRunId) { + await vscode.window.showInformationMessage( + "No CodeClone run is available. Analyze the workspace first." + ); + return; + } + const files = this.resolveBlastRadiusFiles(folder); + if (files.length === 0) { + const input = await vscode.window.showInputBox({ + title: "Blast Radius Brief", + prompt: "Enter a workspace-relative file path", + placeHolder: "src/module.py", + }); + if (!input || !input.trim()) { + return; } - this.clearActiveReviewTarget(); - this.rebuildFileDecorations(); - this.updateContextKeys(); - this.updateStatusBar(); - this.refreshAllViews(); + files.push(this.normalizeBlastRadiusFileInput(folder, input)); + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_blast_radius", { + files, + run_id: state.currentRunId, + depth: "transitive", + }); + const brief = renderBlastRadiusMarkdown(payload, folder.name); + await vscode.env.clipboard.writeText(brief); await vscode.window.showInformationMessage( - "CodeClone MCP session state cleared." + `Copied blast radius brief for ${files.join(", ")}.` ); } catch (error) { - this.handleError(error, "Could not clear CodeClone MCP session state."); + this.handleError(error, "Could not compute blast radius for brief."); } } - async pickHelpTopic() { - const topics = this.availableHelpTopics(); - const picked = await vscode.window.showQuickPick( - topics.map((topic) => ({ - label: topic.replace(/_/g, " "), - description: - topic === this.lastHelpTopic ? "Last opened" : "CodeClone MCP help topic", - topic, - })), - { - title: "Open Help Topic", - placeHolder: "Select a CodeClone MCP help topic", - } - ); - return picked ? picked.topic : null; + async showWorkspaceSessionStats() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_workspace_session_stats", { + root: folder.uri.fsPath, + }); + const nonce = crypto.randomBytes(16).toString("hex"); + const panel = vscode.window.createWebviewPanel( + "codeclone.sessionStats", + `Session Stats: ${folder.name}`, + vscode.ViewColumn.Beside, + { + enableScripts: false, + localResourceRoots: [], + } + ); + panel.iconPath = new vscode.ThemeIcon("debug-console"); + panel.webview.html = renderSessionStatsHtml(payload, folder.name, nonce); + } catch (error) { + this.handleError(error, "Could not load workspace session stats."); + } } - async showMarkdownDocument(markdown) { - const document = await vscode.workspace.openTextDocument({ - content: markdown, - language: "markdown", - }); - await vscode.window.showTextDocument(document, { - preview: true, - }); + async showControllerAuditTrail() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_controller_audit_trail", { + root: folder.uri.fsPath, + limit: 50, + }); + const nonce = crypto.randomBytes(16).toString("hex"); + const panel = vscode.window.createWebviewPanel( + "codeclone.controllerAudit", + `Controller Audit: ${folder.name}`, + vscode.ViewColumn.Beside, + { + enableScripts: false, + localResourceRoots: [], + } + ); + panel.iconPath = new vscode.ThemeIcon("history"); + panel.webview.html = renderAuditTrailHtml(payload, folder.name, nonce); + } catch (error) { + this.handleError(error, "Could not load controller audit trail."); + } } - provideReviewCodeLenses(document) { - const target = this.activeReviewTarget; - if (!target) { - return []; + async showTrajectoryDashboard() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; } - if (target.nodeType === "overloadedModule") { - const state = this.states.get(target.workspaceKey); - if (!state) { - return []; + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const result = await this.client.callTool("query_engineering_memory", { + root: folder.uri.fsPath, + mode: "trajectory_dashboard", + max_results: 25, + }); + const payload = + result && typeof result.payload === "object" && result.payload + ? result.payload + : result; + const nonce = crypto.randomBytes(16).toString("hex"); + const panel = vscode.window.createWebviewPanel( + "codeclone.trajectoryDashboard", + `Trajectories: ${folder.name}`, + vscode.ViewColumn.Beside, + { + enableScripts: false, + localResourceRoots: [], + retainContextWhenHidden: true, + } + ); + panel.iconPath = new vscode.ThemeIcon("history"); + panel.webview.html = renderTrajectoryDashboardHtml(payload, folder.name, nonce); + } catch (error) { + this.handleError(error, "Could not load trajectory dashboard."); + } + } + + async showTrajectoryDetail() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const listResult = await this.client.callTool("query_engineering_memory", { + root: folder.uri.fsPath, + mode: "trajectory_dashboard", + max_results: 25, + }); + const listPayload = + listResult && typeof listResult.payload === "object" && listResult.payload + ? listResult.payload + : listResult; + const recent = Array.isArray(listPayload?.recent_trajectories) + ? listPayload.recent_trajectories + : []; + if (recent.length === 0) { + await vscode.window.showInformationMessage( + "No stored trajectories. Run `codeclone memory trajectory rebuild` first." + ); + return; } - const relativePath = workspaceRelativePath(state.folder, document.uri.fsPath); - if (relativePath !== normalizeRelativePath(target.item.path)) { - return []; + const picked = await vscode.window.showQuickPick( + recent.map((item) => ({ + label: String(item.trajectory_id || "?"), + description: `${item.outcome}/${item.quality_tier} · ${item.workflow_id || ""}`, + detail: formatTrajectoryPickDescription(item), + trajectoryId: String(item.trajectory_id || ""), + })), + { + title: "Open trajectory detail", + placeHolder: "Select a stored trajectory", + } + ); + if (!picked?.trajectoryId) { + return; } - const range = new vscode.Range(0, 0, 0, 0); - return [ + const detailResult = await this.client.callTool("query_engineering_memory", { + root: folder.uri.fsPath, + mode: "trajectory_get", + record_id: picked.trajectoryId, + }); + const detailPayload = + detailResult && typeof detailResult.payload === "object" && detailResult.payload + ? detailResult.payload + : detailResult; + const trajectory = + detailPayload && typeof detailPayload.trajectory === "object" + ? detailPayload.trajectory + : null; + if (!trajectory) { + await vscode.window.showWarningMessage("Trajectory detail not found."); + return; + } + const nonce = crypto.randomBytes(16).toString("hex"); + const panel = vscode.window.createWebviewPanel( + "codeclone.trajectoryDetail", + `Trajectory: ${picked.trajectoryId.slice(0, 18)}…`, + vscode.ViewColumn.Beside, + { + enableScripts: false, + localResourceRoots: [], + retainContextWhenHidden: true, + } + ); + panel.iconPath = new vscode.ThemeIcon("list-tree"); + panel.webview.html = renderTrajectoryDetailHtml(trajectory, folder.name, nonce); + } catch (error) { + this.handleError(error, "Could not load trajectory detail."); + } + } + + async copyTrajectoryDashboardBrief() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const result = await this.client.callTool("query_engineering_memory", { + root: folder.uri.fsPath, + mode: "trajectory_dashboard", + max_results: 25, + }); + const payload = + result && typeof result.payload === "object" && result.payload + ? result.payload + : result; + const brief = renderTrajectoryDashboardMarkdown(payload); + await vscode.env.clipboard.writeText(brief); + await vscode.window.showInformationMessage("Copied trajectory dashboard brief."); + } catch (error) { + this.handleError(error, "Could not copy trajectory dashboard brief."); + } + } + + async copyWorkspaceSessionStatsBrief() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_workspace_session_stats", { + root: folder.uri.fsPath, + }); + await vscode.env.clipboard.writeText(renderSessionStatsMarkdown(payload)); + await vscode.window.showInformationMessage( + "Copied workspace session stats brief." + ); + } catch (error) { + this.handleError(error, "Could not copy session stats brief."); + } + } + + async copyControllerAuditTrailBrief() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + if (!(await this.ensureWorkspaceTrust())) { + return; + } + try { + await this.ensureConnected(folder); + const payload = await this.client.callTool("get_controller_audit_trail", { + root: folder.uri.fsPath, + limit: 50, + }); + await vscode.env.clipboard.writeText(renderAuditTrailMarkdown(payload)); + await vscode.window.showInformationMessage( + "Copied controller audit trail brief." + ); + } catch (error) { + this.handleError(error, "Could not copy controller audit brief."); + } + } + + /** + * @param {any} folder + * @returns {string[]} + */ + resolveBlastRadiusFiles(folder) { + const editor = vscode.window.activeTextEditor; + if (!editor) { + return []; + } + const relativePath = workspaceRelativePath(folder, editor.document.uri.fsPath); + if (relativePath && !relativePath.startsWith("..")) { + return [relativePath]; + } + return []; + } + + normalizeBlastRadiusFileInput(folder, input) { + const resolved = resolveWorkspacePath(folder.uri.fsPath, input); + if (!resolved) { + throw new MCPClientError( + "Blast radius path must be a workspace-relative file inside the open folder." + ); + } + return path.relative(folder.uri.fsPath, resolved).split(path.sep).join("/"); + } + + async clearSessionState() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + try { + await this.ensureConnected(folder); + await this.client.callTool("clear_session_runs", {}); + for (const state of this.states.values()) { + state.currentRunId = null; + state.latestSummary = null; + state.metricsSummary = null; + state.latestTriage = null; + state.lastTriageFetchAt = 0; + state.lastTriageFetchRunId = null; + state.triageFetchPromise = null; + state.changedSummary = null; + state.analysisSettings = null; + state.reviewed = []; + state.reviewArtifacts = emptyReviewArtifacts(); + state.gitSnapshot = null; + state.stale = false; + state.staleReason = null; + state.groupCache.clear(); + } + this.clearActiveReviewTarget(); + this.rebuildFileDecorations(); + this.updateContextKeys(); + this.updateStatusBar(); + this.refreshAllViews(); + await vscode.window.showInformationMessage( + "CodeClone MCP session state cleared." + ); + } catch (error) { + this.handleError(error, "Could not clear CodeClone MCP session state."); + } + } + + async pickHelpTopic() { + const topics = this.availableHelpTopics(); + const picked = await vscode.window.showQuickPick( + topics.map((topic) => ({ + label: topic.replace(/_/g, " "), + description: + topic === this.lastHelpTopic ? "Last opened" : "CodeClone MCP help topic", + topic, + })), + { + title: "Open Help Topic", + placeHolder: "Select a CodeClone MCP help topic", + } + ); + return picked ? picked.topic : null; + } + + async showMarkdownDocument(markdown) { + const document = await vscode.workspace.openTextDocument({ + content: markdown, + language: "markdown", + }); + await vscode.window.showTextDocument(document, { + preview: true, + }); + } + + provideReviewCodeLenses(document) { + const target = this.activeReviewTarget; + if (!target) { + return []; + } + if (target.nodeType === "overloadedModule") { + const state = this.states.get(target.workspaceKey); + if (!state) { + return []; + } + const relativePath = workspaceRelativePath(state.folder, document.uri.fsPath); + if (relativePath !== normalizeRelativePath(target.item.path)) { + return []; + } + const range = new vscode.Range(0, 0, 0, 0); + return [ new vscode.CodeLens(range, { command: "codeclone.previousReviewItem", title: "$(arrow-up) Previous hotspot", @@ -3477,6 +4113,525 @@ class CodeCloneController { return []; } + async getMemoryChildren(node) { + const folder = this.getMemoryWorkspaceFolder(); + try { + return await this.memoryController.getChildren(folder, node); + } catch (error) { + return [ + { + nodeType: "message", + label: `Error: ${error.message}`, + icon: new vscode.ThemeIcon("error"), + }, + ]; + } + } + + async refreshMemoryView() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + this.memoryController.invalidate(folder); + this.memoryProvider.refresh(); + this.updateViewChrome(); + this.updateContextKeys(); + } + + async syncMemoryFromRun() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: "Syncing engineering memory from analysis run", + }, + async () => { + await this.ensureConnected(folder); + await this.client.callTool("manage_engineering_memory", { + root: folder.uri.fsPath, + action: "refresh_from_run", + }); + } + ); + this.memoryController.invalidate(folder); + this.memoryProvider.refresh(); + this.updateViewChrome(); + await vscode.window.showInformationMessage( + "Engineering memory synced from the latest analysis run." + ); + } catch (error) { + this.handleError(error, "Could not sync engineering memory."); + } + } + + async governMemoryRecordSelection(node, selectedItems, decision) { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + const resolved = resolveGovernanceTargets(node, selectedItems); + if (!resolved.length) { + await vscode.window.showWarningMessage( + "No memory records selected for governance." + ); + return; + } + await this.governMemoryRecords(folder, resolved, decision); + } + + async governCheckedMemoryDrafts(decision) { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + const checked = this.memoryController.getCheckedGovernanceNodes(folder); + if (!checked.length) { + await vscode.window.showWarningMessage( + "No memory records are checked. Use inbox or stale checkboxes, or Select all." + ); + return; + } + await this.governMemoryRecords(folder, checked, decision); + } + + async selectAllMemoryDrafts() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const snapshot = await this.memoryController.ensureSnapshot(folder); + const recordIds = snapshot.drafts + .map((record) => String(record.id || "")) + .filter((recordId) => recordId.length > 0); + this.memoryController.setDraftsChecked(folder, recordIds, true); + this.memoryProvider.refresh(); + this.updateContextKeys(); + } catch (error) { + this.handleError(error, "Could not select memory drafts."); + } + } + + async selectMemoryDraftsByType() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const snapshot = await this.memoryController.ensureSnapshot(folder); + const types = distinctRecordTypes(snapshot.drafts); + if (!types.length) { + await vscode.window.showInformationMessage( + "No draft memory records in the inbox." + ); + return; + } + const picked = await vscode.window.showQuickPick(types, { + placeHolder: "Select record type to check in the inbox", + canPickMany: true, + }); + if (!picked?.length) { + return; + } + const typeSet = new Set(picked); + const recordIds = snapshot.drafts + .filter((record) => typeSet.has(String(record.type || ""))) + .map((record) => String(record.id || "")) + .filter((recordId) => recordId.length > 0); + this.memoryController.setDraftsChecked(folder, recordIds, true); + this.memoryProvider.refresh(); + this.updateContextKeys(); + } catch (error) { + this.handleError(error, "Could not select memory drafts by type."); + } + } + + async clearMemoryDraftSelection() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + this.memoryController.clearCheckedDrafts(folder); + this.memoryProvider.refresh(); + this.updateContextKeys(); + } + + async selectAllMemoryStale() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const snapshot = await this.memoryController.ensureSnapshot(folder); + const recordIds = snapshot.stale + .map((record) => String(record.id || "")) + .filter((recordId) => recordId.length > 0); + this.memoryController.setDraftsChecked(folder, recordIds, true); + this.memoryProvider.refresh(); + this.updateContextKeys(); + } catch (error) { + this.handleError(error, "Could not select stale memory records."); + } + } + + async selectMemoryStaleByType() { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const snapshot = await this.memoryController.ensureSnapshot(folder); + const types = distinctRecordTypes(snapshot.stale); + if (!types.length) { + await vscode.window.showInformationMessage( + "No stale memory records." + ); + return; + } + const picked = await vscode.window.showQuickPick(types, { + placeHolder: "Select record type to check in stale", + canPickMany: true, + }); + if (!picked?.length) { + return; + } + const typeSet = new Set(picked); + const recordIds = snapshot.stale + .filter((record) => typeSet.has(String(record.type || ""))) + .map((record) => String(record.id || "")) + .filter((recordId) => recordId.length > 0); + this.memoryController.setDraftsChecked(folder, recordIds, true); + this.memoryProvider.refresh(); + this.updateContextKeys(); + } catch (error) { + this.handleError(error, "Could not select stale memory records by type."); + } + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {object[]} nodes + * @param {"approve"|"reject"|"archive"} decision + */ + async governMemoryRecords(folder, nodes, decision) { + await this.memoryController.ensureSnapshot(folder); + const hydrated = this.memoryController.hydrateGovernanceNodes( + folder, + dedupeGovernanceNodes(nodes) + ); + if (!hydrated.length) { + await vscode.window.showWarningMessage( + "No memory records selected for governance." + ); + return; + } + let workingTargets = hydrated; + if (decision === "reject") { + const draftTargets = hydrated.filter( + (node) => String(node.record?.status || "draft") === "draft" + ); + if (!draftTargets.length) { + await vscode.window.showWarningMessage( + "Only draft records can be rejected. Stale records can be approved or opened." + ); + return; + } + workingTargets = draftTargets; + } + const labels = { + approve: {verb: "Approve", gerund: "Approving", past: "approved"}, + reject: {verb: "Reject", gerund: "Rejecting", past: "rejected"}, + archive: {verb: "Archive", gerund: "Archiving", past: "archived"}, + }; + const label = labels[decision] || labels.approve; + const validTargets = []; + const skipped = []; + for (const node of workingTargets) { + const record = safeObject(node.record); + try { + this.memoryController.assertGovernanceAllowed( + String(record.status || "draft"), + decision + ); + validTargets.push(node); + } catch (error) { + skipped.push({ + recordId: String(record.id || ""), + message: + error instanceof Error ? error.message : String(error), + }); + } + } + if (!validTargets.length) { + const first = skipped[0]; + await vscode.window.showWarningMessage( + first?.message || "Selected memory records cannot be updated." + ); + return; + } + const confirmLabel = + validTargets.length === 1 + ? label.verb + : `${label.verb} ${validTargets.length}`; + const confirmPrompt = + validTargets.length === 1 + ? `${label.verb} this memory record?` + : `${label.verb} ${validTargets.length} memory records?`; + const detail = buildBulkConfirmDetail(validTargets, decision); + const confirm = await vscode.window.showWarningMessage( + confirmPrompt, + {modal: true, detail}, + confirmLabel + ); + if (confirm !== confirmLabel) { + return; + } + /** @type {{succeeded: string[], failed: {recordId: string, message: string}[]}} */ + const results = {succeeded: [], failed: [...skipped]}; + try { + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: + validTargets.length === 1 + ? `${label.gerund} memory record` + : `${label.gerund} ${validTargets.length} memory records`, + cancellable: true, + }, + async (progress, token) => { + await this.ensureConnected(folder); + for (let index = 0; index < validTargets.length; index += 1) { + if (token.isCancellationRequested) { + break; + } + const node = validTargets[index]; + const recordId = String(node.record?.id || ""); + progress.report({ + message: `${index + 1}/${validTargets.length} — ${recordId}`, + }); + try { + await this.memoryController.runGovernance( + folder, + node, + decision, + { + progress, + token, + deferInvalidate: true, + } + ); + results.succeeded.push(recordId); + this.memoryController.setDraftChecked( + folder, + recordId, + false + ); + } catch (error) { + if ( + error instanceof Error && + error.message === "Canceled" + ) { + throw error; + } + results.failed.push({ + recordId, + message: + error instanceof Error + ? error.message + : String(error), + }); + } + } + } + ); + } catch (error) { + if (error instanceof Error && error.message === "Canceled") { + return; + } + this.handleError(error, "Could not update the memory records."); + return; + } + this.memoryController.invalidate(folder); + this.memoryProvider.refresh(); + this.updateViewChrome(); + this.updateContextKeys(); + const summary = formatBulkResultSummary(results, decision); + if (results.succeeded.length) { + await vscode.window.showInformationMessage(summary); + } else if (results.failed.length) { + await vscode.window.showWarningMessage(summary); + } + } + + async openMemoryRecord(node) { + const folder = this.getMemoryWorkspaceFolder(); + if (!folder || !node) { + return; + } + try { + await this.memoryController.openRecordDetail(folder, node); + } catch (error) { + this.handleError(error, "Could not open the memory record."); + } + } + + async openMemoryRecordById(recordId) { + const folder = this.getPreferredFolder() || this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + if (!isValidMemoryRecordId(recordId)) { + this.handleError( + new Error("Invalid memory record id."), + "Could not open the memory record." + ); + return; + } + try { + const ready = await this.memorySearchController.ensureMemoryReady(folder); + if (!ready.ok) { + return; + } + await this.memorySearchController.openRecordById(folder, recordId); + } catch (error) { + this.handleError(error, "Could not open the memory record."); + } + } + + async searchEngineeringMemory() { + const folder = this.getPreferredFolder() || this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const ready = await this.memorySearchController.ensureMemoryReady(folder); + if (!ready.ok) { + return; + } + const query = await this.memorySearchController.promptSearchQuery(folder); + if (!query) { + return; + } + const result = await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: "Searching engineering memory", + }, + async () => this.memorySearchController.querySearch(folder, query) + ); + const records = this.memorySearchController.extractRecords(result); + const picked = await this.memorySearchController.pickRecord( + records, + "Engineering Memory Search" + ); + if (picked) { + await this.memorySearchController.openRecord(folder, picked); + } + } catch (error) { + this.handleError(error, "Could not search engineering memory."); + } + } + + async memoryForActiveFile() { + const folder = this.getPreferredFolder(); + if (!folder) { + return; + } + const relPath = activeEditorMemoryPath(folder); + if (!relPath) { + await vscode.window.showInformationMessage( + "Open a workspace file in the editor to load memory for that path." + ); + return; + } + try { + const ready = await this.memorySearchController.ensureMemoryReady(folder); + if (!ready.ok) { + return; + } + const result = await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: `Memory for ${relPath}`, + }, + async () => this.memorySearchController.queryForPath(folder, relPath) + ); + const records = this.memorySearchController.extractRecords(result); + const picked = await this.memorySearchController.pickRecord( + records, + `Memory: ${relPath}` + ); + if (picked) { + await this.memorySearchController.openRecord(folder, picked); + } + } catch (error) { + this.handleError(error, "Could not load memory for the active file."); + } + } + + async openMemorySearchPanel() { + const folder = this.getPreferredFolder() || this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const ready = await this.memorySearchController.ensureMemoryReady(folder); + if (!ready.ok) { + return; + } + const query = await this.memorySearchController.promptSearchQuery(folder); + if (!query) { + return; + } + const result = await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: "Searching engineering memory", + }, + async () => this.memorySearchController.querySearch(folder, query) + ); + this.memorySearchController.showSearchPanel(folder, query, result); + } catch (error) { + this.handleError(error, "Could not open memory search."); + } + } + + async refreshMemorySearchPanel() { + try { + await this.memorySearchController.refreshActivePanel(); + } catch (error) { + this.handleError(error, "Could not refresh memory search."); + } + } + + async configureMemorySearch() { + const folder = this.getPreferredFolder() || this.getMemoryWorkspaceFolder(); + if (!folder) { + return; + } + try { + const updated = await this.memorySearchController.configureSearchFilters( + folder + ); + if (!updated) { + return; + } + if (this.memorySearchController.activePanel) { + await this.memorySearchController.refreshActivePanel(); + } + await vscode.window.showInformationMessage( + "Engineering memory search filters updated for this workspace." + ); + } catch (error) { + this.handleError(error, "Could not update memory search settings."); + } + } + async getHotspotGroupChildren(state, groupId) { if (state.groupCache.has(groupId)) { return state.groupCache.get(groupId); @@ -3770,6 +4925,7 @@ class CodeCloneController { item.id = node.id; item.description = node.description; item.iconPath = node.icon; + item.contextValue = node.contextValue; item.command = node.command; break; } @@ -3846,6 +5002,47 @@ class CodeCloneController { }; break; } + case "memoryDraft": { + item = new vscode.TreeItem( + node.label, + vscode.TreeItemCollapsibleState.Collapsed + ); + item.id = node.id; + item.description = node.description; + item.tooltip = node.tooltip; + item.iconPath = node.icon; + item.contextValue = node.contextValue || "codeclone.memoryDraft"; + item.command = node.command; + if (node.checkboxState !== undefined) { + item.checkboxState = node.checkboxState; + } + break; + } + case "memoryStale": { + item = new vscode.TreeItem( + node.label, + vscode.TreeItemCollapsibleState.Collapsed + ); + item.id = node.id; + item.description = node.description; + item.tooltip = node.tooltip; + item.iconPath = node.icon; + item.contextValue = node.contextValue || "codeclone.memoryStale"; + item.command = node.command; + if (node.checkboxState !== undefined) { + item.checkboxState = node.checkboxState; + } + break; + } + case "action": { + item = new vscode.TreeItem( + node.label, + vscode.TreeItemCollapsibleState.None + ); + item.iconPath = node.icon; + item.command = node.command; + break; + } case "detail": { item = new vscode.TreeItem( node.label, @@ -3878,6 +5075,7 @@ class CodeCloneController { this.overviewProvider.refresh(); this.hotspotsProvider.refresh(); this.sessionProvider.refresh(); + this.memoryProvider.refresh(); this.reviewCodeLensProvider.refresh(); this.updateViewChrome(); } @@ -3987,6 +5185,19 @@ class CodeCloneController { this.sessionView.description = state && state.reviewed.length > 0 ? `${state.reviewed.length} reviewed` : undefined; } + if (this.memoryView) { + const folder = this.getMemoryWorkspaceFolder(); + const draftCount = folder ? this.memoryController.draftCount(folder) : 0; + this.memoryView.badge = + draftCount > 0 + ? { + value: draftCount, + tooltip: `${draftCount} draft memory record(s) awaiting review`, + } + : undefined; + this.memoryView.description = + draftCount > 0 ? `${draftCount} draft` : undefined; + } } updateContextKeys() { @@ -4056,6 +5267,31 @@ class CodeCloneController { "codeclone.hotspotFocusMode", this.hotspotFocusMode ); + const memoryFolder = this.getMemoryWorkspaceFolder(); + const draftCount = memoryFolder + ? this.memoryController.draftCount(memoryFolder) + : 0; + const staleCount = memoryFolder + ? this.memoryController.staleCount(memoryFolder) + : 0; + const checkedDraftCount = memoryFolder + ? this.memoryController.checkedDraftCount(memoryFolder) + : 0; + void vscode.commands.executeCommand( + "setContext", + "codeclone.memoryHasDrafts", + draftCount > 0 + ); + void vscode.commands.executeCommand( + "setContext", + "codeclone.memoryHasStale", + staleCount > 0 + ); + void vscode.commands.executeCommand( + "setContext", + "codeclone.memoryHasCheckedDrafts", + checkedDraftCount > 0 + ); } updateStatusBar() { diff --git a/extensions/vscode-codeclone/src/mcpClient.js b/extensions/vscode-codeclone/src/mcpClient.js index 898a3e2d..da2a67bb 100644 --- a/extensions/vscode-codeclone/src/mcpClient.js +++ b/extensions/vscode-codeclone/src/mcpClient.js @@ -4,10 +4,12 @@ const {spawn} = require("node:child_process"); const {EventEmitter} = require("node:events"); const {version: EXTENSION_VERSION} = require("../package.json"); -const {logChannelMessage, trimTail} = require("./support"); +const {logChannelMessage, spawnEnvForMcp, trimTail} = require("./support"); const MCP_PROTOCOL_VERSION = "2025-03-26"; const REQUEST_TIMEOUT_MS = 5 * 60 * 1000; +/** Per-step ceiling for memory governance tool calls (separate from RPC timeout). */ +const GOVERNANCE_TOOL_TIMEOUT_MS = 90 * 1000; const MAX_STDOUT_BUFFER_CHARS = 4 * 1024 * 1024; const MAX_STDERR_BUFFER_CHARS = 256 * 1024; const MAX_LOG_LINE_CHARS = 4096; @@ -34,6 +36,64 @@ class MCPClientError extends Error { } } +/** + * @param {unknown} result + * @param {string} toolName + * @returns {string|null} + */ +function extractToolErrorMessage(result, toolName) { + if (!result || typeof result !== "object") { + return null; + } + const content = /** @type {{content?: unknown}} */ (result).content; + if (!Array.isArray(content)) { + return null; + } + for (const entry of content) { + if ( + entry && + typeof entry === "object" && + /** @type {{type?: string}} */ (entry).type === "text" && + typeof /** @type {{text?: string}} */ (entry).text === "string" + ) { + const text = /** @type {{text: string}} */ (entry).text.trim(); + const prefix = `Error executing tool ${toolName}:`; + if (text.startsWith(prefix)) { + return text.slice(prefix.length).trim(); + } + return text; + } + } + return null; +} + +/** + * @param {Promise} promise + * @param {number} timeoutMs + * @param {string} label + */ +function withToolTimeout(promise, timeoutMs, label) { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + reject( + new MCPClientError( + `${label} timed out after ${Math.round(timeoutMs / 1000)}s.` + ) + ); + }, timeoutMs); + Promise.resolve(promise).then( + (value) => { + clearTimeout(timer); + resolve(value); + }, + (error) => { + clearTimeout(timer); + reject(error); + } + ); + }); +} + class CodeCloneMcpClient extends EventEmitter { constructor(outputChannel) { super(); @@ -141,17 +201,29 @@ class CodeCloneMcpClient extends EventEmitter { } } - async callTool(name, args = {}) { + async callTool(name, args = {}, options = {}) { if (!this.connected) { throw new MCPClientError("CodeClone MCP is not connected."); } - const result = await this.request("tools/call", { + const timeoutMs = + typeof options.timeoutMs === "number" + ? options.timeoutMs + : REQUEST_TIMEOUT_MS; + const label = + typeof options.timeoutLabel === "string" + ? options.timeoutLabel + : `CodeClone MCP tool ${name}`; + const request = this.request("tools/call", { name, arguments: args, }); + const result = await withToolTimeout(request, timeoutMs, label); if (result && result.isError) { + const detail = extractToolErrorMessage(result, name); throw new MCPClientError( - `Tool ${name} returned an error response from CodeClone MCP.` + detail + ? `CodeClone MCP (${name}): ${detail}` + : `Tool ${name} returned an error response from CodeClone MCP.` ); } if (result && result.structuredContent !== undefined) { @@ -233,7 +305,7 @@ class CodeCloneMcpClient extends EventEmitter { await new Promise((resolve, reject) => { const child = spawn(launchSpec.command, launchSpec.args, { cwd: launchSpec.cwd, - env: process.env, + env: spawnEnvForMcp(launchSpec.cwd), shell: false, stdio: ["pipe", "pipe", "pipe"], }); @@ -436,4 +508,7 @@ class CodeCloneMcpClient extends EventEmitter { module.exports = { CodeCloneMcpClient, MCPClientError, + GOVERNANCE_TOOL_TIMEOUT_MS, + extractToolErrorMessage, + withToolTimeout, }; diff --git a/extensions/vscode-codeclone/src/memoryBulkSelection.js b/extensions/vscode-codeclone/src/memoryBulkSelection.js new file mode 100644 index 00000000..3d8923f5 --- /dev/null +++ b/extensions/vscode-codeclone/src/memoryBulkSelection.js @@ -0,0 +1,220 @@ +"use strict"; + +const DRAFT_TREE_ID_PREFIX = "memory-draft-"; +const STALE_TREE_ID_PREFIX = "memory-stale-"; + +/** + * @param {object} record + * @returns {string} + */ +function recordStatement(record) { + return String(record?.statement || "").trim(); +} + +/** + * @param {string|undefined} treeItemId + * @returns {string} + */ +function recordIdFromTreeItemId(treeItemId) { + const id = String(treeItemId || ""); + if (id.startsWith(DRAFT_TREE_ID_PREFIX)) { + return id.slice(DRAFT_TREE_ID_PREFIX.length); + } + if (id.startsWith(STALE_TREE_ID_PREFIX)) { + return id.slice(STALE_TREE_ID_PREFIX.length); + } + return ""; +} + +/** + * @param {string} recordId + * @param {"memoryDraft"|"memoryStale"} nodeType + * @returns {string} + */ +function treeItemIdForGovernanceNode(recordId, nodeType) { + const prefix = + nodeType === "memoryStale" ? STALE_TREE_ID_PREFIX : DRAFT_TREE_ID_PREFIX; + return `${prefix}${recordId}`; +} + +/** + * @param {object|undefined} node + * @returns {string} + */ +function recordIdFromGovernanceNode(node) { + if (!node) { + return ""; + } + if (node.nodeType === "memoryDraft" || node.nodeType === "memoryStale") { + return String(node.record?.id || ""); + } + return ""; +} + +/** @deprecated Use recordIdFromGovernanceNode */ +function recordIdFromDraftNode(node) { + return recordIdFromGovernanceNode(node); +} + +/** + * @param {object[]} nodes + * @returns {object[]} + */ +function dedupeGovernanceNodes(nodes) { + const seen = new Set(); + const result = []; + for (const node of nodes) { + const id = recordIdFromGovernanceNode(node); + if (!id || seen.has(id)) { + continue; + } + seen.add(id); + result.push(node); + } + return result; +} + +/** @deprecated Use dedupeGovernanceNodes */ +function dedupeDraftNodes(nodes) { + return dedupeGovernanceNodes(nodes); +} + +/** + * @param {string} treeItemId + * @returns {"memoryDraft"|"memoryStale"|null} + */ +function governanceNodeTypeFromTreeItemId(treeItemId) { + const id = String(treeItemId || ""); + if (id.startsWith(DRAFT_TREE_ID_PREFIX)) { + return "memoryDraft"; + } + if (id.startsWith(STALE_TREE_ID_PREFIX)) { + return "memoryStale"; + } + return null; +} + +/** + * Resolve governance targets from a primary tree/command node and optional + * multi-select tree items from `canSelectMany`. + * + * @param {object|undefined} primary + * @param {object[]|undefined} selectedItems + * @returns {object[]} + */ +function resolveGovernanceTargets(primary, selectedItems) { + const candidates = []; + if (Array.isArray(selectedItems) && selectedItems.length > 0) { + for (const item of selectedItems) { + const id = recordIdFromTreeItemId(item?.id); + const nodeType = governanceNodeTypeFromTreeItemId(item?.id); + if (!id || !nodeType) { + continue; + } + candidates.push({ + nodeType, + id: treeItemIdForGovernanceNode(id, nodeType), + record: {id}, + }); + } + } else if (primary) { + candidates.push(primary); + } + return dedupeGovernanceNodes(candidates); +} + +/** + * @param {object[]} nodes + * @param {"approve"|"reject"|"archive"} decision + * @param {number} [previewLimit] + * @returns {string} + */ +function buildBulkConfirmDetail(nodes, decision, previewLimit = 3) { + const lines = nodes.slice(0, previewLimit).map((node) => { + const id = recordIdFromGovernanceNode(node); + const statement = recordStatement(node.record || {}); + const type = String(node.record?.type || "record"); + const preview = + statement.length > 120 ? `${statement.slice(0, 117)}…` : statement; + return `• [${type}] ${preview || id}`; + }); + if (nodes.length > previewLimit) { + lines.push(`…and ${nodes.length - previewLimit} more`); + } + const verb = + decision === "reject" + ? "Rejected drafts are removed from the inbox." + : "Approved records become active engineering memory."; + return [...lines, "", verb].join("\n"); +} + +/** + * @param {{succeeded: string[], failed: {recordId: string, message: string}[]}} results + * @param {"approve"|"reject"|"archive"} decision + * @returns {string} + */ +function formatBulkResultSummary(results, decision) { + const past = + decision === "reject" + ? "rejected" + : decision === "archive" + ? "archived" + : "approved"; + const parts = []; + if (results.succeeded.length) { + parts.push( + `${results.succeeded.length} memory record(s) ${past}.` + ); + } + if (results.failed.length) { + const failedIds = results.failed + .slice(0, 3) + .map((item) => item.recordId) + .join(", "); + const suffix = + results.failed.length > 3 + ? ` (+${results.failed.length - 3} more)` + : ""; + parts.push( + `${results.failed.length} failed: ${failedIds}${suffix}.` + ); + } + return parts.join(" "); +} + +/** + * @param {object[]} records + * @returns {string[]} + */ +function distinctRecordTypes(records) { + const types = new Set(); + for (const record of records) { + const type = String(record?.type || "").trim(); + if (type) { + types.add(type); + } + } + return [...types].sort(); +} + +/** @deprecated Use distinctRecordTypes */ +function distinctDraftTypes(drafts) { + return distinctRecordTypes(drafts); +} + +module.exports = { + DRAFT_TREE_ID_PREFIX, + STALE_TREE_ID_PREFIX, + recordIdFromTreeItemId, + recordIdFromGovernanceNode, + recordIdFromDraftNode, + dedupeGovernanceNodes, + dedupeDraftNodes, + resolveGovernanceTargets, + buildBulkConfirmDetail, + formatBulkResultSummary, + distinctRecordTypes, + distinctDraftTypes, + governanceNodeTypeFromTreeItemId, + treeItemIdForGovernanceNode, +}; diff --git a/extensions/vscode-codeclone/src/memoryController.js b/extensions/vscode-codeclone/src/memoryController.js new file mode 100644 index 00000000..4f65ccc8 --- /dev/null +++ b/extensions/vscode-codeclone/src/memoryController.js @@ -0,0 +1,778 @@ +"use strict"; + +/** @type {any} */ +const vscode = require("vscode"); + +const { + commitGovernance, + ensureIdeGovernanceRegistered, + prepareGovernance, +} = require("./memoryGovernance"); +const {MCPClientError} = require("./mcpClient"); +const {safeArray, safeObject} = require("./formatters"); +const {recordIdFromGovernanceNode} = require("./memoryBulkSelection"); + +const MEMORY_CACHE_TTL_MS = 15_000; + +class MemoryController { + /** + * @param {object} extension + */ + constructor(extension) { + this.extension = extension; + /** @type {Map} */ + this.cacheByRoot = new Map(); + /** @type {Map>} */ + this.checkedDraftIdsByRoot = new Map(); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @returns {Set} + */ + _checkedDraftIds(folder) { + const key = this.workspaceKey(folder); + let checked = this.checkedDraftIdsByRoot.get(key); + if (!checked) { + checked = new Set(); + this.checkedDraftIdsByRoot.set(key, checked); + } + return checked; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {object[]} drafts + */ + _syncCheckedDraftIds(folder, drafts, stale) { + const validIds = new Set( + [...drafts, ...stale] + .map((record) => String(record.id || "")) + .filter((recordId) => recordId.length > 0) + ); + const checked = this._checkedDraftIds(folder); + for (const recordId of [...checked]) { + if (!validIds.has(recordId)) { + checked.delete(recordId); + } + } + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} recordId + */ + isDraftChecked(folder, recordId) { + return this._checkedDraftIds(folder).has(recordId); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} recordId + * @param {boolean} checked + */ + setDraftChecked(folder, recordId, checked) { + const ids = this._checkedDraftIds(folder); + if (checked) { + ids.add(recordId); + } else { + ids.delete(recordId); + } + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string[]} recordIds + * @param {boolean} checked + */ + setDraftsChecked(folder, recordIds, checked) { + const ids = this._checkedDraftIds(folder); + for (const recordId of recordIds) { + if (checked) { + ids.add(recordId); + } else { + ids.delete(recordId); + } + } + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + clearCheckedDrafts(folder) { + this.checkedDraftIdsByRoot.delete(this.workspaceKey(folder)); + } + + /** + * @param {import("vscode").WorkspaceFolder | undefined} folder + */ + checkedDraftCount(folder) { + if (!folder) { + return 0; + } + return this._checkedDraftIds(folder).size; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + getCheckedGovernanceNodes(folder) { + const snapshot = this.cacheByRoot.get(this.workspaceKey(folder)); + if (!snapshot) { + return []; + } + const checked = this._checkedDraftIds(folder); + const nodes = []; + for (const record of snapshot.drafts) { + const recordId = String(record.id || ""); + if (checked.has(recordId)) { + nodes.push(this.draftNodeFromRecord(record, folder)); + } + } + for (const record of snapshot.stale) { + const recordId = String(record.id || ""); + if (checked.has(recordId)) { + nodes.push(this.staleNodeFromRecord(record, folder)); + } + } + return nodes; + } + + /** + * @param {import("vscode").WorkspaceFolder | undefined} folder + */ + staleCount(folder) { + if (!folder) { + return 0; + } + const cached = this.cacheByRoot.get(this.workspaceKey(folder)); + return cached ? cached.stale.length : 0; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {object[]} nodes + */ + hydrateGovernanceNodes(folder, nodes) { + const snapshot = this.cacheByRoot.get(this.workspaceKey(folder)); + const records = [ + ...(snapshot?.drafts || []), + ...(snapshot?.stale || []), + ]; + const byId = new Map( + records.map((record) => [String(record.id || ""), record]) + ); + const hydrated = []; + const seen = new Set(); + for (const node of nodes) { + const recordId = recordIdFromGovernanceNode(node); + if (!recordId || seen.has(recordId)) { + continue; + } + const record = byId.get(recordId); + if (!record) { + continue; + } + seen.add(recordId); + hydrated.push(this.governanceNodeFromRecord(record, folder)); + } + return hydrated; + } + + /** + * @param {object} record + * @param {import("vscode").WorkspaceFolder} [folder] + */ + governanceNodeFromRecord(record, folder) { + const status = String(record.status || "draft"); + if (status === "stale") { + return this.staleNodeFromRecord(record, folder); + } + return this.draftNodeFromRecord(record, folder); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + workspaceKey(folder) { + return folder.uri.toString(); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + async ensureSnapshot(folder) { + const key = this.workspaceKey(folder); + const cached = this.cacheByRoot.get(key); + const now = Date.now(); + if (cached && now - cached.loadedAt < MEMORY_CACHE_TTL_MS) { + return cached; + } + // The Memory view reflects the current connection — it never starts + // the local server from a tree render. Auto-connecting here would + // surface connect-time prompts and local-store records on restart + // simply because the view was expanded. When disconnected, return an + // empty snapshot and let the view-welcome content guide the user. + if (!this.extension.client.isConnected()) { + const snapshot = { + loadedAt: now, + connected: false, + memorySupported: true, + status: null, + drafts: [], + stale: [], + }; + this.cacheByRoot.set(key, snapshot); + return snapshot; + } + // Engineering Memory tools exist only on CodeClone 2.1.0a1+. The + // extension's minimum-version gate (2.0.0) still admits older servers, + // so detect the capability from the connected server's advertised tool + // list rather than guessing from a version string. Without this, the + // Memory view would call missing tools and surface a misleading + // "not initialized" state on older servers. + const toolNames = safeArray( + this.extension.client.getConnectionSnapshot().toolNames + ).map((name) => String(name)); + if (!toolNames.includes("query_engineering_memory")) { + const snapshot = { + loadedAt: now, + connected: true, + memorySupported: false, + status: null, + drafts: [], + stale: [], + }; + this.cacheByRoot.set(key, snapshot); + return snapshot; + } + const root = folder.uri.fsPath; + const client = this.extension.client; + let status = null; + let drafts = []; + let stale = []; + try { + const statusPayload = await client.callTool("query_engineering_memory", { + root, + mode: "status", + }); + status = safeObject(statusPayload.payload); + } catch { + status = null; + } + try { + const draftsPayload = await client.callTool("query_engineering_memory", { + root, + mode: "drafts", + max_results: 50, + }); + drafts = safeArray(safeObject(draftsPayload.payload).records); + } catch { + drafts = []; + } + try { + const stalePayload = await client.callTool("query_engineering_memory", { + root, + mode: "stale", + max_results: 50, + }); + stale = safeArray(safeObject(stalePayload.payload).records); + } catch { + stale = []; + } + const snapshot = { + loadedAt: now, + connected: true, + memorySupported: true, + status, + drafts, + stale, + }; + this._syncCheckedDraftIds(folder, drafts, stale); + this.cacheByRoot.set(key, snapshot); + return snapshot; + } + + invalidate(folder) { + if (folder) { + this.cacheByRoot.delete(this.workspaceKey(folder)); + return; + } + this.cacheByRoot.clear(); + } + + /** + * @param {import("vscode").WorkspaceFolder | undefined} folder + * @param {object|undefined} node + */ + async getChildren(folder, node) { + // Empty states (no folder / untrusted / disconnected) are handled by + // the view-welcome content in package.json, keeping the Memory view + // consistent with the other CodeClone views. Returning [] lets that + // guidance render instead of ad-hoc message rows. + if (!folder || !vscode.workspace.isTrusted) { + return []; + } + const snapshot = await this.ensureSnapshot(folder); + if (!snapshot.connected) { + return []; + } + if (!snapshot.memorySupported) { + if (node) { + return []; + } + return [ + { + nodeType: "message", + label: "Engineering Memory requires CodeClone 2.1.0a1 or newer.", + icon: new vscode.ThemeIcon("info"), + }, + ]; + } + if (!node) { + const children = [ + { + nodeType: "section", + id: "memory-status", + label: "Status", + icon: new vscode.ThemeIcon("database"), + contextValue: "codeclone.memorySection", + }, + { + nodeType: "section", + id: "memory-inbox", + label: "Inbox", + description: + snapshot.drafts.length > 0 + ? `${snapshot.drafts.length} draft` + : "empty", + icon: new vscode.ThemeIcon("inbox"), + contextValue: "codeclone.memoryInbox", + }, + { + nodeType: "section", + id: "memory-stale", + label: "Stale", + description: + snapshot.stale.length > 0 + ? `${snapshot.stale.length}` + : undefined, + icon: new vscode.ThemeIcon("history"), + contextValue: "codeclone.memoryStaleSection", + }, + { + nodeType: "section", + id: "memory-actions", + label: "Actions", + icon: new vscode.ThemeIcon("settings-gear"), + contextValue: "codeclone.memorySection", + }, + ]; + return children; + } + if (node.nodeType === "section") { + if (node.id === "memory-status") { + if (!snapshot.status) { + return [ + { + nodeType: "message", + label: "Memory database not initialized. Run analysis first.", + icon: new vscode.ThemeIcon("info"), + }, + ]; + } + const byStatus = safeObject(snapshot.status.records_by_status); + const draftTotal = + typeof byStatus.draft === "number" + ? byStatus.draft + : snapshot.drafts.length; + const activeTotal = + typeof byStatus.active === "number" ? byStatus.active : null; + const staleTotal = + typeof byStatus.stale === "number" + ? byStatus.stale + : snapshot.stale.length; + const lines = [ + `Backend: ${snapshot.status.backend || "unknown"}`, + `Records: ${snapshot.status.record_count ?? "—"}`, + `Drafts: ${draftTotal}`, + `Active: ${activeTotal ?? "—"}`, + `Stale: ${staleTotal}`, + ]; + return lines.map((label) => ({ + nodeType: "detail", + label, + icon: new vscode.ThemeIcon("circle-outline"), + })); + } + if (node.id === "memory-inbox") { + if (!snapshot.drafts.length) { + return [ + { + nodeType: "message", + label: "No draft records in the inbox.", + icon: new vscode.ThemeIcon("check"), + }, + ]; + } + return snapshot.drafts.map((record) => + this.draftNodeFromRecord(record, folder) + ); + } + if (node.id === "memory-stale") { + if (!snapshot.stale.length) { + return [ + { + nodeType: "message", + label: "No stale records.", + icon: new vscode.ThemeIcon("check"), + }, + ]; + } + return snapshot.stale.map((record) => + this.staleNodeFromRecord(record, folder) + ); + } + if (node.id === "memory-actions") { + return [ + { + nodeType: "action", + id: "refresh-memory", + label: "Refresh memory", + icon: new vscode.ThemeIcon("refresh"), + command: { + command: "codeclone.refreshMemory", + title: "Refresh Memory", + }, + }, + { + nodeType: "action", + id: "sync-from-run", + label: "Sync from latest run", + icon: new vscode.ThemeIcon("cloud-download"), + command: { + command: "codeclone.syncMemoryFromRun", + title: "Sync Memory From Run", + }, + }, + ]; + } + } + if (node.nodeType === "memoryDraft" && node.record) { + return this._recordActionChildren(node, {includeReject: true}); + } + if (node.nodeType === "memoryStale" && node.record) { + // Stale records can be re-verified (approve) once the linked code + // is confirmed, or inspected. Reject is reserved for drafts. + return this._recordActionChildren(node, {includeReject: false}); + } + return []; + } + + /** + * Build the expandable action rows shared by draft and stale records. + * + * @param {object} node + * @param {{includeReject: boolean}} options + */ + _recordActionChildren(node, {includeReject}) { + const children = [ + { + nodeType: "detail", + label: String(recordStatement(node.record)), + icon: new vscode.ThemeIcon("note"), + }, + { + nodeType: "action", + id: "approve", + label: "Approve", + icon: new vscode.ThemeIcon("check"), + command: { + command: "codeclone.approveMemoryRecord", + title: "Approve Memory Record", + arguments: [node], + }, + }, + ]; + if (includeReject) { + children.push({ + nodeType: "action", + id: "reject", + label: "Reject", + icon: new vscode.ThemeIcon("close"), + command: { + command: "codeclone.rejectMemoryRecord", + title: "Reject Memory Record", + arguments: [node], + }, + }); + } + children.push({ + nodeType: "action", + id: "open-detail", + label: "Open detail", + icon: new vscode.ThemeIcon("open-preview"), + command: { + command: "codeclone.openMemoryRecord", + title: "Open Memory Record", + arguments: [node], + }, + }); + return children; + } + + /** + * @param {object} record + * @param {import("vscode").WorkspaceFolder} [folder] + */ + draftNodeFromRecord(record, folder) { + const statement = recordStatement(record); + const label = + statement.length > 72 ? `${statement.slice(0, 69)}…` : statement; + const recordId = String(record.id || "unknown"); + const node = { + nodeType: "memoryDraft", + id: `memory-draft-${recordId}`, + record, + label: label || recordId, + description: String(record.type || "record"), + tooltip: statement, + icon: new vscode.ThemeIcon("git-pull-request"), + contextValue: "codeclone.memoryDraft", + }; + if (folder) { + node.checkboxState = this.isDraftChecked(folder, recordId) + ? vscode.TreeItemCheckboxState.Checked + : vscode.TreeItemCheckboxState.Unchecked; + } + node.command = { + command: "codeclone.openMemoryRecord", + title: "Open Memory Record", + arguments: [node], + }; + return node; + } + + /** + * @param {object} record + * @param {import("vscode").WorkspaceFolder} [folder] + */ + staleNodeFromRecord(record, folder) { + const statement = recordStatement(record); + const label = + statement.length > 72 ? `${statement.slice(0, 69)}…` : statement; + const recordId = String(record.id || "unknown"); + const node = { + nodeType: "memoryStale", + id: `memory-stale-${recordId}`, + record, + label: label || recordId, + description: String(record.type || "record"), + tooltip: statement, + icon: new vscode.ThemeIcon("history"), + contextValue: "codeclone.memoryStale", + }; + if (folder) { + node.checkboxState = this.isDraftChecked(folder, recordId) + ? vscode.TreeItemCheckboxState.Checked + : vscode.TreeItemCheckboxState.Unchecked; + } + node.command = { + command: "codeclone.openMemoryRecord", + title: "Open Memory Record", + arguments: [node], + }; + return node; + } + + /** + * Execute an already-confirmed governance decision. The caller is + * responsible for validating status and confirming with the user + * *before* invoking this — keeping the confirmation out of any progress + * notification. Reports "Preparing…"/"Committing…" through `progress`. + * + * @param {import("vscode").WorkspaceFolder} folder + * @param {object} node + * @param {"approve"|"reject"|"archive"} decision + * @param {{progress?: {report: Function}, token?: {isCancellationRequested: boolean}, deferInvalidate?: boolean}} [options] + */ + async runGovernance(folder, node, decision, options = {}) { + const {progress, token} = options; + const record = safeObject(node.record); + const recordId = String(record.id || ""); + if (!recordId) { + throw new Error("Memory record is missing an id."); + } + this.assertGovernanceAllowed(String(record.status || "draft"), decision); + const root = folder.uri.fsPath; + const client = this.extension.client; + if (token?.isCancellationRequested) { + return null; + } + progress?.report({message: "Preparing…"}); + const prepared = await this._prepareGovernanceWithRetry( + client, + root, + recordId, + decision + ); + if (token?.isCancellationRequested) { + return null; + } + if (prepared.status === "not_found") { + throw new Error(`Memory record not found: ${recordId}`); + } + if (prepared.status === "rejected") { + const nextStep = prepared.next_step ? ` ${prepared.next_step}` : ""; + throw new MCPClientError( + `${prepared.message || "Governance is not available."}${nextStep}` + ); + } + progress?.report({message: "Committing…"}); + const actor = vscode.env.userName || "vscode-user"; + const committed = await commitGovernance( + client, + this.extension.context, + prepared, + root, + actor, + decision + ); + if (committed.status === "rejected") { + const nextStep = committed.next_step ? ` ${committed.next_step}` : ""; + throw new MCPClientError( + `${committed.message || "Governance commit was rejected."}${nextStep}` + ); + } + if (!options.deferInvalidate) { + this.invalidate(folder); + } + return committed; + } + + /** + * Prepare a governance ticket. The IDE governance key is registered once + * on connect; only re-register (and retry) if the server reports that the + * session key went missing — e.g. after a server restart — instead of + * paying a registration round-trip on every decision. + * + * @param {import("./mcpClient").CodeCloneMcpClient} client + * @param {string} root + * @param {string} recordId + * @param {"approve"|"reject"|"archive"} decision + */ + async _prepareGovernanceWithRetry(client, root, recordId, decision) { + const prepared = await prepareGovernance(client, root, recordId, decision); + if ( + prepared.status === "rejected" && + prepared.reason === "governance_key_missing" + ) { + await ensureIdeGovernanceRegistered( + client, + this.extension.context, + root + ); + return prepareGovernance(client, root, recordId, decision); + } + return prepared; + } + + /** + * @param {string} status + * @param {"approve"|"reject"|"archive"} decision + */ + assertGovernanceAllowed(status, decision) { + if (decision === "approve" && status === "active") { + throw new Error("This memory record is already active."); + } + if (decision === "approve" && status === "rejected") { + throw new Error("Rejected records cannot be approved. Create a new draft."); + } + if (decision === "reject" && status !== "draft") { + throw new Error(`Only draft records can be rejected (status: ${status}).`); + } + if (decision === "archive" && status !== "active") { + throw new Error(`Only active records can be archived (status: ${status}).`); + } + if ( + decision === "approve" && + status !== "draft" && + status !== "stale" + ) { + throw new Error(`Cannot approve a record in status '${status}'.`); + } + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {object} node + */ + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} recordId + */ + async fetchRecordById(folder, recordId) { + const root = folder.uri.fsPath; + const response = await this.extension.client.callTool( + "query_engineering_memory", + { + root, + mode: "get", + record_id: recordId, + } + ); + const body = safeObject(safeObject(response).payload); + if (String(response.status || "") === "not_found" || !body.record) { + throw new Error(`Memory record not found: ${recordId}`); + } + return safeObject(body.record); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {object} node + */ + async openRecordDetail(folder, node) { + const record = safeObject(node.record); + const subjects = safeArray(record.subjects) + .map( + (item) => + `- ${item.subject_kind || item.subject_nodeType || "subject"}: ${item.subject_key} (${item.relation || "primary"})` + ) + .join("\n"); + const body = [ + `# ${record.type || "memory"}`, + "", + `**Status:** ${record.status || "unknown"}`, + `**Confidence:** ${record.confidence || "—"}`, + "", + recordStatement(record), + "", + subjects ? `## Subjects\n${subjects}` : "", + ] + .filter(Boolean) + .join("\n"); + const doc = await vscode.workspace.openTextDocument({ + language: "markdown", + content: body, + }); + await vscode.window.showTextDocument(doc, {preview: true}); + } + + draftCount(folder) { + const cached = this.cacheByRoot.get(this.workspaceKey(folder)); + return cached ? cached.drafts.length : 0; + } +} + +/** + * @param {object} record + */ +function recordStatement(record) { + return String(record.statement || "").trim(); +} + +module.exports = { + MemoryController, + recordStatement, +}; diff --git a/extensions/vscode-codeclone/src/memoryGovernance.js b/extensions/vscode-codeclone/src/memoryGovernance.js new file mode 100644 index 00000000..61eaa93d --- /dev/null +++ b/extensions/vscode-codeclone/src/memoryGovernance.js @@ -0,0 +1,184 @@ +"use strict"; + +const crypto = require("node:crypto"); + +const {version: EXTENSION_VERSION} = require("../package.json"); +const { + GOVERNANCE_TOOL_TIMEOUT_MS, + MCPClientError, +} = require("./mcpClient"); + +const GOVERNANCE_SECRET_KEY = "codeclone.ideGovernanceKey"; +const IDE_CLIENT_NAME = "CodeClone VS Code"; +const IDE_GOVERNANCE_PROTOCOL = 2; + +/** + * @param {string[]} args + * @returns {string[]} + */ +function withIdeGovernanceChannel(args) { + const next = Array.isArray(args) ? [...args] : []; + const disableIndex = next.indexOf("--no-ide-governance-channel"); + if (disableIndex !== -1) { + next.splice(disableIndex, 1); + } + if (!next.includes("--ide-governance-channel")) { + next.push("--ide-governance-channel"); + } + return next; +} + +/** + * @param {import("vscode").SecretStorage} secrets + * @returns {Promise} + */ +async function ensureGovernanceKey(secrets) { + let key = await secrets.get(GOVERNANCE_SECRET_KEY); + if (!key || key.length < 64) { + key = crypto.randomBytes(32).toString("hex"); + await secrets.store(GOVERNANCE_SECRET_KEY, key); + } + return key; +} + +/** + * @param {object} fields + * @returns {string} + */ +function computeGovernanceProof(keyHex, fields) { + const key = Buffer.from(keyHex, "hex"); + const message = + `v${fields.protocol}|${fields.ticketId}|${fields.recordId}|${fields.decision}|` + + `${fields.confirmationNonce}|${fields.projectId}|${fields.statementDigest}`; + return crypto.createHmac("sha256", key).update(message, "utf8").digest("hex"); +} + +/** + * @param {import("./mcpClient").CodeCloneMcpClient} client + * @param {import("vscode").ExtensionContext} context + * @param {string} root + */ +async function registerIdeGovernance(client, context, root) { + const key = await ensureGovernanceKey(context.secrets); + return client.callTool( + "manage_engineering_memory", + { + root, + action: "register_ide_governance", + ide_governance_key: key, + client_name: IDE_CLIENT_NAME, + client_version: EXTENSION_VERSION, + }, + { + timeoutMs: GOVERNANCE_TOOL_TIMEOUT_MS, + timeoutLabel: "Register IDE governance", + } + ); +} + +/** + * @param {import("./mcpClient").CodeCloneMcpClient} client + * @param {import("vscode").ExtensionContext} context + * @param {string} root + */ +async function ensureIdeGovernanceRegistered(client, context, root) { + const result = await registerIdeGovernance(client, context, root); + if (result.status === "ok") { + return result; + } + if (result.status === "rejected") { + const nextStep = result.next_step ? ` ${result.next_step}` : ""; + throw new MCPClientError( + `${result.message || "IDE governance is not available."}${nextStep}` + ); + } + throw new MCPClientError( + `Could not register IDE governance (status: ${String(result.status)}).` + ); +} + +/** + * @param {import("./mcpClient").CodeCloneMcpClient} client + * @param {string} root + * @param {string} recordId + * @param {"approve"|"reject"|"archive"} decision + */ +async function prepareGovernance(client, root, recordId, decision) { + return client.callTool( + "manage_engineering_memory", + { + root, + action: "prepare_governance", + record_id: recordId, + decision, + }, + { + timeoutMs: GOVERNANCE_TOOL_TIMEOUT_MS, + timeoutLabel: "Prepare memory governance", + } + ); +} + +/** + * @param {import("./mcpClient").CodeCloneMcpClient} client + * @param {import("vscode").ExtensionContext} context + * @param {object} prepared + * @param {string} root + * @param {string} actor + * @param {"approve"|"reject"|"archive"} decision + */ +async function commitGovernance( + client, + context, + prepared, + root, + actor, + decision +) { + const key = await ensureGovernanceKey(context.secrets); + const ticketId = String(prepared.governance_ticket || ""); + const recordId = String(prepared.record?.id || ""); + const confirmationNonce = String(prepared.confirmation_nonce || ""); + const projectId = String(prepared.project_id || ""); + const statementDigest = String(prepared.statement_digest || ""); + const proof = computeGovernanceProof(key, { + protocol: IDE_GOVERNANCE_PROTOCOL, + ticketId, + recordId, + decision, + confirmationNonce, + projectId, + statementDigest, + }); + return client.callTool( + "manage_engineering_memory", + { + root, + action: "commit_governance", + record_id: recordId, + decision, + governance_ticket: ticketId, + confirmation_nonce: confirmationNonce, + proof, + actor, + protocol: IDE_GOVERNANCE_PROTOCOL, + }, + { + timeoutMs: GOVERNANCE_TOOL_TIMEOUT_MS, + timeoutLabel: "Commit memory governance", + } + ); +} + +module.exports = { + IDE_CLIENT_NAME, + IDE_GOVERNANCE_PROTOCOL, + GOVERNANCE_TOOL_TIMEOUT_MS, + withIdeGovernanceChannel, + ensureGovernanceKey, + computeGovernanceProof, + registerIdeGovernance, + ensureIdeGovernanceRegistered, + prepareGovernance, + commitGovernance, +}; diff --git a/extensions/vscode-codeclone/src/memorySearch.js b/extensions/vscode-codeclone/src/memorySearch.js new file mode 100644 index 00000000..edeee74e --- /dev/null +++ b/extensions/vscode-codeclone/src/memorySearch.js @@ -0,0 +1,509 @@ +"use strict"; + +/** @type {any} */ +const vscode = require("vscode"); + +const {safeArray, safeObject, workspaceRelativePath} = require("./formatters"); +const {recordStatement} = require("./memoryController"); + +const MEMORY_SEARCH_MAX_QUERY = 200; +const MEMORY_SEARCH_MIN_QUERY = 2; +const MEMORY_RECORD_ID_PATTERN = /^mem-[0-9a-f]{32}$/; + +const DEFAULT_SEARCH_OPTIONS = { + semantic: true, + includeDrafts: false, + includeStale: false, + maxResults: 20, + detailLevel: "compact", +}; + +/** + * @param {string} query + * @returns {string|null} + */ +function sanitizeSearchQuery(query) { + const trimmed = String(query || "").trim(); + if (trimmed.length < MEMORY_SEARCH_MIN_QUERY) { + return `Enter at least ${MEMORY_SEARCH_MIN_QUERY} characters.`; + } + if (trimmed.length > MEMORY_SEARCH_MAX_QUERY) { + return `Query must be at most ${MEMORY_SEARCH_MAX_QUERY} characters.`; + } + if (/[\u0000-\u001f\u007f]/.test(trimmed)) { + return "Query contains unsupported control characters."; + } + return null; +} + +/** + * @param {string} recordId + */ +function isValidMemoryRecordId(recordId) { + return MEMORY_RECORD_ID_PATTERN.test(String(recordId || "")); +} + +/** + * @param {string} rawPath + * @returns {string|null} + */ +function normalizeMemorySearchPath(rawPath) { + const text = String(rawPath || "").replace(/\\/g, "/").trim().replace(/^\.\//, ""); + if (!text || text === "." || text === "..") { + return null; + } + if (text.startsWith("/") || /^[a-zA-Z]:/.test(text)) { + return null; + } + if (text.split("/").includes("..")) { + return null; + } + return text; +} + +/** + * @param {import("vscode").WorkspaceFolder | null | undefined} folder + * @returns {string|null} + */ +function activeEditorMemoryPath(folder) { + const editor = vscode.window.activeTextEditor; + if (!editor || !folder) { + return null; + } + const docFolder = vscode.workspace.getWorkspaceFolder(editor.document.uri); + if (!docFolder || docFolder.uri.toString() !== folder.uri.toString()) { + return null; + } + if (editor.document.uri.scheme !== "file") { + return null; + } + const rel = workspaceRelativePath(folder, editor.document.uri.fsPath); + return normalizeMemorySearchPath(rel); +} + +/** + * @param {import("vscode").WorkspaceConfiguration} config + */ +function readMemorySearchSettings(config) { + return { + semantic: config.get("codeclone.memory.searchSemantic", DEFAULT_SEARCH_OPTIONS.semantic), + includeDrafts: config.get( + "codeclone.memory.searchIncludeDrafts", + DEFAULT_SEARCH_OPTIONS.includeDrafts + ), + includeStale: config.get( + "codeclone.memory.searchIncludeStale", + DEFAULT_SEARCH_OPTIONS.includeStale + ), + maxResults: Math.min( + 50, + Math.max(5, Number(config.get("codeclone.memory.searchMaxResults", 20)) || 20) + ), + detailLevel: + String(config.get("codeclone.memory.searchDetailLevel", "compact")) === + "full" + ? "full" + : "compact", + }; +} + +/** + * @param {string} root + * @param {string} query + * @param {object} options + */ +function buildSearchToolArgs(root, query, options) { + return { + root, + mode: "search", + query, + semantic: Boolean(options.semantic), + include_drafts: Boolean(options.includeDrafts), + include_stale: Boolean(options.includeStale), + max_results: options.maxResults, + detail_level: options.detailLevel, + }; +} + +/** + * @param {string} root + * @param {string} path + * @param {object} options + */ +function buildForPathToolArgs(root, path, options) { + return { + root, + mode: "for_path", + path, + include_stale: Boolean(options.includeStale), + max_results: options.maxResults, + detail_level: options.detailLevel, + }; +} + +/** + * @param {object} semantic + */ +function formatSemanticStatusLine(semantic) { + const block = safeObject(semantic); + if (!block || Object.keys(block).length === 0) { + return "Semantic recall: off"; + } + if (block.used) { + const provider = block.provider || block.backend || "provider"; + const model = block.model ? ` · ${block.model}` : ""; + return `Semantic recall: on (${provider}${model})`; + } + const reason = block.reason || "unavailable"; + return `Semantic recall: off (${reason})`; +} + +/** + * @param {object} record + * @param {number} index + */ +function recordToQuickPickItem(record, index) { + const item = safeObject(record); + const id = String(item.id || ""); + const type = String(item.type || "memory"); + const status = String(item.status || "unknown"); + const statement = recordStatement(item); + const preview = + statement.length > 120 ? `${statement.slice(0, 117)}…` : statement; + const subjects = safeArray(item.subjects) + .map((s) => safeObject(s).subject_key) + .filter(Boolean) + .slice(0, 2); + const subjectHint = subjects.length > 0 ? ` · ${subjects.join(", ")}` : ""; + return { + label: `${type} · ${status}`, + description: id, + detail: `${preview}${subjectHint}`, + record: item, + index, + }; +} + +/** + * @param {string} recordId + */ +function memoryRecordCommandUri(recordId) { + const encoded = encodeURIComponent(JSON.stringify([recordId])); + return `command:codeclone.openMemoryRecordById?${encoded}`; +} + +class MemorySearchController { + /** + * @param {object} extension + */ + constructor(extension) { + this.extension = extension; + /** @type {import("vscode").WebviewPanel | null} */ + this.activePanel = null; + /** @type {{folderKey: string, query: string, folder: import("vscode").WorkspaceFolder, result: object, relPath?: string|null} | null} */ + this.activeSession = null; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + sessionKey(folder) { + return folder.uri.toString(); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + async ensureMemoryReady(folder) { + if (!(await this.extension.ensureWorkspaceTrust())) { + return {ok: false, reason: "trust"}; + } + const snapshot = await this.extension.memoryController.ensureSnapshot(folder); + if (!snapshot.connected) { + const choice = await vscode.window.showInformationMessage( + "Connect CodeClone to search Engineering Memory.", + "Verify Local Server", + "Analyze Workspace" + ); + if (choice === "Verify Local Server") { + await this.extension.connectMcp(); + } else if (choice === "Analyze Workspace") { + await this.extension.analyzeWorkspace(); + } + return {ok: false, reason: "disconnected"}; + } + if (!snapshot.memorySupported) { + await vscode.window.showWarningMessage( + "Engineering Memory search requires CodeClone 2.1.0a1 or newer with query_engineering_memory." + ); + return {ok: false, reason: "unsupported"}; + } + await this.extension.ensureConnected(folder); + return {ok: true}; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} [initialQuery] + */ + async promptSearchQuery(folder, initialQuery = "") { + return vscode.window.showInputBox({ + title: "Search Engineering Memory", + prompt: "Keyword search (FTS; optional semantic re-rank when enabled in settings)", + placeHolder: "baseline trust, blast radius, MCP scope…", + value: initialQuery, + validateInput: (value) => sanitizeSearchQuery(value), + }); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} query + * @param {object} [optionsOverride] + */ + async querySearch(folder, query, optionsOverride = {}) { + const config = vscode.workspace.getConfiguration("codeclone.memory", folder.uri); + const options = {...readMemorySearchSettings(config), ...optionsOverride}; + const root = folder.uri.fsPath; + const response = await this.extension.client.callTool( + "query_engineering_memory", + buildSearchToolArgs(root, query, options) + ); + return {response, options, query}; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} relPath + * @param {object} [optionsOverride] + */ + async queryForPath(folder, relPath, optionsOverride = {}) { + const config = vscode.workspace.getConfiguration("codeclone.memory", folder.uri); + const options = {...readMemorySearchSettings(config), ...optionsOverride}; + const root = folder.uri.fsPath; + const response = await this.extension.client.callTool( + "query_engineering_memory", + buildForPathToolArgs(root, relPath, options) + ); + return {response, options, relPath}; + } + + /** + * @param {object[]} records + * @param {string} title + */ + async pickRecord(records, title) { + if (records.length === 0) { + await vscode.window.showInformationMessage("No engineering memory records matched."); + return null; + } + const items = records.map((record, index) => recordToQuickPickItem(record, index)); + const picked = await vscode.window.showQuickPick(items, { + title, + placeHolder: "Open a record or press Escape to dismiss", + matchOnDescription: true, + matchOnDetail: true, + }); + return picked ? picked.record : null; + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {object} record + */ + async openRecord(folder, record) { + await this.extension.memoryController.openRecordDetail(folder, {record}); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} recordId + */ + async openRecordById(folder, recordId) { + if (!isValidMemoryRecordId(recordId)) { + throw new Error("Invalid memory record id."); + } + const record = await this.extension.memoryController.fetchRecordById( + folder, + recordId + ); + await this.openRecord(folder, record); + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + * @param {string} query + * @param {object} result + */ + showSearchPanel(folder, query, result) { + const {renderMemorySearchHtml} = require("./memorySearchRenderer"); + const nonce = require("node:crypto").randomBytes(16).toString("hex"); + const title = + query.length > 48 ? `Memory Search: ${query.slice(0, 45)}…` : `Memory Search: ${query}`; + const folderKey = this.sessionKey(folder); + if (this.activePanel) { + this.activePanel.title = title; + this.activePanel.webview.html = renderMemorySearchHtml({ + query, + result, + workspaceName: folder.name, + nonce, + }); + this.activeSession = { + folderKey, + query, + result, + folder, + relPath: result.relPath || this.activeSession?.relPath || null, + }; + this.activePanel.reveal(vscode.ViewColumn.Beside); + return; + } + const panel = vscode.window.createWebviewPanel( + "codeclone.memorySearch", + title, + vscode.ViewColumn.Beside, + { + enableScripts: false, + enableForms: false, + enableCommandUris: ["codeclone.openMemoryRecordById"], + localResourceRoots: [], + retainContextWhenHidden: true, + } + ); + panel.iconPath = new vscode.ThemeIcon("search"); + panel.webview.html = renderMemorySearchHtml({ + query, + result, + workspaceName: folder.name, + nonce, + }); + panel.onDidDispose(() => { + if (this.activePanel === panel) { + this.activePanel = null; + this.activeSession = null; + } + }); + this.activePanel = panel; + this.activeSession = { + folderKey, + query, + result, + folder, + relPath: result.relPath || null, + }; + } + + async refreshActivePanel() { + const session = this.activeSession; + if (!session?.folder || !session.query) { + await vscode.window.showInformationMessage( + "Open a memory search panel first (Open Memory Search Panel)." + ); + return; + } + const {folder, query} = session; + const mode = String(safeObject(safeObject(session.result).response).mode || "search"); + try { + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Window, + title: "Refreshing engineering memory search", + }, + async () => { + if (mode === "for_path" && session.relPath) { + const next = await this.queryForPath(folder, session.relPath); + this.showSearchPanel(folder, query, next); + } else { + const next = await this.querySearch(folder, query); + this.showSearchPanel(folder, query, next); + } + } + ); + } catch (error) { + this.extension.handleError(error, "Could not refresh memory search."); + } + } + + /** + * @param {import("vscode").WorkspaceFolder} folder + */ + async configureSearchFilters(folder) { + const config = vscode.workspace.getConfiguration("codeclone.memory", folder.uri); + const current = readMemorySearchSettings(config); + const semanticPick = await vscode.window.showQuickPick( + [ + {label: "Semantic recall on", value: true}, + {label: "FTS only (semantic off)", value: false}, + ], + { + title: "Memory search — semantic", + placeHolder: formatSemanticStatusLine({}), + } + ); + if (!semanticPick) { + return null; + } + const draftsPick = await vscode.window.showQuickPick( + [ + {label: "Hide drafts", value: false}, + {label: "Include drafts", value: true}, + ], + {title: "Memory search — drafts"} + ); + if (!draftsPick) { + return null; + } + const stalePick = await vscode.window.showQuickPick( + [ + {label: "Hide stale", value: false}, + {label: "Include stale", value: true}, + ], + {title: "Memory search — stale"} + ); + if (!stalePick) { + return null; + } + const maxPick = await vscode.window.showQuickPick( + [ + {label: "10 results", value: 10}, + {label: "20 results", value: 20}, + {label: "50 results", value: 50}, + ], + {title: "Memory search — limit", placeHolder: `Current: ${current.maxResults}`} + ); + if (!maxPick) { + return null; + } + const target = vscode.ConfigurationTarget.WorkspaceFolder; + await config.update("searchSemantic", semanticPick.value, target, folder.uri); + await config.update("searchIncludeDrafts", draftsPick.value, target, folder.uri); + await config.update("searchIncludeStale", stalePick.value, target, folder.uri); + await config.update("searchMaxResults", maxPick.value, target, folder.uri); + return readMemorySearchSettings(config); + } + + /** + * @param {object} result + */ + extractRecords(result) { + const payload = safeObject(safeObject(result.response).payload); + return safeArray(payload.records); + } +} + +module.exports = { + MEMORY_SEARCH_MAX_QUERY, + MEMORY_RECORD_ID_PATTERN, + MemorySearchController, + sanitizeSearchQuery, + isValidMemoryRecordId, + normalizeMemorySearchPath, + activeEditorMemoryPath, + readMemorySearchSettings, + buildSearchToolArgs, + buildForPathToolArgs, + formatSemanticStatusLine, + recordToQuickPickItem, + memoryRecordCommandUri, +}; diff --git a/extensions/vscode-codeclone/src/memorySearchRenderer.js b/extensions/vscode-codeclone/src/memorySearchRenderer.js new file mode 100644 index 00000000..965e6919 --- /dev/null +++ b/extensions/vscode-codeclone/src/memorySearchRenderer.js @@ -0,0 +1,183 @@ +"use strict"; + +const {safeArray, safeObject} = require("./formatters"); +const {recordStatement} = require("./memoryController"); + +/** + * @param {object} semantic + */ +function formatSemanticStatusLine(semantic) { + const block = safeObject(semantic); + if (!block || Object.keys(block).length === 0) { + return "Semantic recall: off"; + } + if (block.used) { + const provider = block.provider || block.backend || "provider"; + const model = block.model ? ` · ${block.model}` : ""; + return `Semantic recall: on (${provider}${model})`; + } + const reason = block.reason || "unavailable"; + return `Semantic recall: off (${reason})`; +} + +/** + * @param {string} recordId + */ +function memoryRecordCommandUri(recordId) { + const encoded = encodeURIComponent(JSON.stringify([recordId])); + return `command:codeclone.openMemoryRecordById?${encoded}`; +} + +function escapeHtml(text) { + return String(text) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +/** + * @param {object} record + */ +function recordCardHtml(record) { + const item = safeObject(record); + const id = String(item.id || ""); + if (!id) { + return ""; + } + const type = escapeHtml(item.type || "memory"); + const status = escapeHtml(item.status || "unknown"); + const confidence = escapeHtml(item.confidence || "—"); + const statement = escapeHtml(recordStatement(item)); + const subjects = safeArray(item.subjects) + .map((raw) => { + const subject = safeObject(raw); + const key = escapeHtml(subject.subject_key || ""); + const kind = escapeHtml(subject.subject_kind || "subject"); + return `
  • ${key} (${kind})
  • `; + }) + .join(""); + const openHref = memoryRecordCommandUri(id); + const truncated = item.statement_truncated ? " · truncated preview" : ""; + return [ + '
    ', + '
    ', + `${type}`, + `${status}`, + `${confidence}`, + "
    ", + `

    ${statement}

    `, + subjects ? `
      ${subjects}
    ` : "", + `

    ${escapeHtml(id)}${escapeHtml(truncated)}

    `, + `

    Open full record

    `, + "
    ", + ].join(""); +} + +/** + * @param {object[]} auditEvents + */ +function auditEventsSection(auditEvents) { + if (auditEvents.length === 0) { + return ""; + } + const items = auditEvents + .slice(0, 8) + .map((raw) => { + const event = safeObject(raw); + const summary = escapeHtml( + String(event.summary || event.event_type || event.kind || "audit event") + ); + const path = event.path ? `${escapeHtml(event.path)}` : ""; + return `
  • ${summary}${path ? ` · ${path}` : ""}
  • `; + }) + .join(""); + return [ + '
    ', + "

    Semantic audit incidents

    ", + '

    Typed separately from memory records — review before trusting semantic hits.

    ', + `
      ${items}
    `, + "
    ", + ].join(""); +} + +/** + * @param {object} params + */ +function renderMemorySearchHtml(params) { + const query = String(params.query || ""); + const workspaceName = String(params.workspaceName || "workspace"); + const nonce = String(params.nonce || "0"); + const result = safeObject(params.result); + const response = safeObject(result.response); + const payload = safeObject(response.payload); + const records = safeArray(payload.records); + const truncated = Boolean(payload.truncated); + const recordCount = Number(payload.record_count ?? records.length); + const semantic = safeObject(response.semantic); + const semanticLine = formatSemanticStatusLine(semantic); + const mode = escapeHtml(String(response.mode || "search")); + const detailLevel = escapeHtml(String(response.detail_level || "compact")); + const auditEvents = safeArray(payload.audit_events); + const policy = safeObject(payload.retrieval_policy); + const policyDrafts = policy.drafts_included ? "drafts included" : "drafts excluded"; + + const cards = + records.length > 0 + ? records.map((record) => recordCardHtml(record)).join("\n") + : '

    No records matched this query with the current filters.

    '; + + return [ + "", + '', + "", + '', + '', + ``, + `", + "", + "", + '
    ', + "

    Engineering Memory Search

    ", + `

    Workspace: ${escapeHtml(workspaceName)}`, + `Mode: ${mode}`, + `Detail: ${detailLevel}`, + `${escapeHtml(policyDrafts)}

    `, + `

    Query: ${escapeHtml(query)}`, + `${recordCount} record${recordCount === 1 ? "" : "s"}${truncated ? " (truncated)" : ""}

    `, + "
    ", + ``, + auditEventsSection(auditEvents), + `
    ${cards}
    `, + '

    Use the panel title bar Refresh or command palette filters. Open records via trusted command links only.

    ', + "", + "", + ].join("\n"); +} + +module.exports = { + escapeHtml, + renderMemorySearchHtml, + recordCardHtml, +}; diff --git a/extensions/vscode-codeclone/src/providers.js b/extensions/vscode-codeclone/src/providers.js index 0de5ae78..f8bf991a 100644 --- a/extensions/vscode-codeclone/src/providers.js +++ b/extensions/vscode-codeclone/src/providers.js @@ -29,6 +29,9 @@ class WorkspaceState { this.stale = false; this.staleReason = null; this.lastStaleCheckAt = 0; + this.lastTriageFetchAt = 0; + this.lastTriageFetchRunId = null; + this.triageFetchPromise = null; } } @@ -81,6 +84,17 @@ class SessionTreeProvider extends BaseTreeProvider { } } +/** @implements {VSCodeTreeDataProvider} */ +class MemoryTreeProvider extends BaseTreeProvider { + async getTreeItem(node) { + return this.controller.createTreeItem(node); + } + + async getChildren(node) { + return this.controller.getMemoryChildren(node); + } +} + /** @implements {VSCodeCodeLensProvider} */ class ReviewCodeLensProvider { constructor(controller) { @@ -125,6 +139,7 @@ class ReviewFileDecorationProvider { module.exports = { HotspotsTreeProvider, + MemoryTreeProvider, OverviewTreeProvider, ReviewCodeLensProvider, ReviewFileDecorationProvider, diff --git a/extensions/vscode-codeclone/src/renderers.js b/extensions/vscode-codeclone/src/renderers.js index 9b254896..e32352ff 100644 --- a/extensions/vscode-codeclone/src/renderers.js +++ b/extensions/vscode-codeclone/src/renderers.js @@ -381,8 +381,348 @@ function renderSecuritySurfaceMarkdown(item) { ].join("\n"); } +function escapeHtml(text) { + return String(text) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +function blastRadiusFileListSection(title, items, open) { + if (items.length === 0) { + return ""; + } + const openAttr = open ? " open" : ""; + const listItems = items + .map((f) => `
  • ${escapeHtml(f)}
  • `) + .join(""); + return `${escapeHtml(title)} (${items.length})
      ${listItems}
    `; +} + +function renderBlastRadiusMarkdown(payload, workspaceName) { + const origin = safeArray(payload.origin); + const direct = safeArray(payload.direct_dependents); + const transitive = safeArray(payload.transitive_dependents); + const cloneCohort = safeArray(payload.clone_cohort_members); + const inCycle = safeArray(payload.in_dependency_cycle); + const risk = safeObject(payload.structural_risk); + const doNotTouch = safeArray(payload.do_not_touch); + const reviewContext = safeArray(payload.review_context); + const guardrails = safeArray(payload.guardrails); + const radiusLevel = capitalize(String(payload.radius_level || "unknown")); + + const lines = [ + "# Blast Radius", + "", + `- Run: \`${payload.run_id || "unknown"}\``, + `- Workspace: \`${workspaceName || "unknown"}\``, + `- Depth: ${payload.depth || "direct"}`, + `- Radius level: **${radiusLevel}**`, + `- Origin: ${origin.length} files`, + `- Direct dependents: ${direct.length}`, + `- Transitive dependents: ${transitive.length}`, + `- Clone cohort: ${cloneCohort.length}`, + ]; + if (origin.length > 0) { + lines.push( + "", + "## Origin files", + markdownBulletList(origin.map((f) => `\`${f}\``)) + ); + } + if (direct.length > 0) { + lines.push( + "", + "## Direct dependents", + markdownBulletList(direct.map((f) => `\`${f}\``)) + ); + } + if (transitive.length > 0) { + lines.push( + "", + "## Transitive dependents", + markdownBulletList(transitive.map((f) => `\`${f}\``)) + ); + } + if (cloneCohort.length > 0) { + lines.push( + "", + "## Clone cohort members", + markdownBulletList(cloneCohort.map((f) => `\`${f}\``)) + ); + } + if (inCycle.length > 0) { + lines.push( + "", + "## In dependency cycle", + markdownBulletList(inCycle.map((f) => `\`${f}\``)) + ); + } + const riskEntries = Object.entries(risk).filter( + ([, paths]) => safeArray(paths).length > 0 + ); + if (riskEntries.length > 0) { + lines.push("", "## Structural risk"); + for (const [key, paths] of riskEntries) { + lines.push( + "", + `### ${humanizeIdentifier(key)}`, + markdownBulletList(safeArray(paths).map((f) => `\`${f}\``)) + ); + } + } + if (doNotTouch.length > 0) { + lines.push( + "", + "## Do not touch", + markdownBulletList( + doNotTouch.map( + (e) => + `\`${safeObject(e).path}\` — ${safeObject(e).reason}` + ) + ) + ); + } + if (reviewContext.length > 0) { + lines.push( + "", + "## Review context", + markdownBulletList( + reviewContext.map( + (e) => + `\`${safeObject(e).path}\` — ${safeObject(e).reason}` + ) + ) + ); + } + if (guardrails.length > 0) { + lines.push("", "## Guardrails", markdownBulletList(guardrails)); + } + return lines.join("\n"); +} + +function renderBlastRadiusSvgHtml(payload, workspaceName, nonce) { + const origin = safeArray(payload.origin); + const direct = safeArray(payload.direct_dependents); + const transitive = safeArray(payload.transitive_dependents); + const cloneCohort = safeArray(payload.clone_cohort_members); + const inCycle = safeArray(payload.in_dependency_cycle); + const risk = safeObject(payload.structural_risk); + const doNotTouch = safeArray(payload.do_not_touch); + const reviewContext = safeArray(payload.review_context); + const guardrails = safeArray(payload.guardrails); + const radiusLevel = String(payload.radius_level || "unknown").toLowerCase(); + const depth = String(payload.depth || "direct"); + const runId = String(payload.run_id || "unknown"); + + const hasDirect = direct.length > 0; + const hasTransitive = transitive.length > 0; + const hasClones = cloneCohort.length > 0; + + const cx = hasClones ? 260 : 300; + const cy = 170; + const originR = 50; + const directR = hasDirect ? 105 : 0; + const transitiveR = hasTransitive ? 155 : 0; + const outerR = transitiveR || directR || originR; + const svgWidth = hasClones ? 600 : 520; + + let svgContent = ""; + + if (hasTransitive) { + svgContent += ``; + svgContent += `Transitive (${transitive.length})`; + } + if (hasDirect) { + svgContent += ``; + svgContent += `Direct (${direct.length})`; + } + svgContent += ``; + svgContent += `Origin`; + svgContent += `${origin.length} file${origin.length !== 1 ? "s" : ""}`; + + if (hasClones) { + const boxX = cx + outerR + 30; + const boxW = Math.max(svgWidth - boxX - 10, 80); + svgContent += ``; + svgContent += `Clone cohort`; + svgContent += `${cloneCohort.length}`; + svgContent += ``; + } + + if (inCycle.length > 0) { + svgContent += ``; + svgContent += `${inCycle.length} in cycle`; + } + + const legendY = cy + outerR + 25; + let legendX = 20; + const legendItems = [{cssClass: "ring-origin", label: "Origin"}]; + if (hasDirect) { + legendItems.push({cssClass: "ring-direct", label: "Direct"}); + } + if (hasTransitive) { + legendItems.push({cssClass: "ring-transitive", label: "Transitive"}); + } + if (hasClones) { + legendItems.push({cssClass: "clone-box", label: "Clones"}); + } + + for (const item of legendItems) { + svgContent += ``; + svgContent += `${escapeHtml(item.label)}`; + legendX += 18 + item.label.length * 7 + 16; + } + + const svgHeight = legendY + 30; + + const svg = [ + ``, + svgContent, + "", + ].join(""); + + const detailSections = []; + detailSections.push(blastRadiusFileListSection("Origin files", origin, true)); + if (hasDirect) { + detailSections.push(blastRadiusFileListSection("Direct dependents", direct, false)); + } + if (hasTransitive) { + detailSections.push(blastRadiusFileListSection("Transitive dependents", transitive, false)); + } + if (hasClones) { + detailSections.push(blastRadiusFileListSection("Clone cohort members", cloneCohort, false)); + } + if (inCycle.length > 0) { + detailSections.push(blastRadiusFileListSection("In dependency cycle", inCycle, false)); + } + + const riskEntries = Object.entries(risk).filter( + ([, paths]) => safeArray(paths).length > 0 + ); + if (riskEntries.length > 0) { + let riskHtml = "

    Structural risk

    "; + for (const [key, paths] of riskEntries) { + const riskClass = key.includes("complexity") + ? "risk-high" + : key.includes("coverage") + ? "risk-coverage" + : key.includes("overloaded") + ? "risk-overloaded" + : "risk-high"; + riskHtml += `

    ${escapeHtml(humanizeIdentifier(key))}

    `; + for (const p of safeArray(paths)) { + riskHtml += `
    ${escapeHtml(p)}
    `; + } + riskHtml += "
    "; + } + detailSections.push(riskHtml); + } + + if (doNotTouch.length > 0) { + let html = `

    Do not touch (${doNotTouch.length})

    `; + for (const entry of doNotTouch) { + const e = safeObject(entry); + html += `
    ${escapeHtml(e.path)}`; + html += ` — ${escapeHtml(e.reason)}
    `; + } + detailSections.push(html); + } + + if (reviewContext.length > 0) { + let html = `

    Review context (${reviewContext.length})

    `; + for (const entry of reviewContext) { + const e = safeObject(entry); + html += `
    ${escapeHtml(e.path)}`; + html += ` — ${escapeHtml(e.reason)}
    `; + } + detailSections.push(html); + } + + let guardrailsHtml = ""; + if (guardrails.length > 0) { + const items = guardrails.map((g) => `
  • ${escapeHtml(g)}
  • `).join(""); + guardrailsHtml = `

    Guardrails

      ${items}
    `; + } + + return [ + "", + '', + "", + '', + '', + ``, + `", + "", + "", + '
    ', + "

    Blast Radius

    ", + `${escapeHtml(capitalize(radiusLevel))}`, + "
    ", + '
    ', + `Run: ${escapeHtml(runId)}`, + `Depth: ${escapeHtml(depth)}`, + `Workspace: ${escapeHtml(workspaceName)}`, + "
    ", + `
    ${svg}
    `, + detailSections.filter(Boolean).join("\n"), + guardrailsHtml, + "", + "", + ].join("\n"); +} + module.exports = { markdownBulletList, + renderBlastRadiusMarkdown, + renderBlastRadiusSvgHtml, renderFindingMarkdown, renderCoverageJoinMarkdown, renderOverloadedModuleMarkdown, diff --git a/extensions/vscode-codeclone/src/runArtifacts.js b/extensions/vscode-codeclone/src/runArtifacts.js index f715426b..9f690cdf 100644 --- a/extensions/vscode-codeclone/src/runArtifacts.js +++ b/extensions/vscode-codeclone/src/runArtifacts.js @@ -6,6 +6,37 @@ function arrayItems(value) { return Array.isArray(value) ? value : []; } +function shouldUseCachedTriage( + { + now, + currentRunId, + lastTriageFetchAt, + lastTriageFetchRunId, + stale, + cooldownMs, + }, + hasCachedTriage +) { + if (!hasCachedTriage) { + return false; + } + if (stale) { + return false; + } + if (lastTriageFetchRunId !== currentRunId) { + return false; + } + return now - Number(lastTriageFetchAt || 0) < cooldownMs; +} + +async function fetchProductionTriage(client, runId) { + return client.callTool("get_production_triage", { + run_id: runId, + max_hotspots: 5, + max_suggestions: 5, + }); +} + async function loadRunArtifacts( client, folder, @@ -16,11 +47,7 @@ async function loadRunArtifacts( client.callTool("get_run_summary", { run_id: runId, }), - client.callTool("get_production_triage", { - run_id: runId, - max_hotspots: 5, - max_suggestions: 5, - }), + fetchProductionTriage(client, runId), client.callTool("get_report_section", { run_id: runId, section: "metrics", @@ -49,5 +76,7 @@ async function loadRunArtifacts( } module.exports = { + fetchProductionTriage, loadRunArtifacts, + shouldUseCachedTriage, }; diff --git a/extensions/vscode-codeclone/src/runtime.js b/extensions/vscode-codeclone/src/runtime.js index 7e7b7059..c6cdb0a2 100644 --- a/extensions/vscode-codeclone/src/runtime.js +++ b/extensions/vscode-codeclone/src/runtime.js @@ -75,6 +75,19 @@ function workspaceLocalPath(rootPath, candidatePath) { return null; } +function toRepoRelativeMcpPath(rootPath, resolvedPath) { + const root = String(rootPath || "").trim(); + const resolved = String(resolvedPath || "").trim(); + if (!root || !resolved) { + return null; + } + const relative = path.relative(root, resolved); + if (relative.startsWith("..") || path.isAbsolute(relative)) { + return null; + } + return relative.split(path.sep).join("/"); +} + async function resolveCoverageXmlPath( rootPath, configuredPath = "", @@ -83,7 +96,8 @@ async function resolveCoverageXmlPath( ) { const configured = String(configuredPath || "").trim(); if (configured) { - return workspaceLocalPath(rootPath, configured); + const local = workspaceLocalPath(rootPath, configured); + return local ? toRepoRelativeMcpPath(rootPath, local) : null; } if (!autoDetect) { return null; @@ -92,7 +106,10 @@ async function resolveCoverageXmlPath( if (!detected) { return null; } - return (await exists(detected)) ? detected : null; + if (!(await exists(detected))) { + return null; + } + return toRepoRelativeMcpPath(rootPath, detected); } async function looksLikeCodeCloneRepo(folderPath) { diff --git a/extensions/vscode-codeclone/src/support.js b/extensions/vscode-codeclone/src/support.js index 04a9468f..7612fbb2 100644 --- a/extensions/vscode-codeclone/src/support.js +++ b/extensions/vscode-codeclone/src/support.js @@ -1,5 +1,6 @@ "use strict"; +const fs = require("node:fs"); const path = require("node:path"); const STALE_REASON_EDITOR = "unsaved editor changes"; @@ -91,23 +92,162 @@ function staleMessage(reason) { return "Review data may be stale because the workspace changed after this run."; } +const BLOCKED_MCP_ARGS = new Set([ + "--transport", + "--host", + "--port", + "--allow-remote", + "--json-response", + "--stateless-http", +]); +const STDIO_TRANSPORT_ARGS = Object.freeze(["--transport", "stdio"]); +const SPAWN_ENV_EXACT_KEYS = new Set([ + "PATH", + "HOME", + "USERPROFILE", + "APPDATA", + "LOCALAPPDATA", + "SystemRoot", + "WINDIR", + "TEMP", + "TMP", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TZ", + "TERM", + "PWD", + "OS", + "COMSPEC", + "PATHEXT", +]); +const SPAWN_ENV_PREFIXES = [ + "CODECLONE_", + "PYTHON", + "UV_", + "VIRTUAL_ENV", + "POETRY_", +]; + +function hasPathSeparator(value) { + return value.includes("/") || value.includes("\\"); +} + +function validateConfiguredCommand(command) { + if (!command) { + return; + } + if (hasPathSeparator(command) && !path.isAbsolute(command)) { + throw new Error( + "Configured CodeClone launcher must be an absolute path or a bare command name." + ); + } +} + +function assertSafeMcpArgs(args) { + for (const arg of args) { + const head = arg.split("=", 1)[0]; + if (BLOCKED_MCP_ARGS.has(head)) { + throw new Error( + `CodeClone MCP argument ${arg} is not allowed in the VS Code extension.` + ); + } + } +} + +function forceStdioTransportArgs(args) { + return [...args, ...STDIO_TRANSPORT_ARGS]; +} + +function lockResolvedCommand(command) { + if (!path.isAbsolute(command)) { + return command; + } + try { + const real = fs.realpathSync(command); + const stat = fs.statSync(real); + if (!stat.isFile()) { + throw new Error(`Resolved launcher is not a regular file: ${real}`); + } + return real; + } catch (error) { + if ( + error instanceof Error && + error.message.startsWith("Resolved launcher is not a regular file:") + ) { + throw error; + } + return command; + } +} + +function isLauncherWithinWorkspace(command, rootPath) { + const root = String(rootPath || "").trim(); + const launcher = String(command || "").trim(); + if (!root || !launcher) { + return false; + } + try { + const resolvedCommand = fs.realpathSync(launcher); + const resolvedRoot = fs.realpathSync(root); + const relative = path.relative(resolvedRoot, resolvedCommand); + return ( + relative !== "" && + !relative.startsWith("..") && + !path.isAbsolute(relative) + ); + } catch { + return false; + } +} + +function spawnEnvAllowsKey(key) { + if (SPAWN_ENV_EXACT_KEYS.has(key)) { + return true; + } + return SPAWN_ENV_PREFIXES.some((prefix) => key.startsWith(prefix)); +} + +function spawnEnvForMcp(workspaceRoot, baseEnv = process.env) { + /** @type {NodeJS.ProcessEnv} */ + const env = {}; + for (const [key, value] of Object.entries(baseEnv)) { + if (typeof value === "string" && spawnEnvAllowsKey(key)) { + env[key] = value; + } + } + const root = String(workspaceRoot || "").trim(); + if (root && !String(env.CODECLONE_WORKSPACE_ROOT || "").trim()) { + env.CODECLONE_WORKSPACE_ROOT = root; + } + return env; +} + function normalizedLaunchSpec(spec) { const command = String(spec?.command || "").trim(); if (!command) { throw new Error("CodeClone MCP launcher command must not be empty."); } - const args = Array.isArray(spec?.args) + validateConfiguredCommand(command); + const userArgs = Array.isArray(spec?.args) ? spec.args .filter((value) => typeof value === "string") .map((value) => value.trim()) .filter(Boolean) : []; + assertSafeMcpArgs(userArgs); + const args = forceStdioTransportArgs(userArgs); const cwd = String(spec?.cwd || "").trim(); if (!cwd) { throw new Error("CodeClone MCP launcher cwd must not be empty."); } const source = String(spec?.source || "").trim(); - return {command, args, cwd, source}; + return { + command: lockResolvedCommand(command), + args, + cwd, + source, + }; } function trimTail(value, maxChars) { @@ -395,9 +535,11 @@ module.exports = { analysisThresholdOverrides, compareCodeCloneVersions, customAnalysisThresholds, + isLauncherWithinWorkspace, isMinimumSupportedCodeCloneVersion, launchSpecOrigin, locationsNeedDetailHydration, + lockResolvedCommand, normalizedLaunchSpec, normalizeAnalysisProfile, parseUtcTimestamp, @@ -408,8 +550,10 @@ module.exports = { resolveAnalysisSettings, sameAnalysisSettings, signedInteger, + spawnEnvForMcp, staleMessage, trimTail, unsupportedVersionMessage, + validateConfiguredCommand, workspaceLocalLauncherCandidates, }; diff --git a/extensions/vscode-codeclone/src/trajectoryViewerRenderer.js b/extensions/vscode-codeclone/src/trajectoryViewerRenderer.js new file mode 100644 index 00000000..a7cb30f8 --- /dev/null +++ b/extensions/vscode-codeclone/src/trajectoryViewerRenderer.js @@ -0,0 +1,1096 @@ +"use strict"; + +const {safeArray, safeObject} = require("./formatters"); +const {SHARED_STYLES, escapeHtml, formatDurationSeconds} = require("./workspaceInsightsRenderer"); + +/** + * @param {unknown} value + */ +function pillClassForOutcome(value) { + const outcome = String(value || "").toLowerCase(); + if (outcome === "accepted" || outcome === "accepted_with_external_changes") { + return "pill pill-ok"; + } + if (outcome === "violated" || outcome === "blocked") { + return "pill pill-bad"; + } + return "pill pill-warn"; +} + +/** + * @param {unknown} value + */ +function pillClassForSeverity(value) { + return String(value) === "error" ? "pill pill-bad" : "pill pill-warn"; +} + +/** + * @param {object | null | undefined} status + */ +function statusSection(status) { + const root = safeObject(status); + if (!root) { + return '

    No trajectory status available.

    '; + } + const latest = safeObject(root.latest_projection); + const latestText = + latest && latest.finished_at_utc + ? `${latest.finished_at_utc} · ${latest.workflows_seen ?? 0} workflows · +${latest.created ?? 0}/~${latest.updated ?? 0}` + : "none"; + return [ + '', + "", + ``, + ``, + "", + "
    Stored trajectories${escapeHtml(String(root.trajectory_count ?? 0))}
    Latest projection${escapeHtml(latestText)}
    ", + ].join("\n"); +} + +/** + * @param {object | null | undefined} agents + */ +function agentsSection(agents) { + const root = safeObject(agents); + if (!root) { + return ""; + } + const rows = safeArray(root.agents); + if (rows.length === 0) { + return '

    Agents

    No agent-labeled trajectories yet. Rebuild trajectories after audit events include agent_label.

    '; + } + const body = rows + .map((raw) => { + const row = safeObject(raw); + if (!row) { + return ""; + } + return [ + "", + `${escapeHtml(String(row.agent_label || "?"))}`, + `${escapeHtml(String(row.trajectory_count ?? 0))}`, + `${escapeHtml(String(row.intent_count ?? 0))}`, + `${escapeHtml(String(row.failed_outcome_count ?? 0))}`, + `${escapeHtml(String(row.anomaly_count ?? 0))}`, + `${escapeHtml(String(row.incident_total ?? 0))}`, + "", + ].join(""); + }) + .join("\n"); + return [ + "

    Agents

    ", + `

    ${escapeHtml(String(root.agent_count ?? 0))} agents · ${escapeHtml(String(root.trajectory_count ?? 0))} trajectories · ${escapeHtml(String(root.unlabeled_trajectory_count ?? 0))} unlabeled

    `, + '', + "", + `${body}`, + "
    AgentTrajectoriesIntentsFailedAnomaliesIncidents
    ", + ].join("\n"); +} + +/** + * @param {object | null | undefined} anomalies + */ +function anomaliesSection(anomalies) { + const root = safeObject(anomalies); + if (!root) { + return ""; + } + const summary = safeObject(root.summary); + const banner = summary + ? `` + : ""; + const items = safeArray(root.trajectories); + if (items.length === 0) { + return ["

    Anomalies

    ", banner, '

    No anomalies detected in stored trajectories.

    '].join("\n"); + } + const cards = items + .map((raw) => { + const item = safeObject(raw); + if (!item) { + return ""; + } + const outcome = String(item.outcome || "?"); + const tier = String(item.quality_tier || "?"); + const agent = item.agent_label ? ` · ${item.agent_label}` : ""; + const tags = safeArray(item.anomalies) + .map((tagRaw) => { + const tag = safeObject(tagRaw); + if (!tag) { + return ""; + } + return `
  • ${escapeHtml(String(tag.severity || "?"))} ${escapeHtml(String(tag.kind || "?"))} — ${escapeHtml(String(tag.message || ""))}
  • `; + }) + .join(""); + return [ + '
    ', + `
    ${escapeHtml(String(item.trajectory_id || "?"))} ${escapeHtml(outcome)} ${escapeHtml(tier)}
    `, + `

    ${escapeHtml(String(item.summary || ""))}${escapeHtml(agent)}

    `, + tags ? `
      ${tags}
    ` : "", + "
    ", + ].join(""); + }) + .join("\n"); + return ["

    Anomalies

    ", banner, `
    ${cards}
    `].join("\n"); +} + +/** + * @param {object | null | undefined} payload + * @param {string} workspaceName + * @param {string} nonce + */ +function renderTrajectoryDashboardHtml(payload, workspaceName, nonce) { + const root = safeObject(payload); + const extraStyles = [ + ".card-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(280px,1fr));gap:12px;margin:12px 0}", + ".card{border:1px solid var(--vscode-widget-border,#444);border-radius:8px;padding:12px;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.08))}", + ".card-head{display:flex;flex-wrap:wrap;gap:8px;align-items:center;margin-bottom:8px}", + ".tag-list{margin:8px 0 0;padding-left:18px}", + ".pill-ok{background:var(--vscode-testing-iconPassed,#2ea043);color:#fff}", + ".pill-bad{background:var(--vscode-inputValidation-errorBorder,#f14c4c);color:#fff}", + ".pill-warn{background:var(--vscode-inputValidation-warningBorder,#cca700);color:#000}", + ].join(""); + const body = [ + statusSection(safeObject(root?.status)), + agentsSection(safeObject(root?.agents)), + anomaliesSection(safeObject(root?.anomalies)), + ].join("\n"); + return [ + "", + '', + "", + '', + '', + ``, + ``, + "", + "", + '
    ', + "

    Trajectory Dashboard

    ", + `

    Workspace: ${escapeHtml(workspaceName)} · mirrors codeclone memory trajectory dashboard

    `, + "
    ", + body, + '

    Read-only forensics over Engineering Memory trajectories. Rebuild with codeclone memory trajectory rebuild after new audit events.

    ', + "", + "", + ].join("\n"); +} + +/** + * @param {unknown} value + */ +function humanizeLabel(value) { + return String(value || "") + .split("_") + .filter(Boolean) + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) + .join(" "); +} + +/** + * @param {unknown} iso + */ +function formatUtcTimestamp(iso) { + const text = String(iso || "").trim(); + if (!text) { + return ""; + } + const date = new Date(text); + if (Number.isNaN(date.getTime())) { + return text; + } + return date.toISOString().replace("T", " · ").replace(/\.\d{3}Z$/, " UTC"); +} + +/** + * @param {unknown} started + * @param {unknown} finished + */ +function formatTimeRange(started, finished) { + const startText = formatUtcTimestamp(started); + const finishText = formatUtcTimestamp(finished); + if (startText && finishText) { + return `${startText} → ${finishText}`; + } + return startText || finishText || "unknown"; +} + +/** + * @param {unknown} started + * @param {unknown} finished + */ +function trajectoryDurationSeconds(started, finished) { + const startMs = Date.parse(String(started || "")); + const finishMs = Date.parse(String(finished || "")); + if (Number.isNaN(startMs) || Number.isNaN(finishMs)) { + return null; + } + return Math.max(0, Math.floor((finishMs - startMs) / 1000)); +} + +/** + * @param {unknown} started + * @param {unknown} finished + */ +function durationTableRow(started, finished) { + const seconds = trajectoryDurationSeconds(started, finished); + const range = formatTimeRange(started, finished); + if (seconds === null) { + return `Trajectory duration(${escapeHtml(range)})`; + } + return [ + "Trajectory duration", + `${escapeHtml(formatDurationSeconds(seconds))} `, + `(${escapeHtml(range)})`, + "", + ].join(""); +} + +/** + * @param {unknown} score + */ +function qualityScoreClass(score) { + const value = Number(score); + if (!Number.isFinite(value)) { + return "quality-mid"; + } + if (value >= 90) { + return "quality-high"; + } + if (value >= 70) { + return "quality-mid"; + } + return "quality-low"; +} + +/** + * @param {number} value + * @param {number} max + * @param {string} fillClass + */ +function microGauge(value, max, fillClass) { + const numeric = Number(value); + const ceiling = Number(max); + if (!Number.isFinite(numeric) || !Number.isFinite(ceiling) || ceiling <= 0) { + return ""; + } + const pct = Math.max(0, Math.min(100, (numeric / ceiling) * 100)); + return [ + '", + ].join(""); +} + +/** + * @param {unknown} score + */ +function complexityScoreClass(score) { + const value = Number(score); + if (!Number.isFinite(value)) { + return "complexity-mid"; + } + if (value >= 70) { + return "complexity-high"; + } + if (value >= 35) { + return "complexity-mid"; + } + return "complexity-low"; +} + +/** + * @param {unknown} contractRaw + */ +function complexityCalculationDetails(contractRaw) { + const contract = safeObject(contractRaw); + const calculation = safeObject(contract?.complexity_calculation); + if (!calculation) { + return ""; + } + const lines = safeArray(calculation.lines); + if (lines.length === 0) { + return ""; + } + const rows = lines + .map((raw) => { + const line = safeObject(raw); + if (!line) { + return ""; + } + const rawValue = line.raw ?? 0; + const unit = line.unit ? String(line.unit) : ""; + const contribution = line.contribution ?? 0; + const cap = line.cap ?? ""; + const atCap = Number(contribution) >= Number(cap) && Number(cap) > 0; + return [ + ``, + `${escapeHtml(String(line.label || line.id || ""))}`, + `${escapeHtml(String(rawValue))}${unit ? ` ${escapeHtml(unit)}` : ""}`, + `${escapeHtml(String(contribution))} / ${escapeHtml(String(cap))}`, + "", + ].join(""); + }) + .filter(Boolean) + .join(""); + const formula = calculation.formula + ? `

    ${escapeHtml(String(calculation.formula))}

    ` + : ""; + const hint = calculation.hint + ? `

    ${escapeHtml(String(calculation.hint))}

    ` + : ""; + const bandLabel = calculation.band_label ? String(calculation.band_label) : ""; + return [ + '
    ', + "Show calculation", + formula, + hint, + '', + rows, + '', + "", + ``, + ``, + "", + "
    Total${bandLabel ? escapeHtml(bandLabel) : ""}${escapeHtml(String(calculation.complexity_score ?? contract.complexity_score ?? ""))}
    ", + "
    ", + ].join(""); +} + +/** + * @param {unknown} contractRaw + */ +function qualityCalculationDetails(contractRaw) { + const contract = safeObject(contractRaw); + const calculation = safeObject(contract?.calculation); + if (!calculation) { + return ""; + } + const lines = safeArray(calculation.lines); + if (lines.length === 0) { + return ""; + } + const rows = lines + .map((raw) => { + const line = safeObject(raw); + if (!line) { + return ""; + } + const limits = line.limits_quality === true; + const rowClass = limits ? "calc-row-limit" : ""; + const marker = line.pass === true ? "pass" : "fail"; + return [ + ``, + `${escapeHtml(String(line.label || line.id || ""))}`, + `${escapeHtml(String(line.score ?? ""))}`, + `${limits ? "← limits score" : ""}`, + "", + ].join(""); + }) + .filter(Boolean) + .join(""); + const formula = calculation.formula + ? `

    ${escapeHtml(String(calculation.formula))}

    ` + : ""; + return [ + '
    ', + "Show calculation", + formula, + '', + rows, + '', + "", + ``, + "", + "", + "
    Total${escapeHtml(String(calculation.quality_score ?? contract.quality_score ?? ""))}
    ", + "
    ", + ].join(""); +} + +/** + * @param {unknown} contractRaw + */ +function complexityFactorsList(contractRaw) { + const contract = safeObject(contractRaw); + const calculation = safeObject(contract?.complexity_calculation); + const lines = safeArray(calculation?.lines); + if (lines.length === 0) { + return '

    Complexity breakdown unavailable.

    '; + } + const items = lines + .map((raw) => { + const line = safeObject(raw); + if (!line) { + return ""; + } + const contribution = line.contribution ?? 0; + const cap = line.cap ?? 100; + const fillClass = complexityScoreClass( + cap > 0 ? (Number(contribution) / Number(cap)) * 100 : 0, + ); + return [ + '
  • ', + `${escapeHtml(String(line.label || line.id || ""))}`, + `${escapeHtml(String(line.raw ?? 0))}`, + `${escapeHtml(String(contribution))}`, + microGauge(contribution, cap, fillClass), + "
  • ", + ].join(""); + }) + .filter(Boolean) + .join(""); + const bandLabel = calculation?.band_label ? String(calculation.band_label) : ""; + const hint = calculation?.hint ? String(calculation.hint) : ""; + return [ + bandLabel ? `

    ${escapeHtml(bandLabel)} complexity

    ` : "", + hint ? `

    ${escapeHtml(hint)}

    ` : "", + `
      ${items}
    `, + ].join(""); +} + +/** + * @param {unknown} contractRaw + */ +function trajectoryAnalysisSection(contractRaw) { + const contract = safeObject(contractRaw); + const score = contract?.quality_score ?? null; + if (score === null || score === undefined || score === "") { + return ""; + } + const components = safeArray(contract?.components); + const checklist = components + .map((raw) => { + const item = safeObject(raw); + if (!item) { + return ""; + } + const passed = item.pass === true; + const mark = passed ? "✓" : "✗"; + const klass = passed ? "quality-check-pass" : "quality-check-fail"; + const detail = item.label ? String(item.label) : String(item.id || ""); + return [ + `
  • `, + ``, + `${escapeHtml(detail)}`, + `${escapeHtml(String(item.score ?? ""))}`, + "
  • ", + ].join(""); + }) + .filter(Boolean) + .join(""); + return [ + '
    ', + '
    ', + '
    Contract gates
    ', + checklist + ? `
      ${checklist}
    ` + : '

    Contract breakdown unavailable.

    ', + qualityCalculationDetails(contract), + "
    ", + '
    ', + '
    Complexity factors
    ', + complexityFactorsList(contract), + complexityCalculationDetails(contract), + "
    ", + "
    ", + ].join(""); +} + +/** + * @param {string} label + * @param {string} valueHtml + * @param {string} [gaugeHtml] + * @param {string} [extraClass] + * @param {string} [title] + */ +function passportMetricCell(label, valueHtml, gaugeHtml = "", extraClass = "", title = "") { + const titleAttr = title ? ` title="${escapeHtml(title)}"` : ""; + return [ + `
    `, + `
    ${escapeHtml(label)}
    `, + `
    ${valueHtml}
    `, + gaugeHtml, + "
    ", + ].join(""); +} + +/** + * @param {object | null | undefined} root + */ +function passportStrip(root) { + if (!root) { + return ""; + } + const contract = safeObject(root.quality_contract); + const quality = contract?.quality_score ?? root.quality_score; + const complexity = contract?.complexity_score ?? root.complexity_score; + const complexityCalc = safeObject(contract?.complexity_calculation); + const bandLabel = complexityCalc?.band_label ? String(complexityCalc.band_label) : ""; + const durationSeconds = + contract?.duration_seconds ?? + root.duration_seconds ?? + trajectoryDurationSeconds(root.started_at_utc, root.finished_at_utc); + const durationText = + durationSeconds !== null && durationSeconds !== undefined && durationSeconds !== "" + ? formatDurationSeconds(Number(durationSeconds)) + : "—"; + const durationTitle = formatTimeRange(root.started_at_utc, root.finished_at_utc); + const qualityValue = + quality !== null && quality !== undefined && quality !== "" + ? `${escapeHtml(String(quality))}/100` + : "—"; + const complexityValue = + complexity !== null && complexity !== undefined && complexity !== "" + ? [ + `${escapeHtml(String(complexity))}/100`, + bandLabel ? `${escapeHtml(bandLabel)}` : "", + ].join("") + : "—"; + return [ + '
    ', + passportMetricCell( + "Contract quality", + qualityValue, + "", + "", + "Deterministic change-contract adherence (not code quality).", + ), + passportMetricCell("Complexity", complexityValue), + passportMetricCell( + "Duration", + escapeHtml(durationText), + "", + "", + durationTitle, + ), + passportMetricCell("Events", escapeHtml(String(root.event_count ?? 0))), + passportMetricCell("Steps", escapeHtml(String(root.step_count ?? 0))), + passportMetricCell("Incidents", escapeHtml(String(root.incident_count ?? 0))), + passportMetricCell("Evidence", escapeHtml(String(root.evidence_count ?? 0))), + "
    ", + ].join(""); +} + +/** + * @param {unknown} score + */ +function qualityScoreSection(score) { + if (score === null || score === undefined || score === "") { + return ""; + } + return [ + '
    ', + '
    Contract quality
    ', + `
    `, + `${escapeHtml(String(score))}/100`, + "
    ", + "
    ", + ].join(""); +} + +/** + * @param {unknown} stepsRaw + */ +function intentDescriptionFromSteps(stepsRaw) { + for (const raw of safeArray(stepsRaw)) { + const step = safeObject(raw); + if (!step) { + continue; + } + const eventType = String(step.event_type || ""); + if (eventType === "intent.declared" && step.summary) { + return String(step.summary); + } + } + return ""; +} + +/** + * @param {unknown} summary + */ +function firstSummaryFromMachineSummary(summary) { + const text = String(summary || ""); + const marker = "first_summary="; + const index = text.indexOf(marker); + if (index < 0) { + return ""; + } + return text.slice(index + marker.length).trim(); +} + +/** + * @param {unknown} value + * @param {"scope"|"verification"} kind + */ +function pillClassForTrailStatus(value, kind) { + const status = String(value || "").toLowerCase(); + if (kind === "scope") { + if (status === "clean") { + return "pill pill-ok"; + } + if (status === "violated") { + return "pill pill-bad"; + } + if (status === "expanded") { + return "pill pill-warn"; + } + return "pill pill-muted"; + } + if (status === "accepted" || status === "accepted_with_external_changes") { + return "pill pill-ok"; + } + if (status === "unverified") { + return "pill pill-warn"; + } + if (status === "violated" || status === "blocked") { + return "pill pill-bad"; + } + return "pill pill-muted"; +} + +/** + * @param {unknown} value + * @param {unknown} kind + */ +function trailStatusLabel(value, kind) { + const status = String(value || "?"); + if (kind === "scope") { + return `Scope ${status}`; + } + return `Verification ${status.replace(/_/g, " ")}`; +} + +/** + * @param {unknown} value + * @param {string} label + * @param {string} [extraClass] + */ +function statCard(value, label, extraClass = "") { + const klass = extraClass ? ` stat-card ${extraClass}` : " stat-card"; + return [ + `
    `, + `
    ${escapeHtml(String(value ?? 0))}
    `, + `
    ${escapeHtml(label)}
    `, + "
    ", + ].join(""); +} + +/** + * @param {object | null | undefined} patchSummary + */ +function patchTrailSection(patchSummary) { + const root = safeObject(patchSummary); + if (!root) { + return ""; + } + const counts = safeObject(root.counts) || {}; + const declared = counts.declared ?? 0; + const changed = counts.changed ?? 0; + const untouched = counts.untouched_in_declared ?? 0; + const unexpected = counts.unexpected ?? 0; + const forbidden = counts.forbidden_touched ?? 0; + const statusRow = [ + root.scope_check_status + ? `${escapeHtml(trailStatusLabel(root.scope_check_status, "scope"))}` + : "", + root.verification_status + ? `${escapeHtml(trailStatusLabel(root.verification_status, "verification"))}` + : "", + ] + .filter(Boolean) + .join(""); + const countCells = [ + ["Declared", declared, ""], + ["Changed", changed, ""], + ["Untouched", untouched, untouched > 0 ? "patch-count-warn" : ""], + ["Unexpected", unexpected, unexpected > 0 ? "patch-count-bad" : "patch-count-ok"], + ]; + if (forbidden > 0) { + countCells.push(["Forbidden", forbidden, "patch-count-bad"]); + } + const countRow = countCells + .map( + ([label, value, klass]) => + `${escapeHtml(String(label))}${escapeHtml(String(value))}`, + ) + .join(""); + return [ + '
    ', + '
    ', + '

    Patch trail

    ', + statusRow ? `
    ${statusRow}
    ` : "", + "
    ", + `${countRow}
    `, + "
    ", + ].join("\n"); +} + +/** + * @param {object | null | undefined} root + */ +function trajectoryOverviewSection(root) { + if (!root) { + return ""; + } + const intentId = root.intent_id || String(root.workflow_id || "").replace(/^intent:/, ""); + const description = + intentDescriptionFromSteps(root.steps) || + firstSummaryFromMachineSummary(root.summary) || + ""; + const patchSummary = safeObject(root.patch_trail_summary); + const metaRows = [ + intentId + ? `Intent${escapeHtml(String(intentId))}` + : "", + root.workflow_id + ? `Workflow${escapeHtml(String(root.workflow_id))}` + : "", + root.agent_label + ? `Agent${escapeHtml(String(root.agent_label))}` + : "", + root.primary_run_id + ? `Primary run${escapeHtml(String(root.primary_run_id))}` + : "", + root.first_run_id && root.first_run_id !== root.primary_run_id + ? `First run${escapeHtml(String(root.first_run_id))}` + : "", + root.last_run_id && root.last_run_id !== root.primary_run_id + ? `Last run${escapeHtml(String(root.last_run_id))}` + : "", + ] + .filter(Boolean) + .join(""); + const descriptionBlock = description + ? [ + '
    ', + "

    Intent description

    ", + `

    ${escapeHtml(description)}

    `, + "
    ", + ].join("") + : ""; + return [ + passportStrip(root), + patchTrailSection(patchSummary), + trajectoryAnalysisSection(root.quality_contract) || qualityScoreSection(root.quality_score), + '', + metaRows, + "
    ", + descriptionBlock, + ].join("\n"); +} + +/** + * Compact QuickPick line from structured trajectory preview fields. + * + * @param {object | null | undefined} item + */ +function formatTrajectoryPickDescription(item) { + const row = safeObject(item); + if (!row) { + return ""; + } + let incidents = row.incident_count; + if (incidents === null || incidents === undefined) { + const match = String(row.summary || "").match(/incidents=(\d+)/); + incidents = match ? match[1] : 0; + } + const parts = [`${row.event_count ?? 0} events`, `${incidents} incidents`]; + if (row.quality_score !== null && row.quality_score !== undefined) { + parts.push(`${row.quality_score}/100 contract`); + } + if (row.agent_label) { + parts.push(String(row.agent_label)); + } + if (row.started_at_utc) { + parts.push(formatUtcTimestamp(row.started_at_utc)); + } + return parts.join(" · "); +} + +/** + * @param {object | null | undefined} step + */ +function timelineStepTitle(step) { + const row = safeObject(step); + if (!row) { + return "?"; + } + const label = String(row.step_label || row.event_type || "?"); + return label.replace(/\s\([^)]+\)\s*$/, ""); +} + +/** + * @param {object | null | undefined} step + * @returns {"ok"|"warn"|"bad"} + */ +function timelineStepTone(step) { + const row = safeObject(step); + if (!row) { + return "ok"; + } + const status = String(row.status || "").toLowerCase(); + const eventType = String(row.event_type || "").toLowerCase(); + const badStatuses = new Set([ + "violated", + "blocked", + "failed", + "error", + "not_reached", + "rejected", + ]); + const warnStatuses = new Set([ + "unverified", + "needs_attention", + "partial", + "abandoned", + "accepted_with_external_changes", + "warn", + "incident", + "queued", + "expired", + ]); + const badEventHints = [ + "violated", + "abuse", + "conflict", + "failed", + "queue_blocked", + ]; + const warnEventHints = ["expired", "recovered", "blocked"]; + if ( + badStatuses.has(status) || + badEventHints.some((hint) => eventType.includes(hint)) + ) { + return "bad"; + } + if ( + warnStatuses.has(status) || + warnEventHints.some((hint) => eventType.includes(hint)) + ) { + return "warn"; + } + return "ok"; +} + +/** + * @param {object | null | undefined} step + */ +function timelineStepSummary(step) { + const row = safeObject(step); + if (!row || !row.summary) { + return ""; + } + const summary = String(row.summary).trim(); + const status = String(row.status || "").trim().toLowerCase(); + if (status && summary.toLowerCase() === `review receipt: ${status}`) { + return ""; + } + return summary; +} + +/** + * @param {object | null | undefined} step + */ +function timelineStepStatusLabel(step) { + const row = safeObject(step); + if (!row || !row.status) { + return ""; + } + return String(row.status).replace(/_/g, " "); +} + +/** + * @param {object | null | undefined} trajectory + * @param {string} workspaceName + * @param {string} nonce + */ +function renderTrajectoryDetailHtml(trajectory, workspaceName, nonce) { + const root = safeObject(trajectory); + if (!root) { + return "

    No trajectory detail.

    "; + } + const steps = safeArray(root.steps) + .map((raw, index) => { + const step = safeObject(raw); + if (!step) { + return ""; + } + const tone = timelineStepTone(step); + const title = timelineStepTitle(step); + const statusLabel = timelineStepStatusLabel(step); + const summary = timelineStepSummary(step); + return [ + '
    ', + `
    ${index + 1}
    `, + '
    ', + `
    ${escapeHtml(title)} #${escapeHtml(String(step.audit_sequence ?? "?"))}
    `, + summary ? `
    ${escapeHtml(summary)}
    ` : "", + step.created_at_utc + ? `
    ${escapeHtml(formatUtcTimestamp(step.created_at_utc))}
    ` + : "", + "
    ", + "
    ", + ].join(""); + }) + .join(""); + const labels = safeArray(root.labels) + .map((label) => `${escapeHtml(humanizeLabel(label))}`) + .join(" "); + const extraStyles = [ + ".timeline{display:flex;flex-direction:column;gap:10px;margin-top:16px}", + ".timeline-row{display:grid;grid-template-columns:28px 1fr;gap:10px;align-items:start}", + ".timeline-index{width:28px;height:28px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:.85em;font-weight:600;flex-shrink:0}", + ".timeline-index-ok{background:var(--vscode-testing-iconPassed,#2ea043);color:#fff}", + ".timeline-index-warn{background:var(--vscode-inputValidation-warningBorder,#cca700);color:#000}", + ".timeline-index-bad{background:var(--vscode-inputValidation-errorBorder,#f14c4c);color:#fff}", + ".label-row{display:flex;flex-wrap:wrap;gap:6px;margin:10px 0 4px}", + ".stats-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(96px,1fr));gap:10px;margin:14px 0 4px}", + ".stat-card{border:1px solid var(--vscode-widget-border,#444);border-radius:8px;padding:12px 10px;text-align:center;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.08))}", + ".stat-value{font-size:1.35em;font-weight:700;line-height:1.1;color:var(--vscode-foreground)}", + ".stat-label{font-size:.72em;color:var(--vscode-descriptionForeground);text-transform:uppercase;letter-spacing:.05em;margin-top:6px}", + ".passport-rail{display:grid;grid-template-columns:minmax(88px,1.2fr) minmax(88px,1.2fr) repeat(5,minmax(52px,1fr));gap:0;margin:10px 0 12px;border:1px solid var(--vscode-widget-border,#444);border-radius:6px;overflow:hidden;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.04))}", + ".passport-cell{padding:7px 8px;border-right:1px solid var(--vscode-widget-border,#444);min-width:0}", + ".passport-cell:last-child{border-right:none}", + ".passport-cell-label{font-size:.62em;font-weight:600;text-transform:uppercase;letter-spacing:.05em;color:var(--vscode-descriptionForeground);margin-bottom:3px;line-height:1.1}", + ".passport-cell-value{font-size:.92em;font-weight:600;font-variant-numeric:tabular-nums;line-height:1.2}", + ".passport-band{display:block;margin-top:2px;font-size:.72em;font-weight:500;color:var(--vscode-descriptionForeground);line-height:1.1}", + ".micro-gauge{display:block;width:100%;height:3px;flex-shrink:0}", + ".micro-gauge-track{fill:var(--vscode-widget-border,#444);opacity:.45}", + ".micro-gauge-fill.quality-high{fill:var(--vscode-testing-iconPassed,#2ea043)}", + ".micro-gauge-fill.quality-mid{fill:var(--vscode-inputValidation-warningBorder,#cca700)}", + ".micro-gauge-fill.quality-low{fill:var(--vscode-inputValidation-errorBorder,#f14c4c)}", + ".micro-gauge-fill.complexity-high{fill:var(--vscode-textLink-foreground,#3794ff)}", + ".micro-gauge-fill.complexity-mid{fill:var(--vscode-foreground);opacity:.55}", + ".micro-gauge-fill.complexity-low{fill:var(--vscode-descriptionForeground);opacity:.45}", + ".analysis-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(240px,1fr));gap:12px;margin:12px 0 4px}", + ".analysis-panel{padding:10px 12px;border:1px solid var(--vscode-widget-border,#444);border-radius:8px;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.04))}", + ".factor-list{list-style:none;margin:8px 0 0;padding:0;display:flex;flex-direction:column;gap:8px}", + ".factor-row{display:grid;grid-template-columns:1fr auto auto;grid-template-rows:auto auto;gap:2px 10px;align-items:center;font-size:.88em}", + ".factor-label{grid-column:1;color:var(--vscode-foreground)}", + ".factor-value{grid-column:2;font-variant-numeric:tabular-nums;color:var(--vscode-descriptionForeground)}", + ".factor-contrib{grid-column:3;font-weight:600;font-variant-numeric:tabular-nums}", + ".factor-row .micro-gauge{grid-column:1/-1;margin-top:2px}", + ".factor-band,.factor-hint{margin:6px 0 0;font-size:.85em}", + ".patch-trail-table{width:100%;border-collapse:collapse;margin-top:0}", + ".patch-trail-table td{padding:8px 10px;border:1px solid var(--vscode-widget-border,#444);border-radius:6px;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.04))}", + ".patch-trail-table tr{display:grid;grid-template-columns:repeat(auto-fit,minmax(72px,1fr));gap:8px}", + ".patch-count{display:flex;flex-direction:column;gap:4px;text-align:center}", + ".patch-count-label{font-size:.68em;text-transform:uppercase;letter-spacing:.05em;color:var(--vscode-descriptionForeground)}", + ".patch-count-value{font-size:1.05em;font-weight:700;font-variant-numeric:tabular-nums}", + ".patch-count-warn .patch-count-value{color:var(--vscode-inputValidation-warningBorder,#cca700)}", + ".patch-count-bad .patch-count-value{color:var(--vscode-inputValidation-errorBorder,#f14c4c)}", + ".patch-count-ok .patch-count-value{color:var(--vscode-testing-iconPassed,#2ea043)}", + ".overview-table{margin-top:4px}", + ".intent-callout{border-left:3px solid var(--vscode-textLink-foreground,#3794ff);padding:10px 14px;margin:14px 0;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.08));border-radius:0 8px 8px 0}", + ".intent-callout h2,.section-heading,.timeline-heading{font-size:.78em;font-weight:600;text-transform:uppercase;letter-spacing:.06em;color:var(--vscode-descriptionForeground);margin:0 0 8px}", + ".intent-callout p{margin:0;line-height:1.45}", + ".patch-trail-section{margin:12px 0 4px;padding-top:12px;border-top:1px solid var(--vscode-widget-border,#444)}", + ".section-head{display:flex;flex-wrap:wrap;gap:10px 16px;align-items:center;justify-content:space-between;margin-bottom:8px}", + ".section-head .section-heading{margin:0}", + ".section-head .status-row{margin:0}", + ".pill-muted{background:var(--vscode-badge-background);color:var(--vscode-badge-foreground)}", + ".pill-ok{background:var(--vscode-testing-iconPassed,#2ea043);color:#fff}", + ".pill-bad{background:var(--vscode-inputValidation-errorBorder,#f14c4c);color:#fff}", + ".pill-warn{background:var(--vscode-inputValidation-warningBorder,#cca700);color:#000}", + ".status-row{display:flex;flex-wrap:wrap;gap:8px;align-items:center;margin-top:8px}", + ".quality-score-box{padding:12px 14px;border:1px solid var(--vscode-widget-border,#444);border-radius:8px;background:var(--vscode-editor-inactiveSelectionBackground, rgba(127,127,127,.08))}", + ".quality-score-value{font-size:1.35em;font-weight:700;line-height:1.1}", + ".quality-score-denom{font-size:.55em;font-weight:600;color:var(--vscode-descriptionForeground);margin-left:2px}", + ".quality-checklist{list-style:none;margin:8px 0 0;padding:0;display:flex;flex-direction:column;gap:5px}", + ".quality-checklist li{display:grid;grid-template-columns:1.2em 1fr auto;gap:8px;align-items:baseline;font-size:.88em}", + ".quality-check-pass .quality-check-mark{color:var(--vscode-testing-iconPassed,#2ea043)}", + ".quality-check-fail .quality-check-mark{color:var(--vscode-inputValidation-errorBorder,#f14c4c)}", + ".quality-check-score{font-variant-numeric:tabular-nums;color:var(--vscode-descriptionForeground)}", + ".calculation-details{margin-top:10px}", + ".calculation-details summary{cursor:pointer;font-size:.85em;color:var(--vscode-textLink-foreground,#3794ff)}", + ".calc-formula,.calc-hint{margin:8px 0 0}", + ".calc-table{width:100%;border-collapse:collapse;margin-top:8px;font-size:.85em}", + ".calc-table td{padding:4px 6px;border-top:1px solid var(--vscode-widget-border,#444)}", + ".calc-label{color:var(--vscode-foreground)}", + ".calc-raw{text-align:right;font-variant-numeric:tabular-nums;color:var(--vscode-descriptionForeground)}", + ".calc-score{text-align:right;font-variant-numeric:tabular-nums;width:4.5em}", + ".calc-score.pass{color:var(--vscode-testing-iconPassed,#2ea043)}", + ".calc-score.fail{color:var(--vscode-inputValidation-errorBorder,#f14c4c)}", + ".calc-flag{font-size:.85em;color:var(--vscode-descriptionForeground)}", + ".calc-row-limit td{background:var(--vscode-list-hoverBackground, rgba(127,127,127,.12))}", + ".calc-row-cap td{background:var(--vscode-list-hoverBackground, rgba(127,127,127,.08))}", + ".calc-row-total td{font-weight:600;border-top:2px solid var(--vscode-widget-border,#444)}", + ".quality-high{color:var(--vscode-testing-iconPassed,#2ea043)}", + ".quality-mid{color:var(--vscode-inputValidation-warningBorder,#cca700)}", + ".quality-low{color:var(--vscode-inputValidation-errorBorder,#f14c4c)}", + ".complexity-high{color:var(--vscode-textLink-foreground,#3794ff)}", + ".complexity-mid{color:var(--vscode-foreground)}", + ".complexity-low{color:var(--vscode-descriptionForeground)}", + ].join(""); + return [ + "", + '', + "", + '', + ``, + ``, + "", + "", + '
    ', + `

    Trajectory ${escapeHtml(String(root.trajectory_id || ""))}

    `, + `

    Workspace: ${escapeHtml(workspaceName)}

    `, + '
    ', + `${escapeHtml(String(root.outcome || "?"))}`, + `${escapeHtml(String(root.quality_tier || "?"))}`, + "
    ", + labels ? `
    ${labels}
    ` : "", + trajectoryOverviewSection(root), + "
    ", + '
    ', + '

    Event timeline

    ', + steps || '

    No steps returned.

    ', + "
    ", + "", + "", + ].join("\n"); +} + +/** + * @param {object | null | undefined} payload + */ +function renderTrajectoryDashboardMarkdown(payload) { + const root = safeObject(payload); + const lines = ["# Trajectory dashboard", ""]; + const status = safeObject(root?.status); + if (status) { + lines.push(`- Stored trajectories: ${status.trajectory_count ?? 0}`); + } + const agents = safeObject(root?.agents); + if (agents) { + lines.push("", "## Agents"); + for (const raw of safeArray(agents.agents).slice(0, 20)) { + const row = safeObject(raw); + if (!row) { + continue; + } + lines.push( + `- \`${row.agent_label}\`: ${row.trajectory_count ?? 0} trajectories, ${row.anomaly_count ?? 0} anomaly tags` + ); + } + } + const anomalies = safeObject(root?.anomalies); + if (anomalies) { + lines.push("", "## Anomalies"); + for (const raw of safeArray(anomalies.trajectories).slice(0, 10)) { + const item = safeObject(raw); + if (!item) { + continue; + } + lines.push(`- \`${item.trajectory_id}\` ${item.outcome}/${item.quality_tier}`); + } + } + return lines.join("\n"); +} + +module.exports = { + renderTrajectoryDashboardHtml, + renderTrajectoryDetailHtml, + renderTrajectoryDashboardMarkdown, + formatTrajectoryPickDescription, +}; diff --git a/extensions/vscode-codeclone/src/workspaceInsightsRenderer.js b/extensions/vscode-codeclone/src/workspaceInsightsRenderer.js new file mode 100644 index 00000000..28d26189 --- /dev/null +++ b/extensions/vscode-codeclone/src/workspaceInsightsRenderer.js @@ -0,0 +1,602 @@ +"use strict"; + +const {safeArray, safeObject} = require("./formatters"); + +function escapeHtml(text) { + return String(text) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +/** + * Audit payload_footprint_to_dict uses calls/tokens; session stats use call_count/total_tokens. + * + * @param {object | null | undefined} wf + * @param {"tokens"|"calls"} kind + */ +function workflowMetric(wf, kind) { + const item = safeObject(wf); + if (!item) { + return 0; + } + const value = + kind === "tokens" + ? item.tokens ?? item.total_tokens + : item.calls ?? item.call_count; + return value === null || value === undefined ? 0 : value; +} + +/** + * @param {object | null | undefined} footprint + */ +function footprintAggregateBanner(footprint) { + const fp = safeObject(footprint); + if (!fp) { + return ""; + } + const totalTokens = fp.total_tokens; + const toolCalls = fp.tool_calls ?? 0; + if ( + totalTokens === null || + totalTokens === undefined || + Number(toolCalls) <= 0 + ) { + return ""; + } + const parts = [ + `~${Number(totalTokens).toLocaleString("en-US")} total tokens`, + `${toolCalls} tool calls`, + fp.encoding ? String(fp.encoding) : null, + fp.avg_tokens !== null && fp.avg_tokens !== undefined + ? `avg ${fp.avg_tokens}` + : null, + fp.p95_tokens !== null && fp.p95_tokens !== undefined + ? `p95 ${fp.p95_tokens}` + : null, + fp.max_tokens !== null && fp.max_tokens !== undefined + ? `max ${fp.max_tokens}` + : null, + ].filter(Boolean); + return ``; +} + +/** + * @param {number | null | undefined} seconds + */ +function formatAgeSeconds(seconds) { + if (seconds === null || seconds === undefined || seconds < 0) { + return "unknown"; + } + if (seconds < 60) { + return `${seconds}s ago`; + } + const minutes = Math.floor(seconds / 60); + if (minutes < 60) { + return `${minutes}m ago`; + } + const hours = Math.floor(minutes / 60); + const remainingMinutes = minutes % 60; + if (remainingMinutes) { + return `${hours}h${remainingMinutes}m ago`; + } + return `${hours}h ago`; +} + +/** + * @param {number} seconds + */ +function formatDurationSeconds(seconds) { + if (seconds <= 0) { + return "expired"; + } + if (seconds < 60) { + return `${seconds}s`; + } + const minutes = Math.floor(seconds / 60); + const remainingSeconds = seconds % 60; + if (remainingSeconds) { + return `${minutes}m${remainingSeconds}s`; + } + return `${minutes}m`; +} + +/** + * @param {unknown} bytes + */ +function formatBytes(bytes) { + if (typeof bytes !== "number" || !Number.isFinite(bytes) || bytes < 0) { + return "unknown"; + } + if (bytes < 1024) { + return `${bytes} B`; + } + const units = ["KB", "MB", "GB", "TB"]; + let value = bytes / 1024; + let unitIndex = 0; + while (value >= 1024 && unitIndex < units.length - 1) { + value /= 1024; + unitIndex += 1; + } + const rounded = + value >= 10 ? Math.round(value) : Math.round(value * 10) / 10; + return `${rounded} ${units[unitIndex]}`; +} + +/** + * @param {string} health + */ +function workspaceHealthClass(health) { + return { + idle: "health-idle", + clean: "health-clean", + active: "health-active", + contested: "health-contested", + }[health] || "health-active"; +} + +/** + * @param {object} payload + */ +function sessionStatsBody(payload) { + const root = safeObject(payload); + const workspace = safeObject(root.workspace); + const counts = safeObject(root.counts); + const latest = safeObject(root.latest_run); + const audit = safeObject(root.audit); + const footprint = safeObject(root.token_footprint); + const agents = safeArray(root.agents); + const workflows = safeArray(root.top_workflows); + const health = String(workspace.health || "unknown"); + + const summaryRows = [ + ["Workspace health", `${escapeHtml(health)}`], + ["Live agents", escapeHtml(String(counts.live_agents ?? 0))], + ["Active intents", escapeHtml(String(counts.active_intents ?? 0))], + ["Visible intents", escapeHtml(String(counts.visible_intents ?? 0))], + ["Stale / expired / recoverable", `${escapeHtml(String(counts.stale ?? 0))} / ${escapeHtml(String(counts.expired ?? 0))} / ${escapeHtml(String(counts.recoverable ?? 0))}`], + ]; + + if (workspace.intent_registry_backend) { + summaryRows.push([ + "Intent registry", + `${escapeHtml(String(workspace.intent_registry_backend))} (${escapeHtml(String(workspace.intent_registry_storage || "—"))})`, + ]); + } + if (audit.enabled) { + summaryRows.push([ + "Audit storage", + escapeHtml(String(audit.storage || "enabled")), + ]); + } + + let latestHtml = '

    No cached report in .codeclone/report.json.

    '; + if (latest.cache_present && latest.run_id) { + const age = formatAgeSeconds( + typeof latest.age_seconds === "number" ? latest.age_seconds : null + ); + const parts = [ + `${escapeHtml(String(latest.run_id))}`, + escapeHtml(age), + ]; + if (latest.health !== null && latest.health !== undefined) { + parts.push(`health=${escapeHtml(String(latest.health))}`); + } + if (latest.findings !== null && latest.findings !== undefined) { + parts.push(`findings=${escapeHtml(String(latest.findings))}`); + } + if (latest.files !== null && latest.files !== undefined) { + parts.push(`${escapeHtml(String(latest.files))} files indexed`); + } + latestHtml = `

    ${parts.join(" · ")}

    `; + } + + const liveAgents = agents.filter((raw) => safeObject(raw)?.alive); + let agentsHtml = `

    ${liveAgents.length === 0 ? "No live agent processes with visible intents." : ""}

    `; + if (liveAgents.length > 0) { + const rows = liveAgents + .map((raw) => { + const agent = safeObject(raw); + const intents = safeArray(agent.intents); + const intentLines = intents + .map((intentRaw) => { + const intent = safeObject(intentRaw); + const files = safeArray(intent.allowed_files) + .slice(0, 2) + .map((file) => `${escapeHtml(String(file))}`) + .join(", "); + const extra = + safeArray(intent.allowed_files).length > 2 + ? ` (+${safeArray(intent.allowed_files).length - 2} more)` + : ""; + return [ + "", + `${escapeHtml(String(intent.intent_id || ""))}`, + `${escapeHtml(String(intent.status || ""))}`, + `${escapeHtml(String(intent.ownership || ""))}`, + `${escapeHtml(String(intent.scope_file_count ?? 0))}`, + `${escapeHtml(formatDurationSeconds(Number(intent.lease_remaining_seconds ?? 0)))}`, + `${files}${extra}`, + "", + ].join(""); + }) + .join(""); + const label = escapeHtml(String(agent.label || "unknown")); + const started = formatAgeSeconds( + Math.max(0, Math.floor(Date.now() / 1000) - Number(agent.start_epoch || 0)) + ); + return [ + `

    PID ${escapeHtml(String(agent.pid))} · ${label}

    `, + `

    Started ${escapeHtml(started)}

    `, + '', + "", + `${intentLines}
    IntentStatusOwnershipScopeLeaseAllowed files
    `, + ].join(""); + }) + .join(""); + agentsHtml = rows; + } + + let workflowsHtml = ""; + if (workflows.length > 0) { + const wfRows = workflows + .map((raw) => { + const wf = safeObject(raw); + const name = `${wf.workflow_kind || "workflow"}:${wf.workflow_id || "-"}`; + const tokens = workflowMetric(wf, "tokens"); + const calls = workflowMetric(wf, "calls"); + return [ + "", + `${escapeHtml(name)}`, + `~${escapeHtml(String(tokens))}`, + `${escapeHtml(String(calls))}`, + `${escapeHtml(String(wf.agent_label || "—"))}`, + "", + ].join(""); + }) + .join(""); + workflowsHtml = [ + "

    Top workflows (audit footprint)

    ", + '', + "", + `${wfRows}
    WorkflowTokensCallsAgent
    `, + ].join(""); + } + + let footprintHtml = ""; + if (footprint.total_tokens !== null && footprint.total_tokens !== undefined) { + const calls = footprint.tool_calls ?? 0; + if (Number(calls) > 0) { + footprintHtml = ``; + } + } + + const summaryTable = [ + '', + ...summaryRows.map( + ([label, value]) => + `` + ), + "
    ${escapeHtml(label)}${value}
    ", + ].join(""); + + return [ + summaryTable, + footprintHtml, + "

    Latest cached run

    ", + latestHtml, + "

    Live agents and intents

    ", + agentsHtml, + workflowsHtml, + ].join("\n"); +} + +/** + * @param {object} payload + */ +function auditTrailBody(payload) { + const root = safeObject(payload); + const status = String(root.status || "ok"); + const message = root.message ? String(root.message) : ""; + const database = safeObject(root.database); + const counts = safeObject(root.counts); + const timeRange = safeObject(root.time_range); + const tokenSummary = safeObject(root.token_summary); + const footprint = safeObject(root.payload_footprint); + const events = safeArray(root.events); + + let banner = ""; + if (status !== "ok") { + banner = ``; + } + + const metaRows = []; + if (database.path) { + metaRows.push(["Database", `${escapeHtml(String(database.path))}`]); + } + if (database.size_bytes !== null && database.size_bytes !== undefined) { + metaRows.push(["Size", escapeHtml(formatBytes(Number(database.size_bytes)))]); + } + if (database.retention_days !== null && database.retention_days !== undefined) { + metaRows.push(["Retention", `${escapeHtml(String(database.retention_days))} days`]); + } + metaRows.push( + ["Total events", escapeHtml(String(counts.total_events ?? 0))], + [ + "By kind", + `intents ${escapeHtml(String(counts.intent_events ?? 0))} · contracts ${escapeHtml(String(counts.contract_events ?? 0))} · receipts ${escapeHtml(String(counts.receipt_events ?? 0))} · violations ${escapeHtml(String(counts.violation_events ?? 0))}`, + ] + ); + if (timeRange.oldest_event_utc || timeRange.latest_event_utc) { + metaRows.push([ + "Time range", + `${escapeHtml(String(timeRange.oldest_event_utc || "—"))} → ${escapeHtml(String(timeRange.latest_event_utc || "—"))}`, + ]); + } + if ( + tokenSummary.total_estimated_tokens !== null && + tokenSummary.total_estimated_tokens !== undefined + ) { + metaRows.push([ + "Token estimate", + `~${escapeHtml(String(tokenSummary.total_estimated_tokens))} (${escapeHtml(String(tokenSummary.token_encoding || "unknown"))}, ${escapeHtml(String(tokenSummary.token_event_count ?? 0))} events)`, + ]); + } + + const metaTable = [ + '', + ...metaRows.map( + ([label, value]) => + `` + ), + "
    ${escapeHtml(label)}${value}
    ", + ].join(""); + + let footprintHtml = ""; + if (footprint && Object.keys(footprint).length > 0) { + const aggregate = footprintAggregateBanner(footprint); + const top = safeArray(footprint.top_workflows); + const sections = []; + if (aggregate || top.length > 0) { + sections.push("

    Payload footprint (retention window)

    "); + if (aggregate) { + sections.push(aggregate); + } + } + if (top.length > 0) { + const rows = top + .map((raw) => { + const wf = safeObject(raw); + const tokens = workflowMetric(wf, "tokens"); + const calls = workflowMetric(wf, "calls"); + return [ + "", + `${escapeHtml(`${wf.workflow_kind || "workflow"}:${wf.workflow_id || "-"}`)}`, + `~${escapeHtml(String(tokens))}`, + `${escapeHtml(String(calls))}`, + "", + ].join(""); + }) + .join(""); + sections.push( + '', + `${rows}
    WorkflowTokensCalls
    ` + ); + } + footprintHtml = sections.join("\n"); + } + + let eventsHtml = '

    No recent events in this window.

    '; + if (events.length > 0) { + const rows = events + .map((raw) => { + const event = safeObject(raw); + const summary = escapeHtml( + String(event.summary || event.event_type || "event") + ); + const meta = [ + event.created_at_utc + ? escapeHtml(String(event.created_at_utc)) + : "", + event.severity ? escapeHtml(String(event.severity)) : "", + event.intent_id + ? `${escapeHtml(String(event.intent_id))}` + : "", + event.agent_label ? escapeHtml(String(event.agent_label)) : "", + ] + .filter(Boolean) + .join(" · "); + const tokens = + event.estimated_tokens !== null && + event.estimated_tokens !== undefined + ? ` ~${escapeHtml(String(event.estimated_tokens))} tok` + : ""; + return [ + "", + `${summary}`, + `${escapeHtml(String(event.event_type || ""))}`, + `${meta}${tokens}`, + "", + ].join(""); + }) + .join(""); + eventsHtml = [ + `

    Recent events (${events.length})

    `, + '', + "", + `${rows}
    SummaryTypeDetails
    `, + ].join(""); + } + + return [banner, metaTable, footprintHtml, eventsHtml].join("\n"); +} + +const SHARED_STYLES = [ + "body{font-family:var(--vscode-font-family);color:var(--vscode-editor-foreground);background:var(--vscode-editor-background);padding:16px 22px;line-height:1.5;margin:0}", + ".header{margin-bottom:14px}", + "h1{font-size:1.35em;margin:0 0 4px}", + "h2{font-size:1.05em;margin:20px 0 8px}", + "h3{font-size:.95em;margin:14px 0 6px}", + ".meta{color:var(--vscode-descriptionForeground);font-size:.9em}", + ".banner{margin:12px 0;padding:10px 12px;border-radius:6px;background:var(--vscode-textBlockQuote-background);border-left:3px solid var(--vscode-textLink-foreground)}", + ".banner-warn{border-left-color:var(--vscode-inputValidation-warningBorder);background:var(--vscode-inputValidation-warningBackground)}", + ".summary-table{width:100%;border-collapse:collapse;margin:8px 0 16px}", + ".summary-table th{text-align:left;padding:6px 12px 6px 0;color:var(--vscode-descriptionForeground);font-weight:600;vertical-align:top;white-space:nowrap;width:180px}", + ".summary-table td{padding:6px 0}", + ".data-table{width:100%;border-collapse:collapse;font-size:.92em;margin:8px 0}", + ".data-table th,.data-table td{border-bottom:1px solid var(--vscode-widget-border,#444);padding:6px 8px;text-align:left}", + ".data-table th{color:var(--vscode-descriptionForeground);font-weight:600}", + ".data-table .num{text-align:right;font-variant-numeric:tabular-nums}", + ".meta-cell{color:var(--vscode-descriptionForeground);font-size:.88em}", + ".pill{display:inline-block;padding:2px 10px;border-radius:10px;font-size:.85em;font-weight:600;text-transform:uppercase;letter-spacing:.03em}", + ".health-idle{background:var(--vscode-badge-background);color:var(--vscode-badge-foreground)}", + ".health-clean{background:var(--vscode-testing-iconPassed,#2ea043);color:#fff}", + ".health-active{background:var(--vscode-charts-blue,#1e90ff);color:#fff}", + ".health-contested{background:var(--vscode-inputValidation-warningBorder,#cca700);color:#000}", + ".muted{color:var(--vscode-descriptionForeground);font-style:italic}", + "code{font-family:var(--vscode-editor-font-family)}", +].join(""); + +/** + * @param {object} payload + * @param {string} workspaceName + * @param {string} nonce + */ +function renderSessionStatsHtml(payload, workspaceName, nonce) { + const root = String(safeObject(payload)?.workspace?.root || ""); + const body = sessionStatsBody(payload); + return [ + "", + '', + "", + '', + '', + ``, + ``, + "", + "", + '
    ', + "

    Workspace Session Stats

    ", + `

    Workspace: ${escapeHtml(workspaceName)} · mirrors codeclone . --session-stats

    `, + root ? `

    ${escapeHtml(root)}

    ` : "", + "
    ", + body, + '

    IDE-only MCP tool — not exposed to agent clients on the default launcher.

    ', + "", + "", + ].join("\n"); +} + +/** + * @param {object} payload + * @param {string} workspaceName + * @param {string} nonce + */ +function renderAuditTrailHtml(payload, workspaceName, nonce) { + const body = auditTrailBody(payload); + return [ + "", + '', + "", + '', + '', + ``, + ``, + "", + "", + '
    ', + "

    Controller Audit Trail

    ", + `

    Workspace: ${escapeHtml(workspaceName)} · mirrors codeclone . --audit

    `, + "
    ", + body, + '

    Requires audit_enabled=true in pyproject.toml. IDE-only MCP tool.

    ', + "", + "", + ].join("\n"); +} + +/** + * @param {object} payload + */ +function renderSessionStatsMarkdown(payload) { + const root = safeObject(payload); + const workspace = safeObject(root.workspace); + const counts = safeObject(root.counts); + const latest = safeObject(root.latest_run); + const lines = [ + "# Workspace session stats", + "", + `- Workspace health: **${workspace.health || "unknown"}**`, + `- Live agents: ${counts.live_agents ?? 0}`, + `- Active intents: ${counts.active_intents ?? 0}`, + `- Visible intents: ${counts.visible_intents ?? 0}`, + `- Stale / expired / recoverable: ${counts.stale ?? 0} / ${counts.expired ?? 0} / ${counts.recoverable ?? 0}`, + ]; + if (latest.cache_present && latest.run_id) { + lines.push( + `- Latest run: \`${latest.run_id}\` (${formatAgeSeconds(latest.age_seconds)})` + ); + } else { + lines.push("- Latest run: none (no cached report)"); + } + const agents = safeArray(root.agents).filter((raw) => safeObject(raw)?.alive); + if (agents.length > 0) { + lines.push("", "## Live agents"); + for (const raw of agents) { + const agent = safeObject(raw); + lines.push(`- PID ${agent.pid} (${agent.label || "unknown"})`); + for (const intentRaw of safeArray(agent.intents)) { + const intent = safeObject(intentRaw); + lines.push( + ` - \`${intent.intent_id}\` ${intent.status} · ${intent.ownership} · lease ${formatDurationSeconds(Number(intent.lease_remaining_seconds ?? 0))}` + ); + } + } + } + return lines.join("\n"); +} + +/** + * @param {object} payload + */ +function renderAuditTrailMarkdown(payload) { + const root = safeObject(payload); + const status = String(root.status || "ok"); + const counts = safeObject(root.counts); + const lines = [ + "# Controller audit trail", + "", + `- Status: **${status}**`, + root.message ? `- Message: ${root.message}` : "", + `- Total events: ${counts.total_events ?? 0}`, + `- Intent / contract / receipt / violation: ${counts.intent_events ?? 0} / ${counts.contract_events ?? 0} / ${counts.receipt_events ?? 0} / ${counts.violation_events ?? 0}`, + ].filter(Boolean); + const events = safeArray(root.events); + if (events.length > 0) { + lines.push("", "## Recent events"); + for (const raw of events.slice(0, 20)) { + const event = safeObject(raw); + lines.push( + `- ${event.created_at_utc || "?"} **${event.event_type || "event"}** — ${event.summary || ""}` + ); + } + } + return lines.join("\n"); +} + +module.exports = { + SHARED_STYLES, + escapeHtml, + workflowMetric, + footprintAggregateBanner, + formatAgeSeconds, + formatDurationSeconds, + formatBytes, + renderSessionStatsHtml, + renderAuditTrailHtml, + renderSessionStatsMarkdown, + renderAuditTrailMarkdown, + sessionStatsBody, + auditTrailBody, +}; diff --git a/extensions/vscode-codeclone/test/memoryBulkSelection.test.js b/extensions/vscode-codeclone/test/memoryBulkSelection.test.js new file mode 100644 index 00000000..66e176ba --- /dev/null +++ b/extensions/vscode-codeclone/test/memoryBulkSelection.test.js @@ -0,0 +1,136 @@ +"use strict"; + +const test = require("node:test"); +const assert = require("node:assert/strict"); + +const { + buildBulkConfirmDetail, + dedupeGovernanceNodes, + distinctRecordTypes, + formatBulkResultSummary, + recordIdFromGovernanceNode, + recordIdFromTreeItemId, + resolveGovernanceTargets, +} = require("../src/memoryBulkSelection"); + +test("recordIdFromTreeItemId extracts draft and stale ids", () => { + assert.equal(recordIdFromTreeItemId("memory-draft-mem-abc"), "mem-abc"); + assert.equal(recordIdFromTreeItemId("memory-stale-mem-abc"), "mem-abc"); + assert.equal(recordIdFromTreeItemId("memory-status"), ""); +}); + +test("recordIdFromGovernanceNode accepts draft and stale nodes", () => { + assert.equal( + recordIdFromGovernanceNode({ + nodeType: "memoryStale", + record: {id: "mem-stale"}, + }), + "mem-stale" + ); +}); + +test("dedupeGovernanceNodes keeps first occurrence per record id", () => { + const nodes = [ + {nodeType: "memoryDraft", record: {id: "mem-1"}}, + {nodeType: "memoryStale", record: {id: "mem-1"}}, + {nodeType: "memoryDraft", record: {id: "mem-2"}}, + ]; + assert.equal(dedupeGovernanceNodes(nodes).length, 2); +}); + +test("resolveGovernanceTargets prefers multi-select tree items", () => { + const targets = resolveGovernanceTargets( + {nodeType: "memoryDraft", record: {id: "mem-primary"}}, + [ + {id: "memory-draft-mem-a"}, + {id: "memory-stale-mem-b"}, + ] + ); + assert.deepEqual( + targets.map((node) => [node.nodeType, node.record.id]), + [ + ["memoryDraft", "mem-a"], + ["memoryStale", "mem-b"], + ] + ); +}); + +test("resolveGovernanceTargets keeps stale primary node", () => { + const targets = resolveGovernanceTargets( + { + nodeType: "memoryStale", + record: {id: "mem-stale", status: "stale"}, + }, + undefined + ); + assert.equal(targets.length, 1); + assert.equal(targets[0].nodeType, "memoryStale"); +}); + +test("buildBulkConfirmDetail previews statements and overflow", () => { + const detail = buildBulkConfirmDetail( + [ + { + nodeType: "memoryDraft", + record: { + id: "mem-1", + type: "change_rationale", + statement: "Anchor drift policy", + }, + }, + { + nodeType: "memoryStale", + record: { + id: "mem-2", + type: "module_role", + statement: "Second stale", + }, + }, + { + nodeType: "memoryDraft", + record: { + id: "mem-3", + type: "module_role", + statement: "Third draft", + }, + }, + { + nodeType: "memoryDraft", + record: { + id: "mem-4", + type: "module_role", + statement: "Fourth draft", + }, + }, + ], + "approve", + 2 + ); + assert.match(detail, /change_rationale/); + assert.match(detail, /…and 2 more/); + assert.match(detail, /Approved records become active engineering memory/); +}); + +test("formatBulkResultSummary reports success and failure counts", () => { + const summary = formatBulkResultSummary( + { + succeeded: ["mem-1", "mem-2"], + failed: [{recordId: "mem-3", message: "already active"}], + }, + "approve" + ); + assert.match(summary, /2 memory record\(s\) approved/); + assert.match(summary, /1 failed: mem-3/); +}); + +test("distinctRecordTypes returns sorted unique types", () => { + assert.deepEqual( + distinctRecordTypes([ + {type: "module_role"}, + {type: "change_rationale"}, + {type: "module_role"}, + {type: ""}, + ]), + ["change_rationale", "module_role"] + ); +}); diff --git a/extensions/vscode-codeclone/test/memoryGovernance.test.js b/extensions/vscode-codeclone/test/memoryGovernance.test.js new file mode 100644 index 00000000..5b716c23 --- /dev/null +++ b/extensions/vscode-codeclone/test/memoryGovernance.test.js @@ -0,0 +1,49 @@ +"use strict"; + +const test = require("node:test"); +const assert = require("node:assert/strict"); + +const {computeGovernanceProof} = require("../src/memoryGovernance"); +const {extractToolErrorMessage} = require("../src/mcpClient"); + +test("extractToolErrorMessage parses FastMCP tool errors", () => { + const result = { + isError: true, + content: [ + { + type: "text", + text: "Error executing tool manage_engineering_memory: Cannot approve record in status 'active'", + }, + ], + }; + assert.equal( + extractToolErrorMessage(result, "manage_engineering_memory"), + "Cannot approve record in status 'active'" + ); +}); + +test("computeGovernanceProof is stable for fixed inputs", () => { + const key = "ab".repeat(32); + const proof = computeGovernanceProof(key, { + protocol: 2, + ticketId: "ticket", + recordId: "mem-1", + decision: "approve", + confirmationNonce: "nonce", + projectId: "proj", + statementDigest: "digest", + }); + assert.match(proof, /^[0-9a-f]{64}$/); + assert.equal( + proof, + computeGovernanceProof(key, { + protocol: 2, + ticketId: "ticket", + recordId: "mem-1", + decision: "approve", + confirmationNonce: "nonce", + projectId: "proj", + statementDigest: "digest", + }) + ); +}); diff --git a/extensions/vscode-codeclone/test/memorySearch.test.js b/extensions/vscode-codeclone/test/memorySearch.test.js new file mode 100644 index 00000000..c736f178 --- /dev/null +++ b/extensions/vscode-codeclone/test/memorySearch.test.js @@ -0,0 +1,143 @@ +"use strict"; + +const test = require("node:test"); +const assert = require("node:assert/strict"); +const Module = require("node:module"); + +const moduleInternals = /** @type {{_load: Function}} */ ( + /** @type {unknown} */ (Module) +); +const originalLoad = moduleInternals._load; +moduleInternals._load = function patchedLoad(request, parent, isMain) { + if (request === "vscode") { + return { + ThemeIcon: class ThemeIcon {}, + ThemeColor: class ThemeColor {}, + window: {}, + workspace: {getConfiguration: () => ({get: () => undefined})}, + ConfigurationTarget: {WorkspaceFolder: 3}, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; + +const { + sanitizeSearchQuery, + isValidMemoryRecordId, + normalizeMemorySearchPath, + buildSearchToolArgs, + recordToQuickPickItem, +} = require("../src/memorySearch"); +const { + renderMemorySearchHtml, + escapeHtml, +} = require("../src/memorySearchRenderer"); +const {memoryRecordCommandUri} = require("../src/memorySearch"); + +moduleInternals._load = originalLoad; + +test("sanitizeSearchQuery rejects short, long, and control-character queries", () => { + assert.equal(sanitizeSearchQuery("a"), "Enter at least 2 characters."); + assert.equal(sanitizeSearchQuery("ok"), null); + const tooLong = sanitizeSearchQuery("x".repeat(201)); + assert.ok(tooLong); + assert.match(tooLong, /at most 200/); + const control = sanitizeSearchQuery("bad\u0001query"); + assert.ok(control); + assert.match(control, /control characters/); +}); + +test("isValidMemoryRecordId accepts mem- uuid ids only", () => { + assert.equal(isValidMemoryRecordId("mem-30febd83c0b14c0f9f0e2a1b3c4d5e6f"), true); + assert.equal(isValidMemoryRecordId("mem-proposal-abc"), false); + assert.equal(isValidMemoryRecordId("../mem-30febd83c0b14c0f9f0e2a1b3c4d5e6f"), false); +}); + +test("normalizeMemorySearchPath rejects root and traversal paths", () => { + assert.equal(normalizeMemorySearchPath("src/a.py"), "src/a.py"); + assert.equal(normalizeMemorySearchPath("."), null); + assert.equal(normalizeMemorySearchPath("../secret"), null); + assert.equal(normalizeMemorySearchPath("/abs.py"), null); +}); + +test("buildSearchToolArgs maps UI options to MCP tool fields", () => { + const args = buildSearchToolArgs("/repo", "blast radius", { + semantic: true, + includeDrafts: true, + includeStale: false, + maxResults: 15, + detailLevel: "full", + }); + assert.deepEqual(args, { + root: "/repo", + mode: "search", + query: "blast radius", + semantic: true, + include_drafts: true, + include_stale: false, + max_results: 15, + detail_level: "full", + }); +}); + +test("recordToQuickPickItem surfaces type, status, and preview", () => { + const item = recordToQuickPickItem( + { + id: "mem-30febd83c0b14c0f9f0e2a1b3c4d5e6f", + type: "risk_note", + status: "active", + statement: "Keep MCP scope explicit.", + subjects: [{subject_key: "codeclone/memory/paths.py", subject_kind: "file"}], + }, + 0 + ); + assert.match(item.label, /risk_note/); + assert.match(item.description, /^mem-/); + assert.match(item.detail, /MCP scope/); +}); + +test("renderMemorySearchHtml escapes query and uses allowlisted command links", () => { + const html = renderMemorySearchHtml({ + query: '', + workspaceName: "demo", + nonce: "abc123", + result: { + response: { + mode: "search", + detail_level: "compact", + semantic: {used: false, reason: "disabled"}, + payload: { + records: [ + { + id: "mem-30febd83c0b14c0f9f0e2a1b3c4d5e6f", + type: "change_rationale", + status: "draft", + confidence: "medium", + statement: "Test ", + }, + ], + record_count: 1, + truncated: false, + retrieval_policy: {drafts_included: false}, + }, + }, + }, + }); + assert.doesNotMatch(html, /.py'], + depth: "direct", + radius_level: "high", + direct_dependents: [], + transitive_dependents: [], + clone_cohort_members: [], + in_dependency_cycle: [], + structural_risk: {}, + do_not_touch: [], + review_context: [], + guardrails: [], + }, + "xss-test", + "safenonce" + ); + + assert.doesNotMatch(html, /', + "nonce-abc" + ); + assert.match(html, /style-src 'nonce-nonce-abc'/); + assert.match(html, /Workspace Session Stats/); + assert.doesNotMatch(html, /"), + store.items[1], + ) + rendered = html_report.render_analytics_html( + store=_as_sqlite_store(store), + snapshot=_snapshot(), + run=candidate, + ) + assert profile_banner_message("candidate_only") in rendered + assert 'data-banner-kind="candidate_only"' in rendered + assert "", duration_ms=1.0, status="ok" + ) + op = OperationView( + operation_id="o", + correlation_id="o", + surface="cli", + name="a&b", + started_at_utc="t", + duration_ms=1.0, + status="ok", + spans=(span,), + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t", + window_ended_at_utc="t", + aggregates=AggregatesView(operation_count=1, slowest=(op,)), + operation_tree=(op,), + ) + html = render_trace_html(trace) + assert "" not in html + assert "<script>" in html + assert "a&b" in html + + +def test_observability_main_writes_json_and_html(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="A", + correlation_id="A", + surface="cli", + name="cli.analyze", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=285.0, + status="ok", + spans=( + SpanRecord( + span_id="s", + operation_id="A", + name="pipeline.analyze", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=188.0, + status="ok", + ), + ), + ), + ) + finally: + conn.close() + json_path = tmp_path / "trace.json" + html_path = tmp_path / "trace.html" + code = observability_main( + [ + "trace", + "--root", + str(tmp_path), + "--json", + str(json_path), + "--html", + str(html_path), + ] + ) + assert code == 0 + payload = json.loads(json_path.read_text(encoding="utf-8")) + assert payload["operation_tree"][0]["name"] == "cli.analyze" + assert "Platform Observability" in html_path.read_text(encoding="utf-8") + + +def test_observability_main_no_store( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + code = observability_main(["trace", "--root", str(tmp_path)]) + assert code == 0 + assert "No observability store" in capsys.readouterr().out + + +def test_html_format_helpers_and_semantic_cost_rows() -> None: + from dataclasses import replace + + from codeclone.observability.render_html import _bytes, _mb, _semantic_row, _tokens + + assert _mb(None) == "—" + assert "GB" in _mb(2048.0) + assert "MB" in _mb(512.0) + assert _bytes(None) == "—" + assert "MB" in _bytes(1024 * 1024) + assert "KB" in _bytes(2048) + assert _bytes(12).endswith(" B") + assert _tokens(None) == "—" + assert _tokens(0) == "—" + assert _tokens(1500).endswith("k") + + costly = SpanCostView( + span_id="s1", + name="memory.semantic.rebuild", + surface="memory", + operation_id="op", + operation_name="memory.projection.job", + duration_ms=6000.0, + no_op=True, + reason_kind="schema_version_changed", + ) + costly_html = _semantic_row(costly) + assert "no-op · costly" in costly_html + assert "schema_version_changed" in costly_html + + noop = replace(costly, duration_ms=10.0) + assert "no-op" in _semantic_row(noop) + assert "costly" not in _semantic_row(noop) + + productive = replace(noop, no_op=False, reason_kind=None) + assert "productive" in _semantic_row(productive) + + +def test_rss_text_includes_end_peak_and_peak_delta() -> None: + from codeclone.observability.render_html import _rss_text + + rendered = _rss_text( + 1.0, + end=0.5, + peak=2.0, + peak_delta=0.75, + ) + assert "end" in rendered + assert "peak" in rendered + assert "peakΔ" in rendered + + +def test_render_highlights_process_peak_rss_without_span_consumer() -> None: + from codeclone.observability.render_html import render_trace_html + from codeclone.observability.views import AggregatesView, TraceView + + trace = TraceView( + schema_version="1.0", + window_started_at_utc="t0", + window_ended_at_utc="t1", + aggregates=AggregatesView(operation_count=1, max_peak_rss_mb=512.0), + ) + html = render_trace_html(trace) + assert "Process peak RSS" in html + assert "high-water resident set" in html diff --git a/tests/test_observability_runtime.py b/tests/test_observability_runtime.py new file mode 100644 index 00000000..4ba5011c --- /dev/null +++ b/tests/test_observability_runtime.py @@ -0,0 +1,192 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +from collections.abc import Iterator +from pathlib import Path + +import orjson +import pytest + +from codeclone.config.observability import ObservabilityConfig +from codeclone.observability import ( + bootstrap, + is_observability_enabled, + operation, + record_counter, + shutdown, + span, +) +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) + + +@pytest.fixture(autouse=True) +def _reset_runtime() -> Iterator[None]: + yield + shutdown() + + +def test_disabled_is_inert_and_imports_no_store() -> None: + for module in list(sys.modules): + if module.startswith("codeclone.observability.store"): + sys.modules.pop(module, None) + sys.modules.pop("psutil", None) + + bootstrap(ObservabilityConfig(enabled=False)) + assert is_observability_enabled() is False + # The full handle API must be callable (and inert) when disabled. + with operation(name="x", surface="cli") as op: + op.set_request(request_bytes=5, request_tokens=1) + op.set_response(response_bytes=10, response_tokens=2) + with span(name="s", reason_kind="content_changed") as sp: + sp.add_counter("embedded", 3) + sp.set_counter("skipped", 0) + sp.set_reason_kind("model_changed") + + assert not any(m.startswith("codeclone.observability.store") for m in sys.modules) + assert "psutil" not in sys.modules + + +def test_enabled_persists_operation_and_nested_spans(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path, session_id="sess1") + with operation(name="finish", surface="mcp") as op: + op.set_response(response_bytes=820, response_tokens=120) + with span(name="semantic.reindex", reason_kind="schema_version_changed") as sp: + sp.set_counter("embedded", 1423) + with span(name="inner"): + pass + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + op_row = conn.execute( + "SELECT name, surface, session_id, response_bytes FROM platform_operations" + ).fetchall() + assert op_row == [("finish", "mcp", "sess1", 820)] + rows = conn.execute( + "SELECT name, span_id, parent_span_id, reason_kind FROM platform_spans" + ).fetchall() + by_name = {row[0]: row for row in rows} + assert set(by_name) == {"semantic.reindex", "inner"} + assert by_name["semantic.reindex"][3] == "schema_version_changed" + # Nested span links to its parent span. + assert by_name["inner"][2] == by_name["semantic.reindex"][1] + finally: + conn.close() + + +def test_record_counter_attributes_to_active_span(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with operation(name="search", surface="mcp"), span(name="root"): + record_counter("retrieval.fts_hits", 3) + record_counter("retrieval.fts_hits", 2) # accumulates onto the same key + record_counter("retrieval.vector_memory_hits") # default value 1 + # Outside any span the counter is inert (no raise, nothing recorded). + record_counter("retrieval.fts_hits", 99) + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + (counters_json,) = conn.execute( + "SELECT counters_json FROM platform_spans WHERE name='root'" + ).fetchone() + finally: + conn.close() + counters = orjson.loads(counters_json) + assert counters["retrieval.fts_hits"] == 5 + assert counters["retrieval.vector_memory_hits"] == 1 + + +def test_cross_process_correlation_and_parent(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with operation(name="A", surface="mcp", correlation_id="corrX") as a: + a_id = a.operation_id + with operation( + name="B", surface="memory", parent_operation_id=a_id, correlation_id="corrX" + ): + pass + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + row = conn.execute( + "SELECT parent_operation_id, correlation_id " + "FROM platform_operations WHERE name='B'" + ).fetchone() + assert row == (a_id, "corrX") + finally: + conn.close() + + +def test_operation_records_error_status(tmp_path: Path) -> None: + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with pytest.raises(ValueError, match="nope"), operation(name="boom", surface="cli"): + raise ValueError("nope") + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + row = conn.execute( + "SELECT status, error_kind FROM platform_operations WHERE name='boom'" + ).fetchone() + assert row == ("error", "ValueError") + finally: + conn.close() + + +def test_record_elapsed_span_is_noop_without_active_operation(tmp_path: Path) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import bootstrap, record_elapsed_span, shutdown + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + record_elapsed_span( + "orphan-span", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=1.0, + ) + finally: + shutdown() + + +def test_runtime_optional_payload_root_and_empty_sql_edges(tmp_path: Path) -> None: + from codeclone.observability import runtime + + bootstrap(ObservabilityConfig(enabled=True), session_id="session") + with operation(name="rootless", surface="mcp") as op: + op.set_request(request_bytes=1) + op.set_request(request_tokens=2) + op.set_response(response_bytes=3) + op.set_response(response_tokens=4) + with span(name="db"): + runtime.record_db_query("") + + first_root = tmp_path / "first" + second_root = tmp_path / "second" + runtime.bind_root(first_root) + runtime.bind_root(second_root) + with operation(name="rooted", surface="mcp"): + pass + shutdown() + + assert observability_store_path(first_root).exists() + assert not observability_store_path(second_root).exists() + + bootstrap(ObservabilityConfig(enabled=False)) + runtime.bind_root(tmp_path / "disabled") + + active = runtime._ActiveRuntime( + ObservabilityConfig(enabled=True), + root=tmp_path, + ) + active._conn = object() + active.close() + assert active._conn is None diff --git a/tests/test_observability_store.py b/tests/test_observability_store.py new file mode 100644 index 00000000..29ff7ef4 --- /dev/null +++ b/tests/test_observability_store.py @@ -0,0 +1,252 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION +from codeclone.observability.models import ( + OperationRecord, + ProfileSample, + SpanRecord, +) +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) +from codeclone.observability.store.writer import write_operation + + +def _op( + operation_id: str, + *, + correlation_id: str, + parent_operation_id: str | None = None, + spans: tuple[SpanRecord, ...] = (), +) -> OperationRecord: + return OperationRecord( + operation_id=operation_id, + correlation_id=correlation_id, + surface="mcp", + name="finish_controlled_change", + started_at_utc="2026-06-09T00:00:00Z", + duration_ms=820.0, + status="ok", + parent_operation_id=parent_operation_id, + spans=spans, + ) + + +def _span(span_id: str, **kw: object) -> SpanRecord: + base: dict[str, object] = { + "span_id": span_id, + "operation_id": "A", + "name": "span", + "started_at_utc": "2026-06-09T00:00:00Z", + "duration_ms": 1.0, + "status": "ok", + } + base.update(kw) + return SpanRecord(**base) # type: ignore[arg-type] + + +def test_store_records_schema_version(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + row = conn.execute( + "SELECT value FROM platform_meta WHERE key='schema_version'" + ).fetchone() + assert row[0] == PLATFORM_OBSERVABILITY_SCHEMA_VERSION + finally: + conn.close() + + +def test_write_operation_persists_op_and_spans(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + spans = tuple( + _span( + f"s{i}", + name=f"span{i}", + duration_ms=float(i), + reason_kind="content_changed", + counters={"embedded": i}, + ) + for i in range(5) + ) + write_operation(conn, _op("A", correlation_id="A", spans=spans)) + + assert ( + conn.execute("SELECT COUNT(*) FROM platform_operations").fetchone()[0] == 1 + ) + assert ( + conn.execute( + "SELECT COUNT(*) FROM platform_spans WHERE operation_id='A'" + ).fetchone()[0] + == 5 + ) + reason_kind, counters_json = conn.execute( + "SELECT reason_kind, counters_json FROM platform_spans WHERE span_id='s3'" + ).fetchone() + assert reason_kind == "content_changed" + assert '"embedded"' in counters_json + finally: + conn.close() + + +def test_write_operation_records_tree_columns(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation(conn, _op("A", correlation_id="A")) + write_operation(conn, _op("B", correlation_id="A", parent_operation_id="A")) + row = conn.execute( + "SELECT parent_operation_id, correlation_id " + "FROM platform_operations WHERE operation_id='B'" + ).fetchone() + assert row == ("A", "A") + finally: + conn.close() + + +def test_profile_columns_persist(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + span = _span( + "p", + name="memory.semantic.rebuild", + duration_ms=18200.0, + profile=ProfileSample(rss_delta_mb=6144.0), + ) + write_operation(conn, _op("A", correlation_id="A", spans=(span,))) + rss = conn.execute( + "SELECT rss_delta_mb FROM platform_spans WHERE span_id='p'" + ).fetchone()[0] + assert rss == 6144.0 + finally: + conn.close() + + +def test_observability_span_error_and_sql_classification(tmp_path: Path) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import ( + bootstrap, + operation, + record_elapsed_span, + shutdown, + span, + ) + from codeclone.observability.runtime import _classify_sql + from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, + ) + + assert _classify_sql(" ") == "" + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + with operation(name="job", surface="cli"): + record_elapsed_span( + "cold-start", + started_at_utc="2026-01-01T00:00:00Z", + duration_ms=12.5, + ) + with pytest.raises(RuntimeError, match="boom"), span(name="failing-stage"): + raise RuntimeError("boom") + shutdown() + + conn = open_observability_store(observability_store_path(tmp_path)) + try: + span_row = conn.execute( + "SELECT status FROM platform_spans WHERE name=?", + ("failing-stage",), + ).fetchone() + elapsed_row = conn.execute( + "SELECT name FROM platform_spans WHERE name=?", + ("cold-start",), + ).fetchone() + finally: + conn.close() + assert span_row is not None + assert str(span_row[0]) == "error" + assert elapsed_row is not None + + +def test_observability_schema_migrates_legacy_span_columns(tmp_path: Path) -> None: + import sqlite3 + + from codeclone.observability.store.schema import create_observability_schema + + db_path = tmp_path / "legacy.sqlite3" + conn = sqlite3.connect(db_path) + try: + conn.executescript( + """ + CREATE TABLE platform_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + CREATE TABLE platform_spans ( + span_id TEXT PRIMARY KEY, + operation_id TEXT NOT NULL, + parent_span_id TEXT, + name TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + duration_ms REAL NOT NULL, + status TEXT NOT NULL, + reason_kind TEXT, + reason TEXT, + dedupe_key TEXT, + counters_json TEXT, + rss_mb REAL, + rss_delta_mb REAL, + cpu_user_ms REAL, + cpu_system_ms REAL, + open_fds INTEGER, + thread_count INTEGER + ); + CREATE TABLE platform_operations ( + operation_id TEXT PRIMARY KEY, + parent_operation_id TEXT, + correlation_id TEXT NOT NULL, + surface TEXT NOT NULL, + name TEXT NOT NULL, + started_at_utc TEXT NOT NULL, + duration_ms REAL NOT NULL, + status TEXT NOT NULL, + error_kind TEXT, + session_id TEXT, + repo_root_digest TEXT, + request_bytes INTEGER, + response_bytes INTEGER, + request_tokens INTEGER, + response_tokens INTEGER, + rss_mb REAL, + rss_delta_mb REAL, + cpu_user_ms REAL, + cpu_system_ms REAL, + open_fds INTEGER, + thread_count INTEGER + ); + """ + ) + conn.commit() + create_observability_schema(conn) + span_columns = { + row[1] for row in conn.execute("PRAGMA table_info(platform_spans)") + } + operation_columns = { + row[1] for row in conn.execute("PRAGMA table_info(platform_operations)") + } + assert "db_fingerprints" in span_columns + assert "peak_rss_mb" in span_columns + assert "peak_rss_delta_mb" in span_columns + assert "peak_rss_mb" in operation_columns + assert "peak_rss_delta_mb" in operation_columns + finally: + conn.close() diff --git a/tests/test_observability_worker_chain.py b/tests/test_observability_worker_chain.py new file mode 100644 index 00000000..1a6fc742 --- /dev/null +++ b/tests/test_observability_worker_chain.py @@ -0,0 +1,128 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Iterator +from pathlib import Path +from unittest.mock import patch + +import orjson +import pytest + +from codeclone.config.memory import MemoryConfig, resolve_memory_config +from codeclone.config.observability import ObservabilityConfig +from codeclone.memory.jobs import worker as worker_module +from codeclone.memory.jobs import workflow as workflow_module +from codeclone.memory.jobs.worker import run_projection_job +from codeclone.observability import bootstrap, is_observability_enabled, shutdown +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) + +from .memory_fixtures import cli_memory_repo + + +@pytest.fixture(autouse=True) +def _reset_runtime() -> Iterator[None]: + yield + shutdown() + + +def test_run_projection_job_emits_operation_and_spans(tmp_path: Path) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, store): + config = resolve_memory_config(root) + bootstrap(ObservabilityConfig(enabled=True), root=root) + try: + with ( + patch.object( + worker_module, + "execute_trajectory_rebuild", + return_value={"status": "ok", "mode": "full", "workflows_seen": 7}, + ) as trajectory_rebuild, + patch.object( + worker_module, + "execute_semantic_index_rebuild", + return_value={ + "status": "ok", + "embedded": 1423, + "skipped_unchanged": 11, + }, + ) as semantic_rebuild, + patch.object( + worker_module, + "execute_experience_distillation", + return_value={"status": "ok", "experiences_distilled": 3}, + ) as experience_distillation, + ): + status, _result, _reason = run_projection_job( + store, + job_id="job-1", + root_path=root, + config=config, + project=project, + stimulus={}, + ) + finally: + shutdown() + + assert status == "done" + assert trajectory_rebuild.call_args.kwargs["store"] is store + assert semantic_rebuild.call_args.kwargs["store"] is store + assert experience_distillation.call_args.kwargs["store"] is store + + obs = open_observability_store(observability_store_path(root)) + try: + op_rows = obs.execute( + "SELECT name, surface, status FROM platform_operations" + ).fetchall() + span_rows = obs.execute( + "SELECT name, reason_kind, counters_json, operation_id " + "FROM platform_spans" + ).fetchall() + finally: + obs.close() + + assert op_rows == [("memory.projection.job", "memory", "ok")] + by_name = {row[0]: row for row in span_rows} + assert set(by_name) == { + "memory.trajectory.rebuild", + "memory.experience.distill", + } + # Semantic spans emit from execute_semantic_index_rebuild (not the worker shell). + # This test mocks that call, so only trajectory and experience spans appear. + assert len({row[3] for row in span_rows}) == 1 + assert by_name["memory.trajectory.rebuild"][1] == "first_index" + assert orjson.loads(by_name["memory.trajectory.rebuild"][2]) == { + "workflows_seen": 7 + } + assert orjson.loads(by_name["memory.experience.distill"][2]) == { + "experiences_distilled": 3 + } + + +def test_worker_bootstraps_observability_before_opening_store( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr( + workflow_module, + "resolve_observability_config", + lambda: ObservabilityConfig(enabled=True), + ) + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + enabled_at_open: list[bool] = [] + real_session = workflow_module._require_memory_store_session + + def _spy(root_path: Path, config: MemoryConfig | None = None) -> object: + # open_memory_db only instruments while observability is enabled. + enabled_at_open.append(is_observability_enabled()) + return real_session(root_path, config=config) + + monkeypatch.setattr(workflow_module, "_require_memory_store_session", _spy) + workflow_module.execute_run_projection_jobs_once(root_path=root) + + assert enabled_at_open == [True] diff --git a/tests/test_options_spec_coverage.py b/tests/test_options_spec_coverage.py index 850d311b..736fec58 100644 --- a/tests/test_options_spec_coverage.py +++ b/tests/test_options_spec_coverage.py @@ -1,3 +1,8 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import re @@ -28,6 +33,8 @@ def _cli_sample(option: OptionSpec) -> tuple[tuple[str, ...], object]: return ((option.flags[0],), False) if option.value_type is int: return ((option.flags[0], "7"), 7) + if option.nargs == "+": + return ((option.flags[0], "sample-value"), ["sample-value"]) return ((option.flags[0], "sample-value"), "sample-value") @@ -112,8 +119,12 @@ def test_option_specs_have_pyproject_loading_coverage( def test_config_defaults_doc_covers_exact_pyproject_key_set() -> None: - text = Path("docs/book/04-config-and-defaults.md").read_text(encoding="utf-8") - documented = set(re.findall(r"^\| `([a-z0-9_]+)`\s+\|", text, re.MULTILINE)) + text = Path("docs/book/10-config-and-defaults.md").read_text(encoding="utf-8") + # Scope to the core [tool.codeclone] table; the "Engineering Memory (nested + # tables)" section below documents the separate [tool.codeclone.memory*] + # namespace, which the doc itself marks as not part of the root key set. + core_section = text.split("### Engineering Memory (nested tables)", 1)[0] + documented = set(re.findall(r"^\| `([a-z0-9_]+)`\s+\|", core_section, re.MULTILINE)) declared = { option.pyproject_key for option in PYPROJECT_OPTIONS @@ -124,6 +135,6 @@ def test_config_defaults_doc_covers_exact_pyproject_key_set() -> None: def test_config_defaults_doc_explains_coverage_pyproject_to_cli_mapping() -> None: - text = Path("docs/book/04-config-and-defaults.md").read_text(encoding="utf-8") + text = Path("docs/book/10-config-and-defaults.md").read_text(encoding="utf-8") assert "`coverage_xml` is the `[tool.codeclone]` key" in text assert "`--coverage FILE`" in text diff --git a/tests/test_packaging.py b/tests/test_packaging.py new file mode 100644 index 00000000..7f9378b9 --- /dev/null +++ b/tests/test_packaging.py @@ -0,0 +1,53 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path +from typing import cast + +from codeclone.config.pyproject_loader import _load_toml + + +def _discover_codeclone_packages(repo_root: Path) -> set[str]: + codeclone_root = repo_root / "codeclone" + packages: set[str] = set() + for init_path in codeclone_root.rglob("__init__.py"): + relative = init_path.parent.relative_to(repo_root) + packages.add(".".join(relative.parts)) + return packages + + +def _load_setuptools_packages(repo_root: Path) -> set[str]: + pyproject_path = repo_root / "pyproject.toml" + payload = cast(dict[str, object], _load_toml(pyproject_path)) + tool = cast(dict[str, object], payload["tool"]) + setuptools = cast(dict[str, object], tool["setuptools"]) + packages = setuptools["packages"] + if not isinstance(packages, list): + msg = "tool.setuptools.packages must be a list" + raise AssertionError(msg) + return {str(item) for item in packages} + + +def test_setuptools_packages_match_codeclone_subpackages() -> None: + """Every codeclone package dir must be declared for wheel/sdist builds.""" + + repo_root = Path(__file__).resolve().parents[1] + discovered = _discover_codeclone_packages(repo_root) + declared = _load_setuptools_packages(repo_root) + + missing = sorted(discovered - declared) + assert missing == [], ( + "Add missing subpackages to [tool.setuptools].packages in pyproject.toml: " + + ", ".join(missing) + ) + + orphan = sorted(declared - discovered) + assert orphan == [], ( + "Remove stale setuptools entries (no matching codeclone package dir): " + + ", ".join(orphan) + ) diff --git a/tests/test_patch_trail_compute.py b/tests/test_patch_trail_compute.py new file mode 100644 index 00000000..9a40b4f1 --- /dev/null +++ b/tests/test_patch_trail_compute.py @@ -0,0 +1,105 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from codeclone.contracts import PATCH_TRAIL_SCHEMA_VERSION +from codeclone.memory.trajectory.dto import ( + BlastRadiusSnapshot, + HygieneSnapshot, + PatchTrailEvidenceInput, + PatchTrailInputs, + VerifySnapshot, +) +from codeclone.memory.trajectory.patch_trail import compute_patch_trail + + +def _inputs( + *, + declared: tuple[str, ...], + changed: tuple[str, ...], + unexpected: tuple[str, ...] = (), + forbidden: tuple[str, ...] = (), + scope_status: str = "clean", +) -> PatchTrailInputs: + return PatchTrailInputs( + intent_id="intent-test", + intent_description="test intent", + declared_files=declared, + declared_related=(), + changed_files=changed, + unexpected_files=unexpected, + forbidden_touched=forbidden, + expanded_related_files=(), + scope_check_status=scope_status, + blast_radius=BlastRadiusSnapshot( + do_not_touch_declared=("codeclone.baseline.json",), + review_context_declared=("codeclone/core/pipeline.py",), + ), + verify=VerifySnapshot( + verification_profile="python_structural", + verification_status="accepted", + verification_skipped=("documentation_only",), + verification_failed=(), + ), + hygiene=HygieneSnapshot( + blocks_finish=False, + finish_block_reason=None, + unacknowledged_dirty_in_scope=(), + dirty_paths_outside_scope=(), + attribution_counts={"in_scope": 0}, + ), + evidence=PatchTrailEvidenceInput( + repo_root_digest="abcd1234", + report_digest="sha256:deadbeef", + scope_check_audit_sequence=10, + patch_verify_audit_sequence=11, + ), + ) + + +def test_untouched_in_declared_derivation() -> None: + trail = compute_patch_trail( + _inputs(declared=("a.py", "b.py", "c.py"), changed=("a.py",)) + ) + assert trail.untouched_in_declared == ("b.py", "c.py") + assert trail.scope_check_status == "clean" + + +def test_violated_scope_leaves_no_untouched_when_extra_changed() -> None: + trail = compute_patch_trail( + _inputs( + declared=("a.py",), + changed=("a.py", "b.py"), + unexpected=("b.py",), + scope_status="violated", + ) + ) + assert trail.untouched_in_declared == () + assert trail.unexpected_files == ("b.py",) + + +def test_do_not_touch_held_excludes_changed() -> None: + trail = compute_patch_trail(_inputs(declared=("a.py",), changed=("a.py",))) + assert trail.do_not_touch_declared == ("codeclone.baseline.json",) + assert trail.do_not_touch_held == ("codeclone.baseline.json",) + + +def test_patch_trail_digest_is_stable() -> None: + first = compute_patch_trail(_inputs(declared=("a.py", "b.py"), changed=("a.py",))) + second = compute_patch_trail(_inputs(declared=("a.py", "b.py"), changed=("a.py",))) + assert first.patch_trail_digest == second.patch_trail_digest + assert first.schema_version == PATCH_TRAIL_SCHEMA_VERSION + + +def test_summary_payload_uses_counts() -> None: + trail = compute_patch_trail( + _inputs(declared=("a.py", "b.py", "c.py"), changed=("a.py",)) + ) + summary = trail.to_payload(detail_level="summary") + counts = summary["counts"] + assert isinstance(counts, dict) + assert counts["untouched_in_declared"] == 2 diff --git a/tests/test_patch_trail_rebuild.py b/tests/test_patch_trail_rebuild.py new file mode 100644 index 00000000..8b98cd37 --- /dev/null +++ b/tests/test_patch_trail_rebuild.py @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import json +from pathlib import Path + +from codeclone.audit.events import ( + EVENT_INTENT_CHECKED, + EVENT_INTENT_DECLARED, + EVENT_PATCH_TRAIL_COMPUTED, + EVENT_PATCH_VERIFIED, + AuditEvent, + repo_root_digest, +) +from codeclone.audit.reader import AuditRecord +from codeclone.audit.writer import SqliteAuditWriter +from codeclone.memory.trajectory.patch_trail_projector import ( + project_patch_trail_from_audit, +) +from codeclone.memory.trajectory.store import load_trajectory_patch_trail + +from .memory_fixtures import memory_store + + +def _core(event_type: str, *, status: str = "", **facts: object) -> tuple[str, str]: + payload = { + "core_schema_version": "2", + "event_family": event_type.partition(".")[0], + "event_type": event_type, + "facts": facts, + "status": status, + "truncated": False, + } + text = json.dumps(payload, sort_keys=True, separators=(",", ":")) + return text, hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def _record( + sequence: int, + event_type: str, + *, + status: str | None = None, + summary: str | None = None, + **facts: object, +) -> AuditRecord: + core_json, core_sha = _core(event_type, status=status or "", **facts) + return AuditRecord( + audit_sequence=sequence, + event_id=f"evt-{sequence}", + event_type=event_type, + severity="info", + created_at_utc=f"2026-01-01T00:00:0{sequence}Z", + run_id="run-test", + intent_id="intent-test-001", + report_digest="a" * 64, + workflow_id="intent:intent-test-001", + surface="mcp", + tool_name=None, + event_core_json=core_json, + event_core_sha256=core_sha, + payload_sha256=None, + status=status, + agent_label="agent", + summary=summary, + ) + + +def test_project_patch_trail_from_audit_uses_check_core_paths() -> None: + root_digest = "root-digest" + records = ( + _record( + 1, + EVENT_INTENT_DECLARED, + status="active", + summary="implement patch trail rebuild", + scope_paths=["pkg/a.py", "pkg/b.py"], + ), + _record( + 2, + EVENT_INTENT_CHECKED, + status="clean", + declared_scope_paths=["pkg/a.py", "pkg/b.py"], + changed_files=["pkg/a.py"], + unexpected_files_list=[], + forbidden_touched_list=[], + ), + _record( + 3, + EVENT_PATCH_VERIFIED, + status="accepted", + ), + _record( + 4, + EVENT_PATCH_TRAIL_COMPUTED, + status="clean", + patch_trail_digest="ignored", + untouched_in_declared=1, + ), + ) + + trail = project_patch_trail_from_audit( + records=records, + repo_root_digest=root_digest, + ) + + assert trail is not None + assert trail.declared_files == ("pkg/a.py", "pkg/b.py") + assert trail.changed_files == ("pkg/a.py",) + assert trail.untouched_in_declared == ("pkg/b.py",) + assert trail.verification_status == "accepted" + assert trail.evidence["scope_check_audit_sequence"] == 2 + assert trail.evidence["patch_trail_audit_sequence"] == 4 + + +def test_rebuild_trajectories_persist_patch_trail(tmp_path: Path) -> None: + audit_db = tmp_path / "audit.sqlite3" + with memory_store(tmp_path) as (root, project, store, _db_path): + root_digest = repo_root_digest(root.resolve()) + writer = SqliteAuditWriter( + db_path=audit_db, + payloads="compact", + retention_days=30, + ) + try: + writer.emit( + AuditEvent( + event_type=EVENT_INTENT_DECLARED, + severity="info", + repo_root_digest=root_digest, + agent_pid=100, + agent_label="tester", + intent_id="intent-test-001", + run_id="abc12345", + report_digest="1" * 64, + status="active", + payload={ + "intent_description": "implement patch trail storage", + "scope": { + "allowed_files": ["pkg/a.py", "pkg/b.py"], + }, + "workspace_registered": True, + "ttl_seconds": 3600, + "lease_seconds": 600, + }, + ) + ) + writer.emit( + AuditEvent( + event_type=EVENT_INTENT_CHECKED, + severity="info", + repo_root_digest=root_digest, + agent_pid=100, + agent_label="tester", + intent_id="intent-test-001", + run_id="abc12345", + report_digest="1" * 64, + status="clean", + payload={ + "status": "clean", + "declared_scope": ["pkg/a.py", "pkg/b.py"], + "actual_changed_files": ["pkg/a.py"], + "unexpected_files": [], + "forbidden_touched": [], + "required_action": None, + "message": "clean", + }, + ) + ) + writer.emit( + AuditEvent( + event_type=EVENT_PATCH_VERIFIED, + severity="info", + repo_root_digest=root_digest, + agent_pid=100, + agent_label="tester", + intent_id="intent-test-001", + run_id="def67890", + report_digest="2" * 64, + status="accepted", + payload={ + "status": "accepted", + "structural_delta": { + "regressions": [], + "improvements": [], + "health_delta": 0, + }, + "contract_violations": [], + "baseline_abuse": {"detected": False}, + }, + ) + ) + finally: + writer.close() + + projection = store.rebuild_trajectories_from_audit( + project=project, + root_path=root, + audit_db_path=audit_db, + ) + trajectory = projection.trajectories[0] + loaded = load_trajectory_patch_trail( + store._conn, + trajectory_id=trajectory.id, + ) + + assert loaded is not None + assert loaded["untouched_in_declared"] == ["pkg/b.py"] + assert loaded["changed_files"] == ["pkg/a.py"] + assert trajectory.trajectory_digest + assert loaded["patch_trail_digest"] diff --git a/tests/test_paths_gitignore.py b/tests/test_paths_gitignore.py new file mode 100644 index 00000000..4a781ffe --- /dev/null +++ b/tests/test_paths_gitignore.py @@ -0,0 +1,87 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.paths.gitignore import ( + gitignore_pattern_covers_codeclone_cache, + normalize_gitignore_pattern, + repo_gitignore_covers_codeclone_cache, +) + + +@pytest.mark.parametrize( + ("pattern", "expected"), + [ + (".cache/", True), + (".cache", True), + ("/.cache/", True), + (".cache/**", True), + (".codeclone/", True), + (".codeclone", True), + (".codeclone/**", True), + (".codeclone/", True), + (".codeclone", True), + (".codeclone/**", True), + ("**/.codeclone/", True), + ("**/.codeclone/**", True), + (".cache/*", False), + ("node_modules/", False), + ("", False), + ("# .cache/", False), + ("!.codeclone/", False), + ], +) +def test_gitignore_pattern_covers_codeclone_cache(pattern: str, expected: bool) -> None: + assert gitignore_pattern_covers_codeclone_cache(pattern) is expected + + +def test_normalize_gitignore_pattern_strips_comments_and_slashes() -> None: + assert normalize_gitignore_pattern(" /.codeclone/ ") == ".codeclone" + assert normalize_gitignore_pattern("# ignore cache") == "" + assert normalize_gitignore_pattern("\\# .codeclone/") == "# .codeclone" + + +def test_repo_gitignore_covers_codeclone_cache_read_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + (tmp_path / ".gitignore").write_text(".codeclone/\n", encoding="utf-8") + + def raise_oserror(self: Path, encoding: str | None = None) -> str: + raise OSError("denied") + + monkeypatch.setattr(Path, "read_text", raise_oserror) + assert repo_gitignore_covers_codeclone_cache(tmp_path) is False + + +def test_repo_gitignore_covers_codeclone_cache(tmp_path: Path) -> None: + assert repo_gitignore_covers_codeclone_cache(tmp_path) is False + + (tmp_path / ".gitignore").write_text("node_modules/\n", encoding="utf-8") + assert repo_gitignore_covers_codeclone_cache(tmp_path) is False + + (tmp_path / ".gitignore").write_text(".cache/\n", encoding="utf-8") + assert repo_gitignore_covers_codeclone_cache(tmp_path) is True + + (tmp_path / ".gitignore").write_text(".codeclone/\n", encoding="utf-8") + assert repo_gitignore_covers_codeclone_cache(tmp_path) is True + + +def test_gitignore_codeclone_cache_tip_payload_shape() -> None: + from codeclone.paths.gitignore import ( + GITIGNORE_CODECLONE_CACHE_TIP_ID, + gitignore_codeclone_cache_tip_payload, + ) + + payload = gitignore_codeclone_cache_tip_payload() + assert payload["id"] == GITIGNORE_CODECLONE_CACHE_TIP_ID + assert payload["category"] == "workspace_hygiene" + assert payload["suggested_entry"] == ".codeclone/" diff --git a/tests/test_paths_workspace.py b/tests/test_paths_workspace.py new file mode 100644 index 00000000..7d454ee2 --- /dev/null +++ b/tests/test_paths_workspace.py @@ -0,0 +1,164 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.paths.workspace import ( + default_cache_path, + emit_legacy_workspace_warnings, + legacy_home_cache_path, + legacy_repo_workspace_dir, + legacy_repo_workspace_has_artifacts, + repo_workspace_dir, + workspace_glob_patterns, +) +from codeclone.surfaces.cli.console import PlainConsole + + +def test_default_cache_path_under_codeclone_dir(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + assert default_cache_path(root) == root / ".codeclone" / "cache.json" + + +def test_legacy_repo_workspace_has_artifacts_detects_entries(tmp_path: Path) -> None: + root = tmp_path / "repo" + legacy = legacy_repo_workspace_dir(root) + legacy.mkdir(parents=True) + (legacy / "cache.json").write_text("{}", encoding="utf-8") + assert legacy_repo_workspace_has_artifacts(root) is True + + +def test_legacy_repo_workspace_has_artifacts_false_when_missing(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + assert legacy_repo_workspace_has_artifacts(root) is False + + +def test_repo_workspace_dir(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + assert repo_workspace_dir(root) == root / ".codeclone" + + +def test_legacy_repo_workspace_has_artifacts_treats_iterdir_oserror( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + root = tmp_path / "repo" + legacy = legacy_repo_workspace_dir(root) + legacy.mkdir(parents=True) + (legacy / "marker").write_text("x", encoding="utf-8") + real_iterdir = Path.iterdir + + def _iterdir(self: Path) -> object: + if self == legacy: + raise OSError("permission denied") + return real_iterdir(self) + + monkeypatch.setattr(Path, "iterdir", _iterdir) + assert legacy_repo_workspace_has_artifacts(root) is False + + +def test_workspace_glob_patterns_includes_legacy_and_new_globs() -> None: + patterns = workspace_glob_patterns() + assert ".codeclone/**" in patterns + assert ".cache/codeclone/**" in patterns + + +def test_legacy_home_cache_path_expands_user() -> None: + path = legacy_home_cache_path() + assert path.name == "cache.json" + assert "codeclone" in path.as_posix() + + +def test_emit_legacy_home_cache_warning_when_paths_differ( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + root = tmp_path / "repo" + root.mkdir() + legacy_home = tmp_path / "legacy-home-cache.json" + legacy_home.write_text("{}", encoding="utf-8") + emit_legacy_workspace_warnings( + root_path=root, + cache_path=default_cache_path(root), + legacy_home_cache_path=legacy_home, + console=PlainConsole(), + ) + out = capsys.readouterr().out + assert "Legacy cache file found at" in out + assert str(legacy_home) in out + + +def test_emit_legacy_home_cache_skipped_when_resolved_matches_project_cache( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + root = tmp_path / "repo" + root.mkdir() + cache_path = default_cache_path(root) + cache_path.parent.mkdir(parents=True, exist_ok=True) + cache_path.write_text("{}", encoding="utf-8") + emit_legacy_workspace_warnings( + root_path=root, + cache_path=cache_path, + legacy_home_cache_path=cache_path, + console=PlainConsole(), + ) + out = capsys.readouterr().out + assert "Legacy cache file found at" not in out + + +def test_emit_legacy_home_cache_resolve_oserror_still_warns( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, +) -> None: + root = tmp_path / "repo" + root.mkdir() + cache_path = default_cache_path(root) + legacy_home = tmp_path / "legacy-cache.json" + legacy_home.write_text("{}", encoding="utf-8") + real_resolve = Path.resolve + + def _resolve(self: Path, strict: bool = False) -> Path: + if self == legacy_home: + raise OSError("nope") + return real_resolve(self, strict=strict) + + monkeypatch.setattr(Path, "resolve", _resolve) + emit_legacy_workspace_warnings( + root_path=root, + cache_path=cache_path, + legacy_home_cache_path=legacy_home, + console=PlainConsole(), + ) + out = capsys.readouterr().out + assert "Legacy cache file found at" in out + + +def test_emit_legacy_repo_workspace_warning( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + root = tmp_path / "repo" + root.mkdir() + legacy = legacy_repo_workspace_dir(root) + legacy.mkdir(parents=True) + (legacy / "cache.json").write_text("{}", encoding="utf-8") + emit_legacy_workspace_warnings( + root_path=root, + cache_path=default_cache_path(root), + legacy_home_cache_path=tmp_path / "missing-home-cache.json", + console=PlainConsole(), + ) + captured = capsys.readouterr() + out = captured.out + assert ".cache/codeclone/" in out + assert str(legacy) in out + assert str(repo_workspace_dir(root)) in out diff --git a/tests/test_pipeline_metrics.py b/tests/test_pipeline_metrics.py index 72626cc1..6757796d 100644 --- a/tests/test_pipeline_metrics.py +++ b/tests/test_pipeline_metrics.py @@ -62,7 +62,12 @@ ) from codeclone.core.parallelism import _should_use_parallel from codeclone.core.pipeline import compute_project_metrics -from codeclone.metrics.overloaded_modules import build_overloaded_modules_payload +from codeclone.metrics.overloaded_modules import ( + _percentile_rank, + _score_quantile, + _source_kind, + build_overloaded_modules_payload, +) from codeclone.models import ( ApiBreakingChange, ApiParamSpec, @@ -717,6 +722,49 @@ def test_build_overloaded_modules_payload_flags_project_relative_candidates() -> ] +def test_overloaded_modules_helper_edge_cases() -> None: + assert _source_kind(".", scan_root="") == "other" + assert _source_kind("/repo/tests/fixtures/data.py", scan_root="/repo") == "fixtures" + assert _score_quantile((), 0.5) == 0.0 + assert _score_quantile((0.8,), 0.5) == 0.8 + assert _score_quantile((0.0, 1.0), 0.5) == 0.5 + assert _score_quantile((0.5, 0.5), 0.5) == 0.5 + assert _percentile_rank(1.0, ()) == 0.0 + assert _percentile_rank(1.0, (2.0,)) == 1.0 + + +def test_build_overloaded_modules_payload_skips_unknown_units_and_external_deps() -> ( + None +): + payload = build_overloaded_modules_payload( + scan_root="/repo", + source_stats_by_file=[("/repo/pkg/a.py", 10, 1, 0, 0)], + units=[ + { + "filepath": "/repo/missing.py", + "qualname": "pkg.missing:fn", + "cyclomatic_complexity": 5, + } + ], + class_metrics=(), + module_deps=[ + ModuleDep( + source="outside.mod", + target="pkg.a", + import_type="import", + line=1, + ) + ], + ) + + item_obj = payload["items"] + assert isinstance(item_obj, list) and item_obj + item = item_obj[0] + assert isinstance(item, dict) + assert item["complexity_total"] == 0 + assert item["fan_in"] == 0 + + def test_load_cached_metrics_ignores_referenced_names_from_test_files() -> None: entry: CacheEntry = { "stat": {"mtime_ns": 1, "size": 1}, @@ -1168,6 +1216,35 @@ def test_discovery_cache_runtime_reachability_helpers_accept_and_reject( assert helper("broken") is None +def test_discovery_cache_runtime_row_parser_rejects_invalid_enums() -> None: + from codeclone.core.discovery_cache import ( + _runtime_reachability_confidence, + _runtime_reachability_from_cache_row, + _runtime_reachability_target_kind, + ) + + assert _runtime_reachability_confidence("bogus") is None + assert _runtime_reachability_target_kind("bogus") is None + assert ( + _runtime_reachability_from_cache_row( + { + "target_qualname": "pkg.mod:fn", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + "target_kind": "bogus", + "framework": "fastapi", + "edge_kind": "registers_handler", + "confidence": "high", + "evidence": "route", + "evidence_symbol": "get", + "source_qualname": "pkg.mod:router", + } + ) + is None + ) + + def test_discovery_cache_parsers_reject_invalid_rows_and_skip_invalid_entries() -> None: assert _api_param_spec_from_cache_dict([]) is None assert ( diff --git a/tests/test_pipeline_process.py b/tests/test_pipeline_process.py index 45b4588d..b7dd1b5a 100644 --- a/tests/test_pipeline_process.py +++ b/tests/test_pipeline_process.py @@ -37,7 +37,8 @@ ) from codeclone.core.pipeline import analyze from codeclone.core.reporting import report -from codeclone.models import HealthScore, ProjectMetrics +from codeclone.metrics.coverage_join import CoverageJoinParseError +from codeclone.models import DepGraph, HealthScore, ProjectMetrics class _FailExec: @@ -287,6 +288,39 @@ def test_process_small_batch_skips_parallel_executor( assert result.files_skipped == 0 +def test_invoke_process_file_caches_signature_lookup( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + process_file = _stub_process_file(expected_root=str(tmp_path)) + monkeypatch.setattr(core_worker, "process_file", process_file) + core_worker._supported_process_file_kwarg_names.cache_clear() + + for idx in range(2): + filepath = tmp_path / f"cached_{idx}.py" + filepath.write_text("def cached():\n return 1\n", encoding="utf-8") + result = core_worker._invoke_process_file( + str(filepath), + str(tmp_path), + NormalizationConfig(), + 1, + 1, + collect_structural_findings=False, + collect_api_surface=False, + api_include_private_modules=False, + block_min_loc=20, + block_min_stmt=8, + segment_min_loc=20, + segment_min_stmt=10, + ) + assert result.success is True + + cache_info = core_worker._supported_process_file_kwarg_names.cache_info() + assert cache_info.misses == 1 + assert cache_info.hits == 1 + core_worker._supported_process_file_kwarg_names.cache_clear() + + def test_process_parallel_failure_large_batch_invokes_fallback_callback( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -585,3 +619,59 @@ def test_analyze_skips_suppressed_dead_code_scan_when_dead_code_is_disabled( analysis = analyze(boot=boot, discovery=discovery, processing=processing) assert analysis.project_metrics == project_metrics assert analysis.suppressed_dead_code_items == 0 + + +def test_analyze_coverage_join_parse_error_sets_invalid_status( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + boot, discovery, processing, _analysis = _build_report_case(tmp_path) + boot.args.skip_metrics = False + boot.args.skip_dead_code = True + boot.args.skip_dependencies = True + boot.args.coverage_xml = "coverage.xml" + boot.args.coverage_min = 88 + + project_metrics = ProjectMetrics( + complexity_avg=0.0, + complexity_max=0, + high_risk_functions=(), + coupling_avg=0.0, + coupling_max=0, + high_risk_classes=(), + cohesion_avg=0.0, + cohesion_max=0, + low_cohesion_classes=(), + dependency_modules=0, + dependency_edges=0, + dependency_edge_list=(), + dependency_cycles=(), + dependency_max_depth=0, + dependency_longest_chains=(), + dead_code=(), + health=HealthScore(total=100, grade="A", dimensions={"overall": 100}), + ) + monkeypatch.setattr( + core_pipeline, + "compute_project_metrics", + lambda **kwargs: ( + project_metrics, + DepGraph(frozenset(), (), (), 0, 0.0, 0, ()), + (), + ), + ) + monkeypatch.setattr(core_pipeline, "compute_suggestions", lambda **kwargs: ()) + monkeypatch.setattr( + core_pipeline, + "build_metrics_report_payload", + lambda **kwargs: {"health": {"score": 100, "grade": "A", "dimensions": {}}}, + ) + monkeypatch.setattr( + core_pipeline, + "build_coverage_join", + lambda **kwargs: (_ for _ in ()).throw(CoverageJoinParseError("bad xml")), + ) + + result = analyze(boot=boot, discovery=discovery, processing=processing) + assert result.coverage_join is not None + assert result.coverage_join.status == "invalid" diff --git a/tests/test_projection_spawn_guard.py b/tests/test_projection_spawn_guard.py new file mode 100644 index 00000000..9b3b7588 --- /dev/null +++ b/tests/test_projection_spawn_guard.py @@ -0,0 +1,222 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from codeclone.config.memory import MemoryConfig, resolve_memory_config +from codeclone.config.observability import ObservabilityConfig +from codeclone.memory.jobs import compute_projection_stimulus +from codeclone.memory.jobs import workflow as jobs_workflow +from codeclone.memory.jobs.spawn import SpawnWorkerResult +from codeclone.memory.jobs.store import ( + enqueue_projection_job, + has_live_running_job, + worker_claim_token, +) +from codeclone.memory.jobs.workflow import execute_enqueue_projection_rebuild +from codeclone.memory.models import MemoryProject +from codeclone.memory.project import resolve_memory_db_path +from codeclone.memory.schema import open_memory_db +from codeclone.observability import ( + bootstrap, + current_operation_context, + operation, + shutdown, +) +from codeclone.observability.store.schema import ( + observability_store_path, + open_observability_store, +) +from codeclone.report.meta import current_report_timestamp_utc + +from .memory_fixtures import cli_memory_repo + + +def _mark_running( + conn: sqlite3.Connection, + job_id: str, + *, + claimed_by: str, + started_at_utc: str, +) -> None: + conn.execute( + "UPDATE memory_projection_jobs " + "SET status='running', claimed_by=?, started_at_utc=? WHERE id=?", + (claimed_by, started_at_utc, job_id), + ) + conn.commit() + + +def _enqueue_one( + conn: sqlite3.Connection, + *, + project: MemoryProject, + root: Path, + config: MemoryConfig, +) -> str: + stimulus = compute_projection_stimulus( + conn=conn, project=project, root_path=root, config=config + ) + return enqueue_projection_job( + conn, project=project, trigger="cli", stimulus=stimulus + ).job_id + + +def test_has_live_running_job_false_when_none(tmp_path: Path) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + config = resolve_memory_config(root) + conn = open_memory_db(resolve_memory_db_path(root, config)) + try: + assert ( + has_live_running_job( + conn, project_id=project.id, running_timeout_seconds=3600 + ) + is False + ) + finally: + conn.close() + + +def test_has_live_running_job_true_for_live_worker(tmp_path: Path) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + config = resolve_memory_config(root) + conn = open_memory_db(resolve_memory_db_path(root, config)) + try: + job_id = _enqueue_one(conn, project=project, root=root, config=config) + _mark_running( + conn, + job_id, + claimed_by=worker_claim_token(), + started_at_utc=current_report_timestamp_utc(), + ) + assert ( + has_live_running_job( + conn, project_id=project.id, running_timeout_seconds=3600 + ) + is True + ) + finally: + conn.close() + + +def test_has_live_running_job_reclaims_timed_out_worker(tmp_path: Path) -> None: + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + config = resolve_memory_config(root) + conn = open_memory_db(resolve_memory_db_path(root, config)) + try: + job_id = _enqueue_one(conn, project=project, root=root, config=config) + # Live PID but a long-past start: the timeout makes it stale. + _mark_running( + conn, + job_id, + claimed_by=worker_claim_token(), + started_at_utc="2020-01-01T00:00:00Z", + ) + assert ( + has_live_running_job( + conn, project_id=project.id, running_timeout_seconds=1 + ) + is False + ) + status = conn.execute( + "SELECT status FROM memory_projection_jobs WHERE id=?", (job_id,) + ).fetchone()[0] + assert status == "failed" # reclaimed + finally: + conn.close() + + +def test_enqueue_skips_spawn_when_worker_running( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(jobs_workflow, "is_ci_environment", lambda: False) + with cli_memory_repo(tmp_path, with_draft=False) as (root, project, _store): + config = resolve_memory_config(root) + conn = open_memory_db(resolve_memory_db_path(root, config)) + try: + job_id = _enqueue_one(conn, project=project, root=root, config=config) + _mark_running( + conn, + job_id, + claimed_by=worker_claim_token(), + started_at_utc=current_report_timestamp_utc(), + ) + finally: + conn.close() + + payload = execute_enqueue_projection_rebuild( + root_path=root, + config=config, + trigger="explicit", + force=True, + spawn_worker=True, + ) + # A worker is already running, so no second process is spawned. + assert payload["spawned"] is False + assert payload["spawn_skipped_reason"] == "worker_already_running" + assert payload["status"] == "enqueued" + + +def test_enqueue_records_spawn_op_b_under_finish( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(jobs_workflow, "is_ci_environment", lambda: False) + captured: dict[str, tuple[str, str] | None] = {} + + def _fake_spawn( + *, root_path: Path, not_before_utc: str | None = None + ) -> SpawnWorkerResult: + # The spawn handoff reads the active operation here; under op B it must + # see B (not the finish op A), so the worker links parent=B. + captured["ctx"] = current_operation_context() + return SpawnWorkerResult(spawned=True, reason=None, pid=4242) + + monkeypatch.setattr(jobs_workflow, "spawn_projection_jobs_worker", _fake_spawn) + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + config = resolve_memory_config(root) + bootstrap(ObservabilityConfig(enabled=True), root=root) + try: + with operation( + name="finish_controlled_change", + surface="mcp", + correlation_id="A-corr", + ) as finish_op: + finish_op_id = finish_op.operation_id + payload = execute_enqueue_projection_rebuild( + root_path=root, + config=config, + trigger="mcp_finish", + force=True, + spawn_worker=True, + ) + finally: + shutdown() + + assert payload["spawned"] is True + ctx = captured["ctx"] + assert ctx is not None + spawn_op_id, spawn_corr = ctx + assert spawn_corr == "A-corr" # B inherits A's correlation + assert spawn_op_id != finish_op_id # B is its own operation, not A + + obs = open_observability_store(observability_store_path(root)) + try: + row = obs.execute( + "SELECT operation_id, parent_operation_id, correlation_id " + "FROM platform_operations WHERE name='memory.projection.spawn'" + ).fetchone() + finally: + obs.close() + # Op B persisted, parented to the finish op (A) with A's correlation. + assert row is not None + assert row[0] == spawn_op_id + assert row[1] == finish_op_id + assert row[2] == "A-corr" diff --git a/tests/test_public_api_surface.py b/tests/test_public_api_surface.py index 4e052dfe..c0e558cc 100644 --- a/tests/test_public_api_surface.py +++ b/tests/test_public_api_surface.py @@ -1,3 +1,8 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations import inspect diff --git a/tests/test_renderer_isolation.py b/tests/test_renderer_isolation.py index eb72a496..1b6bf29c 100644 --- a/tests/test_renderer_isolation.py +++ b/tests/test_renderer_isolation.py @@ -1,3 +1,8 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations from pathlib import Path diff --git a/tests/test_repo_paths.py b/tests/test_repo_paths.py new file mode 100644 index 00000000..a23ec2ef --- /dev/null +++ b/tests/test_repo_paths.py @@ -0,0 +1,204 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.utils.repo_paths import ( + PathOutsideRepoError, + RepoPathError, + RepoPathPolicy, + display_repo_path, + resolve_repo_relative_path, + resolve_under_repo_root, +) + + +def test_resolve_repo_relative_path_keeps_paths_under_root(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + + assert resolve_repo_relative_path(root, "nested/cache.json") == ( + root / "nested" / "cache.json" + ).resolve(strict=False) + assert resolve_repo_relative_path(root, "nested/../cache.json") == ( + root / "cache.json" + ).resolve(strict=False) + + +@pytest.mark.parametrize("raw", ["", " "]) +def test_resolve_under_repo_root_rejects_empty_paths( + tmp_path: Path, + raw: str, +) -> None: + root = tmp_path / "repo" + root.mkdir() + + with pytest.raises(RepoPathError, match="must not be empty"): + resolve_under_repo_root(root, raw, policy=RepoPathPolicy()) + + +def test_resolve_under_repo_root_rejects_absolute_without_opt_in( + tmp_path: Path, +) -> None: + root = tmp_path / "repo" + root.mkdir() + + with pytest.raises(PathOutsideRepoError, match="absolute paths"): + resolve_under_repo_root( + root, + root / "cache.json", + policy=RepoPathPolicy(), + ) + + +def test_resolve_under_repo_root_allows_absolute_under_root_with_opt_in( + tmp_path: Path, +) -> None: + root = tmp_path / "repo" + root.mkdir() + target = root / "cache.json" + + assert resolve_under_repo_root( + root, + target, + policy=RepoPathPolicy(allow_absolute=True), + ) == target.resolve(strict=False) + + +def test_resolve_under_repo_root_rejects_absolute_external_by_default( + tmp_path: Path, +) -> None: + root = tmp_path / "repo" + root.mkdir() + external = tmp_path / "external-cache.json" + + with pytest.raises(PathOutsideRepoError, match="absolute paths"): + resolve_under_repo_root(root, external, policy=RepoPathPolicy()) + + with pytest.raises(PathOutsideRepoError, match="escapes repository root"): + resolve_under_repo_root( + root, + external, + policy=RepoPathPolicy(allow_absolute=True), + ) + + +def test_resolve_under_repo_root_allows_external_only_with_full_opt_in( + tmp_path: Path, +) -> None: + root = tmp_path / "repo" + root.mkdir() + external = tmp_path / "external-cache.json" + + assert resolve_under_repo_root( + root, + external, + policy=RepoPathPolicy(allow_absolute=True, allow_external=True), + ) == external.resolve(strict=False) + + +@pytest.mark.parametrize("raw", ["../outside.json", "nested/../../outside.json"]) +def test_resolve_under_repo_root_rejects_traversal_escapes( + tmp_path: Path, + raw: str, +) -> None: + root = tmp_path / "repo" + root.mkdir() + + with pytest.raises(PathOutsideRepoError, match="escapes repository root"): + resolve_under_repo_root(root, raw, policy=RepoPathPolicy()) + + +def test_resolve_under_repo_root_rejects_symlink_escapes(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + external = tmp_path / "external" + external.mkdir() + link = root / "link" + try: + link.symlink_to(external, target_is_directory=True) + except (NotImplementedError, OSError) as exc: + pytest.skip(f"symlink unavailable: {exc}") + + with pytest.raises(PathOutsideRepoError, match="escapes repository root"): + resolve_under_repo_root(root, "link/cache.json", policy=RepoPathPolicy()) + + +def test_resolve_under_repo_root_type_policy(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + file_path = root / "state.sqlite3" + file_path.write_text("", encoding="utf-8") + dir_path = root / "state" + dir_path.mkdir() + + assert ( + resolve_under_repo_root( + root, + "state.sqlite3", + policy=RepoPathPolicy(must_exist=True, must_be_file=True), + ) + == file_path.resolve() + ) + assert ( + resolve_under_repo_root( + root, + "state", + policy=RepoPathPolicy(must_exist=True, must_be_dir=True), + ) + == dir_path.resolve() + ) + + with pytest.raises(RepoPathError, match="must be a file"): + resolve_under_repo_root( + root, + "state", + policy=RepoPathPolicy(must_exist=True, must_be_file=True), + ) + with pytest.raises(RepoPathError, match="must be a directory"): + resolve_under_repo_root( + root, + "state.sqlite3", + policy=RepoPathPolicy(must_exist=True, must_be_dir=True), + ) + + +def test_resolve_under_repo_root_rejects_missing_when_required( + tmp_path: Path, +) -> None: + root = tmp_path / "repo" + root.mkdir() + + with pytest.raises(RepoPathError, match="cannot resolve path"): + resolve_under_repo_root( + root, + "missing.json", + policy=RepoPathPolicy(must_exist=True), + ) + + +def test_display_repo_path_uses_relative_path_when_possible(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + assert display_repo_path(root, root / "nested" / "x.py") == "nested/x.py" + assert display_repo_path(root, tmp_path / "outside.py") == str( + tmp_path / "outside.py" + ) + + +def test_resolve_under_repo_root_requires_directory_root(tmp_path: Path) -> None: + missing = tmp_path / "missing" + with pytest.raises(RepoPathError, match="cannot resolve repository root"): + resolve_under_repo_root(missing, "x", policy=RepoPathPolicy()) + + file_root = tmp_path / "repo.py" + file_root.write_text("", encoding="utf-8") + with pytest.raises(RepoPathError, match="not a directory"): + resolve_under_repo_root(file_root, "x", policy=RepoPathPolicy()) diff --git a/tests/test_security.py b/tests/test_security.py index 18a74c96..30589a99 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -7,6 +7,7 @@ import os import tempfile from pathlib import Path +from typing import cast from unittest.mock import patch import pytest @@ -17,7 +18,15 @@ from codeclone.core.worker import process_file from codeclone.report.explain import build_block_group_facts from codeclone.report.html import build_html_report -from codeclone.scanner import iter_py_files +from codeclone.report.renderers.markdown import render_markdown_report_document +from codeclone.report.renderers.sarif import render_sarif_report_document +from codeclone.scanner import iter_py_files, resolved_path_under_root +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import ( + CachePolicy, + MCPAnalysisRequest, + MCPServiceContractError, +) def test_scanner_path_traversal() -> None: @@ -70,6 +79,29 @@ def _huge_stat(path: str, *args: object, **kwargs: object) -> os.stat_result: os.remove(tmp_path) +def test_process_file_rejects_symlink_target_outside_root(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + outside = tmp_path / "outside" + workspace.mkdir() + outside.mkdir() + cfg = NormalizationConfig() + + module = workspace / "module.py" + module.write_text("x = 1\n", encoding="utf-8") + assert process_file(str(module), str(workspace), cfg, 0, 0).success is True + + outside_target = outside / "secret.py" + outside_target.write_text("y = 2\n", encoding="utf-8") + module.unlink() + module.symlink_to(outside_target) + + result = process_file(str(module), str(workspace), cfg, 0, 0) + assert result.success is False + assert result.error_kind == "source_read_error" + assert result.error is not None + assert "outside repository root" in result.error + + def test_html_report_escapes_user_content(tmp_path: Path) -> None: bad_path = tmp_path / 'x" onmouseover="alert(1).py' good_path = tmp_path / "y.py" @@ -105,3 +137,151 @@ def test_html_report_escapes_user_content(tmp_path: Path) -> None: assert 'onmouseover="alert(1)' not in html assert 'data-qualname="<script>alert(1)</script>"' in html assert "" onmouseover="alert(1).py" in html + + +def test_html_report_escapes_title_and_does_not_emit_raw_script(tmp_path: Path) -> None: + module = tmp_path / "mod.py" + module.write_text("def f():\n return 1\n", encoding="utf-8") + payload = "" + html = build_html_report( + func_groups={ + "k": [ + { + "qualname": payload, + "filepath": str(module), + "start_line": 1, + "end_line": 2, + "loc": 2, + } + ] + }, + block_groups={}, + segment_groups={}, + block_group_facts=build_block_group_facts({}), + title=payload, + ) + assert payload not in html + assert "<img src=x onerror=alert(1)>" in html + + +def test_markdown_and_sarif_projections_do_not_emit_raw_html_tags( + tmp_path: Path, +) -> None: + report_payload: dict[str, object] = { + "report_schema_version": "2.11", + "meta": {"generator": {"name": "codeclone", "version": "2.1.0"}}, + "inventory": {"files": 0, "lines": 0, "functions": 0, "classes": 0}, + "findings": { + "groups": { + "clones": {"functions": [], "blocks": [], "segments": []}, + "structural": [], + "design": [], + } + }, + "summary": {}, + "metrics": {}, + } + markdown = render_markdown_report_document(report_payload) + sarif = render_sarif_report_document(report_payload) + assert "" + html = build_html_report( + func_groups={ + "k": [ + { + "qualname": payload, + "filepath": str(module), + "start_line": 1, + "end_line": 2, + "loc": 2, + } + ] + }, + block_groups={}, + segment_groups={}, + block_group_facts=build_block_group_facts({}), + ) + assert payload not in html + assert "", "<script>alert(1)</script>"), + ('" onclick="alert(1)', "" onclick="alert(1)"), + ("`backtick`", "`backtick`"), + ("\u2028line sep", "
line sep"), + ("\u2029para sep", "
para sep"), + (None, ""), + ], +) +def test_escape_html_neutralizes_html_metacharacters( + raw: object, expected_fragment: str +) -> None: + escaped = _escape_html(raw) + assert expected_fragment in escaped + if isinstance(raw, str) and "<" in raw: + assert "<" not in escaped + + +def test_html_report_js_avoids_dataset_innerhtml_regression() -> None: + """Regression guard for DOM XSS pattern in clone metrics modal.""" + source = _HTML_JS_PATH.read_text(encoding="utf-8") + assert "dlg.querySelector('#modal-body').innerHTML=items" not in source + assert "body.innerHTML=tpl.innerHTML" not in source + assert "document.importNode(tpl.content" in source + assert "list.className='info-dl'" in source + + +# ── cache integrity (checksum contract; not secret-keyed) ──────────── + + +def test_cache_signature_verification_uses_constant_time_compare() -> None: + payload: dict[str, object] = {"version": "test", "files": {}} + signature = sign_cache_payload(payload) + assert verify_cache_payload_signature(payload, signature) is True + assert verify_cache_payload_signature(payload, "0" * len(signature)) is False + + +def test_cache_signature_is_stable_for_canonical_payload() -> None: + payload: dict[str, object] = {"b": 2, "a": 1, "files": {}} + first = sign_cache_payload(payload) + second = sign_cache_payload({"a": 1, "b": 2, "files": {}}) + assert first == second + + +# ── suppressions: malformed input must not crash extraction ───────── + + +@pytest.mark.parametrize( + "source", + [ + "# codeclone: ignore[dead-code, unknown-rule]\n", + "# codeclone: ignore[not a rule!]\n", + "# codeclone ignore[dead-code]\n", + '"""\n# codeclone: ignore[dead-code]\n', + ], +) +def test_extract_suppression_directives_ignores_malformed_or_unknown_rules( + source: str, +) -> None: + directives = extract_suppression_directives(source) + rule_ids = {rule for directive in directives for rule in directive.rules} + assert rule_ids.issubset(SUPPORTED_RULE_IDS) + + +def test_extract_suppression_directives_accepts_supported_rule_ids() -> None: + source = "# codeclone: ignore[dead-code]\ndef keep():\n return 1\n" + directives = extract_suppression_directives(source) + assert len(directives) == 1 + assert directives[0].rules == ("dead-code",) + + +# ── scanner file-count cap (DoS guard) ─────────────────────────────── + + +def test_iter_py_files_rejects_excessive_file_count(tmp_path: Path) -> None: + for index in range(5): + (tmp_path / f"mod_{index}.py").write_text("x = 1\n", encoding="utf-8") + + assert len(list(iter_py_files(str(tmp_path), max_files=10))) == 5 + + with pytest.raises(ValidationError, match="File count exceeds limit"): + list(iter_py_files(str(tmp_path), max_files=3)) + + +# ── baseline integrity tamper detection ────────────────────────────── + + +def test_baseline_verify_integrity_rejects_tampered_clone_payload( + tmp_path: Path, +) -> None: + """Trusted baseline comparison must fail closed on payload tampering.""" + import json + + import codeclone.baseline as baseline_mod + import codeclone.baseline.clone_baseline as clone_baseline_mod + from codeclone.baseline import Baseline + from codeclone.contracts.errors import BaselineValidationError + + func_id = f"{'a' * 40}|0-19" + block_id = "|".join(["a" * 40, "b" * 40, "c" * 40, "d" * 40]) + payload = clone_baseline_mod._baseline_payload( + functions={func_id}, + blocks={block_id}, + generator="codeclone", + schema_version="2.1", + fingerprint_version="1", + python_tag=baseline_mod.current_python_tag(), + generator_version="2.1.0", + created_at="2026-02-08T11:43:16Z", + ) + baseline_path = tmp_path / "codeclone.baseline.json" + baseline_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), "utf-8") + + baseline = Baseline(baseline_path) + baseline.load() + baseline.verify_integrity() + + clones = payload["clones"] + assert isinstance(clones, dict) + clones["functions"] = [func_id, f"{'b' * 40}|20-39"] + baseline_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), "utf-8") + tampered = Baseline(baseline_path) + tampered.load() + with pytest.raises(BaselineValidationError, match="payload_sha256 mismatch") as exc: + tampered.verify_integrity() + assert exc.value.status == "integrity_failed" + + +# ── workspace intent registry path safety ──────────────────────────── + + +def test_workspace_intent_path_helper_rejects_escape_attempts( + tmp_path: Path, +) -> None: + from codeclone.surfaces.mcp._workspace_intents import ( + _is_safe_intent_path, + intent_path, + registry_dir, + ) + + registry = registry_dir(tmp_path) + registry.mkdir(parents=True, exist_ok=True) + valid = intent_path( + root=tmp_path, + pid=123, + start_epoch=456, + intent_id="intent-aaa-001", + ) + assert _is_safe_intent_path(valid, registry) is True + + assert ( + _is_safe_intent_path( + Path("../outside/123-456-intent-aaa-001.json"), + registry, + ) + is False + ) + + outside = tmp_path / "outside.json" + outside.write_text("{}", encoding="utf-8") + symlink = registry / "123-456-intent-aaa-001.json" + _symlink_or_skip(symlink, outside) + assert _is_safe_intent_path(symlink, registry) is False + + +# ── git diff ref: control characters and injection payloads ────────── + + +@pytest.mark.parametrize( + "ref", + [ + "HEAD\x00", + "main\r\n", + "refs/heads/main;id", + "$(curl attacker)", + "HEAD && git status", + ], +) +def test_validate_git_diff_ref_rejects_control_and_shell_metacharacters( + ref: str, +) -> None: + with pytest.raises(ValueError, match="Invalid git diff ref"): + validate_git_diff_ref(ref) diff --git a/tests/test_semantic_chunking.py b/tests/test_semantic_chunking.py new file mode 100644 index 00000000..cb053fa0 --- /dev/null +++ b/tests/test_semantic_chunking.py @@ -0,0 +1,127 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from codeclone.memory.semantic.chunking import ( + IdentityPassageChunker, + collapse_trajectory_hits, + expand_projection, + trajectory_chunk_row_id, +) +from codeclone.memory.semantic.models import SemanticHit, SemanticProjection +from codeclone.memory.semantic.projection import text_hash + + +class _FixedChunker: + def __init__(self, chunks: tuple[str, ...]) -> None: + self._chunks = chunks + + def chunk_text(self, text: str) -> tuple[str, ...]: + return self._chunks + + +def _trajectory_projection( + text: str, *, source_id: str = "traj-1" +) -> SemanticProjection: + return SemanticProjection( + source="trajectory", + source_id=source_id, + kind="trajectory", + text=text, + text_hash=text_hash(text), + ) + + +def _memory_projection(text: str) -> SemanticProjection: + return SemanticProjection( + source="memory", + source_id="mem-1", + kind="contract_note", + text=text, + text_hash=text_hash(text), + ) + + +def test_trajectory_chunk_row_id_is_deterministic() -> None: + assert trajectory_chunk_row_id("abc", 0) == "trajectory:abc:chunk:000" + assert trajectory_chunk_row_id("abc", 12) == "trajectory:abc:chunk:012" + + +def test_expand_projection_keeps_memory_as_single_row() -> None: + projection = _memory_projection("short note") + (unit,) = expand_projection(projection, IdentityPassageChunker()) + assert unit.row_id == "mem-1" + assert unit.parent_id is None + assert unit.chunk_index is None + + +def test_expand_projection_splits_trajectory_into_chunk_rows() -> None: + projection = _trajectory_projection("full trajectory text") + chunker = _FixedChunker(("part-a", "part-b", "part-c")) + units = expand_projection(projection, chunker) + assert len(units) == 3 + assert [unit.row_id for unit in units] == [ + "trajectory:traj-1:chunk:000", + "trajectory:traj-1:chunk:001", + "trajectory:traj-1:chunk:002", + ] + assert all(unit.parent_id == "traj-1" for unit in units) + assert [unit.chunk_index for unit in units] == [0, 1, 2] + assert all(unit.chunk_count == 3 for unit in units) + assert [unit.text_hash for unit in units] == [ + text_hash("part-a"), + text_hash("part-b"), + text_hash("part-c"), + ] + + +def test_expand_projection_single_trajectory_chunk_uses_parent_row_id() -> None: + projection = _trajectory_projection("fits in one chunk") + (unit,) = expand_projection(projection, IdentityPassageChunker()) + assert unit.row_id == "traj-1" + assert unit.parent_id is None + + +def test_collapse_trajectory_hits_keeps_best_score_per_parent() -> None: + hits = [ + SemanticHit( + source_id="trajectory:t1:chunk:000", + source="trajectory", + score=0.4, + parent_id="t1", + chunk_index=0, + chunk_count=3, + ), + SemanticHit( + source_id="trajectory:t1:chunk:001", + source="trajectory", + score=0.9, + parent_id="t1", + chunk_index=1, + chunk_count=3, + ), + SemanticHit( + source_id="trajectory:t2:chunk:000", + source="trajectory", + score=0.7, + parent_id="t2", + chunk_index=0, + chunk_count=1, + ), + ] + collapsed = collapse_trajectory_hits(hits, k=2) + assert len(collapsed) == 2 + assert collapsed[0].parent_id == "t1" + assert collapsed[0].chunk_index == 1 + assert collapsed[0].score == 0.9 + assert collapsed[1].parent_id == "t2" + + +def test_chunking_public_exports() -> None: + from codeclone.memory.semantic import chunking as chunking_mod + + assert chunking_mod.TRAJECTORY_SEARCH_OVERSAMPLE == 4 diff --git a/tests/test_semantic_determinism_gate.py b/tests/test_semantic_determinism_gate.py new file mode 100644 index 00000000..2109c709 --- /dev/null +++ b/tests/test_semantic_determinism_gate.py @@ -0,0 +1,87 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import MCPAnalysisRequest, MCPGateRequest +from tests.memory_fixtures import cli_memory_repo + + +def _write_semantic_config(root: Path, *, enabled: bool) -> None: + value = "true" if enabled else "false" + (root / "pyproject.toml").write_text( + f"[tool.codeclone.memory.semantic]\nenabled = {value}\ndimension = 64\n", + encoding="utf-8", + ) + + +def _write_python_source(root: Path) -> None: + package = root / "pkg" + package.mkdir(exist_ok=True) + (package / "mod.py").write_text( + "\n".join( + [ + "def compute(value: int) -> int:", + " total = value + 1", + " total += 2", + " total += 3", + " total += 4", + " total += 5", + " return total", + "", + ] + ), + encoding="utf-8", + ) + + +def _deterministic_snapshot( + service: CodeCloneMCPService, root: Path +) -> dict[str, object]: + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(root), + respect_pyproject=True, + cache_policy="off", + ) + ) + run_id = str(summary["run_id"]) + return { + "run_id": run_id, + "summary": summary, + "gates": service.evaluate_gates(MCPGateRequest(run_id=run_id)), + "receipt": service.create_review_receipt(run_id=run_id, format="json"), + "memory": service.get_relevant_memory( + root=str(root), + scope=["pkg/mod.py"], + max_records=5, + detail_level="compact", + ), + } + + +def test_semantic_enabled_does_not_change_deterministic_outputs( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr( + "codeclone.surfaces.mcp.session._current_report_timestamp_utc", + lambda: "2026-06-03T00:00:00Z", + ) + with cli_memory_repo(tmp_path, with_draft=False) as (root, _project, _store): + _write_python_source(root) + service = CodeCloneMCPService(history_limit=4) + + _write_semantic_config(root, enabled=False) + disabled = _deterministic_snapshot(service, root) + + _write_semantic_config(root, enabled=True) + enabled = _deterministic_snapshot(service, root) + + assert enabled == disabled diff --git a/tests/test_semantic_embed_batching.py b/tests/test_semantic_embed_batching.py new file mode 100644 index 00000000..136f36b4 --- /dev/null +++ b/tests/test_semantic_embed_batching.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from codeclone.memory.embedding.batching import ( + EmbedBatchLimits, + pack_adaptive_batches, + score_lengths, +) +from codeclone.memory.semantic.models import SemanticProjection +from codeclone.memory.semantic.projection import text_hash + + +def _projection( + source_id: str, text: str, *, source: str = "trajectory" +) -> SemanticProjection: + return SemanticProjection( + source=source, # type: ignore[arg-type] + source_id=source_id, + kind="test", + text=text, + text_hash=text_hash(text), + ) + + +def test_adaptive_batching_splits_on_padded_token_volume() -> None: + short = _projection("short", "a" * 100) + long = _projection("long", "b" * 4000) + scored = score_lengths( + [short, long], + char_counts=(100, 4000), + token_counts=(25, 1000), + source_kinds=("trajectory", "trajectory"), + source_ids=("short", "long"), + ) + batches = pack_adaptive_batches( + scored, + limits=EmbedBatchLimits(max_documents=64, max_padded_tokens=1500), + ) + assert len(batches) == 2 + assert {batch.items[0].source_id for batch in batches} == {"short", "long"} + + +def test_length_bucketing_is_deterministic() -> None: + items = [ + _projection("b", "bb"), + _projection("a", "aaaa"), + _projection("c", "c"), + ] + scored = score_lengths( + items, + char_counts=(2, 4, 1), + token_counts=(10, 20, 10), + source_kinds=("memory", "memory", "memory"), + source_ids=("b", "a", "c"), + ) + assert [item.source_id for item in scored] == ["b", "c", "a"] + + +def test_adaptive_batching_keeps_similar_lengths_together() -> None: + projections = [ + _projection("p1", "x" * 200), + _projection("p2", "y" * 220), + _projection("p3", "z" * 5000), + ] + scored = score_lengths( + projections, + char_counts=(200, 220, 5000), + token_counts=(50, 55, 1250), + source_kinds=("trajectory", "trajectory", "trajectory"), + source_ids=("p1", "p2", "p3"), + ) + batches = pack_adaptive_batches( + scored, + limits=EmbedBatchLimits(max_documents=64, max_padded_tokens=2000), + ) + assert len(batches) == 2 + assert {item.source_id for item in batches[0].items} == {"p1", "p2"} + assert batches[1].items[0].source_id == "p3" + + +def test_padding_amplification_metric() -> None: + projections = [_projection("only", "x" * 400)] + scored = score_lengths( + projections, + char_counts=(400,), + token_counts=(100,), + source_kinds=("audit",), + source_ids=("only",), + ) + (batch,) = pack_adaptive_batches( + scored, + limits=EmbedBatchLimits(max_documents=64, max_padded_tokens=8192), + ) + assert batch.padded_tokens == batch.max_tokens + assert batch.padding_amplification_permille == 1000 diff --git a/tests/test_semantic_embedding.py b/tests/test_semantic_embedding.py new file mode 100644 index 00000000..9cd74e96 --- /dev/null +++ b/tests/test_semantic_embedding.py @@ -0,0 +1,881 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import math +from collections.abc import Sequence +from pathlib import Path +from types import SimpleNamespace +from typing import Any, ClassVar + +import pytest + +from codeclone.config.memory import SemanticConfig +from codeclone.memory.embedding import ( + DeterministicHashEmbeddingProvider, + embed_documents, + embed_query, + resolve_embedding_provider, +) +from codeclone.memory.exceptions import MemorySemanticUnavailableError + + +class _FakeTextEmbedding: + def __init__( + self, + *, + model_name: str, + cache_dir: str, + local_files_only: bool, + vector_value: float = 1.0, + vectors: list[object] | None = None, + raise_on_embed: bool = False, + inner_model: object | None = None, + ) -> None: + self.model_name = model_name + self.cache_dir = cache_dir + self.local_files_only = local_files_only + self.vector_value = vector_value + self.vectors = vectors + self.raise_on_embed = raise_on_embed + self.inputs: list[str] = [] + self.model = inner_model or SimpleNamespace(tokenizer=None) + + def embed(self, texts: list[str]) -> list[object]: + if self.raise_on_embed: + raise RuntimeError("embed failed") + self.inputs.extend(texts) + if self.vectors is not None: + return self.vectors + return [[self.vector_value] * 384 for _ in texts] + + +def _install_fake_fastembed( + monkeypatch: pytest.MonkeyPatch, + *, + vector_value: float = 1.0, + vectors: list[object] | None = None, + raise_on_init: bool = False, + raise_on_embed: bool = False, + expose_text_embedding: bool = True, + inner_model: object | None = None, +) -> list[_FakeTextEmbedding]: + import importlib + + created: list[_FakeTextEmbedding] = [] + original_import_module = importlib.import_module + + def _fake_import_module(name: str, package: str | None = None) -> Any: + if name != "fastembed": + return original_import_module(name, package) + if not expose_text_embedding: + return SimpleNamespace() + + class _ConfiguredFakeTextEmbedding(_FakeTextEmbedding): + def __init__( + self, + *, + model_name: str, + cache_dir: str, + local_files_only: bool, + ) -> None: + if raise_on_init: + raise RuntimeError("model unavailable") + super().__init__( + model_name=model_name, + cache_dir=cache_dir, + local_files_only=local_files_only, + vector_value=vector_value, + vectors=vectors, + raise_on_embed=raise_on_embed, + inner_model=inner_model, + ) + created.append(self) + + return SimpleNamespace(TextEmbedding=_ConfiguredFakeTextEmbedding) + + monkeypatch.setattr(importlib, "import_module", _fake_import_module) + return created + + +def _resolve_fastembed_provider( + monkeypatch: pytest.MonkeyPatch, + *, + inner_model: object | None = None, + vector_value: float = 1.0, + vectors: list[object] | None = None, + raise_on_init: bool = False, + raise_on_embed: bool = False, + expose_text_embedding: bool = True, +) -> tuple[Any, list[_FakeTextEmbedding]]: + from codeclone.memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider + + created = _install_fake_fastembed( + monkeypatch, + vector_value=vector_value, + vectors=vectors, + raise_on_init=raise_on_init, + raise_on_embed=raise_on_embed, + expose_text_embedding=expose_text_embedding, + inner_model=inner_model, + ) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + assert isinstance(provider, FastEmbedEmbeddingProvider) + return provider, created + + +def test_deterministic_embedding_is_stable_and_correct_dimension() -> None: + provider = DeterministicHashEmbeddingProvider(dimension=64) + first = provider.embed(["recover after MCP restart"]) + second = provider.embed(["recover after MCP restart"]) + assert first == second # same text -> same vector + assert len(first) == 1 + assert len(first[0]) == 64 + + +def test_deterministic_embedding_is_l2_normalized() -> None: + provider = DeterministicHashEmbeddingProvider(dimension=128) + (vector,) = provider.embed(["checkpoint degrades to scope_only"]) + norm = math.sqrt(sum(value * value for value in vector)) + assert math.isclose(norm, 1.0, abs_tol=1e-9) + + +def test_deterministic_embedding_distinguishes_texts() -> None: + provider = DeterministicHashEmbeddingProvider(dimension=64) + vectors = provider.embed(["alpha", "beta"]) + assert vectors[0] != vectors[1] + + +def test_deterministic_query_and_document_helpers_use_provider_methods() -> None: + provider = DeterministicHashEmbeddingProvider(dimension=7) + assert embed_query(provider, "alpha") == provider.embed_query("alpha") + assert embed_documents(provider, ["beta"]) == provider.embed_documents(["beta"]) + + +def test_deterministic_embedding_rejects_nonpositive_dimension() -> None: + with pytest.raises(ValueError, match="dimension must be positive"): + DeterministicHashEmbeddingProvider(dimension=0) + + +def test_resolve_diagnostic_provider() -> None: + config = SemanticConfig(embedding_provider="diagnostic", dimension=256) + provider = resolve_embedding_provider(config) + assert provider.model_id == "diagnostic-hash-v1" + assert provider.dimension == 256 + + +def test_resolve_local_model_provider_fails_clear() -> None: + config = SemanticConfig(embedding_provider="local_model") + with pytest.raises(MemorySemanticUnavailableError, match="local_model"): + resolve_embedding_provider(config) + + +def test_resolve_api_provider_fails_clear() -> None: + config = SemanticConfig(embedding_provider="api") + with pytest.raises(MemorySemanticUnavailableError, match="api"): + resolve_embedding_provider(config) + + +class _LegacyEmbeddingProvider: + model_id = "legacy" + dimension = 2 + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + return [[float(len(text)), 0.0] for text in texts] + + +def test_embedding_helpers_fall_back_to_embed() -> None: + provider = _LegacyEmbeddingProvider() + assert embed_query(provider, "abc") == [3.0, 0.0] + assert embed_documents(provider, ["a", "abcd"]) == [[1.0, 0.0], [4.0, 0.0]] + + +def test_fastembed_provider_uses_local_model_cache_and_prefixes( + monkeypatch: pytest.MonkeyPatch, +) -> None: + created = _install_fake_fastembed(monkeypatch) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + + assert provider.model_id == "fastembed:BAAI/bge-small-en-v1.5" + assert provider.dimension == 384 + assert embed_query(provider, "recover after restart") == [1.0] * 384 + assert embed_documents(provider, ["scope-aware hygiene"]) == [[1.0] * 384] + assert created[0].local_files_only is True + assert created[0].inputs == [ + "query: recover after restart", + "passage: scope-aware hygiene", + ] + assert provider.embed(["legacy call"]) == [[1.0] * 384] + + +def test_fastembed_provider_defers_model_load_until_first_embed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + created = _install_fake_fastembed(monkeypatch) + config = SemanticConfig(embedding_provider="fastembed") + + provider = resolve_embedding_provider(config) + # Construction verifies the package but must NOT load the ONNX model yet. + assert created == [] + assert provider.model_id == "fastembed:BAAI/bge-small-en-v1.5" + + embed_query(provider, "first call loads the model") + assert len(created) == 1 + # A second embed reuses the cached model instead of reloading it. + embed_documents(provider, ["reuse the model"]) + assert len(created) == 1 + + +def test_fastembed_provider_honors_download_opt_in( + monkeypatch: pytest.MonkeyPatch, +) -> None: + created = _install_fake_fastembed(monkeypatch, vector_value=0.0) + config = SemanticConfig( + embedding_provider="fastembed", + allow_model_download=True, + ) + provider = resolve_embedding_provider(config) + + assert provider.dimension == 384 + # The download flag is passed when the model loads — i.e. at first embed. + embed_query(provider, "trigger lazy model load") + assert [item.local_files_only for item in created] == [False] + + +def test_fastembed_provider_fails_clear_without_text_embedding( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _install_fake_fastembed(monkeypatch, expose_text_embedding=False) + config = SemanticConfig(embedding_provider="fastembed") + + with pytest.raises(MemorySemanticUnavailableError, match="TextEmbedding"): + resolve_embedding_provider(config) + + +@pytest.mark.parametrize( + ("allow_model_download", "message"), + [(False, "download disabled"), (True, "download allowed")], +) +def test_fastembed_provider_fails_clear_when_model_unavailable( + monkeypatch: pytest.MonkeyPatch, + *, + allow_model_download: bool, + message: str, +) -> None: + _install_fake_fastembed(monkeypatch, raise_on_init=True) + config = SemanticConfig( + embedding_provider="fastembed", + allow_model_download=allow_model_download, + ) + + # Resolve succeeds (cheap package check); the model load — and its failure — + # is deferred to the first embed. + provider = resolve_embedding_provider(config) + with pytest.raises(MemorySemanticUnavailableError, match=message): + embed_query(provider, "boom") + + +def test_fastembed_provider_fails_clear_when_embedding_call_fails( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _install_fake_fastembed(monkeypatch, raise_on_embed=True) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + + with pytest.raises(MemorySemanticUnavailableError, match="embedding failed"): + embed_query(provider, "boom") + + +def test_fastembed_provider_fails_clear_on_dimension_mismatch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _install_fake_fastembed(monkeypatch, vectors=[[1.0, 2.0]]) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + + with pytest.raises(MemorySemanticUnavailableError, match="dimension mismatch"): + embed_query(provider, "short vector") + + +def test_fastembed_provider_fails_clear_on_non_iterable_vector( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _install_fake_fastembed(monkeypatch, vectors=[object()]) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + + with pytest.raises(MemorySemanticUnavailableError, match="non-iterable"): + embed_query(provider, "bad vector") + + +def test_fastembed_provider_fails_clear_on_string_vector( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _install_fake_fastembed(monkeypatch, vectors=["bad"]) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + + with pytest.raises(MemorySemanticUnavailableError, match="non-iterable"): + embed_query(provider, "bad vector") + + +def test_fastembed_provider_fails_clear_when_extra_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import importlib + + original_import_module = importlib.import_module + + def _fake_import_module(name: str, package: str | None = None) -> Any: + if name == "fastembed": + raise ImportError(name) + return original_import_module(name, package) + + monkeypatch.setattr(importlib, "import_module", _fake_import_module) + + config = SemanticConfig(embedding_provider="fastembed") + with pytest.raises(MemorySemanticUnavailableError, match="semantic-fastembed"): + resolve_embedding_provider(config) + + +def test_fastembed_estimate_tokens_without_model_load( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider + + created = _install_fake_fastembed(monkeypatch, vector_value=0.0) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + assert isinstance(provider, FastEmbedEmbeddingProvider) + + (count,) = provider.estimate_token_counts(["hello"]) + assert count == 4 # ceil(len("passage: hello") / 4) + assert created == [] + + +def test_fastembed_max_sequence_tokens_without_model_load( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider + + _install_fake_fastembed(monkeypatch) + config = SemanticConfig(embedding_provider="fastembed") + provider = resolve_embedding_provider(config) + assert isinstance(provider, FastEmbedEmbeddingProvider) + + assert provider.max_sequence_tokens() == 512 + + +class _FakeEncoding: + def __init__(self, length: int) -> None: + self.ids = list(range(length)) + + +class _FakeTokenizer: + def __init__(self) -> None: + self._truncated = True + self.truncation = SimpleNamespace(max_length=512) + + def no_truncation(self) -> None: + self._truncated = False + + def enable_truncation(self, *, max_length: int) -> None: + self._truncated = True + self.truncation = SimpleNamespace(max_length=max_length) + + def encode(self, text: str, *, add_special_tokens: bool) -> _FakeEncoding: + return self.encode_batch([text])[0] + + def decode(self, ids: list[int]) -> str: + return "x" * len(ids) + + def encode_batch(self, texts: list[str]) -> list[_FakeEncoding]: + length = 512 if self._truncated else 1000 + return [_FakeEncoding(length) for _ in texts] + + +class _FakeInnerModel: + def __init__(self) -> None: + self.tokenizer = _FakeTokenizer() + + def tokenize(self, documents: list[str]) -> list[_FakeEncoding]: + return self.tokenizer.encode_batch(documents) + + +def test_fastembed_probe_passage_token_counts_reports_raw_and_effective( + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider, created = _resolve_fastembed_provider( + monkeypatch, + inner_model=_FakeInnerModel(), + ) + assert provider.estimator_label == "fastembed_tokenizer" + + (counts,) = provider.probe_passage_token_counts(["x" * 4000]) + assert counts.raw == 1000 + assert counts.effective == 512 + assert created + + +def test_fastembed_chunk_text_splits_long_document( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _WindowEncoding: + def __init__(self, length: int) -> None: + self.ids = list(range(length)) + + class _ChunkTokenizer: + _SPECIAL_TOKENS = 2 + + def __init__(self) -> None: + self._truncated = True + self.truncation = SimpleNamespace(max_length=512) + + def no_truncation(self) -> None: + self._truncated = False + + def enable_truncation(self, *, max_length: int) -> None: + self._truncated = True + self.truncation = SimpleNamespace(max_length=max_length) + + def encode(self, text: str, *, add_special_tokens: bool) -> _WindowEncoding: + content_tokens = len(text) + if add_special_tokens: + return _WindowEncoding(content_tokens + self._SPECIAL_TOKENS) + return _WindowEncoding(content_tokens) + + def decode(self, ids: list[int]) -> str: + return f"chunk-{len(ids)}" + + class _ChunkInnerModel: + def __init__(self) -> None: + self.tokenizer = _ChunkTokenizer() + + def tokenize(self, documents: list[str]) -> list[_WindowEncoding]: + return [ + self.tokenizer.encode(document, add_special_tokens=True) + for document in documents + ] + + provider, created = _resolve_fastembed_provider( + monkeypatch, + inner_model=_ChunkInnerModel(), + ) + chunks = provider.chunk_text("x" * 4000) + assert len(chunks) == 8 + assert chunks[0] == "chunk-501" + assert chunks[-1] == "chunk-493" + assert sum(int(chunk.removeprefix("chunk-")) for chunk in chunks) == 4000 + assert created + + +class _PassageBoundaryTokenizer: + SPECIAL_TOKENS = 2 + + def __init__(self) -> None: + self._truncated = True + self.truncation = SimpleNamespace(max_length=512) + + def no_truncation(self) -> None: + self._truncated = False + + def enable_truncation(self, *, max_length: int) -> None: + self._truncated = True + self.truncation = SimpleNamespace(max_length=max_length) + + def encode(self, text: str, *, add_special_tokens: bool) -> _FakeEncoding: + content_tokens = len(text.encode("utf-8")) + if add_special_tokens: + return _FakeEncoding(content_tokens + self.SPECIAL_TOKENS) + return _FakeEncoding(content_tokens) + + def decode(self, ids: list[int]) -> str: + return "a" * len(ids) + + +def _boundary_fastembed_provider( + monkeypatch: pytest.MonkeyPatch, +) -> tuple[Any, list[_FakeTextEmbedding]]: + class _BoundaryInnerModel: + def __init__(self) -> None: + self.tokenizer = _PassageBoundaryTokenizer() + + def tokenize(self, documents: list[str]) -> list[_FakeEncoding]: + return [ + self.tokenizer.encode(document, add_special_tokens=True) + for document in documents + ] + + return _resolve_fastembed_provider( + monkeypatch, + inner_model=_BoundaryInnerModel(), + ) + + +@pytest.mark.parametrize( + ("content_len", "expected_chunks"), + [ + (499, 1), + (500, 1), + (501, 1), + (502, 2), + (503, 2), + ], +) +def test_passage_chunk_boundary_token_counts( + monkeypatch: pytest.MonkeyPatch, + content_len: int, + expected_chunks: int, +) -> None: + provider, _created = _boundary_fastembed_provider(monkeypatch) + text = "a" * content_len + chunks = provider.chunk_text(text) + assert len(chunks) == expected_chunks + tokenizer = _PassageBoundaryTokenizer() + for chunk in chunks: + raw = len(tokenizer.encode(f"passage: {chunk}", add_special_tokens=True).ids) + assert raw <= 512 + + +def test_passage_chunks_cover_every_source_token_once( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic.chunking import expand_projection + from codeclone.memory.semantic.models import SemanticProjection + from codeclone.memory.semantic.projection import text_hash + + provider, _created = _boundary_fastembed_provider(monkeypatch) + text = "/repo/codeclone/memory/semantic/chunking.py - " + ("x" * 1200) + tokenizer = _PassageBoundaryTokenizer() + source_token_count = len(tokenizer.encode(text, add_special_tokens=False).ids) + chunks = provider.chunk_text(text) + chunk_token_counts = [ + len(tokenizer.encode(chunk, add_special_tokens=False).ids) for chunk in chunks + ] + assert sum(chunk_token_counts) == source_token_count + assert all(count <= 501 for count in chunk_token_counts) + projection = SemanticProjection( + source="trajectory", + source_id="traj-1", + kind="trajectory", + text=text, + text_hash=text_hash(text), + ) + units = expand_projection(projection, provider) + assert len(units) > 1 + assert all( + len(tokenizer.encode(f"passage: {unit.text}", add_special_tokens=True).ids) + <= 512 + for unit in units + ) + + +def test_passage_chunk_boundaries_are_deterministic( + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider, _created = _boundary_fastembed_provider(monkeypatch) + text = "deterministic " * 400 + assert provider.chunk_text(text) == provider.chunk_text(text) + + +def test_known_model_max_tokens_defaults_to_512() -> None: + from codeclone.memory.embedding.fastembed_provider import known_model_max_tokens + + assert known_model_max_tokens("BAAI/bge-small-en-v1.5") == 512 + assert known_model_max_tokens("unknown-model") == 512 + + +def test_fastembed_max_sequence_tokens_uses_loaded_tokenizer( + monkeypatch: pytest.MonkeyPatch, +) -> None: + + class _Tokenizer: + truncation = SimpleNamespace(max_length=256) + + class _Inner: + tokenizer = _Tokenizer() + + provider, created = _resolve_fastembed_provider( + monkeypatch, + inner_model=_Inner(), + ) + provider.embed_documents(["warmup"]) + assert provider.max_sequence_tokens() == 256 + assert created + + +def test_fastembed_probe_passage_token_counts_without_encode_batch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _Tokenizer: + truncation = SimpleNamespace(max_length=512) + + def encode(self, text: str, *, add_special_tokens: bool) -> _FakeEncoding: + return _FakeEncoding(len(text)) + + class _Inner: + def __init__(self) -> None: + self.tokenizer = _Tokenizer() + + def tokenize(self, documents: list[str]) -> list[_FakeEncoding]: + return [ + self.tokenizer.encode(doc, add_special_tokens=True) for doc in documents + ] + + provider, _created = _resolve_fastembed_provider( + monkeypatch, + inner_model=_Inner(), + ) + provider.embed_documents(["warmup"]) + (counts,) = provider.probe_passage_token_counts(["hello"]) + assert counts.raw == counts.effective + + +def test_fastembed_chunk_text_without_tokenizer_returns_original() -> None: + from codeclone.memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider + + provider = FastEmbedEmbeddingProvider( + model_name="BAAI/bge-small-en-v1.5", + dimension=384, + cache_dir=Path("/tmp/fastembed-cache"), + allow_model_download=False, + ) + provider._model = SimpleNamespace(model=SimpleNamespace(tokenizer=None)) + assert provider.chunk_text("short text") == ("short text",) + + +def test_fastembed_estimate_token_counts_uses_tokenize_when_model_loaded( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _Inner: + def tokenize(self, documents: list[str]) -> list[_FakeEncoding]: + return [_FakeEncoding(len(document)) for document in documents] + + provider, _created = _resolve_fastembed_provider( + monkeypatch, + inner_model=_Inner(), + ) + provider.embed_documents(["warmup"]) + assert provider.estimate_token_counts(["ab", "abcd"]) == (11, 13) + + +def test_fastembed_chunk_text_without_encode_ops_returns_original( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.embedding import fastembed_provider as provider_mod + from codeclone.memory.embedding.fastembed_provider import FastEmbedEmbeddingProvider + + class _Tokenizer: + truncation = SimpleNamespace(max_length=512) + + provider = FastEmbedEmbeddingProvider( + model_name="BAAI/bge-small-en-v1.5", + dimension=384, + cache_dir=Path("/tmp/fastembed-cache"), + allow_model_download=False, + ) + provider._model = SimpleNamespace(model=SimpleNamespace(tokenizer=_Tokenizer())) + monkeypatch.setattr(provider_mod, "_tokenizer_encode_ops", lambda _tokenizer: None) + assert provider.chunk_text("hello world") == ("hello world",) + + +def test_embed_documents_records_observability_counter( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.memory.embedding import embed_documents, embed_query + from codeclone.observability import bootstrap, shutdown + + provider = DeterministicHashEmbeddingProvider(dimension=8) + bootstrap(ObservabilityConfig(enabled=True, profile=False), root=tmp_path) + try: + vectors = embed_documents(provider, ["one", "two"]) + query = embed_query(provider, "query") + finally: + shutdown() + assert len(vectors) == 2 + assert len(query) == 8 + + +def test_embed_batching_validation_and_empty_inputs() -> None: + from codeclone.memory.embedding.batching import ( + EmbedBatchLimits, + LengthScoredItem, + pack_adaptive_batches, + score_lengths, + ) + + with pytest.raises(ValueError, match="length score inputs must align"): + score_lengths( + ["a"], + char_counts=[1], + token_counts=[1, 2], + source_kinds=["kind"], + source_ids=["id"], + ) + with pytest.raises(ValueError, match="embed batch limits must be positive"): + pack_adaptive_batches( + [], limits=EmbedBatchLimits(max_documents=0, max_padded_tokens=8) + ) + assert ( + pack_adaptive_batches( + [], limits=EmbedBatchLimits(max_documents=4, max_padded_tokens=32) + ) + == [] + ) + (batch,) = pack_adaptive_batches( + ( + LengthScoredItem( + item="doc", + char_count=3, + token_count=2, + source_kind="memory", + source_id="id", + ), + ), + limits=EmbedBatchLimits(max_documents=4, max_padded_tokens=32), + ) + assert batch.items[0].item == "doc" + + +def test_deterministic_embedding_max_sequence_tokens_is_none() -> None: + provider = DeterministicHashEmbeddingProvider(dimension=16) + assert provider.max_sequence_tokens() is None + + +def test_fastembed_embed_records_infer_counters_when_observability_enabled( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.config.observability import ObservabilityConfig + from codeclone.observability import bootstrap, shutdown + + provider, _created = _resolve_fastembed_provider(monkeypatch) + bootstrap(ObservabilityConfig(enabled=True, profile=False), root=tmp_path) + try: + vectors = provider.embed_documents( + ["one", "two"], + infer_counters={"lane_memory": 2}, + ) + finally: + shutdown() + assert len(vectors) == 2 + + +def test_fastembed_tokenizer_helper_edge_paths() -> None: + from codeclone.memory.embedding import fastembed_provider as provider_mod + + class _Truncation: + max_length = 128 + + class _TokenizerWithTruncation: + truncation = _Truncation() + + assert provider_mod._tokenizer_max_length(_TokenizerWithTruncation()) == 128 + assert provider_mod._tokenizer_max_length(object()) is None + + class _Encoding: + ids: ClassVar[list[int]] = [1, 2, 3] + + assert provider_mod._encoding_length(_Encoding()) == 3 + assert provider_mod._encoding_length(object()) == 0 + + +def test_fastembed_tokenizer_max_length_rejects_non_positive() -> None: + from codeclone.memory.embedding import fastembed_provider as provider_mod + + class _Truncation: + max_length = 0 + + class _Tokenizer: + truncation = _Truncation() + + assert provider_mod._tokenizer_max_length(_Tokenizer()) is None + + +def test_fastembed_verify_chunk_passage_input_raises_on_overflow( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.embedding import fastembed_provider as provider_mod + from codeclone.memory.exceptions import SemanticChunkingInvariantError + + def _encode(_text: str, *, add_special_tokens: bool = True) -> object: + class _Encoding: + ids: ClassVar[list[int]] = list(range(600)) + + return _Encoding() + + with pytest.raises( + SemanticChunkingInvariantError, match="exceeds model token window" + ): + provider_mod._verify_chunk_passage_input( + _encode, + "too-long", + model_max_tokens=128, + ) + + +def test_fastembed_probe_without_tokenizer_uses_char_estimate( + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider, _created = _resolve_fastembed_provider( + monkeypatch, + inner_model=SimpleNamespace(tokenizer=None, tokenize=lambda _docs: []), + vector_value=0.0, + ) + counts = provider.probe_passage_token_counts(["hello"]) + assert counts[0].raw == counts[0].effective + assert counts[0].raw > 0 + + +def test_fastembed_chunk_text_raises_when_chunk_cannot_fit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.exceptions import SemanticChunkingInvariantError + + class _Encoding: + def __init__(self, ids: list[int]) -> None: + self.ids = ids + + class _Tokenizer: + truncation = type("T", (), {"max_length": 8})() + + def encode(self, text: str, *, add_special_tokens: bool = False) -> _Encoding: + return _Encoding([1] * 32) + + def decode(self, ids: list[int]) -> str: + return "x" * len(ids) + + def no_truncation(self) -> None: + return None + + def enable_truncation(self, *, max_length: int) -> None: + return None + + provider, _created = _resolve_fastembed_provider( + monkeypatch, + inner_model=SimpleNamespace(tokenizer=_Tokenizer()), + ) + with pytest.raises( + SemanticChunkingInvariantError, match="unable to fit passage chunk" + ): + provider.chunk_text("overflow") + + +def test_fastembed_estimate_token_counts_without_tokenize( + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider, _created = _resolve_fastembed_provider( + monkeypatch, + inner_model=SimpleNamespace(tokenizer=object()), + vector_value=0.0, + ) + provider._get_model() + counts = provider.estimate_token_counts(["hello"]) + assert counts == (4,) diff --git a/tests/test_semantic_index_null.py b/tests/test_semantic_index_null.py new file mode 100644 index 00000000..248992e4 --- /dev/null +++ b/tests/test_semantic_index_null.py @@ -0,0 +1,96 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import importlib +from pathlib import Path +from types import ModuleType + +import pytest + +from codeclone.config.memory import SemanticConfig +from codeclone.memory.semantic import ( + NullSemanticIndex, + UnavailableSemanticIndex, + resolve_semantic_index, + resolve_semantic_index_writer, +) + + +@pytest.mark.parametrize( + ("index", "expected_type", "expected_reason"), + [ + ( + resolve_semantic_index(SemanticConfig(enabled=False)), + NullSemanticIndex, + "disabled", + ), + ( + UnavailableSemanticIndex(reason="lancedb_not_installed"), + UnavailableSemanticIndex, + "lancedb_not_installed", + ), + ], +) +def test_degraded_index_is_empty_and_reports_reason( + index: NullSemanticIndex | UnavailableSemanticIndex, + expected_type: type[NullSemanticIndex | UnavailableSemanticIndex], + expected_reason: str, +) -> None: + assert isinstance(index, expected_type) + assert index.search([0.0, 1.0], k=5) == [] + status = index.status() + assert status.available is False + assert status.reason == expected_reason + + +def test_resolve_semantic_index_reports_lancedb_not_installed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + index_dir = tmp_path / "semantic_index.lance" + index_dir.mkdir() + config = SemanticConfig(enabled=True, index_path=str(index_dir)) + monkeypatch.setattr( + "codeclone.memory.semantic._resolve_backend", + lambda *_args, **_kwargs: None, + ) + index = resolve_semantic_index(config) + assert isinstance(index, UnavailableSemanticIndex) + assert index.status().reason == "lancedb_not_installed" + + +def test_resolve_semantic_index_writer_returns_none_when_backend_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + config = SemanticConfig(enabled=True, index_path="/tmp/unused.lance") + monkeypatch.setattr( + "codeclone.memory.semantic._resolve_backend", + lambda *_args, **_kwargs: None, + ) + assert resolve_semantic_index_writer(config) is None + + +def test_resolve_backend_degrades_when_optional_package_missing_in_constructor( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + index_dir = tmp_path / "semantic_index.lance" + index_dir.mkdir() + config = SemanticConfig(enabled=True, index_path=str(index_dir)) + real_import_module = importlib.import_module + + def _import_module(name: str, package: str | None = None) -> ModuleType: + if name == "lancedb": + raise ModuleNotFoundError("No module named 'lancedb'") + return real_import_module(name, package) + + monkeypatch.setattr(importlib, "import_module", _import_module) + + index = resolve_semantic_index(config) + assert isinstance(index, UnavailableSemanticIndex) + assert index.status().reason == "lancedb_not_installed" + assert resolve_semantic_index_writer(config) is None diff --git a/tests/test_semantic_lancedb_backend.py b/tests/test_semantic_lancedb_backend.py new file mode 100644 index 00000000..c9e20369 --- /dev/null +++ b/tests/test_semantic_lancedb_backend.py @@ -0,0 +1,241 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.config.memory import SemanticConfig +from codeclone.memory.embedding import resolve_embedding_provider +from codeclone.memory.semantic import ( + SemanticIndexWriter, + resolve_semantic_index, + resolve_semantic_index_writer, +) +from codeclone.memory.semantic.models import SemanticRow + +# The backend is the optional `semantic-lancedb` extra; skip when absent. +pytest.importorskip("lancedb") + +from codeclone.memory.semantic.lancedb_backend import LanceDbSemanticIndex + + +def _config(tmp_path: Path, *, dimension: int) -> SemanticConfig: + return SemanticConfig( + enabled=True, + index_path=str(tmp_path / "semantic_index.lance"), + dimension=dimension, + ) + + +def _row(record_id: str, vector: list[float]) -> SemanticRow: + return SemanticRow( + id=record_id, + source="memory", + kind="contract_note", + text_hash=f"h-{record_id}", + embedding_model="diagnostic-hash-v1", + vector=tuple(vector), + ) + + +def _writer_and_vector( + tmp_path: Path, + *, + dimension: int, + text: str, +) -> tuple[SemanticIndexWriter, list[float], SemanticConfig]: + config = _config(tmp_path, dimension=dimension) + writer = resolve_semantic_index_writer(config) + assert writer is not None + provider = resolve_embedding_provider(config) + (vector,) = provider.embed([text]) + return writer, vector, config + + +def test_lancedb_backend_round_trip(tmp_path: Path) -> None: + writer, vec_a, config = _writer_and_vector( + tmp_path, dimension=4, text="alpha alpha" + ) + provider = resolve_embedding_provider(config) + (vec_b,) = provider.embed(["beta beta beta"]) + + writer.upsert([_row("a", vec_a), _row("b", vec_b)]) + assert writer.known_ids() == {"a", "b"} + + status = writer.status() + assert status.available is True + assert status.backend == "lancedb" + assert status.dimension == 4 + assert status.indexed_count == 2 + + hits = writer.search(vec_a, k=2) + assert hits[0].source_id == "a" # closest to its own embedding + assert hits[0].score >= hits[-1].score + + writer.delete(["a"]) + assert writer.known_ids() == {"b"} + + +def test_lancedb_backend_row_fingerprints(tmp_path: Path) -> None: + writer, vec_a, config = _writer_and_vector(tmp_path, dimension=4, text="alpha") + provider = resolve_embedding_provider(config) + (vec_b,) = provider.embed(["beta beta"]) + writer.upsert([_row("a", vec_a), _row("b", vec_b)]) + + fingerprints = writer.row_fingerprints(["a", "absent"]) + assert set(fingerprints) == {"a"} # missing ids omitted + assert fingerprints["a"].text_hash == "h-a" + assert fingerprints["a"].embedding_model == "diagnostic-hash-v1" + # Empty request never touches the table. + assert writer.row_fingerprints([]) == {} + + +def test_lancedb_backend_resolves_as_read_index(tmp_path: Path) -> None: + config = _config(tmp_path, dimension=8) + index_path = tmp_path / "semantic_index.lance" + assert not index_path.exists() + + index = resolve_semantic_index(config) + status = index.status() + assert status.available is False + assert status.backend is None + assert status.reason == "not_built" + assert index.search([0.0] * 8, k=3) == [] + assert not index_path.exists() + + writer = resolve_semantic_index_writer(config) + assert writer is not None + writer.upsert([_row("built", [0.0] * 8)]) + + built = resolve_semantic_index(config) + status = built.status() + assert status.available is True + assert status.backend == "lancedb" + assert status.indexed_count == 1 + + +def test_lancedb_backend_reopens_existing_table(tmp_path: Path) -> None: + # Regression: a second backend on the same path must OPEN the existing + # table, not crash with "Table already exists". + config = _config(tmp_path, dimension=4) + first = resolve_semantic_index_writer(config) + assert first is not None + provider = resolve_embedding_provider(config) + (vec,) = provider.embed(["persisted"]) + first.upsert([_row("keep", vec)]) + + second = resolve_semantic_index_writer(config) + assert second is not None + assert second.known_ids() == {"keep"} + assert second.status().indexed_count == 1 + + +def test_lancedb_backend_read_reports_schema_mismatch( + tmp_path: Path, +) -> None: + writer, vec, config = _writer_and_vector( + tmp_path, dimension=4, text="old dimension" + ) + writer.upsert([_row("old", vec)]) + + read_mismatch = LanceDbSemanticIndex( + path=Path(config.index_path), + dimension=8, + create=False, + ) + + status = read_mismatch.status() + assert status.available is False + assert status.reason == "schema_mismatch" + assert read_mismatch.search([0.0] * 8, k=3) == [] + + original = LanceDbSemanticIndex( + path=Path(config.index_path), + dimension=4, + create=False, + ) + assert original.known_ids() == {"old"} + + +def test_lancedb_backend_writer_recreates_schema_mismatch( + tmp_path: Path, +) -> None: + old_writer, vec, old_config = _writer_and_vector( + tmp_path, dimension=4, text="old dimension" + ) + old_writer.upsert([_row("old", vec)]) + + new_writer = LanceDbSemanticIndex( + path=Path(old_config.index_path), + dimension=8, + create=True, + ) + + assert new_writer.status().available is True + assert new_writer.status().dimension == 8 + assert new_writer.known_ids() == set() + new_writer.upsert([_row("new", [0.0] * 8)]) + assert new_writer.known_ids() == {"new"} + + +def test_lancedb_backend_upsert_is_idempotent_by_id(tmp_path: Path) -> None: + config = _config(tmp_path, dimension=4) + writer = resolve_semantic_index_writer(config) + assert writer is not None + provider = resolve_embedding_provider(config) + (vec,) = provider.embed(["one"]) + + writer.upsert([_row("dup", vec)]) + writer.upsert([_row("dup", vec)]) # same id -> merge, not duplicate + assert writer.known_ids() == {"dup"} + assert writer.status().indexed_count == 1 + + +def test_lancedb_backend_no_table_search_and_delete_are_noops(tmp_path: Path) -> None: + index = LanceDbSemanticIndex( + path=tmp_path / "empty.lance", dimension=4, create=False + ) + assert index.status().available is False + assert index.status().reason == "not_built" + assert index.search([0.0, 0.0, 0.0, 0.0], k=3) == [] + assert index.known_ids() == set() + index.delete([]) + index.delete(["missing-table"]) + index.upsert([]) + + +def test_lancedb_backend_upsert_creates_table_and_delete_nonempty( + tmp_path: Path, +) -> None: + config = _config(tmp_path, dimension=4) + writer = LanceDbSemanticIndex( + path=Path(config.index_path), dimension=config.dimension, create=False + ) + assert writer._table is None + provider = resolve_embedding_provider(config) + (vec,) = provider.embed(["create on first upsert"]) + writer.upsert([_row("first", vec)]) + assert writer.status().indexed_count == 1 + writer.delete(["first"]) + assert writer.known_ids() == set() + + +def test_lancedb_backend_open_table_propagates_unexpected_value_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + index = LanceDbSemanticIndex( + path=tmp_path / "broken.lance", dimension=4, create=False + ) + + def _boom(_name: str) -> object: + raise ValueError("unexpected lancedb failure") + + monkeypatch.setattr(index._db, "open_table", _boom) + with pytest.raises(ValueError, match="unexpected lancedb failure"): + index._open_table(create=False) diff --git a/tests/test_semantic_projection.py b/tests/test_semantic_projection.py new file mode 100644 index 00000000..56c3c9c0 --- /dev/null +++ b/tests/test_semantic_projection.py @@ -0,0 +1,108 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +from codeclone.memory.models import MemoryRecord +from codeclone.memory.semantic.projection import ( + is_indexed_audit_event, + is_indexed_memory_type, + project_audit_event, + project_memory_record, + text_hash, +) + + +def _record(*, statement: str, summary: str | None = None) -> MemoryRecord: + return MemoryRecord( + id="mem-1", + project_id="proj-1", + identity_key="key-1", + type="contract_note", + status="active", + confidence="verified", + origin="system", + ingest_source="contract", + statement=statement, + summary=summary, + payload=None, + created_at_utc="2026-01-01T00:00:00Z", + updated_at_utc="2026-01-01T00:00:00Z", + last_verified_at_utc=None, + expires_at_utc=None, + created_by="test", + verified_by=None, + approved_by=None, + approved_at_utc=None, + report_digest=None, + code_fingerprint=None, + stale_reason=None, + created_on_branch=None, + created_at_commit=None, + verified_on_branch=None, + verified_at_commit=None, + ) + + +def test_text_hash_is_idempotent_and_distinguishing() -> None: + assert text_hash("same") == text_hash("same") + assert text_hash("a") != text_hash("b") + + +def test_indexed_type_predicates() -> None: + assert is_indexed_memory_type("contract_note") is True + assert is_indexed_memory_type("change_rationale") is True + # structural records are not semantically indexed + assert is_indexed_memory_type("module_role") is False + assert is_indexed_memory_type("test_anchor") is False + assert is_indexed_audit_event("intent.declared") is True + assert is_indexed_audit_event("workspace.gc_completed") is False + + +def test_project_memory_record_is_deterministic() -> None: + record = _record(statement="recover keeps the checkpoint as before-run") + first = project_memory_record(record, subject_path="codeclone/x.py") + second = project_memory_record(record, subject_path="codeclone/x.py") + assert first == second + assert first.text_hash == second.text_hash + assert first.source == "memory" + assert first.source_id == "mem-1" + assert first.kind == "contract_note" + assert first.subject_path == "codeclone/x.py" + assert first.status == "active" + assert "contract_note" in first.text + assert "codeclone/x.py" in first.text + assert "recover keeps the checkpoint" in first.text + + +def test_project_memory_record_text_hash_tracks_content() -> None: + base = project_memory_record(_record(statement="alpha")) + changed_statement = project_memory_record(_record(statement="beta")) + assert base.text_hash != changed_statement.text_hash + # summary participates in the projected text + with_summary = project_memory_record( + _record(statement="alpha", summary="one-line essence") + ) + assert with_summary.text_hash != base.text_hash + assert "one-line essence" in with_summary.text + + +def test_project_audit_event_is_deterministic() -> None: + first = project_audit_event( + event_id="evt_1", + event_type="intent.declared", + summary="Fix audit gap: capture intent description", + ) + second = project_audit_event( + event_id="evt_1", + event_type="intent.declared", + summary="Fix audit gap: capture intent description", + ) + assert first == second + assert first.source == "audit" + assert first.kind == "intent.declared" + assert first.subject_path is None + assert "intent.declared" in first.text + assert "Fix audit gap" in first.text diff --git a/tests/test_semantic_projection_probe.py b/tests/test_semantic_projection_probe.py new file mode 100644 index 00000000..4a70718c --- /dev/null +++ b/tests/test_semantic_projection_probe.py @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Iterator, Sequence + +from codeclone.memory.embedding.length import ( + PlanningTextTokenEstimator, + truncation_stats, +) +from codeclone.memory.semantic.models import SemanticProjection +from codeclone.memory.semantic.projection import text_hash +from codeclone.memory.semantic.projection_probe import probe_semantic_projections +from codeclone.memory.semantic.sources import IndexSource + + +class _FakeSource(IndexSource): + def __init__(self, name: str, projections: Sequence[SemanticProjection]) -> None: + self._name = name + self._projections = projections + + def name(self) -> str: + return self._name + + def available(self) -> bool: + return True + + def iter_projections(self) -> Iterator[SemanticProjection]: + yield from self._projections + + +def _projection(source: str, source_id: str, text: str) -> SemanticProjection: + return SemanticProjection( + source=source, # type: ignore[arg-type] + source_id=source_id, + kind="test", + text=text, + text_hash=text_hash(text), + ) + + +def test_length_distribution_percentiles() -> None: + from codeclone.memory.embedding.length import length_distribution + + dist = length_distribution([10, 20, 30, 40, 100]) + assert dist.min == 10 + assert dist.p50 == 30 + assert dist.max == 100 + + +def test_truncation_stats_counts_dropped_tokens() -> None: + stats = truncation_stats([600, 100], [512, 100]) + assert stats.documents == 1 + assert stats.max_dropped_tokens == 88 + + +def test_probe_semantic_projections_reports_lane_stats() -> None: + estimator = PlanningTextTokenEstimator( + mode="chars_approx", + model_max_tokens=512, + ) + payload = probe_semantic_projections( + sources=[ + _FakeSource( + "memory", + [_projection("memory", "m1", "short memory note")], + ), + _FakeSource( + "trajectory", + [_projection("trajectory", "t1", "x" * 4000)], + ), + ], + token_prober=estimator, + ) + assert payload["action"] == "probe_semantic_projections" + assert payload["estimator"] == "chars_approx" + assert payload["model_max_tokens"] == 512 + assert payload["lanes"]["memory"]["documents"] == 1 + assert payload["lanes"]["trajectory"]["documents"] == 1 + assert payload["lanes"]["trajectory"]["chars"]["max"] == 4000 + assert payload["lanes"]["trajectory"]["tokens"]["raw"]["max"] == 1000 + assert payload["lanes"]["trajectory"]["tokens"]["effective"]["max"] == 1000 + assert payload["lanes"]["trajectory"]["truncation"]["documents"] == 0 + assert payload["lanes"]["trajectory"]["token_overflow"]["over_model_limit"] == 1 + assert ( + payload["lanes"]["trajectory"]["token_overflow"]["max_overflow_tokens"] == 488 + ) + + +def test_probe_trajectory_with_chunker_measures_index_units() -> None: + from typing import cast + + from codeclone.memory.embedding.length import ( + PassageTokenCounts, + ProjectionTokenProber, + ) + + class _SplitChunker: + def chunk_text(self, text: str) -> tuple[str, ...]: + midpoint = max(1, len(text) // 2) + return (text[:midpoint], text[midpoint:]) + + class _TruncatingProber: + estimator_label = "test_tokenizer" + + def max_sequence_tokens(self) -> int | None: + return 512 + + def probe_passage_token_counts( + self, + texts: Sequence[str], + ) -> tuple[PassageTokenCounts, ...]: + counts: list[PassageTokenCounts] = [] + for text in texts: + raw = len(text) // 4 + effective = min(raw, 512) + counts.append(PassageTokenCounts(raw=raw, effective=effective)) + return tuple(counts) + + payload = probe_semantic_projections( + sources=[ + _FakeSource( + "trajectory", + [_projection("trajectory", "t1", "x" * 4000)], + ), + ], + token_prober=cast(ProjectionTokenProber, _TruncatingProber()), + passage_chunker=_SplitChunker(), + ) + trajectory = payload["lanes"]["trajectory"] + assert trajectory["chunking"] == { + "source_documents": 1, + "index_units": 2, + "multi_chunk_sources": 1, + } + assert trajectory["documents"] == 2 + assert trajectory["truncation"]["documents"] == 0 + assert trajectory["token_overflow"]["over_model_limit"] == 0 + + +def test_probe_trajectory_without_chunker_keeps_source_projection_stats() -> None: + estimator = PlanningTextTokenEstimator( + mode="chars_approx", + model_max_tokens=512, + ) + payload = probe_semantic_projections( + sources=[ + _FakeSource( + "trajectory", + [_projection("trajectory", "t1", "x" * 4000)], + ), + ], + token_prober=estimator, + ) + trajectory = payload["lanes"]["trajectory"] + assert trajectory["documents"] == 1 + assert "chunking" not in trajectory + assert trajectory["token_overflow"]["over_model_limit"] == 1 + + +def test_probe_collects_overflow_examples_for_trajectory_units() -> None: + from typing import cast + + from codeclone.memory.embedding.length import ( + PassageTokenCounts, + ProjectionTokenProber, + ) + + class _OverflowProber: + estimator_label = "test_tokenizer" + + def max_sequence_tokens(self) -> int | None: + return 512 + + def probe_passage_token_counts( + self, + texts: Sequence[str], + ) -> tuple[PassageTokenCounts, ...]: + return tuple(PassageTokenCounts(raw=600, effective=512) for _ in texts) + + class _SingleChunker: + def chunk_text(self, text: str) -> tuple[str, ...]: + return ("fits", "overflow") + + payload = probe_semantic_projections( + sources=[ + _FakeSource( + "trajectory", + [_projection("trajectory", "t1", "long trajectory")], + ), + ], + token_prober=cast(ProjectionTokenProber, _OverflowProber()), + passage_chunker=_SingleChunker(), + ) + trajectory = payload["lanes"]["trajectory"] + examples = trajectory["overflow_examples"] + assert len(examples) == 2 + assert all(example["raw_tokens"] == 600 for example in examples) + assert all(example["overflow_tokens"] == 88 for example in examples) + assert examples[0]["parent_id"] == "t1" + assert examples[0]["chunk_index"] == 0 + assert examples[1]["chunk_index"] == 1 + + +def test_probe_defaults_to_planning_estimator_without_exact_tokens() -> None: + from codeclone.config.memory import SemanticConfig + from codeclone.memory.semantic.rebuild_workflow import ( + _resolve_projection_token_prober, + ) + + config = SemanticConfig(embedding_provider="fastembed") + prober = _resolve_projection_token_prober(config, exact_tokens=False) + assert prober.estimator_label == "chars_approx" diff --git a/tests/test_semantic_ranking.py b/tests/test_semantic_ranking.py new file mode 100644 index 00000000..d9d9ad9b --- /dev/null +++ b/tests/test_semantic_ranking.py @@ -0,0 +1,89 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +from codeclone.memory.models import MemorySubject +from codeclone.memory.retrieval.ranking import ( + RankingContext, + reciprocal_rank_fusion, + relevance_score, +) +from tests.memory_fixtures import make_module_record + + +def _ctx( + *, scope: tuple[str, ...] = (), symbols: tuple[str, ...] = () +) -> RankingContext: + return RankingContext.from_scope( + scope_paths=scope, symbols=symbols, blast_dependents=() + ) + + +def test_semantic_proximity_is_additive() -> None: + record = make_module_record("proj", "codeclone/x.py") + base = relevance_score(record=record, subjects=[], context=_ctx(), evidence_count=0) + boosted = relevance_score( + record=record, + subjects=[], + context=_ctx(), + evidence_count=0, + semantic_proximity=1.0, + ) + # Small additive weight (0.3); it re-ranks, it does not dominate. + assert round(boosted - base, 4) == 0.3 + + +def test_default_proximity_matches_explicit_zero() -> None: + record = make_module_record("proj", "codeclone/x.py") + implicit = relevance_score( + record=record, subjects=[], context=_ctx(), evidence_count=0 + ) + explicit = relevance_score( + record=record, + subjects=[], + context=_ctx(), + evidence_count=0, + semantic_proximity=0.0, + ) + assert implicit == explicit + + +def test_rrf_rewards_membership_in_both_lists() -> None: + # A record matched by both engines outranks one matched by a single engine, + # even when the single-engine match sits at the very top (rank 0). + both = reciprocal_rank_fusion(lexical_rank=2, vector_rank=2) + assert both > reciprocal_rank_fusion(lexical_rank=0) + assert both > reciprocal_rank_fusion(vector_rank=0) + + +def test_rrf_rewards_a_better_rank() -> None: + assert reciprocal_rank_fusion(lexical_rank=0) > reciprocal_rank_fusion( + lexical_rank=5 + ) + + +def test_rrf_absent_from_both_lists_is_zero() -> None: + assert reciprocal_rank_fusion() == 0.0 + + +def test_scoped_shortcircuit_beats_semantic() -> None: + # A scoped query with no contextual subject match must return 0.0 even + # with maximal proximity: semantic cannot inject out-of-scope records. + record = make_module_record("proj", "codeclone/x.py") + subject = MemorySubject( + id="s1", + memory_id=record.id, + subject_kind="path", + subject_key="codeclone/unrelated.py", + ) + score = relevance_score( + record=record, + subjects=[subject], + context=_ctx(scope=("codeclone/other.py",)), + evidence_count=0, + semantic_proximity=1.0, + ) + assert score == 0.0 diff --git a/tests/test_semantic_rebuild.py b/tests/test_semantic_rebuild.py new file mode 100644 index 00000000..0a0a8dee --- /dev/null +++ b/tests/test_semantic_rebuild.py @@ -0,0 +1,200 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +from collections.abc import Iterator, Sequence + +import pytest + +from codeclone.memory.embedding import DeterministicHashEmbeddingProvider +from codeclone.memory.semantic import RebuildReport, rebuild_semantic_index +from codeclone.memory.semantic.models import ( + SemanticHit, + SemanticIndexStatus, + SemanticProjection, + SemanticRow, + SemanticRowFingerprint, +) +from codeclone.memory.semantic.projection import text_hash + + +class _FakeWriter: + def __init__(self) -> None: + self.rows: list[SemanticRow] = [] + + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + rows = ( + self.rows + if source is None + else [row for row in self.rows if row.source == source] + ) + return [ + SemanticHit(source_id=row.id, source=row.source, score=0.0) + for row in rows[:k] + ] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus( + available=True, backend="fake", indexed_count=len(self.rows) + ) + + def upsert(self, rows: Sequence[SemanticRow]) -> None: + self.rows.extend(rows) + + def delete(self, ids: Sequence[str]) -> None: + drop = set(ids) + self.rows = [row for row in self.rows if row.id not in drop] + + def known_ids(self) -> set[str]: + return {row.id for row in self.rows} + + def row_fingerprints(self, ids: Sequence[str]) -> dict[str, SemanticRowFingerprint]: + by_id = {row.id: row for row in self.rows} + return { + row_id: SemanticRowFingerprint( + id=row_id, + text_hash=by_id[row_id].text_hash, + embedding_model=by_id[row_id].embedding_model, + ) + for row_id in ids + if row_id in by_id + } + + +class _FakeSource: + def __init__( + self, + name: str, + projections: list[SemanticProjection], + *, + available: bool = True, + ) -> None: + self._name = name + self._projections = projections + self._available = available + + def name(self) -> str: + return self._name + + def available(self) -> bool: + return self._available + + def iter_projections(self) -> Iterator[SemanticProjection]: + yield from self._projections + + +def _projection(source_id: str, text: str) -> SemanticProjection: + return SemanticProjection( + source="memory", + source_id=source_id, + kind="contract_note", + text=text, + text_hash=text_hash(text), + ) + + +def test_rebuild_embeds_and_upserts() -> None: + writer = _FakeWriter() + provider = DeterministicHashEmbeddingProvider(dimension=32) + source = _FakeSource( + "memory", + [_projection("mem-1", "alpha beta"), _projection("mem-2", "gamma delta")], + ) + + report = rebuild_semantic_index(writer=writer, provider=provider, sources=[source]) + + assert isinstance(report, RebuildReport) + assert report.indexed == 2 + assert report.by_source == {"memory": 2} + assert {row.id for row in writer.rows} == {"mem-1", "mem-2"} + assert all(len(row.vector) == 32 for row in writer.rows) + assert all(row.embedding_model == "diagnostic-hash-v1" for row in writer.rows) + + +def test_rebuild_skips_unavailable_sources() -> None: + writer = _FakeWriter() + provider = DeterministicHashEmbeddingProvider(dimension=16) + source = _FakeSource("audit", [_projection("evt-1", "x y")], available=False) + + report = rebuild_semantic_index(writer=writer, provider=provider, sources=[source]) + + assert report.indexed == 0 + assert report.by_source == {} + assert writer.rows == [] + + +def test_rebuild_prunes_stale_ids() -> None: + writer = _FakeWriter() + writer.rows = [ + SemanticRow( + id="old", + source="memory", + kind="contract_note", + text_hash="h", + embedding_model="diagnostic-hash-v1", + vector=(0.0, 1.0), + ) + ] + provider = DeterministicHashEmbeddingProvider(dimension=8) + source = _FakeSource("memory", [_projection("new", "fresh note")]) + + report = rebuild_semantic_index(writer=writer, provider=provider, sources=[source]) + + assert report.indexed == 1 + assert report.deleted == 1 + assert {row.id for row in writer.rows} == {"new"} + + +def test_rebuild_available_source_with_empty_projections() -> None: + writer = _FakeWriter() + provider = DeterministicHashEmbeddingProvider(dimension=8) + source = _FakeSource("memory", [], available=True) + + report = rebuild_semantic_index(writer=writer, provider=provider, sources=[source]) + + assert report.indexed == 0 + assert report.by_source == {} + assert writer.rows == [] + + +class _FixedChunker: + def __init__(self, chunks: tuple[str, ...]) -> None: + self._chunks = chunks + + def chunk_text(self, text: str) -> tuple[str, ...]: + return self._chunks + + +def test_rebuild_indexes_trajectory_chunks(monkeypatch: pytest.MonkeyPatch) -> None: + import codeclone.memory.semantic.rebuild as rebuild_mod + + monkeypatch.setattr( + rebuild_mod, + "resolve_passage_chunker", + lambda _provider: _FixedChunker(("chunk-a", "chunk-b")), + ) + writer = _FakeWriter() + provider = DeterministicHashEmbeddingProvider(dimension=16) + projection = SemanticProjection( + source="trajectory", + source_id="traj-1", + kind="trajectory", + text="long trajectory", + text_hash=text_hash("long trajectory"), + ) + source = _FakeSource("trajectory", [projection]) + + report = rebuild_semantic_index(writer=writer, provider=provider, sources=[source]) + + assert report.indexed == 2 + assert report.by_source == {"trajectory": 1} + assert {row.id for row in writer.rows} == { + "trajectory:traj-1:chunk:000", + "trajectory:traj-1:chunk:001", + } + assert all(row.parent_id == "traj-1" for row in writer.rows) diff --git a/tests/test_semantic_rebuild_incremental.py b/tests/test_semantic_rebuild_incremental.py new file mode 100644 index 00000000..f61642a4 --- /dev/null +++ b/tests/test_semantic_rebuild_incremental.py @@ -0,0 +1,207 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Iterator, Sequence + +from codeclone.memory.embedding.batching import EmbedBatchLimits +from codeclone.memory.semantic.models import ( + SemanticHit, + SemanticIndexStatus, + SemanticProjection, + SemanticRow, + SemanticRowFingerprint, +) +from codeclone.memory.semantic.projection import text_hash +from codeclone.memory.semantic.rebuild import rebuild_semantic_index + + +class _InMemoryWriter: + """Faithful in-memory SemanticIndexWriter: stores rows, answers fingerprint + lookups from the stored text_hash/model — never returns vectors to check + freshness.""" + + def __init__(self) -> None: + self.rows: dict[str, SemanticRow] = {} + + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + return [] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=True) + + def upsert(self, rows: Sequence[SemanticRow]) -> None: + for row in rows: + self.rows[row.id] = row + + def delete(self, ids: Sequence[str]) -> None: + for row_id in ids: + self.rows.pop(row_id, None) + + def known_ids(self) -> set[str]: + return set(self.rows) + + def row_fingerprints(self, ids: Sequence[str]) -> dict[str, SemanticRowFingerprint]: + out: dict[str, SemanticRowFingerprint] = {} + for row_id in ids: + row = self.rows.get(row_id) + if row is not None: + out[row_id] = SemanticRowFingerprint( + id=row_id, + text_hash=row.text_hash, + embedding_model=row.embedding_model, + ) + return out + + +class _CountingProvider: + def __init__(self, model_id: str = "test-model") -> None: + self.model_id = model_id + self.dimension = 3 + self.embed_calls = 0 + self.embedded_texts = 0 + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + batch = list(texts) + self.embed_calls += 1 + self.embedded_texts += len(batch) + return [[float(len(text)), 0.0, 0.0] for text in batch] + + def embed_documents( + self, + texts: Sequence[str], + *, + infer_counters: object | None = None, + ) -> list[list[float]]: + return self.embed(texts) + + def estimate_token_counts(self, texts: Sequence[str]) -> tuple[int, ...]: + return tuple(max(1, len(text) // 4) for text in texts) + + def max_sequence_tokens(self) -> int | None: + return None + + +class _FakeSource: + def __init__(self, projections: Sequence[SemanticProjection]) -> None: + self._projections = list(projections) + + def name(self) -> str: + return "memory" + + def available(self) -> bool: + return True + + def iter_projections(self) -> Iterator[SemanticProjection]: + yield from self._projections + + +def _projection(source_id: str, text: str) -> SemanticProjection: + return SemanticProjection( + source="memory", + source_id=source_id, + kind="memory", + text=text, + text_hash=text_hash(text), + ) + + +def _corpus(count: int) -> list[SemanticProjection]: + return [_projection(f"id-{i}", f"text {i}") for i in range(count)] + + +def test_first_run_embeds_all() -> None: + writer, provider = _InMemoryWriter(), _CountingProvider() + report = rebuild_semantic_index( + writer=writer, provider=provider, sources=[_FakeSource(_corpus(100))] + ) + assert report.indexed == 100 + assert report.embedded == 100 + assert report.skipped_unchanged == 0 + assert provider.embedded_texts == 100 + + +def test_unchanged_corpus_embeds_nothing_on_second_run() -> None: + writer = _InMemoryWriter() + corpus = _corpus(100) + rebuild_semantic_index( + writer=writer, provider=_CountingProvider(), sources=[_FakeSource(corpus)] + ) + provider = _CountingProvider() + report = rebuild_semantic_index( + writer=writer, provider=provider, sources=[_FakeSource(corpus)] + ) + assert report.indexed == 100 + assert report.embedded == 0 + assert report.skipped_unchanged == 100 + # The model is never asked to embed — this is what bounds RSS. + assert provider.embed_calls == 0 + assert provider.embedded_texts == 0 + + +def test_changed_text_re_embeds_only_that_row() -> None: + writer = _InMemoryWriter() + corpus = _corpus(10) + rebuild_semantic_index( + writer=writer, provider=_CountingProvider(), sources=[_FakeSource(corpus)] + ) + changed = list(corpus) + changed[3] = _projection("id-3", "text 3 rewritten") + provider = _CountingProvider() + report = rebuild_semantic_index( + writer=writer, provider=provider, sources=[_FakeSource(changed)] + ) + assert report.embedded == 1 + assert report.skipped_unchanged == 9 + assert provider.embedded_texts == 1 + + +def test_embedding_model_change_re_embeds_all() -> None: + writer = _InMemoryWriter() + corpus = _corpus(5) + rebuild_semantic_index( + writer=writer, + provider=_CountingProvider(model_id="model-a"), + sources=[_FakeSource(corpus)], + ) + provider = _CountingProvider(model_id="model-b") + report = rebuild_semantic_index( + writer=writer, provider=provider, sources=[_FakeSource(corpus)] + ) + assert report.embedded == 5 + assert report.skipped_unchanged == 0 + + +def test_removed_source_id_is_reconciled() -> None: + writer = _InMemoryWriter() + corpus = _corpus(5) + rebuild_semantic_index( + writer=writer, provider=_CountingProvider(), sources=[_FakeSource(corpus)] + ) + report = rebuild_semantic_index( + writer=writer, + provider=_CountingProvider(), + sources=[_FakeSource(corpus[:4])], + ) + assert report.deleted == 1 + assert "id-4" not in writer.known_ids() + + +def test_changed_rows_embedded_in_bounded_batches() -> None: + writer = _InMemoryWriter() + provider = _CountingProvider() + rebuild_semantic_index( + writer=writer, + provider=provider, + sources=[_FakeSource(_corpus(10))], + embed_batch_limits=EmbedBatchLimits(max_documents=3, max_padded_tokens=100_000), + ) + # 10 new rows, batch size 3 -> 4 embed calls (3+3+3+1); peak RAM bounded. + assert provider.embed_calls == 4 + assert provider.embedded_texts == 10 diff --git a/tests/test_semantic_rebuild_workflow.py b/tests/test_semantic_rebuild_workflow.py new file mode 100644 index 00000000..9107ac2a --- /dev/null +++ b/tests/test_semantic_rebuild_workflow.py @@ -0,0 +1,523 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import dataclasses +from collections.abc import Iterator +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +import pytest + +from codeclone.config.memory import resolve_memory_config +from codeclone.memory.exceptions import ( + MemoryContractError, + MemorySemanticUnavailableError, +) +from codeclone.memory.semantic.rebuild import RebuildReport +from codeclone.memory.semantic.rebuild_workflow import execute_semantic_index_rebuild + +from .memory_fixtures import memory_store + + +def test_execute_semantic_rebuild_skipped_when_disabled(tmp_path: Path) -> None: + config = resolve_memory_config(tmp_path) + payload = execute_semantic_index_rebuild(root_path=tmp_path, config=config) + assert payload["action"] == "rebuild_semantic_index" + assert payload["status"] == "skipped" + assert payload["reason"] == "disabled" + assert payload["indexed"] == 0 + + +def test_execute_semantic_rebuild_unavailable_without_lancedb( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.codeclone.memory.semantic]\nenabled = true\n", + encoding="utf-8", + ) + config = resolve_memory_config(tmp_path) + import codeclone.memory.semantic as semantic_pkg + + monkeypatch.setattr(semantic_pkg, "resolve_semantic_index_writer", lambda _c: None) + payload = execute_semantic_index_rebuild(root_path=tmp_path, config=config) + assert payload["status"] == "unavailable" + assert payload["reason"] == "lancedb_not_installed" + + +def test_execute_semantic_rebuild_requires_memory_db_when_enabled( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + (tmp_path / "pyproject.toml").write_text( + "[tool.codeclone.memory.semantic]\nenabled = true\n", + encoding="utf-8", + ) + config = resolve_memory_config(tmp_path) + + class _Writer: + closed = False + + def known_ids(self) -> set[str]: + return set() + + def delete(self, ids: object) -> None: + return None + + def upsert(self, rows: object) -> None: + return None + + def close(self) -> None: + self.closed = True + + import codeclone.memory.semantic as semantic_pkg + + writer = _Writer() + monkeypatch.setattr( + semantic_pkg, + "resolve_semantic_index_writer", + lambda _config: writer, + ) + with pytest.raises(MemoryContractError, match="database not found"): + execute_semantic_index_rebuild(root_path=tmp_path, config=config) + assert writer.closed is True + + +def test_execute_semantic_rebuild_unavailable_when_model_fails_at_embed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + base = resolve_memory_config(tmp_path) + config = dataclasses.replace( + base, + semantic=base.semantic.model_copy( + update={"enabled": True, "embedding_provider": "diagnostic"} + ), + ) + + class _Writer: + def known_ids(self) -> set[str]: + return set() + + def delete(self, ids: object) -> None: + return None + + def upsert(self, rows: object) -> None: + return None + + def close(self) -> None: + return None + + import codeclone.memory.semantic as semantic_pkg + from codeclone.memory.semantic import rebuild_workflow + + monkeypatch.setattr( + semantic_pkg, "resolve_semantic_index_writer", lambda _config: _Writer() + ) + monkeypatch.setattr( + rebuild_workflow, "build_semantic_index_sources", lambda **_kwargs: [] + ) + + def _raise_unavailable(**_kwargs: object) -> object: + # Provider resolves fine (lazy); the model only fails when the rebuild + # actually embeds. + raise MemorySemanticUnavailableError("model unavailable (download disabled)") + + monkeypatch.setattr(rebuild_workflow, "rebuild_semantic_index", _raise_unavailable) + + with memory_store(tmp_path) as (root, project, store, _db_path): + payload = execute_semantic_index_rebuild( + root_path=root, config=config, store=store, project=project + ) + assert payload["status"] == "unavailable" + assert "model unavailable" in str(payload["reason"]) + + +def test_rebuild_reason_kind_warm_skip_is_manual_rebuild() -> None: + from codeclone.memory.semantic import rebuild_workflow as wf + + report = RebuildReport( + indexed=1502, + embedded=0, + skipped_unchanged=1502, + by_source={"memory": 238, "audit": 673, "trajectory": 591}, + ) + assert wf._rebuild_reason_kind(report) == "manual_rebuild" + + +def test_rebuild_reason_kind_embed_or_prune_is_content_changed() -> None: + from codeclone.memory.semantic import rebuild_workflow as wf + + assert ( + wf._rebuild_reason_kind( + RebuildReport(indexed=10, embedded=3, skipped_unchanged=7) + ) + == "content_changed" + ) + assert ( + wf._rebuild_reason_kind( + RebuildReport(indexed=10, embedded=0, deleted=2, skipped_unchanged=8) + ) + == "content_changed" + ) + + +def test_rebuild_reason_kind_first_index_when_empty() -> None: + from codeclone.memory.semantic import rebuild_workflow as wf + + assert ( + wf._rebuild_reason_kind( + RebuildReport(indexed=0, embedded=0, skipped_unchanged=0) + ) + == "first_index" + ) + + +def test_rebuild_reason_kind_manual_without_embed_or_skip() -> None: + from codeclone.memory.semantic import rebuild_workflow as wf + + assert ( + wf._rebuild_reason_kind( + RebuildReport(indexed=4, embedded=0, deleted=0, skipped_unchanged=0) + ) + == "manual_rebuild" + ) + + +def test_apply_rebuild_counters_records_lane_totals( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic import rebuild_workflow as wf + + counters: dict[str, int] = {} + + class _Span: + def set_counter(self, key: str, value: int) -> None: + counters[key] = value + + monkeypatch.setattr(wf, "is_observability_enabled", lambda: True) + report = RebuildReport( + indexed=5, + embedded=2, + deleted=1, + skipped_unchanged=2, + by_source={"memory": 3, "audit": 2}, + ) + wf._apply_rebuild_counters( + cast(Any, _Span()), + report, + dimensions=384, + batch_size=8, + max_padded_tokens=4096, + ) + assert counters["indexed"] == 5 + assert counters["lane_audit"] == 2 + assert counters["lane_memory"] == 3 + + +def test_execute_semantic_index_rebuild_ok_returns_model_and_counts( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + base = resolve_memory_config(tmp_path) + config = dataclasses.replace( + base, + semantic=base.semantic.model_copy(update={"enabled": True}), + ) + + class _Writer: + closed = False + + def known_ids(self) -> set[str]: + return set() + + def delete(self, ids: object) -> None: + return None + + def upsert(self, rows: object) -> None: + return None + + def close(self) -> None: + self.closed = True + + import codeclone.memory.semantic as semantic_pkg + from codeclone.memory.semantic import rebuild_workflow + + writer = _Writer() + monkeypatch.setattr( + semantic_pkg, "resolve_semantic_index_writer", lambda _config: writer + ) + monkeypatch.setattr( + rebuild_workflow, + "rebuild_semantic_index", + lambda **_kwargs: RebuildReport( + indexed=4, + embedded=2, + deleted=1, + skipped_unchanged=1, + by_source={"trajectory": 2, "memory": 2}, + ), + ) + + with memory_store(tmp_path) as (root, project, store, _db_path): + payload = execute_semantic_index_rebuild( + root_path=root, config=config, store=store, project=project + ) + assert payload["status"] == "ok" + assert payload["embedding_model"] == "diagnostic-hash-v1" + assert payload["by_source"] == {"memory": 2, "trajectory": 2} + assert writer.closed is True + + +def test_execute_semantic_projection_probe_skipped_when_disabled( + tmp_path: Path, +) -> None: + from codeclone.memory.semantic.rebuild_workflow import ( + execute_semantic_projection_probe, + ) + + config = resolve_memory_config(tmp_path) + payload = execute_semantic_projection_probe(root_path=tmp_path, config=config) + skipped = cast(dict[str, str], payload) + assert skipped["status"] == "skipped" + assert skipped["reason"] == "disabled" + + +def test_execute_semantic_projection_probe_reports_lane_stats( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic.rebuild_workflow import ( + build_semantic_index_sources, + execute_semantic_projection_probe, + ) + + (tmp_path / "pyproject.toml").write_text( + "[tool.codeclone.memory.semantic]\nenabled = true\n", + encoding="utf-8", + ) + config = resolve_memory_config(tmp_path) + with memory_store(tmp_path) as (root, project, store, _db_path): + sources = build_semantic_index_sources( + root_path=root, + config=config, + store=store, + project=project, + ) + assert {source.name() for source in sources} == { + "memory", + "audit", + "trajectory", + } + payload = execute_semantic_projection_probe( + root_path=root, + config=config, + store=store, + project=project, + ) + assert payload["action"] == "probe_semantic_projections" + assert "lanes" in payload + + +def test_resolve_projection_helpers_use_fastembed_when_exact( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic import rebuild_workflow as wf + + (tmp_path / "pyproject.toml").write_text( + "[tool.codeclone.memory.semantic]\n" + "enabled = true\n" + 'embedding_provider = "fastembed"\n', + encoding="utf-8", + ) + config = resolve_memory_config(tmp_path) + + class _Provider: + model_id = "fastembed:test" + + @property + def estimator_label(self) -> str: + return "fastembed_tokenizer" + + def max_sequence_tokens(self) -> int | None: + return 512 + + def probe_passage_token_counts(self, texts: object) -> object: + return texts + + def chunk_text(self, text: str) -> tuple[str, ...]: + return (text,) + + provider = _Provider() + monkeypatch.setattr(wf, "resolve_embedding_provider", lambda _cfg: provider) + token_prober = wf._resolve_projection_token_prober( + config.semantic, + exact_tokens=True, + ) + chunker = wf._resolve_projection_passage_chunker( + config.semantic, + exact_tokens=True, + ) + assert cast(Any, token_prober) is provider + assert chunker is not None + assert wf._resolve_projection_passage_chunker(config.semantic) is None + + +def test_execute_semantic_projection_probe_requires_memory_db( + tmp_path: Path, +) -> None: + from codeclone.memory.semantic.rebuild_workflow import ( + execute_semantic_projection_probe, + ) + + (tmp_path / "pyproject.toml").write_text( + "[tool.codeclone.memory.semantic]\nenabled = true\n", + encoding="utf-8", + ) + config = resolve_memory_config(tmp_path) + with pytest.raises(MemoryContractError, match="database not found"): + execute_semantic_projection_probe(root_path=tmp_path, config=config) + + +def test_execute_semantic_projection_probe_opens_and_closes_store( + tmp_path: Path, +) -> None: + from codeclone.memory.semantic.rebuild_workflow import ( + execute_semantic_projection_probe, + ) + + (tmp_path / "pyproject.toml").write_text( + "[tool.codeclone.memory.semantic]\nenabled = true\n", + encoding="utf-8", + ) + config = resolve_memory_config(tmp_path) + with memory_store(tmp_path) as (root, project, store, _db_path): + payload = execute_semantic_projection_probe( + root_path=root, + config=config, + project=project, + store=store, + ) + assert payload["action"] == "probe_semantic_projections" + + +def test_rebuild_semantic_index_records_observability_counters( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic import rebuild as rebuild_mod + + counters: dict[str, int] = {} + + class _Span: + def __enter__(self) -> _Span: + return self + + def __exit__(self, *_args: object) -> None: + return None + + def set_counter(self, key: str, value: int) -> None: + counters[key] = value + + class _Writer: + def known_ids(self) -> set[str]: + return {"stale-1", "stale-2"} + + def delete(self, row_ids: list[str]) -> None: + assert row_ids == ["stale-1", "stale-2"] + + def row_fingerprints(self, row_ids: list[str]) -> dict[str, object]: + return {} + + def upsert(self, rows: list[object]) -> None: + return None + + class _Source: + def name(self) -> str: + return "memory" + + def available(self) -> bool: + return True + + def iter_projections(self) -> Iterator[object]: + return iter(()) + + monkeypatch.setattr(rebuild_mod, "is_observability_enabled", lambda: True) + monkeypatch.setattr(rebuild_mod, "span", lambda **_kwargs: _Span()) + monkeypatch.setattr( + rebuild_mod, "resolve_passage_chunker", lambda _provider: SimpleNamespace() + ) + monkeypatch.setattr( + rebuild_mod, + "_index_source", + lambda *_args, **_kwargs: rebuild_mod._SourceIndexStats( + seen_ids=set(), + embedded=0, + skipped_unchanged=0, + ), + ) + report = rebuild_mod.rebuild_semantic_index( + sources=[cast(Any, _Source())], + writer=_Writer(), # type: ignore[arg-type] + provider=SimpleNamespace(model_id="test-model"), + ) + assert report.deleted == 2 + assert counters["deleted"] == 2 + + +def test_embed_and_upsert_records_observability_counters( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.embedding import DeterministicHashEmbeddingProvider + from codeclone.memory.embedding.batching import EmbedBatchLimits + from codeclone.memory.semantic import rebuild as rebuild_mod + from codeclone.memory.semantic.chunking import IndexedSemanticUnit + + counters: dict[str, int] = {} + + class _Span: + def __enter__(self) -> _Span: + return self + + def __exit__(self, *_args: object) -> None: + return None + + def set_counter(self, key: str, value: int) -> None: + counters[key] = value + + class _Writer: + def upsert(self, rows: list[object]) -> None: + assert rows + + unit = IndexedSemanticUnit( + row_id="row-1", + parent_id="parent-1", + chunk_index=0, + chunk_count=1, + source="memory", + project_id="project", + subject_path="pkg/mod.py", + kind="record", + status="approved", + text="semantic text", + text_hash="hash", + ) + provider = DeterministicHashEmbeddingProvider(dimension=8) + monkeypatch.setattr(rebuild_mod, "is_observability_enabled", lambda: True) + monkeypatch.setattr(rebuild_mod, "span", lambda **_kwargs: _Span()) + embedded = rebuild_mod._embed_and_upsert( + (unit,), + writer=_Writer(), # type: ignore[arg-type] + provider=provider, + embed_batch_limits=EmbedBatchLimits(max_documents=4, max_padded_tokens=32), + ) + assert embedded == 1 + assert counters["pending"] == 1 + assert counters["embedded"] == 1 + assert counters["batches"] == 1 diff --git a/tests/test_semantic_retrieval.py b/tests/test_semantic_retrieval.py new file mode 100644 index 00000000..95232a2d --- /dev/null +++ b/tests/test_semantic_retrieval.py @@ -0,0 +1,172 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import dataclasses +from collections.abc import Sequence +from pathlib import Path + +from codeclone.memory.embedding import DeterministicHashEmbeddingProvider +from codeclone.memory.models import MemoryRecord, MemorySubject +from codeclone.memory.retrieval.semantic import audit_event_row, semantic_search +from codeclone.memory.semantic.models import ( + SemanticHit, + SemanticIndexStatus, + SemanticSearchResult, +) +from tests.memory_fixtures import insert_audit_event, make_module_record + +_PROVIDER = DeterministicHashEmbeddingProvider(dimension=8) +_NO_AUDIT = Path("does-not-exist.sqlite3") + + +class _FakeIndex: + def __init__(self, hits: list[SemanticHit]) -> None: + self._hits = hits + + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + hits = ( + self._hits + if source is None + else [hit for hit in self._hits if hit.source == source] + ) + return hits[:k] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=True, indexed_count=len(self._hits)) + + +class _FakeStore: + def __init__( + self, + records: dict[str, MemoryRecord], + subjects: dict[str, list[MemorySubject]], + ) -> None: + self._records = records + self._subjects = subjects + + def find_record(self, record_id: str) -> MemoryRecord | None: + return self._records.get(record_id) + + def list_subjects_for_memory(self, memory_id: str) -> list[MemorySubject]: + return self._subjects.get(memory_id, []) + + +def _record(*, statement: str) -> MemoryRecord: + return dataclasses.replace( + make_module_record("proj-1", "codeclone/x.py"), + id="mem-1", + type="contract_note", + statement=statement, + ) + + +def _search( + index: _FakeIndex, store: _FakeStore | None, *, audit: Path = _NO_AUDIT +) -> list[SemanticSearchResult]: + return semantic_search( + index=index, + provider=_PROVIDER, + store=store, + audit_db_path=audit, + query="recover after restart", + limit=10, + preview_chars=160, + ) + + +def test_hydrates_memory_hit() -> None: + record = _record(statement="recover after MCP restart uses the checkpoint") + store = _FakeStore( + {record.id: record}, + { + record.id: [ + MemorySubject( + id="s1", + memory_id=record.id, + subject_kind="path", + subject_key="codeclone/x.py", + ) + ] + }, + ) + index = _FakeIndex([SemanticHit(source_id=record.id, source="memory", score=0.9)]) + + (result,) = _search(index, store) + assert (result.source, result.kind, result.status, result.confidence) == ( + "memory", + "contract_note", + "active", + "supported", + ) + assert result.subject_path == "codeclone/x.py" + assert "recover after MCP restart" in result.preview + + +def test_stale_memory_hit_is_skipped() -> None: + index = _FakeIndex([SemanticHit(source_id="gone", source="memory", score=0.9)]) + assert _search(index, _FakeStore({}, {})) == [] + + +def test_memory_hits_skipped_without_store() -> None: + index = _FakeIndex([SemanticHit(source_id="mem-1", source="memory", score=0.9)]) + assert _search(index, None) == [] + + +def test_preview_is_bounded() -> None: + record = _record(statement="x " * 500) + store = _FakeStore({record.id: record}, {}) + index = _FakeIndex([SemanticHit(source_id=record.id, source="memory", score=0.5)]) + + (result,) = _search(index, store) + assert len(result.preview) <= 160 + + +def test_hydrates_audit_hit_from_summary(tmp_path: Path) -> None: + # Unit boundary: hydration reads event_type/status/summary from a + # controller_events row. The writer's summary population (Bug B) is + # covered separately in test_semantic_sources.py, so a controlled row + # via the real schema keeps this test focused on the retrieval mapping. + audit_db = tmp_path / "audit.sqlite3" + insert_audit_event( + audit_db, + event_id="evt-1", + event_type="intent.declared", + status="active", + summary="recover after MCP restart", + ) + index = _FakeIndex([SemanticHit(source_id="evt-1", source="audit", score=0.8)]) + (result,) = _search(index, None, audit=audit_db) + assert result.source == "audit" + assert result.kind == "intent.declared" + assert result.status == "active" + assert result.subject_path is None + assert "recover after MCP restart" in result.preview + + +def test_audit_event_row_missing_db_returns_none(tmp_path: Path) -> None: + assert audit_event_row(tmp_path / "missing.sqlite3", "evt-1") is None + + +def test_audit_event_row_skips_blank_summary(tmp_path: Path) -> None: + audit_db = tmp_path / "audit.sqlite3" + insert_audit_event( + audit_db, + event_id="evt-blank", + event_type="intent.declared", + status="active", + summary=" ", + ) + assert audit_event_row(audit_db, "evt-blank") is None + + +def test_stale_audit_hit_is_skipped(tmp_path: Path) -> None: + index = _FakeIndex( + [SemanticHit(source_id="missing-evt", source="audit", score=0.7)] + ) + assert _search(index, None, audit=tmp_path / "audit.sqlite3") == [] diff --git a/tests/test_semantic_search_service.py b/tests/test_semantic_search_service.py new file mode 100644 index 00000000..2e35c5c7 --- /dev/null +++ b/tests/test_semantic_search_service.py @@ -0,0 +1,383 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import dataclasses +from collections.abc import Mapping, Sequence +from pathlib import Path + +import pytest + +from codeclone.memory.embedding import EmbeddingProvider +from codeclone.memory.exceptions import MemorySemanticUnavailableError +from codeclone.memory.retrieval import query_engineering_memory +from codeclone.memory.semantic.models import SemanticHit, SemanticIndexStatus +from codeclone.memory.sqlite_store import SqliteEngineeringMemoryStore +from tests.memory_fixtures import ( + insert_audit_event, + make_module_record, + memory_store, + seed_document_link, + seed_module_role, +) + + +class _FakeProvider: + model_id = "diagnostic-hash-v1" + dimension = 8 + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + return [[0.0] * self.dimension for _ in texts] + + +class _FakeIndex: + def __init__( + self, + hits: list[SemanticHit], + *, + available: bool = True, + reason: str | None = None, + ) -> None: + self._hits = hits + self._available = available + self._reason = reason + + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + hits = ( + self._hits + if source is None + else [hit for hit in self._hits if hit.source == source] + ) + return hits[:k] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus( + available=self._available, + backend="fake", + reason=self._reason, + indexed_count=len(self._hits), + ) + + +def _search( + store: SqliteEngineeringMemoryStore, + *, + root: Path, + project_id: str, + db_path: Path, + query: str, + index: _FakeIndex, + audit: Path | None = None, + filters: Mapping[str, object] | None = None, + provider: EmbeddingProvider | None = None, +) -> dict[str, object]: + return query_engineering_memory( + store, + project_id=project_id, + root_path=root, + backend="sqlite", + db_path=db_path, + mode="search", + query=query, + semantic=True, + semantic_index=index, + embedding_provider=provider or _FakeProvider(), + provider_label="diagnostic", + audit_db_path=audit, + filters=filters, + ) + + +def _record_ids(result: dict[str, object]) -> list[str]: + payload = result["payload"] + assert isinstance(payload, dict) + records = payload["records"] + assert isinstance(records, list) + return [item["id"] for item in records if isinstance(item, dict)] + + +def test_hybrid_merges_semantic_only_record(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + fts = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha beta gamma", + ) + semantic_only = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/b.py", + statement="delta epsilon zeta", + ) + index = _FakeIndex( + [SemanticHit(source_id=semantic_only.id, source="memory", score=0.9)] + ) + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + ) + block = result["semantic"] + assert isinstance(block, dict) + assert block["used"] is True + assert block["provider"] == "diagnostic" + assert block["model"] == "diagnostic-hash-v1" + assert block["backend"] == "fake" + ids = _record_ids(result) + # FTS hit and the semantic-only record are merged into one ranked list. + assert fts.id in ids + assert semantic_only.id in ids + + +def test_semantic_only_record_respects_type_filter(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + kept = seed_document_link( + store, + project_id=project.id, + doc_file="codeclone/a.py", + ref_path="codeclone/a.py", + statement="alpha beta gamma", + ) + filtered = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/b.py", + statement="delta epsilon zeta", + ) + index = _FakeIndex( + [SemanticHit(source_id=filtered.id, source="memory", score=0.9)] + ) + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + filters={"types": ["document_link"]}, + ) + ids = _record_ids(result) + # FTS hit kept; the semantic-only module_role no longer bypasses the filter. + assert kept.id in ids + assert filtered.id not in ids + + +def test_semantic_hits_searches_each_source_with_its_own_budget() -> None: + from codeclone.memory.retrieval import service as retrieval_service + + captured: list[tuple[str | None, int]] = [] + + class _RecordingIndex: + def search( + self, vector: Sequence[float], *, k: int, source: str | None = None + ) -> list[SemanticHit]: + captured.append((source, k)) + return [] + + def status(self) -> SemanticIndexStatus: + return SemanticIndexStatus(available=True) + + proximity, audit_hits, trajectory_hits = retrieval_service._semantic_hits( + index=_RecordingIndex(), + provider=_FakeProvider(), + query="alpha", + k=7, + ) + + # Each lane is searched independently. Trajectory oversamples before + # parent collapse so chunk rows do not under-fill the lane budget. + assert len(captured) == 3 + assert dict(captured) == {"memory": 7, "audit": 7, "trajectory": 28} + assert proximity == {} + assert audit_hits == [] + assert trajectory_hits == [] + + +def test_rank_records_lets_rrf_lead_over_metadata(tmp_path: Path) -> None: + from codeclone.memory.retrieval import service as retrieval_service + from codeclone.memory.retrieval.ranking import RankingContext + + with memory_store(tmp_path) as (_root, project, store, _db_path): + # module_role carries a smaller type boost than document_link, so under + # the old metadata-led ordering doc_link would sort first. + lexical_top = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha", + ) + meta_rich = seed_document_link( + store, + project_id=project.id, + doc_file="codeclone/b.py", + ref_path="codeclone/b.py", + statement="beta", + ) + context = RankingContext.from_scope( + scope_paths=(), symbols=(), blast_dependents=() + ) + payload, _truncated = retrieval_service._rank_records( + store, + project_id=project.id, + candidates=[lexical_top, meta_rich], + context=context, + max_records=10, + detail_level="compact", + lexical_ranks={lexical_top.id: 0, meta_rich.id: 1}, + vector_ranks={}, + ) + + ids = [item["id"] for item in payload] + # RRF leads: the BM25 rank-0 match wins even though document_link has the + # higher metadata boost. Metadata only breaks ties. + assert ids == [lexical_top.id, meta_rich.id] + + +class _FailingProvider: + model_id = "diagnostic-hash-v1" + dimension = 8 + + def embed(self, texts: Sequence[str]) -> list[list[float]]: + raise MemorySemanticUnavailableError("model unavailable (download disabled)") + + +def test_semantic_search_degrades_to_fts_when_model_unavailable(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + fts = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha beta gamma", + ) + index = _FakeIndex([SemanticHit(source_id=fts.id, source="memory", score=0.9)]) + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + provider=_FailingProvider(), + ) + block = result["semantic"] + assert isinstance(block, dict) + # The lazy model load fails at embed; the query degrades to FTS-only and + # surfaces the reason instead of raising. + assert block["used"] is False + assert "model unavailable" in str(block["reason"]) + assert fts.id in _record_ids(result) + + +def test_record_search_telemetry_emits_lane_counters( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.retrieval import service as service_module + + captured: dict[str, int] = {} + monkeypatch.setattr( + service_module, + "record_counter", + lambda key, value=1: captured.__setitem__(key, value), + ) + record_a = dataclasses.replace(make_module_record("proj", "codeclone/a.py"), id="a") + record_b = dataclasses.replace(make_module_record("proj", "codeclone/b.py"), id="b") + service_module._record_search_telemetry( + fts_records=[record_a, record_b], + proximity={"b": 0.9, "c": 0.8}, # b overlaps FTS; c is vector-only + audit_hits=[SemanticHit(source_id="e1", source="audit", score=0.5)], + trajectory_hits=[], + candidates=[record_a, record_b], # c was dropped by the filter + ) + assert captured == { + "retrieval.fts_hits": 2, + "retrieval.vector_memory_hits": 2, + "retrieval.vector_audit_hits": 1, + "retrieval.vector_trajectory_hits": 0, + "retrieval.fts_vector_overlap": 1, + "retrieval.semantic_filtered": 1, + } + + +def test_unavailable_index_falls_back_to_fts(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, db_path): + fts = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha beta gamma", + ) + index = _FakeIndex([], available=False, reason="lancedb_not_installed") + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + ) + block = result["semantic"] + assert isinstance(block, dict) + assert block["used"] is False + assert block["reason"] == "lancedb_not_installed" + assert fts.id in _record_ids(result) + + +def _audit_events(result: dict[str, object]) -> list[dict[str, object]]: + payload = result["payload"] + assert isinstance(payload, dict) + events = payload["audit_events"] + assert isinstance(events, list) + return [item for item in events if isinstance(item, dict)] + + +def test_audit_events_typed_separate(tmp_path: Path) -> None: + audit_db = tmp_path / "audit.sqlite3" + insert_audit_event( + audit_db, + event_id="evt-1", + event_type="patch_contract.violated", + status="violated", + summary="patch contract violated: 1 regression(s); structural_regressions", + ) + with memory_store(tmp_path) as (root, project, store, db_path): + record = seed_module_role( + store, + project_id=project.id, + file_path="codeclone/a.py", + statement="alpha beta", + ) + index = _FakeIndex( + [ + SemanticHit(source_id=record.id, source="memory", score=0.9), + SemanticHit(source_id="evt-1", source="audit", score=0.8), + ] + ) + result = _search( + store, + root=root, + project_id=project.id, + db_path=db_path, + query="alpha", + index=index, + audit=audit_db, + ) + # The memory record stays in payload.records; the audit incident is + # returned typed-separate in payload.audit_events (never co-ranked). + assert record.id in _record_ids(result) + assert "evt-1" not in _record_ids(result) + events = _audit_events(result) + assert len(events) == 1 + event = events[0] + assert event["event_id"] == "evt-1" + assert event["event_type"] == "patch_contract.violated" + assert event["status"] == "violated" + assert "patch contract violated" in str(event["summary"]) diff --git a/tests/test_semantic_sources.py b/tests/test_semantic_sources.py new file mode 100644 index 00000000..1481ea95 --- /dev/null +++ b/tests/test_semantic_sources.py @@ -0,0 +1,506 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import dataclasses +import sqlite3 +from collections.abc import Sequence +from dataclasses import replace +from pathlib import Path +from typing import Any, Literal, cast + +import pytest + +from codeclone.audit.events import EVENT_INTENT_DECLARED, AuditEvent, repo_root_digest +from codeclone.audit.writer import SqliteAuditWriter +from codeclone.memory.enums import MemoryStatus +from codeclone.memory.models import ( + MemoryQuery, + MemoryRecord, + MemorySubject, + generate_memory_id, +) +from codeclone.memory.retrieval.semantic import audit_event_row +from codeclone.memory.semantic.sources import ( + AuditIndexSource, + MemoryIndexSource, + TrajectoryIndexSource, +) +from codeclone.memory.trajectory.models import ( + Trajectory, + TrajectoryEvidence, + TrajectoryListItem, + TrajectoryStep, + TrajectorySubject, +) +from tests.memory_fixtures import make_module_record + + +class _FakeStore: + def __init__( + self, + records: list[MemoryRecord], + subjects: dict[str, list[MemorySubject]], + ) -> None: + self._records = records + self._subjects = subjects + + def query_records(self, query: MemoryQuery) -> Sequence[MemoryRecord]: + return self._records[query.offset : query.offset + query.limit] + + def list_subjects_for_memories( + self, memory_ids: Sequence[str] + ) -> dict[str, list[MemorySubject]]: + return { + memory_id: self._subjects.get(memory_id, []) for memory_id in memory_ids + } + + +class _FakeTrajectoryStore: + def __init__(self, trajectories: list[Trajectory]) -> None: + self._trajectories = {trajectory.id: trajectory for trajectory in trajectories} + + def list_trajectories( + self, + *, + project_id: str, + limit: int = 20, + ) -> list[TrajectoryListItem]: + items = [ + TrajectoryListItem( + id=trajectory.id, + workflow_id=trajectory.workflow_id, + outcome=trajectory.outcome, + quality_tier=trajectory.quality_tier, + quality_score=trajectory.quality_score, + event_count=trajectory.event_count, + started_at_utc=trajectory.started_at_utc, + finished_at_utc=trajectory.finished_at_utc, + summary=trajectory.summary, + ) + for trajectory in self._trajectories.values() + if trajectory.project_id == project_id + ] + return items[:limit] + + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: + return [ + self._trajectories[tid] + for tid in trajectory_ids + if tid in self._trajectories + ] + + +def _prose( + project_id: str, + *, + statement: str, + status: MemoryStatus = "active", +) -> MemoryRecord: + # Reuse the fixture builder + replace -> avoids a duplicated 25-field literal. + base = make_module_record(project_id, "codeclone/sample.py") + return dataclasses.replace( + base, + id=generate_memory_id(), + type="contract_note", + status=status, + statement=statement, + ) + + +def test_memory_index_source_filters_type_and_status() -> None: + project_id = "proj-1" + indexed = _prose(project_id, statement="recover keeps the checkpoint") + rejected = _prose(project_id, statement="rejected note", status="rejected") + structural = make_module_record(project_id, "codeclone/mod.py") # module_role + subjects = { + indexed.id: [ + MemorySubject( + id="s1", + memory_id=indexed.id, + subject_kind="path", + subject_key="codeclone/sample.py", + ) + ] + } + store = _FakeStore([structural, indexed, rejected], subjects) + source = MemoryIndexSource(store, project_id=project_id) + + assert source.available() is True + projections = list(source.iter_projections()) + assert len(projections) == 1 # module_role + rejected skipped + assert projections[0].source_id == indexed.id + assert projections[0].kind == "contract_note" + assert projections[0].subject_path == "codeclone/sample.py" + + +def test_audit_index_source_gating(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + # Disabled -> unavailable regardless of file presence. + assert AuditIndexSource(enabled=False, db_path=db_path).available() is False + # Enabled but file absent -> unavailable, and iteration is empty (no raise). + absent = AuditIndexSource(enabled=True, db_path=db_path) + assert absent.available() is False + assert list(absent.iter_projections()) == [] + + +def test_audit_index_source_projects_summary_column(tmp_path: Path) -> None: + root = tmp_path / "repo" + root.mkdir() + db_path = tmp_path / "audit.sqlite3" + # payloads="off" still populates the summary column (Bug B). + writer = SqliteAuditWriter(db_path=db_path, payloads="off", retention_days=30) + try: + writer.emit( + AuditEvent( + event_type=EVENT_INTENT_DECLARED, + severity="info", + repo_root_digest=repo_root_digest(root), + agent_pid=1, + agent_label="test", + payload={"intent_description": "recover after MCP restart"}, + ) + ) + finally: + writer.close() + + source = AuditIndexSource(enabled=True, db_path=db_path) + assert source.available() is True + projections = list(source.iter_projections()) + assert len(projections) == 1 + assert projections[0].source == "audit" + assert projections[0].kind == "intent.declared" + assert "recover after MCP restart" in projections[0].text + + +def test_audit_index_source_skips_whitespace_only_summary(tmp_path: Path) -> None: + from codeclone.audit.schema import ensure_schema + + db_path = tmp_path / "audit.sqlite3" + conn = sqlite3.connect(str(db_path)) + try: + ensure_schema(conn) + conn.execute( + "INSERT INTO controller_events " + "(event_id, event_type, severity, created_at_utc, " + "repo_root_digest, agent_label, agent_pid, status, summary) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + "evt-ws", + EVENT_INTENT_DECLARED, + "info", + "2026-06-02T10:00:00Z", + "digest", + "agent", + 1, + "active", + " ", + ), + ) + conn.commit() + finally: + conn.close() + source = AuditIndexSource(enabled=True, db_path=db_path) + assert list(source.iter_projections()) == [] + + +def test_audit_index_source_tolerates_sqlite_errors(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + db_path.write_text("not sqlite", encoding="utf-8") + source = AuditIndexSource(enabled=True, db_path=db_path) + assert source.available() is True + assert list(source.iter_projections()) == [] + + +def test_memory_index_source_paginates_records() -> None: + project_id = "proj-page" + records = [ + _prose(project_id, statement=f"recover note {index}") for index in range(250) + ] + subjects = { + record.id: [ + MemorySubject( + id=f"s-{record.id}", + memory_id=record.id, + subject_kind="path", + subject_key=f"codeclone/p{record.id}.py", + ) + ] + for record in records + } + store = _FakeStore(records, subjects) + projections = list( + MemoryIndexSource(store, project_id=project_id).iter_projections() + ) + assert len(projections) == 250 + + +def test_trajectory_index_source_projects_bounded_text() -> None: + trajectory = _trajectory("proj-traj") + source = TrajectoryIndexSource( + _FakeTrajectoryStore([trajectory]), + project_id="proj-traj", + ) + + projections = list(source.iter_projections()) + + assert len(projections) == 1 + projection = projections[0] + assert projection.source == "trajectory" + assert projection.source_id == trajectory.id + assert projection.subject_path == "pkg/service.py" + assert "recover stale intent" in projection.text + assert "pkg/service.py" in projection.text + assert "intent.declared" in projection.text + assert "event_core_json" not in projection.text + assert '{"secret"' not in projection.text + + +def test_trajectory_source_name_missing_record_and_pagination() -> None: + trajectory = _trajectory("proj-traj") + source = TrajectoryIndexSource( + _FakeTrajectoryStore([trajectory]), + project_id="proj-traj", + ) + assert source.name() == "trajectory" + + class _MissingTrajectoryStore(_FakeTrajectoryStore): + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: + return [] + + assert ( + list( + TrajectoryIndexSource( + _MissingTrajectoryStore([trajectory]), + project_id="proj-traj", + ).iter_projections() + ) + == [] + ) + + trajectories = [ + dataclasses.replace(trajectory, id=f"traj-{index}") for index in range(201) + ] + projections = list( + TrajectoryIndexSource( + _FakeTrajectoryStore(trajectories), + project_id="proj-traj", + ).iter_projections() + ) + assert len(projections) == 201 + + +def test_audit_source_tolerates_query_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.memory.semantic import sources + + db_path = tmp_path / "audit.sqlite3" + db_path.touch() + + class _BrokenConnection: + closed = False + + def execute(self, _sql: str, _params: object) -> object: + raise sqlite3.OperationalError("broken query") + + def close(self) -> None: + self.closed = True + + connection = _BrokenConnection() + monkeypatch.setattr( + sources, + "open_audit_db_readonly", + lambda _path: connection, + ) + assert ( + list(AuditIndexSource(enabled=True, db_path=db_path).iter_projections()) == [] + ) + assert connection.closed is True + + +def test_memory_index_source_batches_subjects_per_page() -> None: + project_id = "proj-batch" + records = [_prose(project_id, statement=f"note {index}") for index in range(250)] + + class _CountingStore(_FakeStore): + batch_calls = 0 + + def list_subjects_for_memories( + self, memory_ids: Sequence[str] + ) -> dict[str, list[MemorySubject]]: + self.batch_calls += 1 + return super().list_subjects_for_memories(memory_ids) + + store = _CountingStore(records, {}) + projections = list( + MemoryIndexSource(store, project_id=project_id).iter_projections() + ) + assert len(projections) == 250 + # 250 records over 2 pages (200 + 50) -> 2 batched subject loads, not 250. + assert store.batch_calls == 2 + + +def test_trajectory_index_source_batches_hydration_per_page() -> None: + base = _trajectory("proj-traj") + trajectories = [ + dataclasses.replace(base, id=f"traj-{index}") for index in range(250) + ] + + class _CountingTrajectoryStore(_FakeTrajectoryStore): + batch_calls = 0 + + def find_trajectories(self, trajectory_ids: Sequence[str]) -> list[Trajectory]: + self.batch_calls += 1 + return super().find_trajectories(trajectory_ids) + + store = _CountingTrajectoryStore(trajectories) + projections = list( + TrajectoryIndexSource(store, project_id="proj-traj").iter_projections() + ) + assert len(projections) == 250 + # 2 pages (200 + 50) -> 2 batched hydrations, not 250 find_trajectory calls. + assert store.batch_calls == 2 + + +def _trajectory(project_id: str) -> Trajectory: + return Trajectory( + id="traj-1", + project_id=project_id, + repo_root_digest="root", + workflow_id="intent:intent-1", + intent_id="intent-1", + primary_run_id="run-after", + first_run_id="run-before", + last_run_id="run-after", + report_digest="sha256:" + "a" * 64, + outcome="accepted", + quality_tier="verified", + quality_score=95, + labels=("recovered",), + summary="recover stale intent before editing service", + trajectory_digest="d" * 64, + source_event_stream_digest="e" * 64, + projection_version="trajectory-v1", + event_count=2, + step_count=2, + incident_count=0, + started_at_utc="2026-01-01T00:00:00Z", + finished_at_utc="2026-01-01T00:00:01Z", + projected_at_utc="2026-01-01T00:00:02Z", + updated_at_utc="2026-01-01T00:00:02Z", + steps=( + TrajectoryStep( + step_index=0, + audit_sequence=1, + event_id="evt-1", + event_type="intent.declared", + status="active", + run_id="run-before", + report_digest=None, + event_core_sha256="1" * 64, + event_core_json='{"secret":"not indexed"}', + summary="recover stale intent", + created_at_utc="2026-01-01T00:00:00Z", + ), + ), + subjects=( + TrajectorySubject( + subject_kind="path", + subject_key="pkg/service.py", + relation="about", + ), + ), + evidence=( + TrajectoryEvidence( + evidence_kind="audit_event_stream", + ref="intent:intent-1", + locator="1", + digest="e" * 64, + created_at_utc="2026-01-01T00:00:02Z", + ), + ), + ) + + +def test_memory_index_source_without_path_subject() -> None: + project_id = "proj-no-path" + record = replace( + make_module_record(project_id, "pkg.mod"), + type="contract_note", + statement="recover semantic index without path subject", + ) + subjects = [ + MemorySubject( + id="s-test", + memory_id=record.id, + subject_kind="test", + subject_key="tests/test_mod.py", + relation="about", + ) + ] + store = type( + "_Store", + (), + { + "query_records": lambda self, query: [record][ + query.offset : query.offset + 250 + ], + "list_subjects_for_memories": lambda self, memory_ids: dict.fromkeys( + memory_ids, subjects + ), + }, + )() + source = MemoryIndexSource(cast(Any, store), project_id=project_id) + projections = list(source.iter_projections()) + assert len(projections) == 1 + assert projections[0].subject_path is None + + +def test_audit_index_source_connect_error(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + db_path.mkdir() + source = AuditIndexSource(enabled=True, db_path=db_path) + assert source.name() == "audit" + assert list(source.iter_projections()) == [] + + +def test_audit_event_row_connect_and_query_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + audit_db = tmp_path / "audit.sqlite3" + audit_db.mkdir() + assert audit_event_row(audit_db, "evt-1") is None + + audit_file = tmp_path / "audit2.sqlite3" + audit_file.write_text("not sqlite", encoding="utf-8") + + real_connect = sqlite3.connect + + def _fail_connect( + database: str, + timeout: float = 5.0, + detect_types: int = 0, + isolation_level: Literal["DEFERRED", "EXCLUSIVE", "IMMEDIATE"] | None = None, + check_same_thread: bool = True, + cached_statements: int = 128, + uri: bool = False, + ) -> sqlite3.Connection: + if database == str(audit_file): + raise sqlite3.Error("connect failed") + return real_connect( + database, + timeout=timeout, + detect_types=detect_types, + isolation_level=isolation_level, + check_same_thread=check_same_thread, + cached_statements=cached_statements, + uri=uri, + ) + + monkeypatch.setattr(sqlite3, "connect", _fail_connect) + assert audit_event_row(audit_file, "evt-1") is None diff --git a/tests/test_sqlite_readonly_openers.py b/tests/test_sqlite_readonly_openers.py new file mode 100644 index 00000000..3a36d623 --- /dev/null +++ b/tests/test_sqlite_readonly_openers.py @@ -0,0 +1,250 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +import sys +from pathlib import Path + +import pytest + +from codeclone.analytics.exceptions import AnalyticsStoreError +from codeclone.analytics.schema import open_analytics_db, open_analytics_db_readonly +from codeclone.audit.schema import ( + open_audit_db, + open_audit_db_readonly, +) +from codeclone.audit.validation import AuditSchemaError +from codeclone.contracts import CORPUS_ANALYTICS_STORE_SCHEMA_VERSION +from codeclone.memory.schema import open_memory_db, open_memory_db_readonly +from codeclone.surfaces.mcp._workspace_intent_schema import ( + IntentRegistrySchemaError, + open_intent_registry_db, + open_intent_registry_db_readonly, +) +from codeclone.utils.sqlite_store import open_sqlite_db_readonly + + +def test_generic_readonly_opener_does_not_create_missing_database( + tmp_path: Path, +) -> None: + db_path = tmp_path / "missing.sqlite3" + + with pytest.raises(FileNotFoundError): + open_sqlite_db_readonly(db_path, validate_schema=lambda _conn: None) + + assert not db_path.exists() + + +def test_audit_readonly_opener_rejects_writes(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + writable = open_audit_db(db_path) + writable.close() + + conn = open_audit_db_readonly(db_path) + try: + assert conn.execute("SELECT COUNT(*) FROM controller_events").fetchone() == (0,) + with pytest.raises(sqlite3.OperationalError, match="readonly"): + conn.execute("DELETE FROM controller_events") + finally: + conn.close() + + +def test_audit_readonly_opener_accepts_migratable_schema_without_migration( + tmp_path: Path, +) -> None: + db_path = tmp_path / "audit.sqlite3" + writable = open_audit_db(db_path) + try: + writable.execute("UPDATE audit_meta SET value='3' WHERE key='schema_version'") + writable.commit() + finally: + writable.close() + + readonly = open_audit_db_readonly(db_path) + readonly.close() + + raw = sqlite3.connect(db_path) + try: + assert raw.execute( + "SELECT value FROM audit_meta WHERE key='schema_version'" + ).fetchone() == ("3",) + finally: + raw.close() + + +def test_audit_readonly_opener_rejects_unsupported_schema(tmp_path: Path) -> None: + db_path = tmp_path / "audit.sqlite3" + writable = open_audit_db(db_path) + try: + writable.execute("UPDATE audit_meta SET value='999' WHERE key='schema_version'") + writable.commit() + finally: + writable.close() + + with pytest.raises(AuditSchemaError, match="Unsupported audit schema"): + open_audit_db_readonly(db_path) + + +def test_intent_readonly_opener_rejects_stale_schema_without_migration( + tmp_path: Path, +) -> None: + db_path = tmp_path / "intents.sqlite3" + writable = open_intent_registry_db(db_path) + try: + writable.execute( + "UPDATE intent_registry_meta SET value='1' WHERE key='schema_version'" + ) + writable.commit() + finally: + writable.close() + + with pytest.raises(IntentRegistrySchemaError, match="requires writable"): + open_intent_registry_db_readonly(db_path) + + raw = sqlite3.connect(db_path) + try: + assert raw.execute( + "SELECT value FROM intent_registry_meta WHERE key='schema_version'" + ).fetchone() == ("1",) + finally: + raw.close() + + +def test_analytics_readonly_opener_requires_writable_migration( + tmp_path: Path, +) -> None: + db_path = tmp_path / "analytics.sqlite3" + writable = open_analytics_db(db_path) + try: + writable.execute( + "UPDATE analytics_meta SET value='1.0' WHERE key='schema_version'" + ) + writable.commit() + finally: + writable.close() + + with pytest.raises(AnalyticsStoreError, match="requires writable migration"): + open_analytics_db_readonly(db_path) + + migrated = open_analytics_db(db_path) + try: + assert migrated.execute( + "SELECT value FROM analytics_meta WHERE key='schema_version'" + ).fetchone() == (CORPUS_ANALYTICS_STORE_SCHEMA_VERSION,) + triggers = { + row[0] + for row in migrated.execute( + "SELECT name FROM sqlite_master WHERE type='trigger'" + ) + } + assert "analytics_generation_delete_guard" in triggers + assert "analytics_assignment_update_guard" in triggers + finally: + migrated.close() + + +def test_analytics_schema_rejects_orphan_embedding_metadata(tmp_path: Path) -> None: + conn = open_analytics_db(tmp_path / "analytics.sqlite3") + try: + with pytest.raises(sqlite3.IntegrityError, match="unknown"): + conn.execute( + """ + INSERT INTO embedding_items ( + embedding_generation_id, snapshot_item_id, + vector_row_key, vector_digest, dimensions + ) VALUES ('missing-generation', 'missing-item', 'row', 'digest', 1) + """ + ) + finally: + conn.close() + + +@pytest.mark.parametrize( + "module_name", + [ + "codeclone.audit.schema", + "codeclone.memory.schema", + "codeclone.analytics.schema", + "codeclone.surfaces.mcp._workspace_intent_schema", + ], +) +def test_schema_module_import_does_not_load_observability(module_name: str) -> None: + for name in list(sys.modules): + if name == "codeclone.observability" or name.startswith( + "codeclone.observability." + ): + sys.modules.pop(name, None) + + import importlib + + importlib.import_module(module_name) + + assert not any( + name == "codeclone.observability" or name.startswith("codeclone.observability.") + for name in sys.modules + ) + + +@pytest.mark.parametrize( + "opener", + [ + open_audit_db, + open_audit_db_readonly, + open_intent_registry_db, + open_memory_db, + open_memory_db_readonly, + ], +) +def test_domain_openers_attach_observability( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + opener: object, +) -> None: + audit_path = tmp_path / "audit.sqlite3" + intent_path = tmp_path / "intents.sqlite3" + memory_path = tmp_path / "memory.sqlite3" + open_audit_db(audit_path).close() + open_memory_db(memory_path).close() + calls: list[sqlite3.Connection] = [] + monkeypatch.setattr( + "codeclone.observability.runtime.instrument_db_connection", + calls.append, + ) + + selected = opener + assert callable(selected) + if selected is open_intent_registry_db: + path = intent_path + elif selected in (open_memory_db, open_memory_db_readonly): + path = memory_path + else: + path = audit_path + conn = selected(path) + try: + assert calls == [conn] + finally: + conn.close() + + +def test_intent_readonly_opener_attaches_observability( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + db_path = tmp_path / "intents.sqlite3" + open_intent_registry_db(db_path).close() + calls: list[sqlite3.Connection] = [] + monkeypatch.setattr( + "codeclone.observability.runtime.instrument_db_connection", + calls.append, + ) + + conn = open_intent_registry_db_readonly(db_path) + try: + assert calls == [conn] + finally: + conn.close() diff --git a/tests/test_sync_integrations.py b/tests/test_sync_integrations.py new file mode 100644 index 00000000..7312d4ac --- /dev/null +++ b/tests/test_sync_integrations.py @@ -0,0 +1,515 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import json +import shutil +import subprocess +from dataclasses import replace +from pathlib import Path + +import pytest + +from scripts.sync_integrations import ( + GLOBAL_DENYLIST, + INTEGRATION_DIST, + SYNC_TARGETS, + SyncTarget, + SyncValidationError, + _dist_file, + _resolve_source_pairs, + main, + sync_target, + validate_source, + validate_target, +) + + +def _write(path: Path, text: str = "content\n") -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def _git(path: Path, *args: str) -> None: + subprocess.run( + ("git", *args), + cwd=path, + check=True, + capture_output=True, + text=True, + ) + + +def _init_git(path: Path) -> None: + path.mkdir(parents=True, exist_ok=True) + _git(path, "init") + _git(path, "config", "user.email", "tests@example.invalid") + _git(path, "config", "user.name", "CodeClone Tests") + + +def _commit_all(path: Path) -> None: + _git(path, "add", ".") + _git(path, "commit", "-m", "fixture") + + +def _seed_integration_dist(source: Path) -> None: + monorepo_dist = Path(__file__).resolve().parents[1] / "scripts/integration_dist" + destination = source / "scripts/integration_dist" + if destination.exists(): + shutil.rmtree(destination) + shutil.copytree(monorepo_dist, destination) + + +def _expected_copy_count( + *, + source_root: Path, + target_root: Path, + target: SyncTarget, +) -> int: + denylist = GLOBAL_DENYLIST + target.denylist + return len( + _resolve_source_pairs( + source_root=source_root, + target_root=target_root, + target=target, + denylist=denylist, + ) + ) + + +def _make_source(tmp_path: Path) -> Path: + source = tmp_path / "source" + _init_git(source) + _write( + source / "pyproject.toml", + '[project]\nname = "codeclone"\nversion = "9.8.7"\n', + ) + _write(source / "plugins" / "codeclone" / "README.md", "# Codex\n") + _write(source / "plugins" / "codeclone" / "skills" / "review" / "SKILL.md") + _write( + source / "plugins" / "codeclone" / "scripts" / "launch_mcp.py", + "def resolve_launch_target():\n return None\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / ".claude-plugin" / "plugin.json", + "{}\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / ".mcp.json", + "{}\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / "scripts" / "launch_mcp.py", + "import runpy\n", + ) + _write( + source / "plugins" / "claude-code-codeclone" / "skills" / "review" / "SKILL.md" + ) + _seed_integration_dist(source) + _write( + source / "extensions" / "claude-desktop-codeclone" / "manifest.json", + "{}\n", + ) + _write(source / "extensions" / "vscode-codeclone" / "package.json", "{}\n") + _write(source / "extensions" / "vscode-codeclone" / "src" / "extension.js") + _write( + source / "plugins" / "cursor-codeclone" / ".cursor-plugin" / "plugin.json", + "{}\n", + ) + _write(source / "plugins" / "cursor-codeclone" / "rules" / "workflow.mdc") + _write( + source / "plugins" / "cursor-codeclone" / "scripts" / "launch_mcp.py", + "import runpy\n", + ) + _commit_all(source) + return source + + +def _make_target(tmp_path: Path, name: str) -> Path: + target = tmp_path / f"codeclone-{name}" + _init_git(target) + return target + + +def _load_manifest(target: Path) -> dict[str, object]: + payload = json.loads((target / "SYNC_MANIFEST.json").read_text(encoding="utf-8")) + assert isinstance(payload, dict) + return payload + + +def test_sync_copies_files_and_writes_manifest(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + target_def = SYNC_TARGETS["codex"] + result = sync_target( + source_root=source, + target_root=target, + target=target_def, + allow_dirty=False, + dry_run=False, + ) + + assert result.files_copied == _expected_copy_count( + source_root=source, + target_root=target, + target=target_def, + ) + assert result.files_deleted == 0 + assert (target / "plugins" / "codeclone" / "README.md").is_file() + assert (target / ".agents" / "plugins" / "marketplace.json").is_file() + manifest = _load_manifest(target) + assert { + "source_repository": manifest["source_repository"], + "source_dirty": manifest["source_dirty"], + "codeclone_version": manifest["codeclone_version"], + "target": manifest["target"], + "files_copied": manifest["files_copied"], + "files_deleted": manifest["files_deleted"], + } == { + "source_repository": "orenlab/codeclone", + "source_dirty": False, + "codeclone_version": "9.8.7", + "target": "codex", + "files_copied": result.files_copied, + "files_deleted": 0, + } + + +def test_sync_deletes_only_allowlisted_paths(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + _write(target / "plugins" / "codeclone" / "stale.txt") + _write(target / ".github" / "workflows" / "ci.yml") + _write(target / "KEEP.md") + _write(target / "SYNC_MANIFEST.json", "{}\n") + + result = sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert result.files_deleted == 2 + assert not (target / "plugins" / "codeclone" / "stale.txt").exists() + assert (target / ".github" / "workflows" / "ci.yml").is_file() + assert (target / "KEEP.md").is_file() + + +def test_sync_respects_global_denylist(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "plugins" / "codeclone" / "__pycache__" / "x.pyc") + _write(source / "plugins" / "codeclone" / ".DS_Store") + _write(source / "plugins" / "codeclone" / "node_modules" / "pkg" / "index.js") + _commit_all(source) + target = _make_target(tmp_path, "codex") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert not (target / "plugins" / "codeclone" / "__pycache__").exists() + assert not (target / "plugins" / "codeclone" / ".DS_Store").exists() + assert not (target / "plugins" / "codeclone" / "node_modules").exists() + + +def test_sync_respects_per_target_denylist(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "extensions" / "vscode-codeclone" / "secret" / "token.txt") + _commit_all(source) + target = _make_target(tmp_path, "vscode") + target_def = replace(SYNC_TARGETS["vscode"], denylist=("secret/**",)) + + sync_target( + source_root=source, + target_root=target, + target=target_def, + allow_dirty=False, + dry_run=False, + ) + + assert (target / "package.json").is_file() + assert not (target / "secret").exists() + + +def test_sync_dry_run_does_not_write(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + result = sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=True, + ) + + target_def = SYNC_TARGETS["codex"] + assert result.dry_run is True + assert result.files_copied == _expected_copy_count( + source_root=source, + target_root=target, + target=target_def, + ) + assert not (target / "plugins").exists() + assert not (target / "SYNC_MANIFEST.json").exists() + + +def test_sync_rejects_dirty_source_without_flag(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "dirty.txt") + + with pytest.raises(SyncValidationError, match="source tree is dirty"): + validate_source(source, allow_dirty=False) + + +def test_sync_allows_dirty_source_with_flag(tmp_path: Path) -> None: + source = _make_source(tmp_path) + _write(source / "dirty.txt") + + source_info = validate_source(source, allow_dirty=True) + + assert source_info.dirty is True + + +def test_sync_rejects_missing_target(tmp_path: Path) -> None: + with pytest.raises(SyncValidationError, match="does not exist"): + validate_target(tmp_path / "codeclone-codex", "codex") + + +def test_sync_rejects_non_git_target(tmp_path: Path) -> None: + target = tmp_path / "codeclone-codex" + target.mkdir() + + with pytest.raises(SyncValidationError, match="not a git repo"): + validate_target(target, "codex") + + +def test_sync_rejects_path_traversal(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "bad") + bad_target = SyncTarget( + name="bad", + copies=(("plugins/codeclone", "../outside"),), + generated=("SYNC_MANIFEST.json",), + ) + + with pytest.raises(SyncValidationError, match="path traversal"): + sync_target( + source_root=source, + target_root=target, + target=bad_target, + allow_dirty=False, + dry_run=False, + ) + + +def test_sync_all_targets(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + source = _make_source(tmp_path) + base_dir = tmp_path / "targets" + for name in SYNC_TARGETS: + _make_target(base_dir, name) + monkeypatch.chdir(source) + + exit_code = main(["--all", "--base-dir", str(base_dir)]) + + assert exit_code == 0 + for name in SYNC_TARGETS: + assert (base_dir / f"codeclone-{name}" / "SYNC_MANIFEST.json").is_file() + + +def test_manifest_fields(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "cursor") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["cursor"], + allow_dirty=False, + dry_run=False, + ) + + manifest = _load_manifest(target) + assert set(manifest) == { + "codeclone_version", + "files_copied", + "files_deleted", + "source_commit", + "source_commit_full", + "source_dirty", + "source_paths", + "source_repository", + "synced_at_utc", + "target", + } + assert isinstance(manifest["source_commit"], str) + assert isinstance(manifest["source_commit_full"], str) + assert isinstance(manifest["source_dirty"], bool) + assert isinstance(manifest["source_paths"], list) + assert isinstance(manifest["files_copied"], int) + assert isinstance(manifest["files_deleted"], int) + + +def test_flat_layout_copies_to_root(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "vscode") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["vscode"], + allow_dirty=False, + dry_run=False, + ) + + assert (target / "package.json").is_file() + assert (target / "src" / "extension.js").is_file() + assert not (target / "extensions").exists() + + +def test_nested_layout_preserves_structure(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + assert (target / "plugins" / "codeclone" / "README.md").is_file() + assert (target / "README.md").is_file() + root_readme = (target / "README.md").read_text(encoding="utf-8") + plugin_readme = (target / "plugins" / "codeclone" / "README.md").read_text( + encoding="utf-8" + ) + assert "distribution repository" in root_readme + assert root_readme != plugin_readme + + +def test_codex_sync_writes_public_marketplace(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "codex") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["codex"], + allow_dirty=False, + dry_run=False, + ) + + marketplace = json.loads( + (target / ".agents/plugins/marketplace.json").read_text(encoding="utf-8") + ) + assert marketplace["name"] == "orenlab-codeclone" + assert marketplace["interface"]["displayName"] == "CodeClone" + assert marketplace["plugins"][0]["source"]["path"] == "./plugins/codeclone" + + +def test_claude_code_sync_writes_public_marketplace(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "claude-code") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["claude-code"], + allow_dirty=False, + dry_run=False, + ) + + marketplace = json.loads( + (target / ".claude-plugin/marketplace.json").read_text(encoding="utf-8") + ) + assert marketplace["name"] == "orenlab-codeclone" + assert marketplace["plugins"][0]["name"] == "codeclone" + assert marketplace["plugins"][0]["source"] == "./plugins/codeclone" + assert ( + target / "plugins" / "codeclone" / ".claude-plugin" / "plugin.json" + ).is_file() + + +def test_sync_writes_gitignore_for_all_targets(tmp_path: Path) -> None: + source = _make_source(tmp_path) + for name in SYNC_TARGETS: + target = _make_target(tmp_path, name) + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS[name], + allow_dirty=False, + dry_run=False, + ) + gitignore = (target / ".gitignore").read_text(encoding="utf-8") + assert ".idea/" in gitignore + assert ".DS_Store" in gitignore + vscode_gitignore = (tmp_path / "codeclone-vscode" / ".gitignore").read_text( + encoding="utf-8" + ) + assert "node_modules/" in vscode_gitignore + + +def test_dist_file_helper_builds_integration_dist_paths() -> None: + assert _dist_file("gitignore.codex", ".gitignore") == ( + f"{INTEGRATION_DIST}/gitignore.codex", + ".gitignore", + ) + + +def test_sync_source_paths_exist() -> None: + root = Path(__file__).resolve().parents[1] + for target in SYNC_TARGETS.values(): + for source_rel, _destination_rel in target.copies: + source_path = root / source_rel + assert source_path.exists(), ( + f"missing sync source {source_rel} for target {target.name}" + ) + + +def test_cursor_sync_ships_standalone_launcher(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "cursor") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["cursor"], + allow_dirty=False, + dry_run=False, + ) + + launcher = (target / "scripts" / "launch_mcp.py").read_text(encoding="utf-8") + assert "resolve_launch_target" in launcher + assert "runpy" not in launcher + + +def test_claude_code_sync_ships_standalone_launcher(tmp_path: Path) -> None: + source = _make_source(tmp_path) + target = _make_target(tmp_path, "claude-code") + + sync_target( + source_root=source, + target_root=target, + target=SYNC_TARGETS["claude-code"], + allow_dirty=False, + dry_run=False, + ) + + launcher = ( + target / "plugins" / "codeclone" / "scripts" / "launch_mcp.py" + ).read_text(encoding="utf-8") + assert "resolve_launch_target" in launcher + assert "runpy" not in launcher diff --git a/tests/test_token_estimator.py b/tests/test_token_estimator.py new file mode 100644 index 00000000..1c2fc323 --- /dev/null +++ b/tests/test_token_estimator.py @@ -0,0 +1,163 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import builtins +from typing import Any +from unittest.mock import patch + +import pytest + +from codeclone.budget.estimator import ( + approx_tokens_from_chars, + estimate_payload, + estimate_text_token, + estimate_texts_token_counts, +) + + +def test_estimate_payload_defaults_to_chars_approx() -> None: + """Default estimation is cheap and does not import tiktoken.""" + payload = {"key": "value", "number": 42} + result = estimate_payload(payload) + assert result.method == "chars_approx" + assert result.encoding == "chars_approx" + assert result.tokens == -(-result.characters // 4) + + +def test_estimate_payload_default_does_not_import_tiktoken() -> None: + """Long-lived MCP paths must not import tiktoken by default.""" + real_import = builtins.__import__ + + def guarded_import( + name: str, + globals_: dict[str, Any] | None = None, + locals_: dict[str, Any] | None = None, + fromlist: tuple[str, ...] = (), + level: int = 0, + ) -> Any: + if name == "tiktoken": + raise AssertionError("default estimator must not import tiktoken") + return real_import(name, globals_, locals_, fromlist, level) + + with patch.object(builtins, "__import__", guarded_import): + result = estimate_payload({"key": "value"}) + + assert result.method == "chars_approx" + + +def test_estimate_payload_with_tiktoken_opt_in() -> None: + """Exact BPE estimation when explicitly requested and available.""" + payload = { + "status": "accepted", + "health": 90, + "findings": {"total": 5, "new": 2}, + "message": "Patch contract accepted.", + } + result = estimate_payload(payload, estimator="tiktoken") + assert result.method == "tiktoken" + assert result.encoding == "o200k_base" + assert result.tokens > 0 + assert result.characters > 0 + assert result.tokens < result.characters + + +def test_estimate_payload_without_tiktoken() -> None: + """Explicit tiktoken mode falls back when tiktoken import fails.""" + payload = {"key": "value", "number": 42} + with patch.dict("sys.modules", {"tiktoken": None}): + result = estimate_payload(payload, estimator="tiktoken") + assert result.method == "chars_approx" + assert result.encoding == "chars_approx" + assert result.tokens == -(-result.characters // 4) + + +def test_estimate_payload_canonical_json_determinism() -> None: + """Same content in different insertion order -> identical estimates.""" + payload_a = {"z_last": 1, "a_first": 2, "m_middle": 3} + payload_b = {"a_first": 2, "m_middle": 3, "z_last": 1} + result_a = estimate_payload(payload_a) + result_b = estimate_payload(payload_b) + assert result_a.tokens == result_b.tokens + assert result_a.characters == result_b.characters + + +def test_estimate_empty_payload() -> None: + """Empty dict produces minimal token count.""" + result = estimate_payload({}) + assert result.characters == 2 # "{}" + assert result.tokens >= 1 + + +def test_estimate_payload_custom_encoding() -> None: + """Custom encoding parameter is passed through.""" + result = estimate_payload( + {"key": "value"}, + encoding="cl100k_base", + estimator="tiktoken", + ) + assert result.encoding == "cl100k_base" + assert result.method == "tiktoken" + assert result.tokens > 0 + + +def test_token_estimate_is_frozen() -> None: + """TokenEstimate is immutable.""" + result = estimate_payload({"x": 1}) + with pytest.raises(AttributeError): + result.tokens = 999 # type: ignore[misc] + + +def test_estimate_payload_with_nested_structures() -> None: + """Complex nested payloads produce reasonable estimates.""" + payload = { + "scope": { + "allowed_files": [f"pkg/module_{i}.py" for i in range(20)], + "forbidden": [".cache/**", "*.baseline.json"], + }, + "blast_radius": { + "level": "high", + "dependents": [ + {"path": f"dep_{i}.py", "reason": "import"} for i in range(5) + ], + }, + "gate_preview": {"would_fail": False, "reasons": []}, + } + result = estimate_payload(payload) + assert result.tokens > 50 + assert result.characters > 200 + + +def test_estimate_payload_with_unicode() -> None: + """Unicode content is handled correctly (ensure_ascii=False).""" + payload = {"message": "Результат: чистый", "emoji": "✅"} + result = estimate_payload(payload) + assert result.tokens > 0 + assert result.characters > 0 + + +def test_estimate_payload_rejects_unknown_estimator() -> None: + with pytest.raises(ValueError, match="token estimator"): + estimate_payload({"x": 1}, estimator="bad") # type: ignore[arg-type] + + +def test_estimate_text_token_defaults_to_chars_approx() -> None: + result = estimate_text_token("hello world") + assert result.method == "chars_approx" + assert result.tokens == approx_tokens_from_chars(len("hello world")) + + +def test_estimate_texts_token_counts_batch() -> None: + counts = estimate_texts_token_counts(["ab", "abcd"]) + assert counts == ( + approx_tokens_from_chars(2), + approx_tokens_from_chars(4), + ) + + +def test_estimate_text_token_rejects_unknown_estimator() -> None: + with pytest.raises(ValueError, match="token estimator"): + estimate_text_token("x", estimator="bad") # type: ignore[arg-type] diff --git a/tests/test_trajectory_rebuild_incremental.py b/tests/test_trajectory_rebuild_incremental.py new file mode 100644 index 00000000..4a77e181 --- /dev/null +++ b/tests/test_trajectory_rebuild_incremental.py @@ -0,0 +1,229 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +import codeclone.memory.jobs.staleness as staleness_mod +from codeclone.audit.events import AuditEvent, repo_root_digest +from codeclone.audit.reader import list_workflow_ids_with_events_after +from codeclone.audit.writer import SqliteAuditWriter +from codeclone.memory.jobs.worker import _trajectory_incremental_watermark +from codeclone.memory.trajectory.models import TRAJECTORY_PROJECTION_VERSION + +from .memory_fixtures import memory_store + + +def _emit_workflow(root: Path, audit_db: Path, *, intent_id: str) -> None: + """Emit a minimal two-event accepted workflow for ``intent_id``.""" + root_digest = repo_root_digest(root.resolve()) + writer = SqliteAuditWriter(db_path=audit_db, payloads="compact", retention_days=30) + try: + writer.emit( + AuditEvent( + event_type="intent.declared", + severity="info", + repo_root_digest=root_digest, + agent_pid=100, + agent_label="tester", + intent_id=intent_id, + run_id="aaaa1111", + report_digest="1" * 64, + status="active", + payload={ + "intent_description": f"work on {intent_id}", + "scope": {"allowed_files": ["pkg/a.py"]}, + "workspace_registered": True, + "ttl_seconds": 3600, + "lease_seconds": 600, + }, + ) + ) + writer.emit( + AuditEvent( + event_type="patch_contract.verified", + severity="info", + repo_root_digest=root_digest, + agent_pid=100, + agent_label="tester", + intent_id=intent_id, + run_id="bbbb2222", + report_digest="2" * 64, + status="accepted", + payload={ + "status": "accepted", + "structural_delta": { + "regressions": [], + "improvements": [], + "health_delta": 0, + }, + "contract_violations": [], + "baseline_abuse": {"detected": False}, + }, + ) + ) + finally: + writer.close() + + +def _max_event_id(audit_db: Path) -> int: + conn = sqlite3.connect(str(audit_db)) + try: + row = conn.execute("SELECT MAX(id) FROM controller_events").fetchone() + finally: + conn.close() + return int(row[0]) if row and row[0] is not None else 0 + + +def test_list_workflow_ids_with_events_after(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, _project, _store, _db): + audit_db = tmp_path / "audit.sqlite3" + _emit_workflow(root, audit_db, intent_id="intent-a") + after_a = _max_event_id(audit_db) + _emit_workflow(root, audit_db, intent_id="intent-b") + digest = repo_root_digest(root.resolve()) + + assert list_workflow_ids_with_events_after( + db_path=audit_db, repo_root_digest=digest, after_id=after_a + ) == ("intent:intent-b",) + assert set( + list_workflow_ids_with_events_after( + db_path=audit_db, repo_root_digest=digest, after_id=0 + ) + ) == {"intent:intent-a", "intent:intent-b"} + + +def test_list_workflow_ids_after_missing_audit_db_is_empty(tmp_path: Path) -> None: + assert ( + list_workflow_ids_with_events_after( + db_path=tmp_path / "absent.sqlite3", + repo_root_digest="digest", + after_id=0, + ) + == () + ) + + +def test_incremental_reprojects_only_changed_workflows(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db): + audit_db = tmp_path / "audit.sqlite3" + _emit_workflow(root, audit_db, intent_id="intent-a") + _emit_workflow(root, audit_db, intent_id="intent-b") + full = store.rebuild_trajectories_from_audit( + project=project, root_path=root, audit_db_path=audit_db + ) + assert full.run.trajectories_created == 2 + + watermark = _max_event_id(audit_db) + _emit_workflow(root, audit_db, intent_id="intent-c") + + incremental = store.rebuild_trajectories_incremental( + project=project, + root_path=root, + audit_db_path=audit_db, + after_event_core_id=watermark, + ) + # Only the workflow with events after the watermark is re-projected. + assert incremental.run.workflows_seen == 1 + assert incremental.run.trajectories_created == 1 + assert store.count_trajectories(project_id=project.id) == 3 + + +def test_incremental_after_current_max_is_noop(tmp_path: Path) -> None: + with memory_store(tmp_path) as (root, project, store, _db): + audit_db = tmp_path / "audit.sqlite3" + _emit_workflow(root, audit_db, intent_id="intent-a") + store.rebuild_trajectories_from_audit( + project=project, root_path=root, audit_db_path=audit_db + ) + watermark = _max_event_id(audit_db) + + incremental = store.rebuild_trajectories_incremental( + project=project, + root_path=root, + audit_db_path=audit_db, + after_event_core_id=watermark, + ) + assert incremental.run.workflows_seen == 0 + assert incremental.run.trajectories_created == 0 + assert incremental.run.trajectories_updated == 0 + + +def test_worker_watermark_decision(monkeypatch: pytest.MonkeyPatch) -> None: + conn = sqlite3.connect(":memory:") + try: + # No prior done job -> full rebuild. + monkeypatch.setattr( + staleness_mod, "last_applied_stimulus", lambda c, *, project_id: None + ) + assert _trajectory_incremental_watermark(conn, project_id="p") is None + + # Projection-version change -> full rebuild (re-derive everything). + monkeypatch.setattr( + staleness_mod, + "last_applied_stimulus", + lambda c, *, project_id: { + "trajectory_projection_version": "trajectory-v0", + "event_core_max_id": 42, + }, + ) + assert _trajectory_incremental_watermark(conn, project_id="p") is None + + # Same version + watermark -> incremental after the watermark. + monkeypatch.setattr( + staleness_mod, + "last_applied_stimulus", + lambda c, *, project_id: { + "trajectory_projection_version": TRAJECTORY_PROJECTION_VERSION, + "event_core_max_id": 42, + }, + ) + assert _trajectory_incremental_watermark(conn, project_id="p") == 42 + + # Missing watermark -> full rebuild. + monkeypatch.setattr( + staleness_mod, + "last_applied_stimulus", + lambda c, *, project_id: { + "trajectory_projection_version": TRAJECTORY_PROJECTION_VERSION + }, + ) + assert _trajectory_incremental_watermark(conn, project_id="p") is None + finally: + conn.close() + + +def test_execute_rebuild_reports_full_and_incremental_modes(tmp_path: Path) -> None: + from codeclone.config.memory import resolve_memory_config + from codeclone.memory.trajectory.rebuild_workflow import execute_trajectory_rebuild + + from .memory_fixtures import seed_trajectory_audit_workflow + + with memory_store(tmp_path) as (root, project, store, _db_path): + audit_db = root / ".codeclone" / "db" / "audit.sqlite3" + seed_trajectory_audit_workflow(root=root, audit_db=audit_db) + config = resolve_memory_config(root) + full = execute_trajectory_rebuild( + root_path=root, + config=config, + store=store, + project=project, + ) + assert full["status"] == "ok" + assert full["mode"] == "full" + incremental = execute_trajectory_rebuild( + root_path=root, + config=config, + store=store, + project=project, + incremental_after_event_core_id=1, + ) + assert incremental["status"] == "ok" + assert incremental["mode"] == "incremental" diff --git a/tests/test_ui_messages_formatters.py b/tests/test_ui_messages_formatters.py new file mode 100644 index 00000000..c63a4a90 --- /dev/null +++ b/tests/test_ui_messages_formatters.py @@ -0,0 +1,168 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from codeclone.ui_messages import formatters + + +def test_fmt_summary_compact_coverage_join_ok_with_scope_gaps() -> None: + text = formatters.fmt_summary_compact_coverage_join( + status="ok", + overall_permille=950, + coverage_hotspots=2, + scope_gap_hotspots=3, + threshold_percent=80, + source_label="cobertura.xml", + ) + assert "status=ok" in text + assert "scope_gaps=3" in text + assert "source=cobertura.xml" in text + + +def test_fmt_metrics_coverage_join_ok_with_scope_gaps() -> None: + text = formatters.fmt_metrics_coverage_join( + status="ok", + overall_permille=920, + coverage_hotspots=1, + scope_gap_hotspots=2, + threshold_percent=75, + source_label="external", + ) + assert "scope gaps" in text + assert "external" in text + + +def test_fmt_metrics_coverage_join_unavailable_with_source() -> None: + text = formatters.fmt_metrics_coverage_join( + status="missing", + overall_permille=0, + coverage_hotspots=0, + scope_gap_hotspots=0, + threshold_percent=80, + source_label="none", + ) + assert "join unavailable" in text + assert "none" in text + + +def test_fmt_metrics_coverage_join_unavailable_without_source() -> None: + text = formatters.fmt_metrics_coverage_join( + status="missing", + overall_permille=0, + coverage_hotspots=0, + scope_gap_hotspots=0, + threshold_percent=80, + source_label="", + ) + assert "join unavailable" in text + assert " · " not in text.split("join unavailable", 1)[-1] + + +def test_fmt_summary_parsed_returns_none_when_all_zero() -> None: + assert ( + formatters.fmt_summary_parsed( + lines=0, + functions=0, + methods=0, + classes=0, + ) + is None + ) + + +def test_fmt_summary_parsed_includes_callables_and_classes() -> None: + text = formatters.fmt_summary_parsed( + lines=100, + functions=3, + methods=2, + classes=1, + ) + assert text is not None + assert "callables" in text + assert "classes" in text + + +def test_fmt_summary_compact_coverage_non_ok_status() -> None: + text = formatters.fmt_summary_compact_coverage_join( + status="missing", + overall_permille=0, + coverage_hotspots=0, + scope_gap_hotspots=0, + threshold_percent=80, + source_label="", + ) + assert "status=missing" in text + assert "overall=" not in text + + +def test_fmt_summary_compact_coverage_ok_without_scope_gaps() -> None: + text = formatters.fmt_summary_compact_coverage_join( + status="ok", + overall_permille=990, + coverage_hotspots=0, + scope_gap_hotspots=0, + threshold_percent=80, + source_label="", + ) + assert "scope_gaps" not in text + + +def test_fmt_summary_parsed_classes_only_without_callables() -> None: + text = formatters.fmt_summary_parsed( + lines=10, + functions=0, + methods=0, + classes=2, + ) + assert text is not None + assert "classes" in text + assert "callables" not in text + + +def test_fmt_metrics_api_surface_includes_breaking_and_added() -> None: + text = formatters.fmt_metrics_api_surface( + public_symbols=5, + modules=2, + added=3, + breaking=1, + ) + assert "breaking" in text + assert "added" in text + + +def test_fmt_metrics_api_surface_without_delta() -> None: + text = formatters.fmt_metrics_api_surface( + public_symbols=10, + modules=3, + added=0, + breaking=0, + ) + assert "breaking" not in text + + +def test_fmt_metrics_adoption_line_formats_permille_fields() -> None: + text = formatters.fmt_metrics_adoption( + param_permille=800, + return_permille=700, + docstring_permille=600, + any_annotation_count=2, + ) + assert "params" in text + assert "returns" in text + assert "docstrings" in text + + +def test_fmt_metrics_coverage_join_ok_without_scope_gaps_or_source() -> None: + text = formatters.fmt_metrics_coverage_join( + status="ok", + overall_permille=990, + coverage_hotspots=0, + scope_gap_hotspots=0, + threshold_percent=80, + source_label="", + ) + assert "scope gaps" not in text diff --git a/tests/test_utc_timestamps.py b/tests/test_utc_timestamps.py new file mode 100644 index 00000000..937ead97 --- /dev/null +++ b/tests/test_utc_timestamps.py @@ -0,0 +1,42 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +from datetime import datetime, timedelta, timezone + +from codeclone.utils.utc_timestamps import age_seconds_since_utc_timestamp + + +def test_age_seconds_since_utc_timestamp_none_and_blank() -> None: + assert age_seconds_since_utc_timestamp(None) is None + assert age_seconds_since_utc_timestamp("") is None + assert age_seconds_since_utc_timestamp(" ") is None + + +def test_age_seconds_since_utc_timestamp_invalid() -> None: + assert age_seconds_since_utc_timestamp("not-a-timestamp") is None + + +def test_age_seconds_since_utc_timestamp_z_suffix() -> None: + past = datetime.now(timezone.utc) - timedelta(seconds=30) + text = past.strftime("%Y-%m-%dT%H:%M:%SZ") + age = age_seconds_since_utc_timestamp(text) + assert age is not None + assert 25 <= age <= 35 + + +def test_age_seconds_since_utc_timestamp_naive_treated_as_utc() -> None: + past = datetime.now(timezone.utc) - timedelta(seconds=10) + text = past.replace(tzinfo=None).isoformat() + age = age_seconds_since_utc_timestamp(text) + assert age is not None + assert 5 <= age <= 20 + + +def test_age_seconds_since_utc_timestamp_never_negative() -> None: + future = datetime.now(timezone.utc) + timedelta(seconds=60) + text = future.strftime("%Y-%m-%dT%H:%M:%S+00:00") + assert age_seconds_since_utc_timestamp(text) == 0 diff --git a/tests/test_verification_profile.py b/tests/test_verification_profile.py new file mode 100644 index 00000000..3a07636c --- /dev/null +++ b/tests/test_verification_profile.py @@ -0,0 +1,524 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +"""Tests for the verification profile classifier. + +Covers the 10 acceptance criteria from the design: +1. Profile is computed only from actual_changed_files. +2. State artifact has highest priority. +3. Any .py/.pyi forces python_structural. +4. Governance config requires after_run; without it → unverified. +5. Documentation-only can verify without after_run. +6. Scope and forbidden checks always run before any fast return. +7. Payload includes profile, reason, performed checks, not-applicable checks. +8. Receipts use "not applicable", never "passed", for skipped structural. +9. Claim Guard allows only narrow docs-only claims. +10. Empty diff behavior is deterministic and tested. +""" + +from __future__ import annotations + +import pytest + +from codeclone.surfaces.mcp._verification_profile import ( + CHECK_FORBIDDEN, + CHECK_GATE_COMPARISON, + CHECK_PROFILE_CLASSIFICATION, + CHECK_SCOPE, + CHECK_STRUCTURAL_DELTA, + CHECK_WORSENED_SYMBOLS, + VerificationProfile, + check_matrix, + classify_patch, + profile_accepted_message, + profile_limitations, + profile_unverified_message, +) + +# ═══════════════════════════════════════════════════════════════════ +# Single-file classification +# ═══════════════════════════════════════════════════════════════════ + + +def test_single_python_file() -> None: + result = classify_patch(["src/main.py"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + assert result.python_source_touched is True + + +def test_single_pyi_file() -> None: + result = classify_patch(["codeclone/types.pyi"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + assert result.python_source_touched is True + + +def test_single_markdown_file() -> None: + result = classify_patch(["README.md"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + assert result.python_source_touched is False + + +def test_single_rst_file() -> None: + result = classify_patch(["docs/guide.rst"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_single_txt_file() -> None: + result = classify_patch(["notes.txt"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_single_adoc_file() -> None: + result = classify_patch(["guide.adoc"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_single_textile_file() -> None: + result = classify_patch(["notes.textile"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_single_pyproject_toml() -> None: + result = classify_patch(["pyproject.toml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + assert result.governance_config_touched is True + + +def test_single_pre_commit_config() -> None: + result = classify_patch([".pre-commit-config.yaml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_single_github_workflow() -> None: + result = classify_patch([".github/workflows/tests.yml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_single_github_action() -> None: + result = classify_patch([".github/actions/codeclone/action.yml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_single_dockerfile() -> None: + result = classify_patch(["Dockerfile"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_single_docker_compose() -> None: + result = classify_patch(["docker-compose.yml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_single_baseline_json() -> None: + result = classify_patch(["codeclone.baseline.json"]) + assert result.profile == VerificationProfile.STATE_ARTIFACT_CHANGE + assert result.state_artifact_touched is True + + +def test_single_cache_file() -> None: + result = classify_patch([".codeclone/report.json"]) + assert result.profile == VerificationProfile.STATE_ARTIFACT_CHANGE + + +def test_single_unknown_file() -> None: + result = classify_patch(["data/fixtures.json"]) + assert result.profile == VerificationProfile.NON_PYTHON_PATCH + assert result.python_source_touched is False + + +def test_single_image_file() -> None: + result = classify_patch(["assets/logo.png"]) + assert result.profile == VerificationProfile.NON_PYTHON_PATCH + + +def test_py_typed_marker() -> None: + result = classify_patch(["py.typed"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_ruff_toml() -> None: + result = classify_patch(["ruff.toml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_mypy_ini() -> None: + result = classify_patch(["mypy.ini"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_makefile() -> None: + result = classify_patch(["Makefile"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_setup_cfg() -> None: + result = classify_patch(["setup.cfg"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_coveragerc() -> None: + result = classify_patch([".coveragerc"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +# ═══════════════════════════════════════════════════════════════════ +# Priority chain — mixed files +# ═══════════════════════════════════════════════════════════════════ + + +def test_state_artifact_overrides_python() -> None: + """State artifact has highest priority, even with Python files.""" + result = classify_patch(["src/main.py", "codeclone.baseline.json"]) + assert result.profile == VerificationProfile.STATE_ARTIFACT_CHANGE + assert result.state_artifact_touched is True + assert result.python_source_touched is True + + +def test_state_artifact_overrides_docs() -> None: + result = classify_patch(["README.md", ".codeclone/report.json"]) + assert result.profile == VerificationProfile.STATE_ARTIFACT_CHANGE + + +def test_python_overrides_governance() -> None: + result = classify_patch(["src/engine.py", "pyproject.toml"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +def test_python_overrides_docs() -> None: + result = classify_patch(["src/main.py", "README.md"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +def test_governance_overrides_docs() -> None: + result = classify_patch(["README.md", "pyproject.toml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_governance_overrides_non_python() -> None: + result = classify_patch(["data/config.json", ".pre-commit-config.yaml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_docs_only_multiple_doc_files() -> None: + result = classify_patch(["README.md", "CHANGELOG.md", "docs/guide.rst"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_docs_plus_unknown_is_non_python() -> None: + """Docs + unknown file is not docs-only.""" + result = classify_patch(["README.md", "data/fixtures.json"]) + assert result.profile == VerificationProfile.NON_PYTHON_PATCH + + +def test_setup_py_is_python_structural() -> None: + """setup.py is a Python file, so it triggers python_structural.""" + result = classify_patch(["setup.py"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +def test_noxfile_is_python_structural() -> None: + """noxfile.py is Python source, not governance config.""" + result = classify_patch(["noxfile.py"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +def test_conftest_is_python_structural() -> None: + result = classify_patch(["tests/conftest.py"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +# ═══════════════════════════════════════════════════════════════════ +# Edge cases +# ═══════════════════════════════════════════════════════════════════ + + +def test_empty_changed_files() -> None: + """Empty list → deterministic non_python_patch with dedicated reason.""" + result = classify_patch([]) + assert result.profile == VerificationProfile.NON_PYTHON_PATCH + assert result.reason == "no changed files detected" + assert result.python_source_touched is False + assert result.state_artifact_touched is False + assert result.governance_config_touched is False + + +def test_docs_nested_in_docs_dir() -> None: + result = classify_patch(["docs/book/chapter.md"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_license_is_docs() -> None: + result = classify_patch(["LICENSE"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_contributing_is_docs() -> None: + result = classify_patch(["CONTRIBUTING.md"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_changelog_without_extension() -> None: + result = classify_patch(["CHANGELOG"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_doc_dir_is_docs() -> None: + result = classify_patch(["doc/tutorial.html"]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +@pytest.mark.parametrize( + "filename", + [ + "CHANGES", + "CHANGES.md", + "HISTORY.rst", + "NEWS", + "LICENCE", + "COPYING", + "NOTICE", + "CONTRIBUTORS.md", + "CREDITS", + "MAINTAINERS", + "THANKS", + "SECURITY.md", + "CODE_OF_CONDUCT.md", + ], + ids=lambda f: f.replace(".", "_"), +) +def test_expanded_documentation_patterns(filename: str) -> None: + result = classify_patch([filename]) + assert result.profile == VerificationProfile.DOCUMENTATION_ONLY + + +def test_windows_path_normalization() -> None: + result = classify_patch(["src\\main.py"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +def test_dotslash_prefix_normalization() -> None: + result = classify_patch(["./src/main.py"]) + assert result.profile == VerificationProfile.PYTHON_STRUCTURAL + + +def test_deeply_nested_workflow() -> None: + result = classify_patch([".github/workflows/deploy/staging.yml"]) + assert result.profile == VerificationProfile.GOVERNANCE_CONFIG + + +def test_deeply_nested_cache() -> None: + result = classify_patch([".codeclone/intents/some-intent.json"]) + assert result.profile == VerificationProfile.STATE_ARTIFACT_CHANGE + + +# ═══════════════════════════════════════════════════════════════════ +# ClassificationResult payload +# ═══════════════════════════════════════════════════════════════════ + + +def test_payload_has_required_fields() -> None: + result = classify_patch(["README.md"]) + payload = result.to_payload() + assert payload["verification_profile"] == "documentation_only" + assert isinstance(payload["profile_reason"], str) + assert payload["python_source_touched"] is False + assert isinstance(payload["after_run_required"], bool) + assert isinstance(payload["checks_performed"], list) + assert isinstance(payload["checks_not_applicable"], list) + + +def test_python_structural_payload() -> None: + result = classify_patch(["src/main.py"]) + payload = result.to_payload() + assert payload["verification_profile"] == "python_structural" + assert payload["after_run_required"] is True + performed = payload["checks_performed"] + assert isinstance(performed, list) + assert CHECK_STRUCTURAL_DELTA in performed + assert CHECK_GATE_COMPARISON in performed + assert CHECK_WORSENED_SYMBOLS in performed + assert payload["checks_not_applicable"] == [] + + +def test_docs_only_payload_structural_not_applicable() -> None: + result = classify_patch(["docs/guide.md"]) + payload = result.to_payload() + assert payload["verification_profile"] == "documentation_only" + assert payload["after_run_required"] is False + not_applicable = payload["checks_not_applicable"] + assert isinstance(not_applicable, list) + assert CHECK_STRUCTURAL_DELTA in not_applicable + assert CHECK_GATE_COMPARISON in not_applicable + assert CHECK_WORSENED_SYMBOLS in not_applicable + + +def test_governance_config_requires_after_run() -> None: + result = classify_patch(["pyproject.toml"]) + payload = result.to_payload() + assert payload["after_run_required"] is True + not_applicable = payload["checks_not_applicable"] + assert isinstance(not_applicable, list) + assert CHECK_STRUCTURAL_DELTA in not_applicable + + +def test_profile_classification_always_in_checks_performed() -> None: + """verification_profile_classification is in checks_performed.""" + for files, expected_profile in ( + (["src/main.py"], VerificationProfile.PYTHON_STRUCTURAL), + (["README.md"], VerificationProfile.DOCUMENTATION_ONLY), + (["pyproject.toml"], VerificationProfile.GOVERNANCE_CONFIG), + (["data.json"], VerificationProfile.NON_PYTHON_PATCH), + ): + result = classify_patch(files) + assert result.profile == expected_profile + performed = result.to_payload()["checks_performed"] + assert isinstance(performed, list) + assert CHECK_PROFILE_CLASSIFICATION in performed + assert CHECK_SCOPE in performed + assert CHECK_FORBIDDEN in performed + + +# ═══════════════════════════════════════════════════════════════════ +# CheckMatrix +# ═══════════════════════════════════════════════════════════════════ + + +@pytest.mark.parametrize( + "profile, after_run_required, structural", + [ + (VerificationProfile.PYTHON_STRUCTURAL, True, True), + (VerificationProfile.GOVERNANCE_CONFIG, True, False), + (VerificationProfile.DOCUMENTATION_ONLY, False, False), + (VerificationProfile.NON_PYTHON_PATCH, False, False), + (VerificationProfile.STATE_ARTIFACT_CHANGE, False, False), + ], + ids=[ + "python_structural", + "governance_config", + "documentation_only", + "non_python_patch", + "state_artifact_change", + ], +) +def test_check_matrix_exhaustive( + profile: VerificationProfile, + after_run_required: bool, + structural: bool, +) -> None: + matrix = check_matrix(profile) + assert matrix.profile == profile + assert matrix.after_run_required == after_run_required + assert matrix.structural_checks_applicable == structural + + +def test_check_matrix_structural_includes_all_checks() -> None: + matrix = check_matrix(VerificationProfile.PYTHON_STRUCTURAL) + performed = matrix.checks_performed + assert CHECK_STRUCTURAL_DELTA in performed + assert CHECK_GATE_COMPARISON in performed + assert CHECK_WORSENED_SYMBOLS in performed + assert matrix.checks_not_applicable == () + + +def test_check_matrix_docs_excludes_structural() -> None: + matrix = check_matrix(VerificationProfile.DOCUMENTATION_ONLY) + not_applicable = matrix.checks_not_applicable + assert CHECK_STRUCTURAL_DELTA in not_applicable + assert CHECK_GATE_COMPARISON in not_applicable + assert CHECK_WORSENED_SYMBOLS in not_applicable + + +# ═══════════════════════════════════════════════════════════════════ +# Limitations and messages +# ═══════════════════════════════════════════════════════════════════ + + +def test_non_python_patch_has_limitations() -> None: + limitations = profile_limitations(VerificationProfile.NON_PYTHON_PATCH) + assert len(limitations) == 2 + assert any("Python structural" in lim for lim in limitations) + assert any("documentation-only" in lim for lim in limitations) + + +def test_documentation_only_no_limitations() -> None: + assert profile_limitations(VerificationProfile.DOCUMENTATION_ONLY) == () + + +def test_accepted_message_docs() -> None: + msg = profile_accepted_message(VerificationProfile.DOCUMENTATION_ONLY) + assert "not applicable" in msg + + +def test_accepted_message_non_python() -> None: + msg = profile_accepted_message(VerificationProfile.NON_PYTHON_PATCH) + assert "limitations" in msg + + +def test_unverified_message_python() -> None: + msg = profile_unverified_message(VerificationProfile.PYTHON_STRUCTURAL) + assert "after_run_id" in msg + + +def test_unverified_message_governance() -> None: + msg = profile_unverified_message(VerificationProfile.GOVERNANCE_CONFIG) + assert "after_run_id" in msg + assert "configuration" in msg.lower() or "CI" in msg + + +# ═══════════════════════════════════════════════════════════════════ +# Determinism +# ═══════════════════════════════════════════════════════════════════ + + +def test_classify_is_pure() -> None: + """Same input always yields the same result.""" + files = ["src/main.py", "README.md", "pyproject.toml"] + result_a = classify_patch(files) + result_b = classify_patch(files) + assert result_a.profile == result_b.profile + assert result_a.reason == result_b.reason + assert result_a.python_source_touched == result_b.python_source_touched + + +def test_classify_order_independent() -> None: + """File order does not change the result.""" + files_a = ["README.md", "src/main.py"] + files_b = ["src/main.py", "README.md"] + assert classify_patch(files_a).profile == classify_patch(files_b).profile + + +# ═══════════════════════════════════════════════════════════════════ +# Profile reason stability +# ═══════════════════════════════════════════════════════════════════ + + +def test_reason_stable_for_docs() -> None: + result = classify_patch(["README.md", "docs/guide.rst"]) + assert result.reason == "all changed files match documentation patterns" + + +def test_reason_stable_for_governance() -> None: + result = classify_patch(["pyproject.toml"]) + assert "governance or analysis configuration" in result.reason + + +def test_reason_stable_for_python() -> None: + result = classify_patch(["src/main.py"]) + assert "Python source" in result.reason + + +def test_reason_stable_for_state_artifact() -> None: + result = classify_patch(["codeclone.baseline.json"]) + assert "state artifacts" in result.reason + + +def test_reason_stable_for_empty() -> None: + result = classify_patch([]) + assert result.reason == "no changed files detected" + + +def test_reason_stable_for_non_python() -> None: + result = classify_patch(["assets/logo.png"]) + assert "documentation" in result.reason or "outside" in result.reason diff --git a/tests/test_workspace_hygiene.py b/tests/test_workspace_hygiene.py new file mode 100644 index 00000000..c7aaf14d --- /dev/null +++ b/tests/test_workspace_hygiene.py @@ -0,0 +1,680 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import subprocess +from collections.abc import Iterator +from contextlib import contextmanager +from dataclasses import replace +from pathlib import Path +from typing import cast +from unittest.mock import patch + +import pytest + +from codeclone.surfaces.mcp import _workspace_hygiene as hygiene_mod +from codeclone.surfaces.mcp._workspace_hygiene import ( + DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP, + STRICT_FINISH_ENV, + DirtyAttribution, + DirtySnapshot, + DirtySnapshotEntry, + ForeignDirtyOverlap, + WorkspaceHygieneResult, + collect_dirty_paths, + collect_dirty_snapshot, + dirty_snapshot_from_payload, + evaluate_scoped_hygiene, + finish_hygiene_check, + workspace_dirty_summary, +) +from codeclone.surfaces.mcp._workspace_intent_registry_lock import ( + WorkspaceRegistryLockError, + workspace_registry_lock, +) +from codeclone.surfaces.mcp._workspace_intent_store import get_workspace_intent_store +from codeclone.surfaces.mcp._workspace_intents import ( + write_workspace_intent, +) +from tests.test_workspace_intents import _record + +_GIT_RUN = "codeclone.surfaces.mcp._workspace_hygiene.subprocess.run" + + +@contextmanager +def _mock_git_porcelain( + porcelain: str, + *, + git_available: bool = True, + git_side_effect: BaseException | None = None, +) -> Iterator[None]: + git_run_patch = ( + patch(_GIT_RUN, side_effect=git_side_effect) + if git_side_effect is not None + else patch( + _GIT_RUN, + return_value=subprocess.CompletedProcess( + args=["git"], + returncode=0, + stdout=porcelain, + stderr="", + ), + ) + ) + with ( + patch.object(hygiene_mod, "_git_available", return_value=git_available), + git_run_patch, + ): + yield + + +def test_foreign_dirty_overlap_to_payload() -> None: + overlap = ForeignDirtyOverlap( + path="pkg/a.py", + foreign_intent_id="intent-foreign-001", + foreign_persisted_status="active", + foreign_ownership="foreign_active", + foreign_agent_label="other", + message="overlap", + ) + payload = overlap.to_payload() + assert payload["foreign_intent_id"] == "intent-foreign-001" + assert payload["path"] == "pkg/a.py" + + +def test_workspace_hygiene_result_to_payload_includes_finish_fields() -> None: + hygiene = WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py",), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=True, + unacknowledged_dirty_in_scope=("pkg/a.py",), + blocks_finish=True, + ) + payload = hygiene.to_payload() + counts = cast("dict[str, int]", payload["counts"]) + assert payload["blocks_finish"] is True + assert payload["unacknowledged_dirty_in_scope"] == ["pkg/a.py"] + assert counts["missing_evidence"] == 1 + + +def test_workspace_hygiene_to_payload_summary_omits_per_path_detail() -> None: + # Summary (default) carries counts + blocking subset only; the derived + # classification arrays and full per-path attribution are detail=full. + hygiene = WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py", "other/x.py"), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=("other/x.py",), + foreign_dirty_overlaps=(), + blocks_edit=True, + new_unattributed_unscoped_dirty=("other/x.py",), + dirty_attribution=( + DirtyAttribution( + path="other/x.py", + scope_relation="outside", + evidence="absent", + start_state="absent", + intent_attribution="none", + classification="new_unattributed_unscoped_dirty", + blocking=False, + ), + ), + ) + summary = hygiene.to_payload() + summary_counts = cast("dict[str, int]", summary["counts"]) + assert summary_counts["outside_scope"] == 1 + assert summary_counts["new_unattributed_unscoped"] == 1 + assert "dirty_attribution" not in summary + assert "new_unattributed_unscoped_dirty" not in summary + assert "dirty_paths_outside_scope" not in summary + + full = hygiene.to_payload(detail_level="full") + assert full["new_unattributed_unscoped_dirty"] == ["other/x.py"] + assert full["dirty_paths_outside_scope"] == ["other/x.py"] + assert isinstance(full["dirty_attribution"], list) + assert len(full["dirty_attribution"]) == 1 + + +def test_collect_dirty_paths_when_git_unavailable(tmp_path: Path) -> None: + with patch.object(hygiene_mod, "_git_available", return_value=False): + result = collect_dirty_paths(tmp_path) + assert result.git_available is False + assert result.dirty_paths == () + + +def test_collect_dirty_paths_git_failure_returns_unavailable(tmp_path: Path) -> None: + with _mock_git_porcelain( + "", + git_side_effect=subprocess.TimeoutExpired("git", 30), + ): + result = collect_dirty_paths(tmp_path) + assert result.git_available is False + assert result.dirty_paths == () + + +def test_collect_dirty_paths_scoped_filter() -> None: + with _mock_git_porcelain(" M pkg/a.py\n M pkg/b.py\n"): + result = collect_dirty_paths( + Path("/tmp/root"), + scoped_paths=["pkg/a.py"], + ) + assert result.git_available is True + assert result.dirty_paths == ("pkg/a.py",) + + +def test_collect_dirty_snapshot_roundtrip() -> None: + with _mock_git_porcelain(" M pkg/a.py\n"): + snapshot = collect_dirty_snapshot(Path("/tmp/root")) + assert snapshot.git_available is True + assert snapshot.paths == ("pkg/a.py",) + assert dirty_snapshot_from_payload(snapshot.to_payload()) == snapshot + + +def test_workspace_dirty_summary_without_git(tmp_path: Path) -> None: + with patch.object(hygiene_mod, "_git_available", return_value=False): + summary = workspace_dirty_summary(root=tmp_path) + assert summary["git_available"] is False + assert summary["dirty_paths_count"] == 0 + + +def test_workspace_dirty_summary_truncates_sample() -> None: + dirty_paths = tuple(f"pkg/file_{index}.py" for index in range(12)) + with patch.object( + hygiene_mod, + "collect_dirty_paths", + return_value=hygiene_mod.DirtyPathsResult( + git_available=True, + dirty_paths=dirty_paths, + ), + ): + summary = workspace_dirty_summary(root=Path("/tmp/root")) + assert summary["dirty_paths_count"] == 12 + assert summary["sample_truncated"] is True + assert len(cast(list[str], summary["dirty_paths_sample"])) == 10 + + +def test_evaluate_scoped_hygiene_without_git(tmp_path: Path) -> None: + store = get_workspace_intent_store(tmp_path) + with patch.object(hygiene_mod, "_git_available", return_value=False): + hygiene = evaluate_scoped_hygiene( + root=tmp_path, + allowed_files=["pkg/a.py"], + store=store, + own_pid=os.getpid(), + own_start_epoch=100, + ) + assert hygiene.git_available is False + assert hygiene.blocks_edit is False + + +def test_finish_hygiene_check_blocks_unacknowledged_dirty(tmp_path: Path) -> None: + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/b.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=["pkg/b.py"], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.unacknowledged_dirty_in_scope == ("pkg/b.py",) + assert hygiene.blocks_finish is True + assert hygiene.finish_block_reason == "missing_evidence" + + +def test_finish_hygiene_check_allows_preexisting_unscoped_dirty( + tmp_path: Path, +) -> None: + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/extra.py\n"): + snapshot = collect_dirty_snapshot(tmp_path) + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + start_dirty_snapshot=snapshot, + ) + assert hygiene.preexisting_unscoped_dirty == ("pkg/extra.py",) + assert hygiene.unattributed_unscoped_dirty == () + assert hygiene.blocks_finish is False + + +def test_finish_hygiene_check_treats_new_unattributed_as_advisory( + tmp_path: Path, +) -> None: + # Scope-aware finish hygiene: out-of-scope dirt with no foreign-intent + # attribution is classified for diagnostics but does NOT block finish. + store = get_workspace_intent_store(tmp_path) + start_snapshot = DirtySnapshot( + git_available=True, + captured_at_utc="2026-01-01T00:00:00Z", + entries=( + DirtySnapshotEntry( + path="pkg/a.py", + status_xy=" M", + digest="start-a", + digest_status="ok", + ), + ), + ) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/extra.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + start_dirty_snapshot=start_snapshot, + ) + assert hygiene.new_unattributed_unscoped_dirty == ("pkg/extra.py",) + assert hygiene.dirty_paths_outside_scope == ("pkg/extra.py",) + assert hygiene.blocks_finish is False + assert hygiene.finish_block_reason is None + + +def test_finish_hygiene_check_blocks_unattributed_when_strict( + tmp_path: Path, +) -> None: + store = get_workspace_intent_store(tmp_path) + start_snapshot = DirtySnapshot( + git_available=True, + captured_at_utc="2026-01-01T00:00:00Z", + entries=( + DirtySnapshotEntry( + path="pkg/a.py", + status_xy=" M", + digest="start-a", + digest_status="ok", + ), + ), + ) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/extra.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + start_dirty_snapshot=start_snapshot, + strict_finish=True, + ) + assert hygiene.new_unattributed_unscoped_dirty == ("pkg/extra.py",) + assert hygiene.blocks_finish is True + assert hygiene.finish_block_reason == "own_unscoped_dirty" + + +def test_finish_hygiene_check_legacy_snapshot_treats_unknown_as_advisory( + tmp_path: Path, +) -> None: + # Undigestible/legacy 'unknown' start state (e.g. directories) must never + # block finish — authorship cannot be proven, so it is advisory only. + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/extra.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.unknown_unattributed_unscoped_dirty == ("pkg/extra.py",) + assert hygiene.dirty_snapshot_status == "missing_legacy_conservative" + assert hygiene.blocks_finish is False + assert hygiene.finish_block_reason is None + + +def test_finish_hygiene_check_strict_finish_env_blocks_unknown_unattributed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv(STRICT_FINISH_ENV, "1") + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/extra.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.unknown_unattributed_unscoped_dirty == ("pkg/extra.py",) + assert hygiene.blocks_finish is True + assert hygiene.finish_block_reason == "own_unscoped_dirty" + + +def test_finish_hygiene_check_treats_modified_unattributed_as_advisory( + tmp_path: Path, +) -> None: + # An out-of-scope path that changed since start, with no foreign-intent + # attribution, is advisory — a peer's concurrent edit must not block the + # innocent finisher whose own declared scope is clean. + store = get_workspace_intent_store(tmp_path) + start_snapshot = DirtySnapshot( + git_available=True, + captured_at_utc="2026-01-01T00:00:00Z", + entries=( + DirtySnapshotEntry( + path="pkg/extra.py", + status_xy=" M", + digest="old-digest", + digest_status="ok", + ), + ), + ) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/extra.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + start_dirty_snapshot=start_snapshot, + ) + assert hygiene.modified_unattributed_unscoped_dirty == ("pkg/extra.py",) + assert hygiene.blocks_finish is False + assert hygiene.finish_block_reason is None + + +def test_finish_hygiene_check_ignores_foreign_unscoped_dirty( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_pid = 33333 + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive", + lambda pid: pid == live_pid, + ) + foreign = _record( + intent_id="intent-foreign-other-001", + pid=live_pid, + start_epoch=300, + scope={ + "allowed_files": ["pkg/foreign.py"], + "allowed_related": [], + "forbidden": [], + }, + ) + assert write_workspace_intent(root=tmp_path, record=foreign) + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n M pkg/foreign.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.foreign_attributed_outside_scope == ("pkg/foreign.py",) + assert hygiene.own_unscoped_dirty == () + assert hygiene.blocks_finish is False + + +def test_finish_hygiene_check_blocks_unacknowledged_dirty_legacy( + tmp_path: Path, +) -> None: + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=[], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.unacknowledged_dirty_in_scope == ("pkg/a.py",) + assert hygiene.blocks_finish is True + + +def test_finish_hygiene_check_blocks_on_foreign_overlap( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_pid = 33333 + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive", + lambda pid: pid == live_pid, + ) + foreign = _record( + intent_id="intent-foreign-dirty-001", + pid=live_pid, + start_epoch=300, + ) + assert write_workspace_intent(root=tmp_path, record=foreign) + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n"): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.blocks_finish is True + assert len(hygiene.foreign_dirty_overlaps) == 1 + + +def test_dirty_paths_from_porcelain_skips_short_and_parses_renames() -> None: + dirty = hygiene_mod._dirty_paths_from_porcelain( + "ab\n \n M pkg/a.py\nR pkg/old.py -> pkg/new.py\n" + ) + assert dirty == ("pkg/a.py", "pkg/new.py", "pkg/old.py") + + +def test_normalize_path_rejects_traversal() -> None: + with pytest.raises(ValueError, match="path traversal"): + hygiene_mod._normalize_path("../etc/passwd") + + +def test_normalize_path_strips_dot_slash_prefix() -> None: + assert hygiene_mod._normalize_path("./pkg/a.py") == "pkg/a.py" + + +def test_normalize_path_dot_returns_empty() -> None: + assert hygiene_mod._normalize_path(".") == "" + + +@pytest.mark.parametrize( + ("intent_id", "own_pid", "own_start_epoch", "own_intent_id"), + [ + ("intent-own-001", 99999, 999, "intent-own-001"), + ("intent-own-002", 11111, 100, None), + ], +) +def test_foreign_dirty_overlaps_skip_own_identity( + tmp_path: Path, + intent_id: str, + own_pid: int, + own_start_epoch: int, + own_intent_id: str | None, +) -> None: + own = _record(intent_id=intent_id, pid=11111, start_epoch=100) + assert write_workspace_intent(root=tmp_path, record=own) + store = get_workspace_intent_store(tmp_path) + overlaps = hygiene_mod._foreign_dirty_overlaps( + dirty_paths=["pkg/a.py"], + store=store, + own_pid=own_pid, + own_start_epoch=own_start_epoch, + own_intent_id=own_intent_id, + ) + assert overlaps == () + + +def test_registry_lock_retries_until_acquired( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + lock_path = tmp_path / ".codeclone" / "intents" / ".lock" + attempts = iter([BlockingIOError(), None]) + + def _acquire_once(handle: object) -> None: + result = next(attempts, None) + if result is not None: + raise result + + times = iter([0.0, 0.05, 0.1]) + monkeypatch.setattr( + "codeclone.utils.file_lock.time.monotonic", + lambda: next(times), + ) + monkeypatch.setattr( + "codeclone.utils.file_lock._acquire_exclusive_lock", + _acquire_once, + ) + with workspace_registry_lock(lock_path, timeout_seconds=1.0): + assert lock_path.is_file() + + +def test_registry_lock_timeout_raises( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + lock_path = tmp_path / ".codeclone" / "intents" / ".lock" + times = iter([0.0, 10.0]) + + def _always_busy(handle: object) -> None: + raise BlockingIOError + + monkeypatch.setattr( + "codeclone.utils.file_lock.time.monotonic", + lambda: next(times), + ) + monkeypatch.setattr( + "codeclone.utils.file_lock._acquire_exclusive_lock", + _always_busy, + ) + with ( + pytest.raises(WorkspaceRegistryLockError, match="Timed out"), + workspace_registry_lock(lock_path, timeout_seconds=1.0), + ): + pass + + +def test_evaluate_scoped_hygiene_includes_related_scope(tmp_path: Path) -> None: + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M tests/test_a.py\n"): + hygiene = evaluate_scoped_hygiene( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=["tests/test_a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + ) + assert hygiene.dirty_paths == ("tests/test_a.py",) + assert hygiene.dirty_paths_outside_scope == ("tests/test_a.py",) + assert hygiene.blocks_edit is False + + +def test_foreign_dirty_overlaps_skip_terminal_foreign( + tmp_path: Path, +) -> None: + terminal = replace(_record(intent_id="intent-clean-001"), status="clean") + assert write_workspace_intent(root=tmp_path, record=terminal) + store = get_workspace_intent_store(tmp_path) + overlaps = hygiene_mod._foreign_dirty_overlaps( + dirty_paths=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id=None, + ) + assert overlaps == () + + +def test_evaluate_scoped_hygiene_marks_dirty_in_blocking_scope(tmp_path: Path) -> None: + store = get_workspace_intent_store(tmp_path) + with _mock_git_porcelain(" M pkg/a.py\n"): + hygiene = evaluate_scoped_hygiene( + root=tmp_path, + allowed_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + ) + assert hygiene.dirty_paths_in_scope == ("pkg/a.py",) + assert hygiene.dirty_paths_outside_scope == () + assert hygiene.blocks_edit is True + + +def test_finish_hygiene_check_returns_early_when_git_unavailable( + tmp_path: Path, +) -> None: + store = get_workspace_intent_store(tmp_path) + with patch.object(hygiene_mod, "_git_available", return_value=False): + hygiene = finish_hygiene_check( + root=tmp_path, + allowed_files=["pkg/a.py"], + allowed_related=[], + resolved_files=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id="intent-own-001", + ) + assert hygiene.git_available is False + assert hygiene.blocks_finish is False + + +def test_registry_lock_acquire_and_release(tmp_path: Path) -> None: + lock_path = tmp_path / ".codeclone" / "intents" / ".lock" + with workspace_registry_lock(lock_path): + assert lock_path.is_file() + + +def test_continue_own_wip_policy_allows_own_dirty_without_foreign() -> None: + hygiene = WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py",), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=True, + ) + assert ( + hygiene_mod.hygiene_blocks_start_edit( + hygiene, + dirty_scope_policy=DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP, + ) + is False + ) diff --git a/tests/test_workspace_hygiene_digest.py b/tests/test_workspace_hygiene_digest.py new file mode 100644 index 00000000..d3bc719a --- /dev/null +++ b/tests/test_workspace_hygiene_digest.py @@ -0,0 +1,373 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +from codeclone.surfaces.mcp._workspace_hygiene import ( + DirtyAttribution, + DirtySnapshot, + DirtySnapshotEntry, + WorkspaceHygieneResult, + _dirty_entries_from_porcelain, + _dirty_entry_digest, + _dirty_start_state, + _git_diff_bytes, + _scope_relation, + _snapshot_status, + _untracked_file_digest, + collect_dirty_paths, + collect_dirty_snapshot, + dirty_snapshot_from_payload, + finish_hygiene_check, +) +from codeclone.surfaces.mcp._workspace_intent_store import get_workspace_intent_store + + +def test_dirty_snapshot_to_payload_sorts_entries() -> None: + snapshot = DirtySnapshot( + git_available=True, + captured_at_utc="2026-01-01T00:00:00Z", + entries=( + DirtySnapshotEntry( + path="b.py", + status_xy=" M", + digest_status="ok", + digest="aa", + ), + DirtySnapshotEntry( + path="a.py", + status_xy=" M", + digest_status="ok", + digest="bb", + ), + ), + ) + payload = snapshot.to_payload() + entries_obj = payload["entries"] + assert isinstance(entries_obj, dict) + assert list(entries_obj.keys()) == ["a.py", "b.py"] + + +def test_untracked_file_digest_reads_file_and_rejects_traversal( + tmp_path: Path, +) -> None: + root = tmp_path / "repo" + root.mkdir() + target = root / "pkg" + target.mkdir() + sample = target / "mod.py" + sample.write_text("print('ok')\n", encoding="utf-8") + + digest, status = _untracked_file_digest(root, "pkg/mod.py") + assert status == "ok" + assert digest is not None + assert len(digest) == 64 + + outside, outside_status = _untracked_file_digest(root, "../escape.py") + assert outside is None + assert outside_status == "unavailable" + + +def test_git_diff_bytes_returns_none_on_failure(tmp_path: Path) -> None: + from codeclone.surfaces.mcp import _workspace_hygiene as hygiene_mod + + with patch( + "codeclone.surfaces.mcp._workspace_hygiene.subprocess.run", + side_effect=OSError("git missing"), + ): + result = hygiene_mod._git_diff_bytes(tmp_path, ["diff", "--", "a.py"]) + assert result is None + + +def test_workspace_hygiene_payload_detail_and_snapshot_status() -> None: + result = WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py",), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=("tmp.log",), + foreign_dirty_overlaps=(), + blocks_edit=True, + dirty_attribution=( + DirtyAttribution( + path="tmp.log", + scope_relation="outside", + evidence="absent", + start_state="unknown", + intent_attribution="none", + classification="unknown_unattributed_unscoped_dirty", + blocking=False, + ), + ), + dirty_snapshot=DirtySnapshot( + git_available=False, + captured_at_utc="2026-01-01T00:00:00Z", + entries=(), + ), + dirty_snapshot_status="git_unavailable", + blocks_finish=True, + finish_block_reason="missing_evidence", + ) + payload = result.to_payload(detail_level="full") + assert payload["blocks_finish"] is True + assert payload["finish_block_reason"] == "missing_evidence" + assert payload["dirty_snapshot_status"] == "git_unavailable" + assert _snapshot_status(None) == "missing_legacy_conservative" + assert _snapshot_status(result.dirty_snapshot) == "git_unavailable" + + +def test_dirty_entries_from_porcelain_handles_rename_and_blank_rows() -> None: + output = "\n".join( + [ + "?? pkg/new.py", + "R pkg/old.py -> pkg/newer.py", + " M ", + "x", + ] + ) + entries = _dirty_entries_from_porcelain(output) + assert ("pkg/new.py", "??") in entries + assert ("pkg/old.py", "R ") in entries + assert ("pkg/newer.py", "R ") in entries + + +def test_dirty_entry_digest_and_git_diff_bytes_edge_branches( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene._git_diff_bytes", + lambda _root, args: b"cached" if "--cached" in args else None, + ) + digest, status = _dirty_entry_digest(tmp_path, "pkg/a.py", " M") + assert digest is None + assert status == "unavailable" + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene.subprocess.run", + lambda *args, **kwargs: SimpleNamespace(stdout="text-diff"), + ) + assert _git_diff_bytes(tmp_path, ["diff", "--", "a.py"]) == b"text-diff" + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene.subprocess.run", + lambda *args, **kwargs: SimpleNamespace(stdout=object()), + ) + assert _git_diff_bytes(tmp_path, ["diff", "--", "a.py"]) is None + + +def test_untracked_digest_handles_missing_and_open_errors( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + root = tmp_path / "repo" + root.mkdir() + missing_digest, missing_status = _untracked_file_digest(root, "pkg") + assert missing_digest is None + assert missing_status == "unavailable" + + target = root / "pkg" / "broken.py" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("x = 1\n", encoding="utf-8") + + def _boom_open(*_args: object, **_kwargs: object) -> object: + raise OSError("read failed") + + monkeypatch.setattr(Path, "open", _boom_open) + digest, status = _untracked_file_digest(root, "pkg/broken.py") + assert digest is None + assert status == "unavailable" + + +def test_scope_relation_declared_branch() -> None: + relation = _scope_relation( + "docs/guide.md", + blocking_scope={"pkg/"}, + related_scope={"tests/"}, + declared_scope={"docs/guide.md"}, + ) + assert relation == "declared" + + +def test_workspace_hygiene_snapshot_and_payload_edge_paths( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene._git_available", + lambda _root: True, + ) + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene.subprocess.run", + lambda *args, **kwargs: (_ for _ in ()).throw(OSError("git failed")), + ) + snapshot = collect_dirty_snapshot(tmp_path) + assert snapshot.git_available is False + + dirty = collect_dirty_paths(tmp_path, scoped_paths=("pkg/a.py",)) + assert dirty.git_available is False + + payload = WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py",), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=False, + dirty_attribution=( + DirtyAttribution( + path="pkg/a.py", + scope_relation="own_allowed", + evidence="present", + start_state="present_same", + intent_attribution="none", + classification="declared_scope_dirty", + blocking=False, + ), + ), + files_for_scope_check=("pkg/a.py",), + ).to_payload(detail_level="full") + assert "dirty_attribution" in payload + assert "files_for_scope_check" in payload + + +def test_dirty_snapshot_from_payload_invalid_shapes() -> None: + assert dirty_snapshot_from_payload("bad") is None + assert dirty_snapshot_from_payload({"git_available": True}) is None + assert ( + dirty_snapshot_from_payload( + { + "git_available": True, + "captured_at_utc": "x", + "entries": {"a.py": {"status_xy": 1, "digest_status": "ok"}}, + } + ) + is None + ) + assert ( + dirty_snapshot_from_payload( + { + "git_available": True, + "captured_at_utc": "x", + "entries": [], + } + ) + is None + ) + assert ( + dirty_snapshot_from_payload( + { + "git_available": True, + "captured_at_utc": "x", + "entries": {1: {}}, + } + ) + is None + ) + assert ( + dirty_snapshot_from_payload( + { + "git_available": True, + "captured_at_utc": "x", + "entries": {"../a.py": {"status_xy": " M", "digest_status": "ok"}}, + } + ) + is None + ) + assert ( + dirty_snapshot_from_payload( + { + "git_available": True, + "captured_at_utc": "x", + "entries": { + "a.py": {"status_xy": " M", "digest": 1, "digest_status": "ok"} + }, + } + ) + is None + ) + + +def test_workspace_hygiene_state_helpers_and_finish_short_circuit( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + payload = WorkspaceHygieneResult( + git_available=True, + dirty_paths=(), + dirty_paths_in_scope=(), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=False, + dirty_attribution=(), + files_for_scope_check=("pkg/a.py",), + ).to_payload(detail_level="full") + assert "files_for_scope_check" in payload + + current = DirtySnapshotEntry( + path="pkg/a.py", + status_xy=" M", + digest="a" * 64, + digest_status="ok", + ) + start = DirtySnapshotEntry( + path="pkg/a.py", + status_xy=" M", + digest="a" * 64, + digest_status="unavailable", + ) + snapshot = DirtySnapshot(git_available=True, captured_at_utc="x", entries=()) + assert _dirty_start_state(None, start, snapshot=snapshot) == "cleaned" + assert _dirty_start_state(current, start, snapshot=snapshot) == "unknown" + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene._untracked_file_digest", + lambda _root, _path: ("u", "ok"), + ) + assert _dirty_entry_digest(tmp_path, "pkg/new.py", "??") == ("u", "ok") + + class _Completed: + stdout = b"bin" + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene.subprocess.run", + lambda *args, **kwargs: _Completed(), + ) + assert _git_diff_bytes(tmp_path, ["diff"]) == b"bin" + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene.evaluate_scoped_hygiene", + lambda **kwargs: WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py",), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=False, + ), + ) + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_hygiene.collect_dirty_snapshot", + lambda _root: DirtySnapshot( + git_available=False, captured_at_utc="x", entries=() + ), + ) + result = finish_hygiene_check( + root=tmp_path, + allowed_files=("pkg/a.py",), + allowed_related=(), + resolved_files=("pkg/a.py",), + store=get_workspace_intent_store(tmp_path), + own_pid=1, + own_start_epoch=1, + own_intent_id="intent-a", + ) + assert result.git_available is True diff --git a/tests/test_workspace_intent_gate.py b/tests/test_workspace_intent_gate.py new file mode 100644 index 00000000..bb9cf75c --- /dev/null +++ b/tests/test_workspace_intent_gate.py @@ -0,0 +1,445 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import os +from dataclasses import replace +from datetime import timedelta +from pathlib import Path + +import pytest + +from codeclone.config.intent_registry import DEFAULT_INTENT_REGISTRY_DB_PATH +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intent_store import ( + clear_workspace_intent_store_cache, +) +from codeclone.workspace_intent.gate import ( + HOOK_AUTHORIZE_FOREIGN_ENV, + UnclosedWorkspaceIntent, + evaluate_workspace_edit_gate, + has_authorized_workspace_intent, + has_blocking_workspace_intent, + list_unclosed_workspace_intents, + list_unclosed_workspace_intents_for_hook_cleanup, +) +from tests.test_workspace_intents import _record +from tests.workspace_intent_gate_helpers import ( + assert_gate_denied, + codex_foreign_record, + cursor_vscode_record, + write_workspace_record, +) + +_PID_ALIVE = "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive" +_PID_LIVENESS = "codeclone.surfaces.mcp._workspace_intent_pid.agent_pid_liveness" + + +def _write_record(root: Path, record: workspace_intents.WorkspaceIntentRecord) -> None: + write_workspace_record(root, record) + + +def test_gate_allows_active_file_registry_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + _write_record(tmp_path, _record(pid=os.getpid() + 1000)) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is True + assert decision.reason == "active_intent" + assert decision.status == "active" + assert decision.ownership == "foreign_active" + assert decision.registry_backend == "file" + assert has_authorized_workspace_intent(tmp_path) is True + assert has_blocking_workspace_intent(tmp_path) is True + + +def test_gate_denies_foreign_active_when_hook_env_disables_it( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "0") + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + _write_record(tmp_path, _record(pid=os.getpid() + 1000)) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "no_active_intent" + + +def test_gate_denies_unknown_pid_liveness( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + _PID_LIVENESS, + lambda _pid: workspace_intents.PidLiveness.UNKNOWN, + ) + _write_record(tmp_path, _record(pid=os.getpid() + 1000)) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "no_active_intent" + + +def test_gate_allows_active_sqlite_registry_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("CODECLONE_INTENT_REGISTRY_BACKEND", "sqlite") + monkeypatch.setenv( + "CODECLONE_INTENT_REGISTRY_PATH", + DEFAULT_INTENT_REGISTRY_DB_PATH, + ) + clear_workspace_intent_store_cache() + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + _write_record(tmp_path, _record(pid=os.getpid() + 1000)) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is True + assert decision.reason == "active_intent" + assert decision.registry_backend == "sqlite" + assert decision.registry_path == DEFAULT_INTENT_REGISTRY_DB_PATH + + +def test_gate_denies_queued_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + queued = replace(_record(), status="queued") + _write_record(tmp_path, queued) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "queued_intent_not_editable" + assert decision.intent_id == queued.intent_id + + +def test_gate_denies_expired_ttl( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + _write_record(tmp_path, _record(expires_delta=timedelta(seconds=-1))) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "no_active_intent" + + +def test_gate_denies_orphaned_active_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: False) + _write_record(tmp_path, _record(pid=999999)) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "no_active_intent" + + +def test_gate_denies_live_foreign_stale_lease( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + stale_live = _record( + pid=os.getpid() + 1000, + lease_renewed_delta=timedelta(minutes=-20), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + _write_record(tmp_path, stale_live) + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "no_active_intent" + + +def test_gate_does_not_create_missing_sqlite_registry( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("CODECLONE_INTENT_REGISTRY_BACKEND", "sqlite") + monkeypatch.setenv( + "CODECLONE_INTENT_REGISTRY_PATH", + DEFAULT_INTENT_REGISTRY_DB_PATH, + ) + clear_workspace_intent_store_cache() + db_path = tmp_path / DEFAULT_INTENT_REGISTRY_DB_PATH + + decision = evaluate_workspace_edit_gate(tmp_path) + + assert decision.allowed is False + assert decision.reason == "no_active_intent" + assert not db_path.exists() + + +def test_gate_closes_sqlite_read_connection( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("CODECLONE_INTENT_REGISTRY_BACKEND", "sqlite") + monkeypatch.setenv( + "CODECLONE_INTENT_REGISTRY_PATH", + DEFAULT_INTENT_REGISTRY_DB_PATH, + ) + clear_workspace_intent_store_cache() + db_path = tmp_path / DEFAULT_INTENT_REGISTRY_DB_PATH + db_path.parent.mkdir(parents=True) + db_path.touch() + seen: dict[str, object] = {} + + class _FakeConnection: + closed = False + + def execute(self, sql: str) -> _FakeConnection: + seen["sql"] = sql + return self + + def fetchall(self) -> list[tuple[str]]: + return [] + + def close(self) -> None: + self.closed = True + + fake = _FakeConnection() + + def _open_readonly(database: Path) -> _FakeConnection: + seen["database"] = database + return fake + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.open_intent_registry_db_readonly", + _open_readonly, + ) + + assert_gate_denied(tmp_path, reason="no_active_intent") + assert fake.closed is True + assert seen["database"] == db_path + assert "SELECT payload_json" in str(seen["sql"]) + + +def test_list_unclosed_workspace_intents_returns_active_and_queued( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + active = _record(intent_id="intent-active-001", status="active") + queued = _record(intent_id="intent-queued-001", status="queued") + _write_record(tmp_path, active) + _write_record(tmp_path, queued) + + unclosed = list_unclosed_workspace_intents(tmp_path) + + assert unclosed == ( + UnclosedWorkspaceIntent("intent-active-001", "active"), + UnclosedWorkspaceIntent("intent-queued-001", "queued"), + ) + + +def test_list_unclosed_workspace_intents_ignores_terminal_records( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + _write_record(tmp_path, _record(intent_id="intent-active-001", status="active")) + _write_record(tmp_path, _record(intent_id="intent-clean-001", status="clean")) + _write_record(tmp_path, _record(intent_id="intent-expired-001", status="expired")) + _write_record(tmp_path, _record(intent_id="intent-orphan-001", status="orphaned")) + + unclosed = list_unclosed_workspace_intents(tmp_path) + + assert unclosed == (UnclosedWorkspaceIntent("intent-active-001", "active"),) + + +def test_hook_cleanup_excludes_foreign_active_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + _write_record(tmp_path, codex_foreign_record()) + + unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + assert unclosed == () + + +def test_hook_cleanup_includes_own_active_intent( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + own_pid = os.getpid() + own = cursor_vscode_record( + intent_id="intent-own-001", + pid=own_pid, + start_epoch=100, + ) + _write_record(tmp_path, own) + + unclosed = list_unclosed_workspace_intents_for_hook_cleanup( + tmp_path, + own_pid=own_pid, + own_start_epoch=100, + ) + + assert unclosed == (UnclosedWorkspaceIntent("intent-own-001", "active"),) + + +def test_hook_cleanup_includes_recoverable_cursor_intent_only( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: False) + cursor_recoverable = cursor_vscode_record( + intent_id="intent-cursor-dead-001", + pid=os.getpid() + 9000, + ) + codex_recoverable = codex_foreign_record( + intent_id="intent-codex-dead-001", + pid=os.getpid() + 9001, + ) + _write_record(tmp_path, cursor_recoverable) + _write_record(tmp_path, codex_recoverable) + + unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + assert unclosed == (UnclosedWorkspaceIntent("intent-cursor-dead-001", "active"),) + + +def test_hook_cleanup_resolves_owner_identity_from_environment( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + import os + from dataclasses import replace + + from codeclone.workspace_intent.gate import ( + list_unclosed_workspace_intents_for_hook_cleanup, + ) + from tests.test_workspace_intents import _record + from tests.workspace_intent_gate_helpers import write_workspace_record + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive", + lambda _pid: True, + ) + own_pid = os.getpid() + own = replace( + _record(intent_id="intent-own-env-001", status="active"), + agent_pid=own_pid, + agent_start_epoch=42, + ) + write_workspace_record(tmp_path, own) + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_PID", str(own_pid)) + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_START_EPOCH", "42") + + unclosed = list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + assert len(unclosed) == 1 + assert unclosed[0].intent_id == "intent-own-env-001" + + +def test_hook_cleanup_record_filter_handles_recoverable_agents() -> None: + import os + from dataclasses import replace + + from codeclone.surfaces.mcp._workspace_intent_lifecycle import utc_now + from codeclone.workspace_intent.gate import _include_record_in_hook_cleanup + from tests.test_workspace_intents import _record + + recoverable = replace( + _record(intent_id="intent-rec-001", status="active"), + agent_pid=os.getpid() + 5000, + agent_label="cursor-vscode/dead", + ) + now = utc_now() + assert ( + _include_record_in_hook_cleanup( + recoverable, + own_pid=os.getpid(), + own_start_epoch=1, + recoverable_agent_label_prefix=None, + include_foreign=False, + now=now, + ) + is False + ) + assert ( + _include_record_in_hook_cleanup( + recoverable, + own_pid=os.getpid(), + own_start_epoch=1, + recoverable_agent_label_prefix="cursor-vscode/", + include_foreign=False, + now=now, + ) + is True + ) + + +def test_hook_authorizes_foreign_active_environment_values( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.workspace_intent.gate import ( + HOOK_AUTHORIZE_FOREIGN_ENV, + _hook_authorizes_foreign_active, + ) + + monkeypatch.delenv(HOOK_AUTHORIZE_FOREIGN_ENV, raising=False) + assert _hook_authorizes_foreign_active() is True + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "maybe") + assert _hook_authorizes_foreign_active() is False + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "off") + assert _hook_authorizes_foreign_active() is False + monkeypatch.setenv(HOOK_AUTHORIZE_FOREIGN_ENV, "yes") + assert _hook_authorizes_foreign_active() is True + + +def test_workspace_ownership_honors_foreign_active_authorization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp import _workspace_intents as workspace_intents + from codeclone.workspace_intent import gate as gate_mod + + monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: True) + assert ( + gate_mod._ownership_authorizes_hook( + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + liveness=workspace_intents.PidLiveness.ALIVE, + ) + is True + ) + monkeypatch.setattr(gate_mod, "_hook_authorizes_foreign_active", lambda: False) + assert ( + gate_mod._ownership_authorizes_hook( + workspace_intents.IntentOwnership.FOREIGN_ACTIVE, + liveness=workspace_intents.PidLiveness.ALIVE, + ) + is False + ) + + +def test_agent_pid_liveness_honors_boolean_probe( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp import _workspace_intent_pid as pid_mod + from codeclone.surfaces.mcp._workspace_intent_lifecycle import PidLiveness + + monkeypatch.setattr(pid_mod, "is_agent_pid_alive", lambda _pid: False) + assert pid_mod.agent_pid_liveness(123) is PidLiveness.DEAD diff --git a/tests/test_workspace_intent_gate_errors.py b/tests/test_workspace_intent_gate_errors.py new file mode 100644 index 00000000..5d53d2b4 --- /dev/null +++ b/tests/test_workspace_intent_gate_errors.py @@ -0,0 +1,168 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path +from typing import cast + +import pytest + +from codeclone.config.intent_registry import IntentRegistryConfig +from codeclone.workspace_intent import gate as gate_mod +from tests.test_workspace_intents import _record +from tests.workspace_intent_gate_helpers import assert_gate_denied + + +def test_gate_registry_config_error_is_fail_closed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + def _boom(_root: Path) -> object: + raise ValueError("bad registry config") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + _boom, + ) + decision = assert_gate_denied(tmp_path, reason="registry_error") + assert decision.registry_backend is None + + +def test_gate_sqlite_load_error_is_fail_closed( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _Config: + backend = "sqlite" + storage_path = Path(".codeclone/db/intents.sqlite3") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + lambda _root: _Config(), + ) + + def _load_fail(*_args: object, **_kwargs: object) -> object: + raise OSError("cannot read sqlite") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate._load_registry_records_read_only", + _load_fail, + ) + decision = assert_gate_denied(tmp_path, reason="registry_error") + assert decision.registry_backend == "sqlite" + + +def test_gate_unsupported_backend_and_payload_type_edges() -> None: + class _Config: + backend = "unknown" + storage_path = Path("x") + + with pytest.raises(ValueError, match="Unsupported intent registry backend"): + gate_mod._load_registry_records_read_only( + Path("."), + cast(IntentRegistryConfig, _Config()), + ) + + assert gate_mod._record_from_payload(123) is None + assert gate_mod._record_from_payload('{"version": 99}') is None + + +def test_gate_decision_ignores_terminal_and_non_active_records() -> None: + records = [ + _record(status="accepted"), + _record(status="blocked"), + ] + decision = gate_mod._decision_from_records( + records, + registry_backend="file", + registry_path=".codeclone/intents", + ) + assert decision.allowed is False + assert decision.reason == "no_active_intent" + + +def test_gate_decision_queued_reports_ignored_history() -> None: + records = [ + _record(status="accepted"), + _record(status="queued"), + ] + decision = gate_mod._decision_from_records( + records, + registry_backend="file", + registry_path=".codeclone/intents", + ) + assert decision.allowed is False + assert decision.reason == "queued_intent_not_editable" + assert decision.details.get("ignored_records") == 1 + + +def test_gate_file_registry_skips_invalid_payload_files(tmp_path: Path) -> None: + intents_dir = tmp_path / ".codeclone" / "intents" + intents_dir.mkdir(parents=True) + (intents_dir / "bad.json").write_text("not-json", encoding="utf-8") + records = gate_mod._load_file_records(tmp_path) + assert records == () + + +def test_gate_load_registry_records_file_backend(tmp_path: Path) -> None: + from codeclone.config.intent_registry import IntentRegistryConfig + + config = IntentRegistryConfig( + backend="file", + storage_path=tmp_path / ".codeclone/intents", + ) + records = gate_mod._load_registry_records_read_only(tmp_path, config) + assert records == () + + +def test_hook_cleanup_reports_registry_configuration_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.workspace_intent.gate import ( + WorkspaceIntentRegistryUnavailable, + list_unclosed_workspace_intents_for_hook_cleanup, + ) + + def _boom(_root: Path) -> object: + raise ValueError("broken registry") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + _boom, + ) + with pytest.raises(WorkspaceIntentRegistryUnavailable, match="broken registry"): + list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) + + +def test_hook_cleanup_reports_sqlite_load_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.workspace_intent.gate import ( + WorkspaceIntentRegistryUnavailable, + list_unclosed_workspace_intents_for_hook_cleanup, + ) + + class _Config: + backend = "sqlite" + storage_path = Path(".codeclone/db/intents.sqlite3") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate.resolve_intent_registry_config", + lambda _root: _Config(), + ) + + def _load_fail(*_args: object, **_kwargs: object) -> object: + raise OSError("cannot read sqlite") + + monkeypatch.setattr( + "codeclone.workspace_intent.gate._load_registry_records_read_only", + _load_fail, + ) + with pytest.raises(WorkspaceIntentRegistryUnavailable, match="cannot read sqlite"): + list_unclosed_workspace_intents_for_hook_cleanup(tmp_path) diff --git a/tests/test_workspace_intent_models.py b/tests/test_workspace_intent_models.py new file mode 100644 index 00000000..6d7943c0 --- /dev/null +++ b/tests/test_workspace_intent_models.py @@ -0,0 +1,297 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from dataclasses import replace + +import pytest +from pydantic import ValidationError + +from codeclone.surfaces.mcp import _workspace_intent_models as workspace_intent_models +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intent_models import ( + IntentIntegrityModel, + IntentScopeModel, + WorkspaceIntentRowModel, + document_to_record_fields, + parse_workspace_document, + parse_workspace_document_json, + record_from_document, + signed_payload_dict_from_record, + signed_payload_json_from_record, +) + + +def test_intent_scope_model_normalizes_paths() -> None: + scope = IntentScopeModel.model_validate( + { + "allowed_files": ["pkg/a.py", "pkg/a.py", "./pkg/b.py"], + "allowed_related": [], + "forbidden": [".codeclone/**"], + } + ) + assert scope.allowed_files == ["./pkg/b.py", "pkg/a.py"] + + +def test_intent_scope_model_rejects_traversal() -> None: + with pytest.raises(ValidationError): + IntentScopeModel.model_validate( + {"allowed_files": ["../outside.py"], "allowed_related": [], "forbidden": []} + ) + + +def test_intent_scope_model_rejects_empty_allowed_files() -> None: + with pytest.raises(ValidationError, match="allowed_files must not be empty"): + IntentScopeModel.model_validate( + {"allowed_files": ["", " "], "allowed_related": [], "forbidden": []} + ) + + +def test_intent_scope_model_skips_blank_entries() -> None: + scope = IntentScopeModel.model_validate( + { + "allowed_files": ["pkg/a.py", "", " "], + "allowed_related": [], + "forbidden": [], + } + ) + assert scope.allowed_files == ["pkg/a.py"] + + +def test_intent_integrity_model_rejects_invalid_digest() -> None: + with pytest.raises(ValidationError, match="64-char hex digest"): + IntentIntegrityModel.model_validate({"payload_sha256": "not-a-digest"}) + + +def test_parse_workspace_document_json_rejects_invalid_payload() -> None: + assert parse_workspace_document_json("{not-json") is None + assert parse_workspace_document_json('{"registry_version":"2"}') is None + + +def test_workspace_intent_document_rejects_tampered_integrity() -> None: + record = workspace_intents.WorkspaceIntentRecord( + intent_id="intent-abcdef12-001", + agent_pid=1000, + agent_start_epoch=100, + agent_label="agent", + run_id="run1234567890", + declared_at_utc="2026-05-29T20:00:00Z", + expires_at_utc="2026-05-29T21:00:00Z", + ttl_seconds=3600, + status="active", + intent="edit pkg", + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [".codeclone/**"], + }, + scope_digest=workspace_intents.compute_scope_digest( + { + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [".codeclone/**"], + } + ), + blast_radius_summary={"radius_level": "medium"}, + lease_renewed_at_utc="2026-05-29T20:00:00Z", + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + report_digest="digest-a", + ) + payload = signed_payload_dict_from_record(record) + payload["intent"] = "tampered" + assert parse_workspace_document(payload) is None + + +def test_workspace_intent_document_rejects_naive_timestamp() -> None: + from tests.test_workspace_intents import _record + + record = _record() + payload = signed_payload_dict_from_record(record) + payload["declared_at_utc"] = "2026-05-29T20:00:00" + assert parse_workspace_document(payload) is None + + +def test_workspace_intent_document_rejects_invalid_dirty_snapshot() -> None: + from tests.test_workspace_intents import _record + + record = replace( + _record(), + dirty_snapshot={ + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": { + "../outside.py": { + "status_xy": " M", + "digest": "a" * 64, + "digest_status": "ok", + } + }, + }, + ) + assert parse_workspace_document(signed_payload_dict_from_record(record)) is None + + +@pytest.mark.parametrize( + "dirty_snapshot", + ( + ( + { + "git_available": "yes", + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": {}, + } + ), + ( + { + "git_available": True, + "captured_at_utc": "not-utc", + "entries": {}, + } + ), + ( + { + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": {"pkg/a.py": {"status_xy": "M", "digest_status": "ok"}}, + } + ), + ( + { + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": {"pkg/a.py": {"status_xy": " M", "digest_status": "ok"}}, + } + ), + ( + { + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": { + "pkg/a.py": { + "status_xy": " M", + "digest": "a" * 64, + "digest_status": "bad", + } + }, + } + ), + ), +) +def test_workspace_intent_document_dirty_snapshot_validation_messages( + dirty_snapshot: dict[str, object], +) -> None: + from tests.test_workspace_intents import _record + + record = replace(_record(), dirty_snapshot=dirty_snapshot) + assert parse_workspace_document(signed_payload_dict_from_record(record)) is None + + +def test_signed_payload_json_roundtrip_via_pydantic() -> None: + from tests.test_workspace_intents import _record + + record = replace( + _record(), + dirty_snapshot={ + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": { + "pkg/a.py": { + "status_xy": " M", + "digest": "a" * 64, + "digest_status": "ok", + } + }, + }, + ) + payload_json = signed_payload_json_from_record(record) + document = parse_workspace_document(json.loads(payload_json)) + assert document is not None + roundtrip = record_from_document(document) + assert roundtrip == record + + +def test_workspace_intent_row_model_validates_payload_json() -> None: + from tests.test_workspace_intents import _record + + record = _record() + row = WorkspaceIntentRowModel.from_record_fields( + agent_pid=record.agent_pid, + agent_start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + declared_at_utc=record.declared_at_utc, + payload_json=signed_payload_json_from_record(record), + updated_at_utc=record.declared_at_utc, + ) + assert row.intent_id == record.intent_id + + with pytest.raises(ValidationError): + WorkspaceIntentRowModel.from_record_fields( + agent_pid=record.agent_pid, + agent_start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + declared_at_utc=record.declared_at_utc, + payload_json="{not-json", + updated_at_utc=record.declared_at_utc, + ) + + +def test_workspace_intent_row_model_rejects_unsafe_intent_id() -> None: + from tests.test_workspace_intents import _record + + record = _record() + with pytest.raises(ValidationError): + WorkspaceIntentRowModel.from_record_fields( + agent_pid=record.agent_pid, + agent_start_epoch=record.agent_start_epoch, + intent_id="../evil", + declared_at_utc=record.declared_at_utc, + payload_json=signed_payload_json_from_record(record), + updated_at_utc=record.declared_at_utc, + ) + + +def test_workspace_intent_document_to_record_fields_includes_lease_values() -> None: + from tests.test_workspace_intents import _record + + payload = signed_payload_dict_from_record(_record()) + document = parse_workspace_document(payload) + assert document is not None + fields = document_to_record_fields(document) + assert fields["lease_renewed_at_utc"] == document.lease_renewed_at_utc + assert fields["lease_seconds"] == document.lease_seconds + assert fields["report_digest"] == document.report_digest + + +def test_validate_dirty_snapshot_payload_private_edges() -> None: + validate = workspace_intent_models._validate_dirty_snapshot_payload + + assert validate(None) is None + with pytest.raises(ValueError, match=r"dirty_snapshot\.entries must be an object"): + validate( + { + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": [], + } + ) + with pytest.raises(ValueError, match="entry path must be a non-empty string"): + validate( + { + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": {"": {"status_xy": " M", "digest_status": "unavailable"}}, + } + ) + with pytest.raises(ValueError, match="entry must be an object"): + validate( + { + "git_available": True, + "captured_at_utc": "2026-05-29T20:00:00Z", + "entries": {"pkg/a.py": "bad"}, + } + ) diff --git a/tests/test_workspace_intent_schema.py b/tests/test_workspace_intent_schema.py new file mode 100644 index 00000000..07380f50 --- /dev/null +++ b/tests/test_workspace_intent_schema.py @@ -0,0 +1,121 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from codeclone.surfaces.mcp._workspace_intent_schema import ( + INTENT_REGISTRY_SCHEMA_VERSION, + IntentRegistrySchemaError, + create_schema_v1, + ensure_schema, + get_meta, + open_intent_registry_db, +) + +_V1_CREATE_INTENTS_SQL = """ +CREATE TABLE workspace_intents ( + agent_pid INTEGER NOT NULL, + agent_start_epoch INTEGER NOT NULL, + intent_id TEXT NOT NULL, + declared_at_utc TEXT NOT NULL, + payload_json TEXT NOT NULL, + PRIMARY KEY (agent_pid, agent_start_epoch, intent_id) +) +""" + +_V1_CREATE_META_SQL = """ +CREATE TABLE intent_registry_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) +""" + + +def _connect_v1_db(db_path: Path) -> sqlite3.Connection: + conn = sqlite3.connect(db_path) + conn.execute(_V1_CREATE_META_SQL) + conn.execute(_V1_CREATE_INTENTS_SQL) + conn.execute( + "INSERT INTO intent_registry_meta(key, value) VALUES ('schema_version', '1')" + ) + conn.commit() + return conn + + +def test_open_intent_registry_db_creates_v2_schema(tmp_path: Path) -> None: + db_path = tmp_path / "intents.sqlite3" + conn = open_intent_registry_db(db_path) + try: + assert get_meta(conn, "schema_version") == INTENT_REGISTRY_SCHEMA_VERSION + columns = { + row[1] for row in conn.execute("PRAGMA table_info(workspace_intents)") + } + assert {"closed_at_utc", "updated_at_utc"}.issubset(columns) + finally: + conn.close() + + +def test_create_schema_v1_seeds_meta(tmp_path: Path) -> None: + conn = sqlite3.connect(tmp_path / "fresh.sqlite3") + try: + create_schema_v1(conn) + assert get_meta(conn, "schema_version") == INTENT_REGISTRY_SCHEMA_VERSION + assert get_meta(conn, "generator") == "codeclone" + finally: + conn.close() + + +def test_ensure_schema_migrates_v1_database(tmp_path: Path) -> None: + db_path = tmp_path / "legacy.sqlite3" + conn = _connect_v1_db(db_path) + try: + ensure_schema(conn) + assert get_meta(conn, "schema_version") == INTENT_REGISTRY_SCHEMA_VERSION + columns = { + row[1] for row in conn.execute("PRAGMA table_info(workspace_intents)") + } + assert {"closed_at_utc", "updated_at_utc"}.issubset(columns) + finally: + conn.close() + + +def test_ensure_schema_migrates_partial_v1_columns(tmp_path: Path) -> None: + db_path = tmp_path / "partial.sqlite3" + conn = _connect_v1_db(db_path) + try: + conn.execute("ALTER TABLE workspace_intents ADD COLUMN closed_at_utc TEXT") + conn.commit() + ensure_schema(conn) + columns = { + row[1] for row in conn.execute("PRAGMA table_info(workspace_intents)") + } + assert "updated_at_utc" in columns + assert get_meta(conn, "schema_version") == INTENT_REGISTRY_SCHEMA_VERSION + finally: + conn.close() + + +def test_ensure_schema_rejects_unknown_version(tmp_path: Path) -> None: + db_path = tmp_path / "unknown.sqlite3" + conn = sqlite3.connect(db_path) + try: + conn.execute(_V1_CREATE_META_SQL) + conn.execute( + "INSERT INTO intent_registry_meta(key, value) " + "VALUES ('schema_version', '999')" + ) + conn.commit() + with pytest.raises( + IntentRegistrySchemaError, match="Unsupported intent registry" + ): + ensure_schema(conn) + finally: + conn.close() diff --git a/tests/test_workspace_intent_sqlite_store.py b/tests/test_workspace_intent_sqlite_store.py new file mode 100644 index 00000000..6df2e8a9 --- /dev/null +++ b/tests/test_workspace_intent_sqlite_store.py @@ -0,0 +1,541 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from collections.abc import Iterator +from contextlib import contextmanager +from dataclasses import replace +from datetime import timedelta +from pathlib import Path + +import pytest + +from codeclone.config.intent_registry import ( + DEFAULT_INTENT_REGISTRY_BACKEND, + DEFAULT_INTENT_REGISTRY_DB_PATH, + DEFAULT_INTENT_REGISTRY_RETENTION_DAYS, + IntentRegistryConfig, + IntentRegistryConfigError, + intent_registry_summary, + resolve_intent_registry_backend, + resolve_intent_registry_config, + resolve_intent_registry_db_path, + resolve_intent_registry_retention_days, +) +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intent_lifecycle import gc_status_for_reason +from codeclone.surfaces.mcp._workspace_intent_models import ( + signed_payload_json_from_record, +) +from codeclone.surfaces.mcp._workspace_intent_paths import intent_path +from codeclone.surfaces.mcp._workspace_intent_schema import ( + INTENT_REGISTRY_SCHEMA_VERSION, + get_meta, + open_intent_registry_db, +) +from codeclone.surfaces.mcp._workspace_intent_staleness import is_stale, stale_reason +from codeclone.surfaces.mcp._workspace_intent_store import ( + FileWorkspaceIntentStore, + SqliteWorkspaceIntentStore, + _record_from_json, + clear_workspace_intent_store_cache, + get_workspace_intent_store, + lazy_close_eligible_records, +) +from tests.test_workspace_intents import _record + + +@contextmanager +def _open_sqlite_store( + sqlite_root: Path, + *, + retention_days: int = 7, +) -> Iterator[SqliteWorkspaceIntentStore]: + db_path = sqlite_root / DEFAULT_INTENT_REGISTRY_DB_PATH + store = SqliteWorkspaceIntentStore(db_path=db_path, retention_days=retention_days) + try: + yield store + finally: + store.close() + + +@pytest.fixture +def sqlite_root(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + monkeypatch.setenv("CODECLONE_INTENT_REGISTRY_BACKEND", "sqlite") + monkeypatch.setenv( + "CODECLONE_INTENT_REGISTRY_PATH", + DEFAULT_INTENT_REGISTRY_DB_PATH, + ) + clear_workspace_intent_store_cache() + return tmp_path + + +def test_resolve_intent_registry_backend_defaults() -> None: + assert resolve_intent_registry_backend(None) == DEFAULT_INTENT_REGISTRY_BACKEND + assert resolve_intent_registry_backend("sqlite") == "sqlite" + with pytest.raises(IntentRegistryConfigError): + resolve_intent_registry_backend("postgres") + + +def test_resolve_intent_registry_retention_days_defaults_and_bounds() -> None: + assert ( + resolve_intent_registry_retention_days(None) + == DEFAULT_INTENT_REGISTRY_RETENTION_DAYS + ) + assert resolve_intent_registry_retention_days(7) == 7 + assert resolve_intent_registry_retention_days(14) == 14 + with pytest.raises(IntentRegistryConfigError): + resolve_intent_registry_retention_days(0) + # No edition ceiling: any value at or above the minimum is accepted. + assert resolve_intent_registry_retention_days(30) == 30 + assert resolve_intent_registry_retention_days(365) == 365 + + +def test_resolve_intent_registry_db_path_rejects_unsafe_values( + tmp_path: Path, +) -> None: + with pytest.raises(IntentRegistryConfigError): + resolve_intent_registry_db_path( + root_path=tmp_path, + value="/tmp/intents.sqlite3", + ) + with pytest.raises(IntentRegistryConfigError): + resolve_intent_registry_db_path( + root_path=tmp_path, + value="../outside.sqlite3", + ) + with pytest.raises(IntentRegistryConfigError, match="must be a string"): + resolve_intent_registry_db_path(root_path=tmp_path, value=123) + with pytest.raises(IntentRegistryConfigError, match="must not be empty"): + resolve_intent_registry_db_path(root_path=tmp_path, value=" ") + with pytest.raises(IntentRegistryConfigError, match="must end with"): + resolve_intent_registry_db_path( + root_path=tmp_path, + value=".cache/intents.txt", + ) + + +@pytest.mark.parametrize("value", [True, "7"]) +def test_resolve_intent_registry_retention_days_rejects_non_int( + value: object, +) -> None: + with pytest.raises(IntentRegistryConfigError, match="must be an integer"): + resolve_intent_registry_retention_days(value) + + +def test_resolve_intent_registry_backend_rejects_non_string() -> None: + with pytest.raises(IntentRegistryConfigError, match="must be a string"): + resolve_intent_registry_backend(123) + + +def test_resolve_intent_registry_config_ignores_invalid_pyproject( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_BACKEND", raising=False) + (tmp_path / "pyproject.toml").write_bytes(b"not-valid-toml{") + config = resolve_intent_registry_config(tmp_path) + assert config.backend == DEFAULT_INTENT_REGISTRY_BACKEND + + +def test_intent_registry_summary_falls_back_to_absolute_storage( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + outside = Path("/tmp/codeclone-intent-registry-outside.sqlite3") + + def _outside_config(_root: Path) -> IntentRegistryConfig: + return IntentRegistryConfig( + backend="sqlite", + storage_path=outside, + retention_days=DEFAULT_INTENT_REGISTRY_RETENTION_DAYS, + ) + + monkeypatch.setattr( + "codeclone.config.intent_registry.resolve_intent_registry_config", + _outside_config, + ) + summary = intent_registry_summary(tmp_path) + assert summary["registry_backend"] == "sqlite" + assert summary["registry_storage"] == str(outside) + + +def test_sqlite_store_write_list_find_update_close(sqlite_root: Path) -> None: + record = _record() + store = get_workspace_intent_store(sqlite_root) + assert store.backend == "sqlite" + assert store.write(record) + listed = store.list_records() + assert listed == (record,) + assert store.find(record.intent_id) == record + + assert store.remove( + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + assert store.find(record.intent_id) is None + assert isinstance(store, SqliteWorkspaceIntentStore) + archived = store._fetch_record_unlocked( + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + assert archived is not None + assert archived.status == "clean" + + closed_via_status = _record(intent_id="intent-close-002", start_epoch=101) + assert store.write(closed_via_status) + assert workspace_intents.update_workspace_intent_status( + root=sqlite_root, + pid=closed_via_status.agent_pid, + start_epoch=closed_via_status.agent_start_epoch, + intent_id=closed_via_status.intent_id, + new_status="clean", + ) + assert store.find(closed_via_status.intent_id) is None + + +def test_sqlite_store_list_records_for_hygiene_and_find_raw( + sqlite_root: Path, +) -> None: + record = _record(intent_id="intent-hygiene-001") + store = get_workspace_intent_store(sqlite_root) + assert store.write(record) + hygiene_records = store.list_records_for_hygiene() + assert record in hygiene_records + assert store.find_raw(record.intent_id) == record + raw_records = store.list_records_raw() + assert record in raw_records + + +def test_sqlite_store_active_rewrite_clears_closed_at(sqlite_root: Path) -> None: + with _open_sqlite_store(sqlite_root) as store: + record = _record(intent_id="intent-reopen-001") + assert store.write(replace(record, status="clean")) + closed_row = store._conn.execute( + "SELECT closed_at_utc FROM workspace_intents WHERE intent_id = ?", + (record.intent_id,), + ).fetchone() + assert closed_row is not None + assert closed_row[0] is not None + + assert store.write(record) + reopened_row = store._conn.execute( + "SELECT closed_at_utc FROM workspace_intents WHERE intent_id = ?", + (record.intent_id,), + ).fetchone() + assert reopened_row is not None + assert reopened_row[0] is None + assert store.find(record.intent_id) == record + + +def test_sqlite_store_remove_rejects_invalid_intent_id(sqlite_root: Path) -> None: + store = get_workspace_intent_store(sqlite_root) + assert store.remove(pid=1, start_epoch=1, intent_id="not-an-intent") is False + + +def test_sqlite_store_lazy_close_eligible_records(sqlite_root: Path) -> None: + stale = replace( + _record(intent_id="intent-lazy-close-001"), + expires_at_utc=workspace_intents.format_utc( + workspace_intents.utc_now() - timedelta(hours=1) + ), + ) + store = get_workspace_intent_store(sqlite_root) + assert store.write(stale) + result = lazy_close_eligible_records(store) + assert stale.intent_id in result.closed_ids + + +def test_sqlite_store_gc_closes_corrupted_and_stale(sqlite_root: Path) -> None: + with _open_sqlite_store(sqlite_root, retention_days=14) as store: + record = _record(intent_id="intent-stale-001") + stale = replace( + _record( + intent_id="intent-expired-002", + pid=record.agent_pid + 1, + start_epoch=101, + ), + expires_at_utc=workspace_intents.format_utc( + workspace_intents.utc_now() - timedelta(hours=1) + ), + ) + assert store.write(record) + assert store.write(stale) + with store._lock: + store._conn.execute( + """ + INSERT INTO workspace_intents( + agent_pid, + agent_start_epoch, + intent_id, + declared_at_utc, + payload_json, + closed_at_utc, + updated_at_utc + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + 999, + 999, + "intent-bad-003", + "2026-01-01T00:00:00Z", + "{not-json", + None, + "2026-01-01T00:00:00Z", + ), + ) + store._conn.commit() + + result = store.gc() + removed = result["removed"] + corrupted_removed = result["corrupted_removed"] + assert isinstance(removed, int) and removed >= 1 + assert isinstance(corrupted_removed, int) and corrupted_removed >= 1 + assert store.find("intent-stale-001") is not None + assert store.find("intent-expired-002") is None + archived = store._fetch_record_unlocked( + pid=stale.agent_pid, + start_epoch=stale.agent_start_epoch, + intent_id=stale.intent_id, + ) + assert archived is not None + assert archived.status == "expired" + + +def test_sqlite_store_retention_purges_closed_rows(sqlite_root: Path) -> None: + with _open_sqlite_store(sqlite_root) as store: + closed = replace(_record(intent_id="intent-old-001"), status="clean") + assert store.write(closed) + old_closed_at = workspace_intents.format_utc( + workspace_intents.utc_now() - timedelta(days=30) + ) + with store._lock: + store._conn.execute( + """ + UPDATE workspace_intents + SET closed_at_utc = ? + WHERE intent_id = ? + """, + (old_closed_at, closed.intent_id), + ) + store._conn.commit() + + result = store.gc() + assert result["retention_purged"] == 1 + assert ( + store._fetch_record_unlocked( + pid=closed.agent_pid, + start_epoch=closed.agent_start_epoch, + intent_id=closed.intent_id, + ) + is None + ) + + +def test_sqlite_store_exposes_storage_path_and_closes(sqlite_root: Path) -> None: + db_path = sqlite_root / DEFAULT_INTENT_REGISTRY_DB_PATH + with _open_sqlite_store(sqlite_root) as store: + assert store.storage_path == db_path + + +def test_sqlite_store_write_returns_false_for_invalid_record( + sqlite_root: Path, +) -> None: + with _open_sqlite_store(sqlite_root) as store: + assert store.write(object()) is False # type: ignore[arg-type] + + +def test_sqlite_store_gc_returns_empty_result_on_query_failure( + sqlite_root: Path, +) -> None: + with _open_sqlite_store(sqlite_root) as store: + store.close() + result = store.gc() + assert result["removed"] == 0 + assert result["remaining"] == 0 + + +def test_record_from_json_accepts_dict_payload() -> None: + record = _record() + payload = json.loads(signed_payload_json_from_record(record)) + parsed = _record_from_json(payload) + assert parsed == record + + +def test_record_from_json_rejects_non_mapping_payload() -> None: + assert _record_from_json(123) is None + + +def test_file_store_backend_metadata( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_BACKEND", raising=False) + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_PATH", raising=False) + clear_workspace_intent_store_cache() + store = get_workspace_intent_store(tmp_path) + assert isinstance(store, FileWorkspaceIntentStore) + assert store.backend == "file" + assert store.storage_path == tmp_path / ".codeclone" / "intents" + + +def test_file_store_write_returns_false_on_os_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_BACKEND", raising=False) + clear_workspace_intent_store_cache() + store = get_workspace_intent_store(tmp_path) + record = _record() + + def _fail(**_kwargs: object) -> None: + raise OSError("read-only registry") + + import codeclone.surfaces.mcp._workspace_intent_store as intent_store_mod + + monkeypatch.setattr(intent_store_mod, "write_json_document_atomically", _fail) + assert store.write(record) is False + + +def test_file_store_gc_removes_corrupted_payload( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_BACKEND", raising=False) + clear_workspace_intent_store_cache() + store = get_workspace_intent_store(tmp_path) + record = _record(intent_id="intent-corrupt-001") + bad_path = intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + bad_path.parent.mkdir(parents=True, exist_ok=True) + bad_path.write_text("{not-json", encoding="utf-8") + result = store.gc() + assert result["corrupted_removed"] == 1 + + +def test_resolve_intent_registry_config_honors_env_overrides( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_RETENTION_DAYS", raising=False) + monkeypatch.setenv("CODECLONE_INTENT_REGISTRY_BACKEND", "sqlite") + monkeypatch.setenv( + "CODECLONE_INTENT_REGISTRY_PATH", + DEFAULT_INTENT_REGISTRY_DB_PATH, + ) + config = resolve_intent_registry_config(tmp_path) + assert config.backend == "sqlite" + assert config.storage_path == tmp_path / DEFAULT_INTENT_REGISTRY_DB_PATH + + +def test_intent_registry_summary_file_backend(tmp_path: Path) -> None: + summary = intent_registry_summary(tmp_path) + assert summary["registry_backend"] == "file" + assert summary["registry_storage"] == ".codeclone/intents" + assert summary["registry_retention_days"] == str( + DEFAULT_INTENT_REGISTRY_RETENTION_DAYS + ) + + +def test_intent_registry_summary_sqlite_backend(sqlite_root: Path) -> None: + summary = intent_registry_summary(sqlite_root) + assert summary["registry_backend"] == "sqlite" + assert summary["registry_storage"] == DEFAULT_INTENT_REGISTRY_DB_PATH + assert summary["registry_retention_days"] == str( + DEFAULT_INTENT_REGISTRY_RETENTION_DAYS + ) + + +def test_pyproject_sqlite_backend( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_BACKEND", raising=False) + monkeypatch.delenv("CODECLONE_INTENT_REGISTRY_PATH", raising=False) + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + "[tool.codeclone]\n" + 'intent_registry_backend = "sqlite"\n' + f'intent_registry_path = "{DEFAULT_INTENT_REGISTRY_DB_PATH}"\n' + "intent_registry_retention_days = 14\n", + encoding="utf-8", + ) + config = resolve_intent_registry_config(tmp_path) + assert config.backend == "sqlite" + assert config.storage_path == tmp_path / DEFAULT_INTENT_REGISTRY_DB_PATH + assert config.retention_days == 14 + + +def test_sqlite_schema_v2_has_audit_columns(tmp_path: Path) -> None: + db_path = tmp_path / DEFAULT_INTENT_REGISTRY_DB_PATH + conn = open_intent_registry_db(db_path) + try: + assert get_meta(conn, "schema_version") == INTENT_REGISTRY_SCHEMA_VERSION + columns = { + row[1] for row in conn.execute("PRAGMA table_info(workspace_intents)") + } + assert {"closed_at_utc", "updated_at_utc"}.issubset(columns) + finally: + conn.close() + + +def test_sqlite_payload_roundtrip_preserves_integrity(sqlite_root: Path) -> None: + record = _record() + assert workspace_intents.write_workspace_intent(root=sqlite_root, record=record) + loaded = workspace_intents.list_workspace_intents(root=sqlite_root)[0] + assert workspace_intents.verify_intent_integrity( + workspace_intents.signed_payload(loaded) + ) + payload = json.loads( + json.dumps( + workspace_intents.signed_payload(loaded), + sort_keys=True, + separators=(",", ":"), + ) + ) + assert payload["integrity"][ + "payload_sha256" + ] == workspace_intents.compute_intent_digest( + {k: v for k, v in payload.items() if k != "integrity"} + ) + + +def test_gc_status_for_reason_maps_orphaned_status() -> None: + assert gc_status_for_reason("orphaned") == "orphaned" + assert gc_status_for_reason("expired") == "expired" + + +def test_stale_reason_honors_terminal_status_fields() -> None: + expired = replace(_record(), status="expired") + orphaned = replace(_record(), status="orphaned") + assert stale_reason(expired) == "expired" + assert stale_reason(orphaned) == "orphaned" + + +def test_is_stale_reports_expired_ttl() -> None: + stale = replace( + _record(intent_id="intent-stale-ttl-001"), + expires_at_utc=workspace_intents.format_utc( + workspace_intents.utc_now() - timedelta(hours=1) + ), + ) + assert is_stale(stale) is True + + +def test_sqlite_store_write_returns_false_on_closed_connection( + sqlite_root: Path, +) -> None: + store = get_workspace_intent_store(sqlite_root) + assert isinstance(store, SqliteWorkspaceIntentStore) + record = _record(intent_id="intent-write-fail-001") + store._conn.close() + assert store.write(record) is False diff --git a/tests/test_workspace_intents.py b/tests/test_workspace_intents.py new file mode 100644 index 00000000..797f6d5b --- /dev/null +++ b/tests/test_workspace_intents.py @@ -0,0 +1,1535 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy +from __future__ import annotations + +import os +from dataclasses import replace +from datetime import timedelta +from pathlib import Path + +import pytest +from pydantic import ValidationError + +from codeclone.surfaces.mcp import _workspace_intent_paths as intent_paths +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intent_models import IntentScopeModel +from codeclone.surfaces.mcp._workspace_intents import WorkspaceIntentRecord +from codeclone.utils.json_io import read_json_object, write_json_document_atomically + +_PID_ALIVE = "codeclone.surfaces.mcp._workspace_intent_pid.is_agent_pid_alive" +_PID_LIVENESS = "codeclone.surfaces.mcp._workspace_intent_pid.agent_pid_liveness" + + +def _record( + *, + intent_id: str = "intent-abcdef12-001", + pid: int | None = None, + start_epoch: int = 100, + status: str = "active", + scope: dict[str, object] | None = None, + expires_delta: timedelta = timedelta(hours=1), + lease_renewed_delta: timedelta = timedelta(), + lease_seconds: int = workspace_intents.DEFAULT_LEASE_SECONDS, + report_digest: str = "digest-a", +) -> WorkspaceIntentRecord: + declared_at = workspace_intents.utc_now() + scope_payload = scope or { + "allowed_files": ["pkg/a.py"], + "allowed_related": ["tests/test_a.py"], + "forbidden": [ + ".cache/codeclone/**", + ".codeclone/**", + "codeclone.baseline.json", + ], + } + return WorkspaceIntentRecord( + intent_id=intent_id, + agent_pid=pid or os.getpid(), + agent_start_epoch=start_epoch, + agent_label="agent-a", + run_id="abcdef1234567890", + declared_at_utc=workspace_intents.format_utc(declared_at), + expires_at_utc=workspace_intents.format_utc(declared_at + expires_delta), + ttl_seconds=3600, + status=status, + intent="edit pkg.a", + scope=scope_payload, + scope_digest=workspace_intents.compute_scope_digest(scope_payload), + blast_radius_summary={"radius_level": "medium"}, + lease_renewed_at_utc=workspace_intents.format_utc( + declared_at + lease_renewed_delta + ), + lease_seconds=lease_seconds, + report_digest=report_digest, + ) + + +def _signed_payload_with( + record: WorkspaceIntentRecord, + **updates: object, +) -> dict[str, object]: + payload = record.unsigned_payload() + payload.update(updates) + payload["integrity"] = { + "payload_sha256": workspace_intents.compute_intent_digest(payload) + } + return payload + + +def _signed_payload_without( + record: WorkspaceIntentRecord, + *keys: str, +) -> dict[str, object]: + payload = record.unsigned_payload() + for key in keys: + payload.pop(key, None) + payload["integrity"] = { + "payload_sha256": workspace_intents.compute_intent_digest(payload) + } + return payload + + +def test_workspace_intent_write_validate_update_and_remove(tmp_path: Path) -> None: + record = _record() + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + records = workspace_intents.list_workspace_intents(root=tmp_path) + assert records == (record,) + found = workspace_intents.find_workspace_intent( + root=tmp_path, + intent_id=record.intent_id, + ) + assert found == record + + assert workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="clean", + ) + from codeclone.surfaces.mcp._workspace_intents import ( + list_workspace_intent_records_raw, + ) + + updated = list_workspace_intent_records_raw(root=tmp_path)[0] + assert updated.status == "clean" + assert workspace_intents.verify_intent_integrity( + workspace_intents.signed_payload(updated) + ) + + assert workspace_intents.remove_workspace_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + + +@pytest.mark.parametrize( + "payload", + [ + None, + {1: "not-a-string-key"}, + {"integrity": {"payload_sha256": "0" * 64}}, + _signed_payload_with(_record(), registry_version="9"), + _signed_payload_without(_record(), "intent_id"), + _signed_payload_with(_record(), agent_pid=True), + _signed_payload_with(_record(), agent_start_epoch=0), + _signed_payload_with(_record(), status="finished"), + _signed_payload_with(_record(), scope_digest="not-a-digest"), + _signed_payload_with(_record(), declared_at_utc="not-a-date"), + _signed_payload_with(_record(), lease_renewed_at_utc="not-a-date"), + _signed_payload_without(_record(), "lease_renewed_at_utc"), + _signed_payload_with(_record(), lease_seconds=1), + _signed_payload_with(_record(), lease_seconds=True), + _signed_payload_without(_record(), "report_digest"), + _signed_payload_with(_record(), blast_radius_summary=[]), + _signed_payload_with(_record(), scope=[]), + _signed_payload_with(_record(), scope={"allowed_files": []}), + _signed_payload_with( + _record(), + scope={"allowed_files": ["pkg/a.py"], "allowed_related": "tests/a.py"}, + ), + _signed_payload_with( + _record(), + scope={"allowed_files": ["pkg/a.py"], "forbidden": [1]}, + ), + ], +) +def test_workspace_intent_validation_rejects_malformed_payloads( + payload: object, +) -> None: + assert workspace_intents.validate_workspace_record(payload) is None + + +def test_workspace_intent_validation_rejects_scope_digest_mismatch() -> None: + payload = _signed_payload_with(_record(), scope_digest="0" * 64) + + assert workspace_intents.validate_workspace_record(payload) is None + + +def test_workspace_intent_validation_rejects_tampered_and_invalid_paths( + tmp_path: Path, +) -> None: + record = _record() + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + path = workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + payload = read_json_object(path) + payload["intent"] = "tampered" + write_json_document_atomically(path, payload, sort_keys=True) + + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + assert not path.exists() + gc_payload = workspace_intents.gc_workspace(root=tmp_path) + assert gc_payload["corrupted_removed"] == 0 + + invalid_scope: dict[str, object] = { + "allowed_files": [str(tmp_path / "abs.py")], + "allowed_related": [], + "forbidden": [], + } + invalid = _record(scope=invalid_scope) + signed = workspace_intents.signed_payload(invalid) + assert workspace_intents.validate_workspace_record(signed) is None + + traversal_scope: dict[str, object] = { + "allowed_files": ["../outside.py"], + "allowed_related": [], + "forbidden": [], + } + traversal = _record(scope=traversal_scope) + assert ( + workspace_intents.validate_workspace_record( + workspace_intents.signed_payload(traversal) + ) + is None + ) + + +def test_workspace_intent_stale_orphan_and_gc( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + expired = _record( + intent_id="intent-expired-001", + expires_delta=timedelta(seconds=-1), + ) + orphaned = _record( + intent_id="intent-orphaned-001", + pid=999999, + start_epoch=101, + ) + active = _record(intent_id="intent-active-001", start_epoch=102) + for record in (expired, orphaned, active): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + monkeypatch.setattr( + _PID_ALIVE, + lambda pid: pid != orphaned.agent_pid, + ) + + assert workspace_intents.stale_reason(expired) == "expired" + assert workspace_intents.stale_reason(orphaned) == "orphaned" + assert workspace_intents.list_workspace_intents(root=tmp_path) == (active,) + + gc_payload = workspace_intents.gc_workspace(root=tmp_path) + assert gc_payload["removed"] == 1 + assert gc_payload["remaining"] == 1 + assert workspace_intents.list_workspace_intents(root=tmp_path) == (active,) + + +def test_workspace_intent_lease_expiry_is_recoverable_not_gc( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record( + intent_id="intent-lease-expired-001", + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + + assert workspace_intents.stale_reason(record) == "lease_expired" + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + assert workspace_intents.list_workspace_intents( + root=tmp_path, + exclude_stale=False, + ) == (record,) + + gc_payload = workspace_intents.gc_workspace(root=tmp_path) + assert gc_payload["removed"] == 0 + assert workspace_intents.list_workspace_intents( + root=tmp_path, + exclude_stale=False, + ) == (record,) + + +def test_workspace_intent_unknown_pid_liveness_is_not_orphaned( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + active = _record(intent_id="intent-unknown-active-001", pid=44444) + lease_expired = _record( + intent_id="intent-unknown-lease-expired-001", + pid=44444, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + for record in (active, lease_expired): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr( + _PID_LIVENESS, + lambda _pid: workspace_intents.PidLiveness.UNKNOWN, + ) + + assert workspace_intents.stale_reason(active) is None + assert workspace_intents.stale_reason(lease_expired) == "lease_expired" + counts = workspace_intents.workspace_status_counts(root=tmp_path) + assert counts["orphaned_count"] == 0 + + +def test_workspace_intent_io_failure_paths( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record() + + assert ( + workspace_intents.find_workspace_intent( + root=tmp_path, + intent_id=record.intent_id, + ) + is None + ) + assert ( + workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="clean", + ) + is False + ) + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + assert ( + workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid + 1, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="clean", + ) + is False + ) + + expired = _record( + intent_id="intent-expired-lease", + expires_delta=timedelta(days=-1), + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=expired) + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=expired.agent_pid, + start_epoch=expired.agent_start_epoch, + intent_id=expired.intent_id, + ) + is False + ) + + def fail_write(_self: object, _record: WorkspaceIntentRecord) -> bool: + return False + + from codeclone.surfaces.mcp._workspace_intent_store import FileWorkspaceIntentStore + + monkeypatch.setattr(FileWorkspaceIntentStore, "write_unlocked", fail_write) + assert ( + workspace_intents.write_workspace_intent(root=tmp_path, record=_record()) + is False + ) + assert ( + workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="violated", + ) + is False + ) + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + def raise_oserror(*args: object, **kwargs: object) -> None: + raise OSError("boom") + + monkeypatch.setattr(Path, "unlink", raise_oserror) + assert ( + workspace_intents.remove_workspace_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + +def test_workspace_intent_payload_and_helper_edge_cases(tmp_path: Path) -> None: + record = _record() + now = workspace_intents.utc_now() + + own_payload = workspace_intents.workspace_intent_to_payload( + record, + own_pid=record.agent_pid, + own_start_epoch=record.agent_start_epoch, + now=now, + ) + assert own_payload["ownership"] == "own_active" + assert own_payload["is_own"] is True + assert isinstance(own_payload["lease_expires_in_seconds"], int) + + invalid_lease = replace(record, lease_renewed_at_utc="not-a-date") + invalid_payload = workspace_intents.workspace_intent_to_payload( + invalid_lease, + own_pid=record.agent_pid, + own_start_epoch=record.agent_start_epoch, + now=now, + ) + assert invalid_payload["ownership"] == "own_stale" + assert "lease_expires_in_seconds" not in invalid_payload + + assert workspace_intents.resolved_ttl_seconds(True) == ( + workspace_intents.DEFAULT_TTL_SECONDS + ) + assert workspace_intents.resolved_ttl_seconds("bad") == ( + workspace_intents.DEFAULT_TTL_SECONDS + ) + assert workspace_intents.resolved_ttl_seconds("1") == ( + workspace_intents.MIN_TTL_SECONDS + ) + assert workspace_intents.resolved_ttl_seconds("999999") == ( + workspace_intents.MAX_TTL_SECONDS + ) + assert workspace_intents.resolved_lease_seconds("1") == ( + workspace_intents.MIN_LEASE_SECONDS + ) + assert workspace_intents.resolved_lease_seconds("999999") == ( + workspace_intents.MAX_LEASE_SECONDS + ) + assert workspace_intents.verify_intent_integrity({}) is False + assert ( + workspace_intents.safe_remove_own_intent( + root=Path("relative"), + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + assert workspace_intents.safe_remove_own_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + assert not workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ).exists() + + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + assert workspace_intents.remove_workspace_record(root=tmp_path, record=record) + + +def test_workspace_intent_private_edge_helpers( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + expired_status = _record( + status=workspace_intents.WorkspaceIntentStatus.EXPIRED.value + ) + orphaned_status = _record( + status=workspace_intents.WorkspaceIntentStatus.ORPHANED.value + ) + + assert workspace_intents.is_stale(expired_status) + assert workspace_intents.stale_reason(expired_status) == "expired" + assert workspace_intents.stale_reason(orphaned_status) == "orphaned" + assert workspace_intents._is_pid_alive(0) is False + assert workspace_intents._valid_path_list( + ["", "pkg/a.py"], + required=True, + ) == ["pkg/a.py"] + with pytest.raises(ValidationError): + IntentScopeModel.model_validate( + {"allowed_files": [], "allowed_related": [], "forbidden": []} + ) + with pytest.raises(ValidationError): + IntentScopeModel.model_validate( + { + "allowed_files": ["pkg/a.py"], + "allowed_related": "tests/a.py", + "forbidden": [], + } + ) + with pytest.raises(ValidationError): + IntentScopeModel.model_validate( + {"allowed_files": ["pkg/a.py"], "allowed_related": [], "forbidden": [1]} + ) + tampered = workspace_intents.signed_payload(_record()) + tampered["integrity"] = {"payload_sha256": "not-a-valid-digest"} + assert workspace_intents.verify_intent_integrity(tampered) is False + tampered["integrity"] = {"payload_sha256": "0" * 64} + assert workspace_intents.verify_intent_integrity(tampered) is False + assert workspace_intents._valid_path_list("pkg/a.py", required=True) is None + assert workspace_intents._valid_path_list([1], required=True) is None + assert workspace_intents._valid_path_list(["/abs.py"], required=True) is None + assert workspace_intents._valid_path_list(["../abs.py"], required=True) is None + assert workspace_intents._valid_path_list(["pkg/a.py/"], required=True) == [ + "pkg/a.py" + ] + assert workspace_intents._scope_all_sets({"allowed_files": "pkg/a.py"}) == ( + set(), + set(), + (), + ) + assert workspace_intents._parse_utc("2026-01-01T00:00:00") is None + assert workspace_intents._sort_agent_pid(True) == 0 + assert workspace_intents._sort_agent_pid("123") == 0 + assert workspace_intents._sort_agent_pid(123) == 123 + assert workspace_intents._overlap_type(hard=False, soft=True) == "soft" + + def raise_permission_error(pid: int, signal: int) -> None: + raise PermissionError + + def raise_oserror(pid: int, signal: int) -> None: + raise OSError + + def raise_process_lookup(pid: int, signal: int) -> None: + raise ProcessLookupError + + monkeypatch.setattr(os, "kill", raise_permission_error) + assert workspace_intents._pid_liveness(123) == workspace_intents.PidLiveness.UNKNOWN + assert workspace_intents._is_pid_alive(123) is False + monkeypatch.setattr(os, "kill", raise_oserror) + assert workspace_intents._is_pid_alive(123) is True + monkeypatch.setattr(os, "kill", raise_process_lookup) + assert workspace_intents._is_pid_alive(123) is False + + from codeclone.surfaces.mcp._workspace_intent_lifecycle import is_orphaned + + monkeypatch.setattr( + "codeclone.surfaces.mcp._workspace_intent_lifecycle.pid_liveness", + lambda _pid: workspace_intents.PidLiveness.DEAD, + ) + assert is_orphaned(_record()) is True + + from codeclone.surfaces.mcp._workspace_intent_lifecycle import ( + is_lease_expired, + lease_expiry, + ) + + broken_lease = replace(_record(), lease_renewed_at_utc="not-a-timestamp") + assert lease_expiry(broken_lease) is None + assert is_lease_expired(broken_lease) is True + + path = tmp_path / "intent.json" + path.write_text("{}", "utf-8") + + def raise_unlink_oserror(self: Path, missing_ok: bool = False) -> None: + raise OSError("unlink failed") + + monkeypatch.setattr(Path, "unlink", raise_unlink_oserror) + assert workspace_intents._unlink(path) is False + + def raise_resolve_oserror(self: Path, strict: bool = False) -> Path: + raise OSError("resolve failed") + + monkeypatch.setattr(Path, "resolve", raise_resolve_oserror) + assert ( + workspace_intents._is_safe_intent_path( + tmp_path / "intent.json", + workspace_intents.registry_dir(tmp_path), + ) + is False + ) + + +def test_workspace_intent_safe_path_edge_helpers(tmp_path: Path) -> None: + registry = workspace_intents.registry_dir(tmp_path) + registry.mkdir(parents=True) + good = registry / "123-456-intent-good.json" + good.write_text("{}", encoding="utf-8") + + assert workspace_intents._is_safe_intent_path(good, registry) + assert ( + workspace_intents._is_safe_intent_path(Path("relative.json"), registry) is False + ) + assert ( + workspace_intents._is_safe_intent_path(tmp_path / "outside.json", registry) + is False + ) + assert ( + workspace_intents._is_safe_intent_path(registry / "bad.json", registry) is False + ) + + directory_target = registry / "123-456-intent-dir.json" + directory_target.mkdir() + assert workspace_intents._is_safe_intent_path(directory_target, registry) is False + + non_normalized = registry / ".." / "123-456-intent-other.json" + assert workspace_intents._is_safe_intent_path(non_normalized, registry) is False + + +def test_workspace_intent_registry_defensive_failure_edges( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record( + intent_id="intent-expired-cleanup", expires_delta=timedelta(days=-1) + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + path = workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + + corrupted = workspace_intents.registry_dir(tmp_path) / "123-456-intent-bad.json" + corrupted.write_text("{", encoding="utf-8") + cleanup = workspace_intents.gc_workspace(root=tmp_path) + assert cleanup["corrupted_filenames"] == ["123-456-intent-bad.json"] + + monkeypatch.setattr(intent_paths, "unlink", lambda item: False) + assert workspace_intents.gc_workspace(root=tmp_path)["removed"] == 0 + + def raise_read_error(item: Path) -> dict[str, object]: + raise ValueError("bad json") + + monkeypatch.setattr(intent_paths, "read_json_object", raise_read_error) + assert workspace_intents._read_payload(path) is None + + def raise_glob_error(self: Path, pattern: str) -> tuple[Path, ...]: + raise OSError("glob failed") + + monkeypatch.setattr(Path, "glob", raise_glob_error) + assert workspace_intents.list_workspace_intents(root=tmp_path) == () + with pytest.raises(ValidationError): + IntentScopeModel.model_validate( + {"allowed_files": ["../outside.py"], "allowed_related": [], "forbidden": []} + ) + + def raise_safety_error(expected: Path, registry: Path) -> bool: + raise RuntimeError("safety check failed") + + monkeypatch.setattr(intent_paths, "is_safe_intent_path", raise_safety_error) + assert ( + workspace_intents.safe_remove_own_intent( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + + +def test_workspace_intent_foreign_stale_conflict_and_counts( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + active = _record(intent_id="intent-active-001", pid=111, start_epoch=100) + foreign_stale = _record( + intent_id="intent-stale-001", + pid=222, + start_epoch=200, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + orphaned = _record(intent_id="intent-orphaned-001", pid=333, start_epoch=300) + for record in (active, foreign_stale, orphaned): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + monkeypatch.setattr(_PID_ALIVE, lambda pid: pid != 333) + + counts = workspace_intents.workspace_status_counts(root=tmp_path) + assert counts == {"stale_count": 2, "orphaned_count": 1, "total_agents": 3} + + payload = workspace_intents.workspace_intent_to_payload( + foreign_stale, + own_pid=111, + own_start_epoch=100, + now=workspace_intents.utc_now(), + ) + assert payload["ownership"] == workspace_intents.IntentOwnership.FOREIGN_STALE.value + assert "owner may still be working" in str(payload["escalation_hint"]) + assert workspace_intents.stale_reason(foreign_stale) == "lease_expired" + assert workspace_intents._ttl_expired(foreign_stale) is False + + conflicts = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign_stale,), + own_pid=111, + own_start_epoch=100, + ) + assert conflicts == [ + { + "intent_id": foreign_stale.intent_id, + "agent_pid": 222, + "agent_start_epoch": 200, + "agent_label": "agent-a", + "intent": "edit pkg.a", + "ownership": workspace_intents.IntentOwnership.FOREIGN_STALE.value, + "severity": "stale", + "recommended_action": "coordinate_or_recover", + "overlap_type": "hard", + "hard_overlap": ["pkg/a.py"], + "soft_overlap": [], + "declared_at_utc": foreign_stale.declared_at_utc, + "expires_at_utc": foreign_stale.expires_at_utc, + } + ] + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/other.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign_stale,), + own_pid=111, + own_start_epoch=100, + ) + == [] + ) + + +def test_workspace_intent_ownership_classification( + monkeypatch: pytest.MonkeyPatch, +) -> None: + now = workspace_intents.utc_now() + own = _record(pid=111, start_epoch=100) + own_stale = _record( + pid=111, + start_epoch=100, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + foreign = _record(pid=222, start_epoch=200) + foreign_stale = _record( + pid=222, + start_epoch=200, + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + expired = _record(expires_delta=timedelta(seconds=-1)) + + monkeypatch.setattr(_PID_ALIVE, lambda pid: pid != 333) + + assert ( + workspace_intents.classify_intent_ownership( + own, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.OWN_ACTIVE + ) + assert ( + workspace_intents.classify_intent_ownership( + own_stale, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.OWN_STALE + ) + assert ( + workspace_intents.classify_intent_ownership( + foreign, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.FOREIGN_ACTIVE + ) + assert ( + workspace_intents.classify_intent_ownership( + foreign_stale, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.FOREIGN_STALE + ) + dead_pid = _record(pid=333, start_epoch=300) + assert ( + workspace_intents.classify_intent_ownership( + dead_pid, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.RECOVERABLE + ) + monkeypatch.setattr( + _PID_LIVENESS, lambda pid: workspace_intents.PidLiveness.UNKNOWN + ) + assert ( + workspace_intents.classify_intent_ownership( + foreign, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.FOREIGN_ACTIVE + ) + assert ( + workspace_intents.classify_intent_ownership( + foreign_stale, + own_pid=111, + own_start_epoch=100, + now=now, + ) + == workspace_intents.IntentOwnership.FOREIGN_STALE + ) + assert ( + workspace_intents.classify_intent_ownership( + expired, + own_pid=expired.agent_pid, + own_start_epoch=expired.agent_start_epoch, + now=now, + ) + == workspace_intents.IntentOwnership.EXPIRED + ) + + +def test_workspace_intent_renew_lease_updates_timestamp(tmp_path: Path) -> None: + record = _record( + lease_renewed_delta=timedelta(minutes=-2), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.lease_renewed_at_utc != record.lease_renewed_at_utc + assert workspace_intents.verify_intent_integrity( + workspace_intents.signed_payload(updated) + ) + + +def test_workspace_intent_update_status_can_extend_ttl(tmp_path: Path) -> None: + record = _record(lease_renewed_delta=timedelta(minutes=-2)) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert workspace_intents.update_workspace_intent_status( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + new_status="active", + ttl_seconds=workspace_intents.MIN_TTL_SECONDS, + ) + + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.ttl_seconds == workspace_intents.MIN_TTL_SECONDS + assert updated.lease_renewed_at_utc != record.lease_renewed_at_utc + assert updated.lease_renewed_at_utc == updated.declared_at_utc + + +def test_workspace_intent_renew_lease_rejects_foreign_owner(tmp_path: Path) -> None: + record = _record() + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert ( + workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid + 1, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ) + is False + ) + assert workspace_intents.list_workspace_intents(root=tmp_path)[0] == record + + +def test_workspace_intent_v1_record_defaults_lease_fields() -> None: + record = _record() + payload = { + "registry_version": workspace_intents.LEGACY_REGISTRY_VERSION, + "intent_id": record.intent_id, + "agent_pid": record.agent_pid, + "agent_start_epoch": record.agent_start_epoch, + "agent_label": record.agent_label, + "run_id": record.run_id, + "declared_at_utc": record.declared_at_utc, + "expires_at_utc": record.expires_at_utc, + "ttl_seconds": record.ttl_seconds, + "status": record.status, + "intent": record.intent, + "scope": record.scope, + "scope_digest": record.scope_digest, + "blast_radius_summary": record.blast_radius_summary, + } + payload["integrity"] = { + "payload_sha256": workspace_intents.compute_intent_digest(payload) + } + + validated = workspace_intents.validate_workspace_record(payload) + + assert validated is not None + assert validated.lease_renewed_at_utc == record.declared_at_utc + assert validated.lease_seconds == workspace_intents.DEFAULT_LEASE_SECONDS + assert validated.report_digest == "" + + +def test_workspace_intent_conflict_detection() -> None: + existing = _record() + + hard = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + assert hard[0]["overlap_type"] == "hard" + assert hard[0]["hard_overlap"] == ["pkg/a.py"] + + soft = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["tests/test_a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + assert soft[0]["overlap_type"] == "soft" + assert soft[0]["soft_overlap"] == ["tests/test_a.py"] + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=existing.agent_pid, + own_start_epoch=existing.agent_start_epoch, + ) + == [] + ) + + both = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": ["pkg/a.py"], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + assert both[0]["overlap_type"] == "both" + + +def test_workspace_intent_workspace_relations_forbidden_patterns( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + foreign = _record( + intent_id="intent-foreign-docs", + pid=111, + start_epoch=100, + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": ["docs/**"], + }, + ) + + relations = workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["docs/readme.md"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["docs/readme.md"], + "allowed_related": [], + "forbidden": [], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + == [] + ) + assert relations == [ + { + "intent_id": "intent-foreign-docs", + "agent_pid": 111, + "agent_start_epoch": 100, + "agent_label": "agent-a", + "intent": "edit pkg.a", + "ownership": "foreign_active", + "relation": "foreign_excludes_target", + "severity": "info", + "matching_patterns": ["docs/**"], + "message": "Foreign agent explicitly excludes files in current scope.", + "declared_at_utc": foreign.declared_at_utc, + "expires_at_utc": foreign.expires_at_utc, + } + ] + + +def test_workspace_intent_workspace_relations_target_excludes_foreign( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + foreign = _record( + intent_id="intent-foreign-docs", + pid=111, + start_epoch=100, + scope={ + "allowed_files": ["docs/readme.md"], + "allowed_related": [], + "forbidden": [], + }, + ) + + relations = workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": ["docs/**"], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + + assert ( + workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": ["docs/**"], + }, + existing=(foreign,), + own_pid=222, + own_start_epoch=200, + ) + == [] + ) + assert relations[0]["relation"] == "target_excludes_foreign" + assert relations[0]["matching_patterns"] == ["docs/**"] + + +def test_workspace_intent_workspace_relations_include_edit_overlap() -> None: + existing = _record() + + relations = workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + + assert relations[0]["relation"] == "edit_overlap" + assert relations[0]["hard_overlap"] == ["pkg/a.py"] + + +def test_workspace_intent_workspace_relations_omit_disjoint_scope() -> None: + existing = _record() + + assert ( + workspace_intents.detect_workspace_relations( + new_scope={ + "allowed_files": ["pkg/other.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(existing,), + own_pid=123456, + own_start_epoch=999, + ) + == [] + ) + + +def test_workspace_intent_regression_stale_lease_silent_overlap( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Regression: expired lease + alive PID must produce conflict, not silence. + + Timeline from real incident: + T0: PID A declares intent over files X, lease=300s + T0+301s: PID A lease expires, PID A still alive + T0+352s: PID B declares intent over same files X + Expected: PID B sees concurrent_intents with ownership=foreign_stale + """ + agent_a = _record( + intent_id="intent-a-001", + pid=1000, + start_epoch=100, + scope={ + "allowed_files": ["src/shared.py"], + "allowed_related": [], + "forbidden": [], + }, + lease_renewed_delta=timedelta(minutes=-6), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + ) + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + + conflicts = workspace_intents.detect_conflicts( + new_scope={ + "allowed_files": ["src/shared.py"], + "allowed_related": [], + "forbidden": [], + }, + existing=(agent_a,), + own_pid=2000, + own_start_epoch=200, + ) + + conflict = conflicts[0] + assert len(conflicts) == 1 + for key, expected in ( + ("ownership", "foreign_stale"), + ("severity", "stale"), + ("recommended_action", "coordinate_or_recover"), + ("hard_overlap", ["src/shared.py"]), + ): + assert conflict[key] == expected, f"{key}: {conflict[key]!r} != {expected!r}" + + +def test_workspace_intent_renew_lease_with_custom_seconds(tmp_path: Path) -> None: + """Explicit lease_seconds on renew updates the workspace record.""" + record = _record( + lease_renewed_delta=timedelta(minutes=-2), + lease_seconds=workspace_intents.DEFAULT_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + assert workspace_intents.renew_workspace_intent_lease( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + lease_seconds=workspace_intents.MAX_LEASE_SECONDS, + ) + updated = workspace_intents.list_workspace_intents(root=tmp_path)[0] + assert updated.lease_seconds == workspace_intents.MAX_LEASE_SECONDS + assert updated.lease_renewed_at_utc != record.lease_renewed_at_utc + + +def test_workspace_intent_max_lease_seconds_ceiling() -> None: + """MAX_LEASE_SECONDS is 600 (10 minutes), not 3600.""" + assert workspace_intents.MAX_LEASE_SECONDS == 600 + assert workspace_intents.resolved_lease_seconds(9999) == 600 + assert workspace_intents.resolved_lease_seconds(60) == 60 + + +# ── intent_id format validation (path traversal hardening) ── + + +class TestSafeIntentId: + """_is_safe_intent_id rejects path traversal and control characters.""" + + @pytest.mark.parametrize( + "value", + [ + "intent-abcdef12-001", + "intent-run12345-003", + "simple", + "a", + "with.dot", + "with_underscore", + "A123-B456", + ], + ) + def test_accepts_safe_ids(self, value: str) -> None: + assert workspace_intents._is_safe_intent_id(value) is True + + @pytest.mark.parametrize( + ("value", "reason"), + [ + ("../../etc/passwd", "path traversal with ../"), + ("../target", "single-level traversal"), + ("foo/bar", "forward slash"), + ("foo\\bar", "backslash"), + ("", "empty string"), + ("-starts-with-dash", "leading dash"), + (".starts-with-dot", "leading dot"), + ("has\x00null", "NUL byte"), + ("has\nnewline", "newline"), + ("has space", "space"), + (None, "None value"), + (42, "integer"), + ("x" * 129, "too long (129 chars)"), + ], + ) + def test_rejects_unsafe_ids(self, value: object, reason: str) -> None: + assert workspace_intents._is_safe_intent_id(value) is False, reason + + def test_max_length_boundary(self) -> None: + assert workspace_intents._is_safe_intent_id("a" * 128) is True + assert workspace_intents._is_safe_intent_id("a" * 129) is False + + +def test_validate_workspace_record_rejects_traversal_intent_id() -> None: + """validate_workspace_record rejects intent_id with path separators.""" + malicious = _record(intent_id="../../etc/passwd") + payload = workspace_intents.signed_payload(malicious) + assert workspace_intents.validate_workspace_record(payload) is None + + +def test_remove_workspace_intent_rejects_traversal(tmp_path: Path) -> None: + """remove_workspace_intent returns False for traversal intent_id. + + The function delegates to safe_remove_own_intent which validates + path containment. A crafted intent_id must not cause deletion + outside the registry directory. + """ + # Create a sentinel file that a traversal would target + sentinel = tmp_path / "do_not_delete.json" + sentinel.write_text("{}") + + result = workspace_intents.remove_workspace_intent( + root=tmp_path, + pid=1, + start_epoch=100, + intent_id="../../do_not_delete", + ) + assert result is False + assert sentinel.exists(), "sentinel file must survive traversal attempt" + + +def test_lazy_close_removes_ttl_expired_on_list( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + expired = _record( + intent_id="intent-expired-lazy-001", + expires_delta=timedelta(seconds=-1), + ) + active = _record(intent_id="intent-active-lazy-001", start_epoch=102) + for record in (expired, active): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + + assert workspace_intents.list_workspace_intents(root=tmp_path) == (active,) + from codeclone.surfaces.mcp._workspace_intent_paths import registry_files + + assert len(registry_files(tmp_path)) == 1 + + +def test_lazy_close_keeps_lease_only_stale( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record( + intent_id="intent-lease-only-001", + lease_renewed_delta=timedelta(minutes=-10), + lease_seconds=workspace_intents.MIN_LEASE_SECONDS, + ) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + + listed = workspace_intents.list_workspace_intents( + root=tmp_path, + exclude_stale=False, + ) + assert listed == (record,) + + +def test_registry_lock_file_ignored_by_listing(tmp_path: Path) -> None: + record = _record(intent_id="intent-lock-001") + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + from codeclone.surfaces.mcp._workspace_intent_paths import registry_dir + + lock_path = registry_dir(tmp_path) / ".registry.lock" + lock_path.write_bytes(b"\0") + + assert workspace_intents.list_workspace_intents(root=tmp_path) == (record,) + + +def test_registry_files_skips_unsafe_entries(tmp_path: Path) -> None: + record = _record(intent_id="intent-safe-list-001") + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + from codeclone.surfaces.mcp._workspace_intent_paths import ( + registry_dir, + registry_files, + ) + + directory = registry_dir(tmp_path) + (directory / "unsafe.json").write_text("{}", encoding="utf-8") + symlink_path = directory / "999-100-intent-symlink-001.json" + symlink_path.symlink_to(tmp_path / "outside.json") + + assert registry_files(tmp_path) == ( + workspace_intents.intent_path( + root=tmp_path, + pid=record.agent_pid, + start_epoch=record.agent_start_epoch, + intent_id=record.intent_id, + ), + ) + + +def test_write_workspace_intent_with_existing_snapshots_before_write( + tmp_path: Path, +) -> None: + existing = _record(intent_id="intent-existing-001", start_epoch=100) + new = _record(intent_id="intent-new-001", start_epoch=101) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=existing) + + seen, registered = workspace_intents.write_workspace_intent_with_existing( + root=tmp_path, + record=new, + ) + + assert registered is True + assert seen == (existing,) + assert workspace_intents.list_workspace_intents(root=tmp_path) == ( + existing, + new, + ) + + +def test_gc_removal_reason_keeps_orphaned_for_recovery( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp._workspace_intent_staleness import gc_removal_reason + + record = _record(intent_id="intent-orphan-recover-001", pid=33333) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr(_PID_ALIVE, lambda pid: False) + + assert workspace_intents.stale_reason(record) == "orphaned" + assert gc_removal_reason(record) == "orphaned" + assert gc_removal_reason(record, for_lazy_close=True) is None + + +def test_lazy_close_keeps_dead_pid_for_recovery( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + record = _record(intent_id="intent-orphan-lazy-001", pid=33333) + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + monkeypatch.setattr(_PID_ALIVE, lambda pid: False) + + assert workspace_intents.list_workspace_intents( + root=tmp_path, + exclude_stale=False, + ) == (record,) + from codeclone.surfaces.mcp._workspace_intent_paths import registry_files + + assert len(registry_files(tmp_path)) == 1 + + +def test_foreign_dirty_overlaps_only_live_foreign_agents( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp._workspace_hygiene import _foreign_dirty_overlaps + from codeclone.surfaces.mcp._workspace_intent_store import ( + get_workspace_intent_store, + ) + + monkeypatch.setattr(_PID_ALIVE, lambda pid: pid == 11111) + live_foreign = _record( + intent_id="intent-live-foreign-001", + pid=11111, + start_epoch=100, + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + ) + recoverable_foreign = _record( + intent_id="intent-dead-foreign-001", + pid=99999, + start_epoch=200, + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + ) + terminal_foreign = replace( + _record( + intent_id="intent-clean-foreign-001", + pid=44444, + start_epoch=300, + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + ), + status="clean", + ) + for record in (live_foreign, recoverable_foreign, terminal_foreign): + assert workspace_intents.write_workspace_intent(root=tmp_path, record=record) + + store = get_workspace_intent_store(tmp_path) + overlaps = _foreign_dirty_overlaps( + dirty_paths=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id=None, + ) + assert len(overlaps) == 1 + assert overlaps[0].foreign_intent_id == live_foreign.intent_id + assert overlaps[0].foreign_ownership == "foreign_active" + + +def test_foreign_dirty_overlaps_skip_queued_foreign( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + from codeclone.surfaces.mcp._workspace_hygiene import _foreign_dirty_overlaps + from codeclone.surfaces.mcp._workspace_intent_store import ( + get_workspace_intent_store, + ) + + monkeypatch.setattr(_PID_ALIVE, lambda pid: True) + queued_foreign = replace( + _record( + intent_id="intent-queued-foreign-001", + pid=11111, + start_epoch=100, + scope={ + "allowed_files": ["pkg/a.py"], + "allowed_related": [], + "forbidden": [], + }, + ), + status="queued", + ) + assert workspace_intents.write_workspace_intent( + root=tmp_path, + record=queued_foreign, + ) + store = get_workspace_intent_store(tmp_path) + overlaps = _foreign_dirty_overlaps( + dirty_paths=["pkg/a.py"], + store=store, + own_pid=22222, + own_start_epoch=400, + own_intent_id=None, + ) + assert overlaps == () + + +def test_hygiene_blocks_start_edit_continue_own_wip() -> None: + from codeclone.surfaces.mcp._workspace_hygiene import ( + DIRTY_SCOPE_POLICY_BLOCK, + DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP, + ForeignDirtyOverlap, + WorkspaceHygieneResult, + hygiene_blocks_start_edit, + ) + + dirty = WorkspaceHygieneResult( + git_available=True, + dirty_paths=("pkg/a.py",), + dirty_paths_in_scope=("pkg/a.py",), + dirty_paths_outside_scope=(), + foreign_dirty_overlaps=(), + blocks_edit=True, + ) + assert hygiene_blocks_start_edit( + dirty, + dirty_scope_policy=DIRTY_SCOPE_POLICY_BLOCK, + ) + assert not hygiene_blocks_start_edit( + dirty, + dirty_scope_policy=DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP, + ) + foreign = replace( + dirty, + foreign_dirty_overlaps=( + ForeignDirtyOverlap( + path="pkg/a.py", + foreign_intent_id="intent-foreign-001", + foreign_persisted_status="active", + foreign_ownership="foreign_active", + foreign_agent_label="other", + message="overlap", + ), + ), + ) + assert hygiene_blocks_start_edit( + foreign, + dirty_scope_policy=DIRTY_SCOPE_POLICY_CONTINUE_OWN_WIP, + ) diff --git a/tests/workspace_intent_gate_helpers.py b/tests/workspace_intent_gate_helpers.py new file mode 100644 index 00000000..16dc4b16 --- /dev/null +++ b/tests/workspace_intent_gate_helpers.py @@ -0,0 +1,86 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from dataclasses import replace +from pathlib import Path + +import pytest + +from codeclone.surfaces.mcp import _workspace_intents as workspace_intents +from codeclone.surfaces.mcp._workspace_intents import WorkspaceIntentRecord +from codeclone.workspace_intent.gate import ( + WorkspaceEditGateDecision, + evaluate_workspace_edit_gate, +) +from tests.test_workspace_intents import _record + +CODEX_AGENT_LABEL = "codex-mcp-client/0.137.0-alpha.4" +CURSOR_AGENT_LABEL = "cursor-vscode/1.0.0" + + +def assert_gate_denied( + root: Path, + *, + reason: str, +) -> WorkspaceEditGateDecision: + decision = evaluate_workspace_edit_gate(root) + assert decision.allowed is False + assert decision.reason == reason + return decision + + +def codex_foreign_record( + *, + intent_id: str = "intent-foreign-001", + pid: int | None = None, + start_epoch: int = 100, + status: str = "active", +) -> WorkspaceIntentRecord: + return replace( + _record( + intent_id=intent_id, + pid=pid or (os.getpid() + 5000), + start_epoch=start_epoch, + status=status, + ), + agent_label=CODEX_AGENT_LABEL, + ) + + +def cursor_vscode_record( + *, + intent_id: str = "intent-abcdef12-001", + pid: int | None = None, + start_epoch: int = 100, + status: str = "active", +) -> WorkspaceIntentRecord: + return replace( + _record( + intent_id=intent_id, + pid=pid, + start_epoch=start_epoch, + status=status, + ), + agent_label=CURSOR_AGENT_LABEL, + ) + + +def write_workspace_record(root: Path, record: WorkspaceIntentRecord) -> None: + assert workspace_intents.write_workspace_intent(root=root, record=record) + + +def bind_hook_own_agent_env( + monkeypatch: pytest.MonkeyPatch, + record: WorkspaceIntentRecord, +) -> None: + monkeypatch.setenv("CODECLONE_HOOK_OWN_AGENT_PID", str(record.agent_pid)) + monkeypatch.setenv( + "CODECLONE_HOOK_OWN_AGENT_START_EPOCH", + str(record.agent_start_epoch), + ) diff --git a/uv.lock b/uv.lock index 7421c364..9eb6c78f 100644 --- a/uv.lock +++ b/uv.lock @@ -3,7 +3,20 @@ revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.15'", - "python_full_version < '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", + "python_full_version < '3.11'", +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, ] [[package]] @@ -17,16 +30,16 @@ wheels = [ [[package]] name = "anyio" -version = "4.13.0" +version = "4.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, { name = "idna" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/b5/001890774a9552aff22502b8da382593109ce0c95314abaebbb116567545/anyio-4.14.0.tar.gz", hash = "sha256:b47c1f9ccf73e67021df785332508f99379c68fa7d0684e8e3492cb1d4b23f89", size = 253586, upload-time = "2026-06-15T22:00:49.021Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, + { url = "https://files.pythonhosted.org/packages/ba/16/9826f089383c593cdfc4a6e5aca94d9e91ae1692c57af82c3b2aa5e810f7/anyio-4.14.0-py3-none-any.whl", hash = "sha256:dd9b7a2a9799ed6552fde617b2c5df02b7fdd7d88392fc48101e51bae46164d9", size = 123506, upload-time = "2026-06-15T22:00:47.595Z" }, ] [[package]] @@ -105,11 +118,11 @@ wheels = [ [[package]] name = "certifi" -version = "2026.4.22" +version = "2026.6.17" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077, upload-time = "2026-04-22T11:26:11.191Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/c7/424b75da314c1045981bd9777432fad05a9e0c69daa4ed7e308bbaffe405/certifi-2026.6.17.tar.gz", hash = "sha256:024c88eeec92ca068db80f02b8b07c9cef7b9fe261d1d535abfd5abd6f6af432", size = 134594, upload-time = "2026-06-17T10:31:07.894Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" }, + { url = "https://files.pythonhosted.org/packages/ef/2f/c5464532e965badff2f4c4c1a3a83f5697f0d7c407ed0cda44aaa99bb451/certifi-2026.6.17-py3-none-any.whl", hash = "sha256:2227dcbaafe0d2f59279d1762ddddc37783ed4354594f194ffc31d20f41fc3db", size = 133289, upload-time = "2026-06-17T10:31:06.348Z" }, ] [[package]] @@ -310,29 +323,41 @@ wheels = [ [[package]] name = "click" -version = "8.4.0" +version = "8.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/23/e4/796662cd90cf80e3a363c99db2b88e0e394b988a575f60a17e16440cd011/click-8.4.0.tar.gz", hash = "sha256:638f1338fe1235c8f4e008e4a8a254fb5c5fbdcbb40ece3c9142ebb78e792973", size = 350843, upload-time = "2026-05-17T00:47:58.425Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/98/518d8e5081007684232226f475082b30087d0f585e8457db087298259f49/click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96", size = 353007, upload-time = "2026-05-22T04:08:37.769Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/ae/8e92f8058baf87f6c7d86ee7e457668690195cc77efedb8d3797a06e3940/click-8.4.0-py3-none-any.whl", hash = "sha256:40c50b7c6c6adac2823d411041ec84f3f103f1b280d5e9ce0d7f998995832f81", size = 116147, upload-time = "2026-05-17T00:47:56.842Z" }, + { url = "https://files.pythonhosted.org/packages/c7/0d/67e5b4109ea4a837e80daa87c2c696711955e40449a97e8926672534def2/click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2", size = 116639, upload-time = "2026-05-22T04:08:35.26Z" }, ] [[package]] name = "codeclone" -version = "2.0.2" +version = "2.1.0a1" source = { editable = "." } dependencies = [ { name = "orjson" }, { name = "packaging" }, + { name = "pydantic" }, { name = "pygments" }, { name = "rich" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] [package.optional-dependencies] +analytics = [ + { name = "fastembed" }, + { name = "hdbscan" }, + { name = "lancedb" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "umap-learn", marker = "python_full_version < '3.14'" }, +] +coverage-xml = [ + { name = "defusedxml" }, +] dev = [ { name = "build" }, { name = "mypy" }, @@ -346,25 +371,54 @@ mcp = [ { name = "httpx" }, { name = "mcp" }, ] +perf = [ + { name = "psutil" }, +] +semantic-fastembed = [ + { name = "fastembed" }, +] +semantic-lancedb = [ + { name = "lancedb" }, +] +semantic-local = [ + { name = "fastembed" }, + { name = "lancedb" }, +] +token-bench = [ + { name = "tiktoken" }, +] [package.metadata] requires-dist = [ { name = "build", marker = "extra == 'dev'", specifier = ">=1.4.3" }, + { name = "defusedxml", marker = "extra == 'coverage-xml'", specifier = ">=0.7.1,<0.8" }, + { name = "fastembed", marker = "extra == 'analytics'", specifier = ">=0.8.0,<0.9" }, + { name = "fastembed", marker = "extra == 'semantic-fastembed'", specifier = ">=0.8.0,<0.9" }, + { name = "fastembed", marker = "extra == 'semantic-local'", specifier = ">=0.8.0,<0.9" }, + { name = "hdbscan", marker = "extra == 'analytics'", specifier = ">=0.8.0" }, { name = "httpx", marker = "extra == 'mcp'", specifier = ">=0.27.1,<1" }, - { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.27.0,<2" }, + { name = "lancedb", marker = "extra == 'analytics'", specifier = ">=0.33.0" }, + { name = "lancedb", marker = "extra == 'semantic-lancedb'", specifier = ">=0.33.0" }, + { name = "lancedb", marker = "extra == 'semantic-local'", specifier = ">=0.33.0" }, + { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.28.0,<2" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.20.1" }, { name = "orjson", specifier = ">=3.11.9" }, { name = "packaging", specifier = ">=24.0" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.5.1" }, + { name = "psutil", marker = "extra == 'perf'", specifier = ">=7,<8" }, + { name = "pydantic", specifier = ">=2.13.4" }, { name = "pygments", specifier = ">=2.20.0" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.1.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=7.1.0" }, { name = "rich", specifier = ">=15.0.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.13" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.17" }, + { name = "scikit-learn", marker = "extra == 'analytics'", specifier = ">=1.5.0" }, + { name = "tiktoken", marker = "extra == 'token-bench'", specifier = ">=0.13.0" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=6.2.0" }, + { name = "umap-learn", marker = "python_full_version < '3.14' and extra == 'analytics'", specifier = ">=0.5.0" }, ] -provides-extras = ["mcp", "dev"] +provides-extras = ["mcp", "token-bench", "coverage-xml", "semantic-lancedb", "semantic-fastembed", "semantic-local", "analytics", "perf", "dev"] [[package]] name = "colorama" @@ -375,117 +429,129 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "humanfriendly", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" }, +] + [[package]] name = "coverage" -version = "7.14.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/23/7f/d0720730a397a999ffc0fd3f5bebef347338e3a47b727da66fbb228e2ff2/coverage-7.14.0.tar.gz", hash = "sha256:057a6af2f160a85384cde4ab36f0d2777bae1057bae255f95413cdd382aa5c74", size = 919489, upload-time = "2026-05-10T18:02:31.397Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/9d/7c83ef51c3eb495f10010094e661833588b7709946da634c8b66520b97c7/coverage-7.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:84c32d90bf4537f0e7b4dec9aaa9a938fb8205136b9d2ecf4d7629d5262dc075", size = 219668, upload-time = "2026-05-10T17:59:23.106Z" }, - { url = "https://files.pythonhosted.org/packages/24/34/898546aefbd28f0af131201d0dc852c9e976f817bd7d5bfb8dc4e02863bb/coverage-7.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7c843572c605ab51cfdb5c6b5f2586e2a8467c0d28eca4bdef4ec70c5fecbd82", size = 220192, upload-time = "2026-05-10T17:59:26.095Z" }, - { url = "https://files.pythonhosted.org/packages/df/4a/b457c88aca72b0df13a98167ebd5d947135ccd9881ea88ce6a570e13aa9b/coverage-7.14.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0c451757d3fa2603354fdc789b5e58a0e327a117c370a40e3476ba4eabab228c", size = 246932, upload-time = "2026-05-10T17:59:27.806Z" }, - { url = "https://files.pythonhosted.org/packages/b5/d9/92600e89486fd074c50f0117422b2c9592c3e144e2f25bd5ac0bc62bc7a0/coverage-7.14.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3fd43f0616e765ab78d069cf8358def7363957a45cee446d65c502dcfeea7893", size = 248762, upload-time = "2026-05-10T17:59:29.479Z" }, - { url = "https://files.pythonhosted.org/packages/0d/e1/9ea1eb9c311da7f15853559dc1d9d82bef88ecd3e59fbeb51f16bc2ffa91/coverage-7.14.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:731e535b1498b27d13594a0527a79b0510867b0ad891532be41cb883f2128e20", size = 250625, upload-time = "2026-05-10T17:59:31.33Z" }, - { url = "https://files.pythonhosted.org/packages/a5/03/57afca1b8106f8549a5329139315041fe166d6099bd9381346b9430dfbd1/coverage-7.14.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c7492f2d493b976941c7ca050f273cbda2f43c381124f7586a3e3c16d1804fec", size = 252539, upload-time = "2026-05-10T17:59:32.692Z" }, - { url = "https://files.pythonhosted.org/packages/57/5e/2e9fc63c9928119c1dbae02222be51407d3e7ebac5811ebbda4af3557795/coverage-7.14.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dc38367eaa2abb1b766ac333142bce7655335a73537f5c8b75aaa89c2b987757", size = 247636, upload-time = "2026-05-10T17:59:34.599Z" }, - { url = "https://files.pythonhosted.org/packages/f0/e2/0b7898cda21041cc67546e19b80ba66cbbb47cbece52a76a5904de6a3aaf/coverage-7.14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0a951308cde22cf77f953955a754d04dccb57fe3bb8e345d685778ed9fc1632a", size = 248666, upload-time = "2026-05-10T17:59:36.232Z" }, - { url = "https://files.pythonhosted.org/packages/d6/e3/d33662a2fdaef23229c15921f39c84ec38441f3069ba26e134ed402c833b/coverage-7.14.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fab3877e4ebb06bd9d4d4d00ee53309ee5478e66873c66a382272e3ee33eb7ea", size = 246670, upload-time = "2026-05-10T17:59:38.029Z" }, - { url = "https://files.pythonhosted.org/packages/99/b2/533942c3bfbf6770b5c32d7f2ff029fe013dba31f3fe8b45cabbb250365e/coverage-7.14.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b812eb847b19876ebf33fb6c4f11819af05ab6050b0bfa1bc53412ae81779adb", size = 250484, upload-time = "2026-05-10T17:59:39.974Z" }, - { url = "https://files.pythonhosted.org/packages/d8/00/15acbad83a96de13c73831486c7627bfed73dfaec53b04e4a6315edf3fd8/coverage-7.14.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d9c8ef6ed820c433de075657d72dda1f89a2984955e58b8a75feb3f184250218", size = 246942, upload-time = "2026-05-10T17:59:41.659Z" }, - { url = "https://files.pythonhosted.org/packages/70/db/cef0228de493f2c740c760a9057a61d00c6849480073b70a75b87c7d4bab/coverage-7.14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d128b1bba9361fbaaf6a19e179e6cfd6a9103ce0c0555876f72780acc93efd85", size = 247544, upload-time = "2026-05-10T17:59:43.471Z" }, - { url = "https://files.pythonhosted.org/packages/77/a0/d9ef8e148f3025c2ae8401d77cda1502b6d2a4d8102603a8af31460aedb6/coverage-7.14.0-cp310-cp310-win32.whl", hash = "sha256:65f267ca1370726ec2c1aa38bbe4df9a71a740f22878d2d4bf59d71a4cd8d323", size = 222285, upload-time = "2026-05-10T17:59:44.908Z" }, - { url = "https://files.pythonhosted.org/packages/85/c0/30c454c7d3cf47b2805d4e06f12443f5eece8a5d030d3b0350e7b74ecb49/coverage-7.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:b34ece8065914f938ed7f2c5872bb865336977a52919149846eac3744327267a", size = 223215, upload-time = "2026-05-10T17:59:46.779Z" }, - { url = "https://files.pythonhosted.org/packages/fc/e4/649c8d4f7f1709b6dbfc474358aa1bba02f67bcd52e2fec291a5014006cd/coverage-7.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a78e2a9d9c5e3b8d4ab9b9d28c985ea66fced0a7d7c2aec1f216e03a2011480", size = 219795, upload-time = "2026-05-10T17:59:48.198Z" }, - { url = "https://files.pythonhosted.org/packages/7f/8d/46692d24b3f395d4cbf17bfcc57136b4f2f9c0c0df864b0bddfc1d71a014/coverage-7.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1816c505187592dcd1c5a5f226601a549f70365fbd00930ac88b0c225b76bb4", size = 220299, upload-time = "2026-05-10T17:59:49.683Z" }, - { url = "https://files.pythonhosted.org/packages/12/c2/a40f5cb295bbcbb697a76947a56081c494c61950366294ee426ffe261099/coverage-7.14.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d8e1762f0e9cbc26ec315471e7b47855218e833cd5a032d706fbf43845d878c7", size = 250721, upload-time = "2026-05-10T17:59:51.494Z" }, - { url = "https://files.pythonhosted.org/packages/fd/35/202235eb5c3c14c212462cd91d61b7386bf8fc44bc7a77f4742d2a69174b/coverage-7.14.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9336e23e8bb3a3925398261385e2a1533957d3e760e91070dcb0e98bfa514eed", size = 252633, upload-time = "2026-05-10T17:59:53.244Z" }, - { url = "https://files.pythonhosted.org/packages/bb/80/5f596e8995785124ee191c42535664c5e62c65995b66f4ca21e28ae04c81/coverage-7.14.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cd1169b2230f9cbe9c638ba38022ed7a2b1e641cc07f7cea0365e4be2a74980", size = 254743, upload-time = "2026-05-10T17:59:55.021Z" }, - { url = "https://files.pythonhosted.org/packages/1e/6d/0d178825be2350f0adb27984d0aa7cf84bbdab201f6fb926b535d23a8f5f/coverage-7.14.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d1bb3543b58fea74d2cd1abc4054cc927e4724687cb4560cd2ed88d2c7d820c0", size = 256700, upload-time = "2026-05-10T17:59:56.511Z" }, - { url = "https://files.pythonhosted.org/packages/19/5b/9e549c2f6e9dfea472adadba06c294e64735dabc2dd19015fac082095013/coverage-7.14.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a93bac2cb577ef60074999ed56d8a1535894398e2ed920d4185c3ec0c8864742", size = 250854, upload-time = "2026-05-10T17:59:57.94Z" }, - { url = "https://files.pythonhosted.org/packages/3d/1c/b94f9f5f36396021ee2f62c5834b12e6a3d31f0bed5d6fc6d1c3caec087c/coverage-7.14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5904abf7e18cddc463219b17552229650c6b79e061d31a1059283051169cf7d5", size = 252433, upload-time = "2026-05-10T17:59:59.688Z" }, - { url = "https://files.pythonhosted.org/packages/b5/cb/d192cd8e1345eccabc32016f2d39072ecd10cb4f4b983ed8d0ebdeaf00dc/coverage-7.14.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:741f57cddc9004a8c81b084660215f33a6b597dbe62c31386b983ee26310e327", size = 250494, upload-time = "2026-05-10T18:00:01.953Z" }, - { url = "https://files.pythonhosted.org/packages/53/c5/aac9f460a41d835dbddef1d377f105f6ac2311d0f3c1588e9f51046d8813/coverage-7.14.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:664123feb0929d7affc135717dbd70d61d98688a08ab1e5ba464739620c6252d", size = 254261, upload-time = "2026-05-10T18:00:03.779Z" }, - { url = "https://files.pythonhosted.org/packages/23/aa/7af7c0081980a9cb3d289c5a435a4b7657dcecbd128e25c580e6a50389b5/coverage-7.14.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:c83d2399a51bbec8429266905d33616f04bc5726b1138c35844d5fcd896b2e20", size = 250216, upload-time = "2026-05-10T18:00:05.262Z" }, - { url = "https://files.pythonhosted.org/packages/35/60/a4257538ce2f6b978aeb51870d6c4208c510928a03db7e0339bb625dccb7/coverage-7.14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb2e855b87321259a037429288ae85216d191c74de3e79bf57cd2bc0761992c", size = 251125, upload-time = "2026-05-10T18:00:06.858Z" }, - { url = "https://files.pythonhosted.org/packages/a1/ab/f91af47642ec1aa53490e835a95847168d9c77fc39aa58527604c051e145/coverage-7.14.0-cp311-cp311-win32.whl", hash = "sha256:731dc15b385ac52289743d476245b61e1a2927e803bef655b52bc3b2a75a21f3", size = 222300, upload-time = "2026-05-10T18:00:08.608Z" }, - { url = "https://files.pythonhosted.org/packages/f0/f0/a71ddbd874431e7a7cd96071f0c331cfbbad07704833c765d24ffbab8a67/coverage-7.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:bfb0ed8ec5d25e93face268115d7964db9df8b9aae8edcde9ec6b16c726a7cc1", size = 223241, upload-time = "2026-05-10T18:00:10.746Z" }, - { url = "https://files.pythonhosted.org/packages/d8/6e/d9d312a5151a96cd110efee32efc3fc97b01ebd86203fe618ccb29cf4c92/coverage-7.14.0-cp311-cp311-win_arm64.whl", hash = "sha256:7ebb1c6df9f78046a1b1e0a89674cd4bf73b7c648914eebcf976a57fd99a5627", size = 221908, upload-time = "2026-05-10T18:00:12.242Z" }, - { url = "https://files.pythonhosted.org/packages/09/1e/2f996b2c8415cbb6f54b0f5ec1ee850c96d7911961afb4fc05f4a89d8c58/coverage-7.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7ffd19fc8aed057fd686a17a4935eef5f9859d69208f96310e893e64b9b6ccf5", size = 219967, upload-time = "2026-05-10T18:00:13.756Z" }, - { url = "https://files.pythonhosted.org/packages/34/23/35c7aea1274aef7525bdd2dc92f710bdde6d11652239d71d1ec450067939/coverage-7.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:829994cfe1aeb773ca27bf246d4badc1e764893e3bfb98fff820fcecd1ca4662", size = 220329, upload-time = "2026-05-10T18:00:15.264Z" }, - { url = "https://files.pythonhosted.org/packages/75/cf/a8f4b43a16e194b0261257ad28ded5853ec052570afef4a84e1d81189f3b/coverage-7.14.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b4f07cf7edcb7ec39431a5074d7ea83b29a9f71fcfc494f0f40af4e65180420f", size = 251839, upload-time = "2026-05-10T18:00:17.16Z" }, - { url = "https://files.pythonhosted.org/packages/69/ff/6699e7b71e60d3049eb2bdcbc95ee3f35707b2b0e48f32e9e63d3ce30c08/coverage-7.14.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca3d9cf2c32b521bd9518385608787fa86f38daf993695307531822c3430ed67", size = 254576, upload-time = "2026-05-10T18:00:18.829Z" }, - { url = "https://files.pythonhosted.org/packages/22/ec/c936d495fcd67f48f03a9c4ad3297ff80d1f222a5df3980f15b34c186c21/coverage-7.14.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92af52828e7f29d827346b0294e5a0853fa206db77db0395b282918d41e28db9", size = 255690, upload-time = "2026-05-10T18:00:20.648Z" }, - { url = "https://files.pythonhosted.org/packages/5c/42/5af63f636cc62a4a2b1b3ba9146f6ee6f53a35a50d5cefc54d5670f60999/coverage-7.14.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b2bb6c9d7e769360d0f20a0f219603fd64f0c8f97de17ab25853261602be0fb", size = 257949, upload-time = "2026-05-10T18:00:22.28Z" }, - { url = "https://files.pythonhosted.org/packages/26/d3/a225317bd2012132a27e1176d51660b826f99bb975876463c44ea0d7ee5a/coverage-7.14.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1c9ed6ef99f88fb8c14aa8e2bf8eb0fe55fa2edfea68f8675d78741df1a5ac0e", size = 252242, upload-time = "2026-05-10T18:00:24.076Z" }, - { url = "https://files.pythonhosted.org/packages/f1/7f/9e65495298c3ea414742998539c37d048b5e81cc818fb1828cc6b51d10bf/coverage-7.14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8231ade007f37959fbf58acc677f26b922c02eda6f0428ea307da0fd39681bf3", size = 253608, upload-time = "2026-05-10T18:00:25.588Z" }, - { url = "https://files.pythonhosted.org/packages/94/46/1522b524a35bdad22b2b8c4f9d32d0a104b524726ec380b2db68db1746f5/coverage-7.14.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d8b013632cc1ce1d09dbe4f32667b4d320ec2f54fc326ebeffcd0b0bcc2bb6c4", size = 251753, upload-time = "2026-05-10T18:00:27.104Z" }, - { url = "https://files.pythonhosted.org/packages/f3/e9/cdf00d38817742c541ade405e115a3f7bf36e6f2a8b99d4f209861b85a2d/coverage-7.14.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1733198802d71ec4c524f322e2867ee05c62e9e75df86bdca545407a221827d1", size = 255823, upload-time = "2026-05-10T18:00:29.038Z" }, - { url = "https://files.pythonhosted.org/packages/38/fc/5e7877cf5f902d08a17ff1c532511476d87e1bea355bd5028cb97f902e79/coverage-7.14.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:72a305291fa8ee01332f1aaf38b348ca34097f6aa0b0ef627eef2837e57bbba5", size = 251323, upload-time = "2026-05-10T18:00:30.647Z" }, - { url = "https://files.pythonhosted.org/packages/18/9d/50f05a72dff8487464fdd4178dda5daed642a060e60afb644e3d45123559/coverage-7.14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fcaba850dd317c65423a9d63d88f9573c53b00354d6dd95724576cc98a131595", size = 253197, upload-time = "2026-05-10T18:00:32.211Z" }, - { url = "https://files.pythonhosted.org/packages/00/3f/6f61ffe6439df266c3cf60f5c99cfaa21103d0210d706a42fc6c30683ff8/coverage-7.14.0-cp312-cp312-win32.whl", hash = "sha256:5ac83957a80d0701310e96d8bec68cdcf4f90a7674b7d13f15a344315b41ab27", size = 222515, upload-time = "2026-05-10T18:00:33.717Z" }, - { url = "https://files.pythonhosted.org/packages/85/19/93853133df2cb371083285ef6a93982a0173e7a233b0f61373ba9fd30eb2/coverage-7.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:70390b0da32cb90b501953716302906e8bcce087cb283e70d8c97729f22e92b2", size = 223324, upload-time = "2026-05-10T18:00:35.172Z" }, - { url = "https://files.pythonhosted.org/packages/74/18/9f7fe62f659f24b7a82a0be56bf94c1bd0a89e0ae7ab4c668f6e82404294/coverage-7.14.0-cp312-cp312-win_arm64.whl", hash = "sha256:91b993743d959b8be85b4abf9d5478216a69329c321efe5be0433c1a841d691d", size = 221944, upload-time = "2026-05-10T18:00:37.014Z" }, - { url = "https://files.pythonhosted.org/packages/6b/76/b7c66ee3c66e1b0f9d894c8125983aa0c03fb2336f2fd16559f9c966157f/coverage-7.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f2bbb8254370eb4c628ff3d6fa8a7f74ddc40565394d4f7ab791d1fe568e37ef", size = 219990, upload-time = "2026-05-10T18:00:38.887Z" }, - { url = "https://files.pythonhosted.org/packages/b3/af/e567cbad5ba69c013a50146dfa886dc7193361fda77521f51274ff620e1b/coverage-7.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:23b81107f46d3f21d0cbce30664fcec0f5d9f585638a67081750f99738f6bf66", size = 220365, upload-time = "2026-05-10T18:00:40.864Z" }, - { url = "https://files.pythonhosted.org/packages/44/6f/9ad575d505b4d805b254febc8a5b338a2efe278f8786e56ff1cb8413f9c3/coverage-7.14.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:22a7e06a5f11a757cdfe79018e9095f9f69ae283c5cd8123774c788deec8717b", size = 251363, upload-time = "2026-05-10T18:00:42.489Z" }, - { url = "https://files.pythonhosted.org/packages/6f/5f/b5370068b2f57787454592ed7dcd1002f0f1703b7db1fa30f6a325a4ca6e/coverage-7.14.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9d1aa57a1dc8e05bdc42e81c5d671d849577aeedf279f4c449d6d286f9ed88ca", size = 253961, upload-time = "2026-05-10T18:00:44.079Z" }, - { url = "https://files.pythonhosted.org/packages/29/1e/51adf17738976e8f2b85ddef7b7aa12a0838b056c92f175941d8862767c1/coverage-7.14.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90c1a51bcfddf645b3bb7ec333d9e94393a8e94f55642380fa8a9a5a9e636cb7", size = 255193, upload-time = "2026-05-10T18:00:45.623Z" }, - { url = "https://files.pythonhosted.org/packages/9e/7b/5bfd7ac1df3b881c2ac7a5cbc99c7609e6296c402f5ef587cd81c6f355b3/coverage-7.14.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a841fae2fadcae4f438d43b6ccc4aac2ad609f47cdb6cfdce60cbb3fe5ca7bc2", size = 257326, upload-time = "2026-05-10T18:00:47.173Z" }, - { url = "https://files.pythonhosted.org/packages/7d/38/1d37d316b174fad3843a1d76dbdfe4398771c9ecd0515935dd9ece9cd627/coverage-7.14.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c79d2319cabef1fe8e86df73371126931550804738f78ad7d31e3aad85a67367", size = 251582, upload-time = "2026-05-10T18:00:49.152Z" }, - { url = "https://files.pythonhosted.org/packages/34/46/746704f95980ba220214e1a41e18cec5aea80a898eaa53c51bf2d645ff36/coverage-7.14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b23b0c6f0b1db6ad769b7050c8b641c0bf215ded26c1816955b17b7f26edfa9", size = 253325, upload-time = "2026-05-10T18:00:51.252Z" }, - { url = "https://files.pythonhosted.org/packages/e1/b9/bbe87206d9687b192352f893797825b5f5b15ecd3aa9c68fbff0c074d77b/coverage-7.14.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:55d3089079ce181a4566b1065ab28d2575eb76d8ac8f81f4fcda2bf037fee087", size = 251291, upload-time = "2026-05-10T18:00:52.816Z" }, - { url = "https://files.pythonhosted.org/packages/46/57/b8cdb12ac0d73ef0243218bd5e22c9df8f92edab8018213a86aec67c5324/coverage-7.14.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:49c005cba1e2f9677fb2845dcdf9a2e72a52a17d63e8231aaaae35d9f50215ef", size = 255448, upload-time = "2026-05-10T18:00:54.548Z" }, - { url = "https://files.pythonhosted.org/packages/1f/d4/5002019538b2036ce3c84340f54d2fd5100d55b0a6b0894eee56128d03c7/coverage-7.14.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:9117377b823daa28aa8635fbb08cda1cd6be3d7143257345459559aeef852d52", size = 251110, upload-time = "2026-05-10T18:00:56.122Z" }, - { url = "https://files.pythonhosted.org/packages/37/53/20c5009477660f084e6ed60bc02a91894b8e234e617e86ecfd9aaf78e27b/coverage-7.14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7b79d646cf46d5cf9a9f40281d4441df5849e445726e369006d2b117710b33fe", size = 252885, upload-time = "2026-05-10T18:00:57.967Z" }, - { url = "https://files.pythonhosted.org/packages/ae/ab/3cf6427ac9c1f1db747dbb1ce71dde47984876d4c2cfd018a3fef0a78d4d/coverage-7.14.0-cp313-cp313-win32.whl", hash = "sha256:fb609b3658479e33f9516d46f1a89dbb9b6c261366e3a11844a96ec487533dae", size = 222539, upload-time = "2026-05-10T18:00:59.581Z" }, - { url = "https://files.pythonhosted.org/packages/8f/b8/9228523e80321c2cb4880d1f589bc0171f2f71432c35118ad04dc01decce/coverage-7.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0773d8329cf32b6fd222e4b52622c61fe8d503eb966cfc8d3c3c10c96266d50e", size = 223344, upload-time = "2026-05-10T18:01:01.531Z" }, - { url = "https://files.pythonhosted.org/packages/a3/99/118daa192f95e3a6cb2740100fbf8797cda1734b4134ef0b5d501a7fa8f3/coverage-7.14.0-cp313-cp313-win_arm64.whl", hash = "sha256:b4e26a0f1b696faf283bffe5b8569e44e336c582439df5d53281ab89ee0cba96", size = 221966, upload-time = "2026-05-10T18:01:03.16Z" }, - { url = "https://files.pythonhosted.org/packages/e6/f1/a46cc0c013be170216253184a32366d7cbdb9252feaec866b05c2d12a894/coverage-7.14.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:953f521ca9445300397e65fda3dca58b2dbd68fee983777420b57ac3c77e9f90", size = 220679, upload-time = "2026-05-10T18:01:05.058Z" }, - { url = "https://files.pythonhosted.org/packages/64/8c/9c30a3d311a34177fa432995be7fbfc64477d8bac5630bd38055b1c9b424/coverage-7.14.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:98af83fd65ae24b1fdd03aaead967a9f523bcd2f1aab2d4f3ffda65bb568a6f1", size = 221033, upload-time = "2026-05-10T18:01:07.002Z" }, - { url = "https://files.pythonhosted.org/packages/9a/cd/3fb5e06c3badefd0c1b47e2044fdca67f8220a4ec2e7fcfb476aa0a67c6c/coverage-7.14.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:668b92e6958c4db7cf92e81caac328dfbbdbb215db2850ad28f0cbe1eea0bfbd", size = 262333, upload-time = "2026-05-10T18:01:08.903Z" }, - { url = "https://files.pythonhosted.org/packages/a8/e6/fbc322325c7294d3e22c1ad6b79e45d0806b25228c8e5842aed6d8169aa7/coverage-7.14.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9fbd898551762dea00d3fef2b1c4f99afd2c6a3ff952ea07d60a9bd5ed4f34bc", size = 264410, upload-time = "2026-05-10T18:01:10.531Z" }, - { url = "https://files.pythonhosted.org/packages/08/92/c497b264bec1673c47cc77e26f760fcda4654cabf1f39546d1a23a3b8c35/coverage-7.14.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:68af363c07ecd8d4b7d4043d85cb376d7d227eceb54e5323ee45da73dbd3e426", size = 266836, upload-time = "2026-05-10T18:01:12.19Z" }, - { url = "https://files.pythonhosted.org/packages/78/fc/045da320987f401af5d2815d351e8aa799aec859f60e29f445e3089eeedb/coverage-7.14.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6e57054a583da8ac55edf24117ea4c9133032cfc4cf72aa2d48c1e5d4b52f899", size = 267974, upload-time = "2026-05-10T18:01:13.926Z" }, - { url = "https://files.pythonhosted.org/packages/1b/ae/227b1e379497fb7a4fc3286e620f80c8a1e7cec66d45695a01639eb1af65/coverage-7.14.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cc3499459bbcdd51a65b64c35ab7ed2764eaf3cba826e0df3f1d7fe2e102b70b", size = 261578, upload-time = "2026-05-10T18:01:15.564Z" }, - { url = "https://files.pythonhosted.org/packages/a0/f5/3570342900f2acea31d33ff1590c5d8bac1a8e1a2e1c6d34a5d5e61de681/coverage-7.14.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:45899ec2138a4346ed34d601dedf5076fb74edf2d1dd9dc76a78e82397edee90", size = 264394, upload-time = "2026-05-10T18:01:17.607Z" }, - { url = "https://files.pythonhosted.org/packages/16/29/de1bbc01c935b28f89b1dc3db85b011c055e843a8e5e3b83141c3f80af7f/coverage-7.14.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8767486808c436f05b23ab98eb963fb29185e32a9357a166971685cb3459900f", size = 262022, upload-time = "2026-05-10T18:01:19.304Z" }, - { url = "https://files.pythonhosted.org/packages/35/95/f53890b0bf2fc10ab168e05d38869215e73ca24c4cb521c3bb0eb62fe16b/coverage-7.14.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a3b5ddfd6aa7ddad53ee3edb231e88a2151507a43229b7d71b953916deca127d", size = 265732, upload-time = "2026-05-10T18:01:21.494Z" }, - { url = "https://files.pythonhosted.org/packages/ed/ea/c919e259081dd2bdf0e43b87209709ba7ec2e4117c2a7f5185379c43463c/coverage-7.14.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:63df0fe568e698e1045792399f8ab6da3a6c2dce3182813fb92afa2641087b47", size = 260921, upload-time = "2026-05-10T18:01:23.533Z" }, - { url = "https://files.pythonhosted.org/packages/1a/2c/c2831889705a81dc5d1c6ca12e4d8e9b95dfc146d153488a6c0ea685d28e/coverage-7.14.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:827d6397dbd95144939b18f89edf31f63e1f99633e8d5f32f22ba8bdda567477", size = 263109, upload-time = "2026-05-10T18:01:25.165Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a9/2fcae5003cac3d63fe344d2166243c2756935f48420863c5272b240d550b/coverage-7.14.0-cp313-cp313t-win32.whl", hash = "sha256:7bf43e000d24012599b879791cff41589af90674722421ef11b11a5431920bab", size = 223212, upload-time = "2026-05-10T18:01:27.157Z" }, - { url = "https://files.pythonhosted.org/packages/3f/bb/18e94d7b14b9b398164197114a587a04ab7c9fdbe1d237eef57311c5e883/coverage-7.14.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3f5549365af25d770e06b1f8f5682d9a5637d06eb494db91c6fa75d3950cc917", size = 224272, upload-time = "2026-05-10T18:01:29.107Z" }, - { url = "https://files.pythonhosted.org/packages/db/56/4f14fad782b035c81c4ffd09159e7103d42bb1d93ac8496d04b90a11b7da/coverage-7.14.0-cp313-cp313t-win_arm64.whl", hash = "sha256:6d160217ec6fe890f16ad3a9531761589443749e448f91986c972714fad361c8", size = 222530, upload-time = "2026-05-10T18:01:31.151Z" }, - { url = "https://files.pythonhosted.org/packages/1c/18/b9a6586d73992807c26f9a5f274131be3d76b56b18a82b9392e2a25d2e45/coverage-7.14.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9aed9fa983514ca032790f3fe0d1c0e42ca7e16b42432af1706b50a9a46bef5d", size = 220036, upload-time = "2026-05-10T18:01:33.057Z" }, - { url = "https://files.pythonhosted.org/packages/f3/9b/4165a1d56ddc302a0e2d518fd9d412a4fd0b57562618c78c5f21c57194f5/coverage-7.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ba3b8390db29296dbbf49e91b6fe08f990743a90c8f447ba4c2ffc29670dfa63", size = 220368, upload-time = "2026-05-10T18:01:34.705Z" }, - { url = "https://files.pythonhosted.org/packages/69/aa/c12e52a5ba148d9995229d557e3be6e554fe469addc0e9241b2f0956d8ea/coverage-7.14.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3a5d8e876dfa2f102e970b183863d6dedd023d3c0eeca1fe7a9787bc5f28b212", size = 251417, upload-time = "2026-05-10T18:01:36.949Z" }, - { url = "https://files.pythonhosted.org/packages/d7/51/ec641c26e6dca1b25a7d2035ba6ecb7c884ef1a100a9e42fbe4ce4405139/coverage-7.14.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5ebb8f4614a3787d567e610bbfdf96a4798dd69a1afb1bd8ad228d4111fe6ff3", size = 253924, upload-time = "2026-05-10T18:01:38.985Z" }, - { url = "https://files.pythonhosted.org/packages/33/c4/59c3de0bd1b538824173fd518fed51c1ce740ca5ed68e74545983f4053a9/coverage-7.14.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b9bf47223dd8db3d4c4b2e443b02bace480d428f0822c3f991600448a176c97", size = 255269, upload-time = "2026-05-10T18:01:40.957Z" }, - { url = "https://files.pythonhosted.org/packages/7b/a9/36dfa153a62040296f6e7febfdb20a5720622f6ef5a81a41e8237b9a5344/coverage-7.14.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3485a836550b303d006d57cc06e3d5afaabc642c77050b7c985a97b13e3776b8", size = 257583, upload-time = "2026-05-10T18:01:42.607Z" }, - { url = "https://files.pythonhosted.org/packages/26/7b/cc2c048d4114d9ab1c2409e9ee365e5ae10736df6dffcfc9444effa6c708/coverage-7.14.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3e7e88110bae996d199d1693ca8ec3fd52441d426401ae963437598667b4c5eb", size = 251434, upload-time = "2026-05-10T18:01:44.537Z" }, - { url = "https://files.pythonhosted.org/packages/ee/df/6770eaa576e604575e9a78055313250faef5faa84bd6f71a39fece519c43/coverage-7.14.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15228a6800ce7bdf1b74800595e56db7138cecb338fdbf044806e10dcf182dfe", size = 253280, upload-time = "2026-05-10T18:01:46.175Z" }, - { url = "https://files.pythonhosted.org/packages/ad/9e/1c0264514a3f98259a6d64765a397b2c8373e3ba59ee722a4802d3ec0c61/coverage-7.14.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9d26ac7f5398bafc5b57421ad994e8a4749e8a7a0e62d05ec7d53014d5963bfa", size = 251241, upload-time = "2026-05-10T18:01:48.732Z" }, - { url = "https://files.pythonhosted.org/packages/64/16/4efdf3e3c4079cdbf0ece56a2fea872df9e8a3e15a13a0af4400e1075944/coverage-7.14.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb73254ff43c911c967a899e1359bc5049b4b115d6e8fbdde4937d0a2246cd5", size = 255516, upload-time = "2026-05-10T18:01:50.819Z" }, - { url = "https://files.pythonhosted.org/packages/93/69/b1de96346603881b3d1bc8d6447c83200e1c9700ffbaff926ba01ff5724c/coverage-7.14.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:454a380af72c6adada298ed270d38c7a391288198dbfb8467f786f588751a90c", size = 251059, upload-time = "2026-05-10T18:01:52.773Z" }, - { url = "https://files.pythonhosted.org/packages/a4/66/2881853e0363a5e0a724d1103e53650795367471b6afb234f8b49e713bc6/coverage-7.14.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:65c86fb646d2bd2972e96bd1a8b45817ed907cee68655d6295fe7ec031d04cca", size = 252716, upload-time = "2026-05-10T18:01:54.506Z" }, - { url = "https://files.pythonhosted.org/packages/55/5c/0d3305d002c41dcde873dbe456491e663dc55152ca526b630b5c47efd62f/coverage-7.14.0-cp314-cp314-win32.whl", hash = "sha256:6a6516b02a6101398e19a3f44820f69bab2590697f7def4331f668b14adaf828", size = 222788, upload-time = "2026-05-10T18:01:56.487Z" }, - { url = "https://files.pythonhosted.org/packages/f9/58/6e1b8f52fdc3184b47dc5037f5070d83a3d11042db1594b02d2a44d786c8/coverage-7.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:45e0f79d8351fa76e256716df91eab12890d32678b9590df7ae1042e4bd4cf5d", size = 223600, upload-time = "2026-05-10T18:01:58.497Z" }, - { url = "https://files.pythonhosted.org/packages/00/70/a18c408e674bc26281cadaedc7351f929bd2094e191e4b15271c30b084cc/coverage-7.14.0-cp314-cp314-win_arm64.whl", hash = "sha256:4b899594a8b2d81e5cc064a0d7f9cac2081fed91049456cae7676787e41549c9", size = 222168, upload-time = "2026-05-10T18:02:00.411Z" }, - { url = "https://files.pythonhosted.org/packages/3d/89/2681f071d238b62aff8dfc2ab44fc24cfdb38d1c01f391a80522ff5d3a16/coverage-7.14.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f580f8c80acd94ac72e863efe2cab791d8c38d153e0b463b92dfa000d5c84cd1", size = 220766, upload-time = "2026-05-10T18:02:02.313Z" }, - { url = "https://files.pythonhosted.org/packages/bd/c7/c987babafd9207ffa1995e1ef1f9b26762cf4963aa768a66b6f0501e4616/coverage-7.14.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a2bd259c442cd43c49b30fbafc51776eb19ea396faf159d26a83e6a0a5f13b0c", size = 221035, upload-time = "2026-05-10T18:02:04.017Z" }, - { url = "https://files.pythonhosted.org/packages/5a/e9/d6a5ac3b333088143d6fc877d398a9a674dc03124a2f776e131f03864823/coverage-7.14.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a706b908dfa85538863504c624b237a3cc34232bf403c057414ebfdb3b4d9f84", size = 262405, upload-time = "2026-05-10T18:02:05.915Z" }, - { url = "https://files.pythonhosted.org/packages/38/b1/e70838d29a7c08e22d44398a46db90815bbcbf28de06992bd9210d1a8d8e/coverage-7.14.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7333cd944ee4393b9b3d3c1b598c936d4fc8d70573a4c7dacfec5590dd50e436", size = 264530, upload-time = "2026-05-10T18:02:07.582Z" }, - { url = "https://files.pythonhosted.org/packages/6b/73/5c31ef97763288d03d9995152b96d5475b527c63d91c84b01caea894b83a/coverage-7.14.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f162bc9a15b82d947b02651b0c7e1609d6f7a8735ca330cfadec8481dd97d5a", size = 266932, upload-time = "2026-05-10T18:02:09.401Z" }, - { url = "https://files.pythonhosted.org/packages/e1/76/dd56d80f29c5f05b4d76f7e7c6d47cafacae017189c75c5759d24f9ff0cc/coverage-7.14.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:362cb78e01a5dc82009d88004cf60f2e6b6d6fcbfdec05b05af73b0abf40118f", size = 268062, upload-time = "2026-05-10T18:02:11.399Z" }, - { url = "https://files.pythonhosted.org/packages/6e/c7/27ba85cd5b95614f159ff93ebff1901584a8d192e2e5e24c4943a7453f59/coverage-7.14.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:acebd068fca5512c3a6fde9c045f901613478781a73f0e82b307b214daef23fb", size = 261504, upload-time = "2026-05-10T18:02:13.257Z" }, - { url = "https://files.pythonhosted.org/packages/13/2e/e8149f60ab5d5684c6eee881bdf34b127115cddbb958b196768dd9d63473/coverage-7.14.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:29fe3da551dface75deb2ccbf87b6b66e2e7ef38f6d89050b428be94afff3490", size = 264398, upload-time = "2026-05-10T18:02:15.063Z" }, - { url = "https://files.pythonhosted.org/packages/d9/7f/1261b025285323225f4b4abffa5a643649dfd67e25ddca7ebcbdea3b7cb3/coverage-7.14.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b4cc4fce8672fffcb09b0eafc167b396b3ba53c4a7230f54b7aaffbf6c835fa9", size = 262000, upload-time = "2026-05-10T18:02:16.756Z" }, - { url = "https://files.pythonhosted.org/packages/d3/dc/829c54f60b9d08389439c00f813c752781c496fc5788c78d8006db4b4f2b/coverage-7.14.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5d4a51aad8ba8bdcd2b8bd8f03d4aca19693fa2327a3470e4718a25b03481020", size = 265732, upload-time = "2026-05-10T18:02:18.817Z" }, - { url = "https://files.pythonhosted.org/packages/ed/b0/70bd1419941652fa062689cba9c3eeafb8f5e6fbb890bce41c3bdda5dbd6/coverage-7.14.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:9f323af3e1e4f68b60b7b247e37b8515563a61375518fa59de1af48ba28a3db6", size = 260847, upload-time = "2026-05-10T18:02:20.528Z" }, - { url = "https://files.pythonhosted.org/packages/f2/73/be40b2390656c654d35ea0015ea7ba3d945769cf80790ad5e0bb2d56d2ba/coverage-7.14.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1a0abc7342ea9711c469dd8b821c6c311e6bc6aac1442e5fbd6b27fae0a8f3db", size = 263166, upload-time = "2026-05-10T18:02:22.337Z" }, - { url = "https://files.pythonhosted.org/packages/29/55/4a643f712fcf7cf2881f8ec1e0ccb7b164aff3108f69b51801246c8799f2/coverage-7.14.0-cp314-cp314t-win32.whl", hash = "sha256:a9f864ef57b7172e2db87a096642dd51e179e085ab6b2c371c29e885f65c8fb2", size = 223573, upload-time = "2026-05-10T18:02:24.11Z" }, - { url = "https://files.pythonhosted.org/packages/27/96/3acae5da0953be042c0b4dea6d6789d2f080701c77b88e44d5bd41b9219b/coverage-7.14.0-cp314-cp314t-win_amd64.whl", hash = "sha256:29943e552fdc08e082eb51400fb2f58e118a83b5542bd06531214e084399b644", size = 224680, upload-time = "2026-05-10T18:02:25.896Z" }, - { url = "https://files.pythonhosted.org/packages/93/3d/6ab5d2dd8325d838737c6f8d83d62eb6230e0d70b87b51b57bbfd08fa767/coverage-7.14.0-cp314-cp314t-win_arm64.whl", hash = "sha256:742a73ea621953b012f2c4c2219b512180dd84489acf5b1596b0aafc55b9100b", size = 222703, upload-time = "2026-05-10T18:02:27.822Z" }, - { url = "https://files.pythonhosted.org/packages/61/e8/cb8e80d6f9f55b99588625062822bf946cf03ed06315df4bd8397f5632a1/coverage-7.14.0-py3-none-any.whl", hash = "sha256:8de5b61163aee3d05c8a2beab6f47913df7981dad1baf82c414d99158c286ab1", size = 211764, upload-time = "2026-05-10T18:02:29.538Z" }, +version = "7.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/fd/0ab2772530e946e1be1abd0bc09e647ec9b02e88f0867857601fefca8953/coverage-7.14.1.tar.gz", hash = "sha256:30c08f7d90415aa98b3c990385dea2939b0da55f38515e5b369b83655f8523be", size = 920132, upload-time = "2026-05-26T20:41:36.783Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/69/0d2ef01ff4b8fcecd4cba920d11e92fa4f96ae412441d3b56a90a258e69b/coverage-7.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e3680291c4a1d0dadfa84a2c459576a4af5133abb617905714339a0c73138cf", size = 219722, upload-time = "2026-05-26T20:38:14.002Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ae/9afdeaa31b9d9ce98124b6abf8bb49119bf71aecae04f8567c189d91299f/coverage-7.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a5274669f37f2343635a347b91a60777621341ab3378e9c6ac9335eee704bddf", size = 220240, upload-time = "2026-05-26T20:38:17.424Z" }, + { url = "https://files.pythonhosted.org/packages/51/69/c998589871df7ea7dba865cc5ee32b5a3e1d47ba6c68ef91104c7c46fa5e/coverage-7.14.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cfe5a5fec635799ef33428f1e5e61bafa45a92a96190ba731561ba558ccc214d", size = 246981, upload-time = "2026-05-26T20:38:19.266Z" }, + { url = "https://files.pythonhosted.org/packages/fc/10/1c7d04c13040dac531d21b712bbe08f902e6dd9b58f5d77875c4d030f8f2/coverage-7.14.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:62a9f70b52e0b5a95cfef4a5c5641b06983cadc5e538a3feeb5c00211f523ac2", size = 248812, upload-time = "2026-05-26T20:38:20.75Z" }, + { url = "https://files.pythonhosted.org/packages/c1/65/2a38a4607ef27cadcfbcee034dba5830ae2569f90144a0f4c7dbf47d30b0/coverage-7.14.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c18ebc343e15be53049b3a2dce38fe82d58f37e20ab9094b3a39c0aa4f6bb47", size = 250675, upload-time = "2026-05-26T20:38:22.159Z" }, + { url = "https://files.pythonhosted.org/packages/c9/a2/a446ed9752a4a59b79e0fb6cbb319f6facb2183045c0725462625e66f87e/coverage-7.14.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b84ffdf877644e7096aa936991efeed873f7f3df57b9cd001312b7668ab08550", size = 252590, upload-time = "2026-05-26T20:38:23.63Z" }, + { url = "https://files.pythonhosted.org/packages/9e/fd/e81fbd7ba752365546e9842b1cbdaad3d6919d2a522c590aef16a281ec5e/coverage-7.14.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e854312c4103f2ad4c0dc023b69b77ebfd2c89db5f86c4c94dc2353f9a92167e", size = 247691, upload-time = "2026-05-26T20:38:25.057Z" }, + { url = "https://files.pythonhosted.org/packages/53/35/f3c26fdaae9ea937d154ca4d372e5ea0a4167ff70d36c6074ac2eacb2f83/coverage-7.14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c643734307300234fafa36bf2a040a7235f8f177ea1fd6ec1423aea6fb7b929f", size = 248716, upload-time = "2026-05-26T20:38:26.406Z" }, + { url = "https://files.pythonhosted.org/packages/2e/14/940b6c49551fd343e8507ee2b0ba7af5d0aa04ed5bf768285cb7c72a9884/coverage-7.14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84ac9499e48700399a5dd0ea7085b5091961fec52c68d66b4ec0d3cf7f4441b1", size = 246721, upload-time = "2026-05-26T20:38:28.282Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2c/40fc0634186c28292a662dff578866b3913983d6c375a3c2a74020938719/coverage-7.14.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:7f02d09f70776579b926d889a4c9c235070a1f47c40458aeaca563fae5acfdb5", size = 250533, upload-time = "2026-05-26T20:38:29.753Z" }, + { url = "https://files.pythonhosted.org/packages/de/e3/2c26bf1e811f9df991ff2a9bdddebdd13ee0665d564df7d05979f9146297/coverage-7.14.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:ce66d8e46da2bb5ee313a745cbd2e391d319176c1f7a9451bfcd3a2fb920859b", size = 246990, upload-time = "2026-05-26T20:38:31.516Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b0/060260ef56bd92363ebdce0c7095ce422b06e69aae71828efeca473ab1ca/coverage-7.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c912c259304cfb5ee584481cfb7ce1ff932b4d61e6c9140b8f19cb7b5ed82332", size = 247593, upload-time = "2026-05-26T20:38:33.065Z" }, + { url = "https://files.pythonhosted.org/packages/63/f3/501502046efeb0d6d94b5ca54941d95f1184183dd6bdb7f283985783bb4a/coverage-7.14.1-cp310-cp310-win32.whl", hash = "sha256:1238cb94638e610e972c60dac68e813f868dc7d6e982535270558443058d9d59", size = 222330, upload-time = "2026-05-26T20:38:35.36Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5d/1bf99f2c558f128faf7906817ccbdb576ba815d3b41ce2ac1719b70a3663/coverage-7.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:fc459e5d73be2d6332fcfe8dbf3d8994671fe33c700f4565988ecfa511547253", size = 223261, upload-time = "2026-05-26T20:38:37.196Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d7/477ad149490e6cb849f28abea1dabb9c823cea72e7500c81b4240ce619c0/coverage-7.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:478b5bcd63c2e1357c5c7e16c070690df7b07f676b1c114d7b93e533c664309f", size = 219848, upload-time = "2026-05-26T20:38:38.715Z" }, + { url = "https://files.pythonhosted.org/packages/91/82/a5eb47257c50601bb7b9a9d2857c67b7a3a85ad74180eb2c98bb1fbe0ce5/coverage-7.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a24a81f9715ee42ef59a316cc11611c98fe23920f7c81861315c9f3ff4a230f4", size = 220354, upload-time = "2026-05-26T20:38:40.232Z" }, + { url = "https://files.pythonhosted.org/packages/43/8b/78419b5391a5cb706b6544390507e469d83ffc9a8248b02c4011aceb9365/coverage-7.14.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:196a13319ad88d6d8ef5ab489ec4f44ddde2143c0c7d5b27786f6c3ffd56a7e1", size = 250771, upload-time = "2026-05-26T20:38:41.782Z" }, + { url = "https://files.pythonhosted.org/packages/77/63/e77aaacd491182210d639636b7a8bba23ffffa9b82aa3762da9431855fa9/coverage-7.14.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3d452fd08b5c72c5167c93e6867b5c08500bd40f2a21e1e854a500550b6cc36f", size = 252683, upload-time = "2026-05-26T20:38:43.305Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/a022e3cfbec2ac241640003cb3a817e161d9c7f5aa9b49173756cdc03204/coverage-7.14.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23bf7fa51ac02e07fc7c96849b82946da47ae862dc8f86d183b2a4864fc38129", size = 254791, upload-time = "2026-05-26T20:38:45.361Z" }, + { url = "https://files.pythonhosted.org/packages/61/d6/967e408aca4c1ceb88cb0cc677169110ae7f5995fb5eaf5fb1f5a1bb8f5d/coverage-7.14.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bcaa50684dcaadfa599ac48f81103c756d791cfd85c97203d2217c593d48b860", size = 256748, upload-time = "2026-05-26T20:38:46.91Z" }, + { url = "https://files.pythonhosted.org/packages/b8/be/869188f7fe28638078ec479331ace6dc5f7b40b7153eb616f47ab79404d8/coverage-7.14.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4ea1c034f95c9b056e856b794630b17f9fa3d57e4800ff1e503d3be0f9c9078c", size = 250907, upload-time = "2026-05-26T20:38:48.493Z" }, + { url = "https://files.pythonhosted.org/packages/07/aa/adb7d3b4278d690e68703abcd76ab1b948242e3668d921711551b78f9ddb/coverage-7.14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c7e057326434e441306226fbeb5d1aaf14a2637efe97ba668306635835f32ad7", size = 252483, upload-time = "2026-05-26T20:38:50.074Z" }, + { url = "https://files.pythonhosted.org/packages/43/61/331c74103c62dcb0c4b9b3a0de9a61aca016208b0a90f109592a9f9ecc28/coverage-7.14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:59baf88468dbc8d63b1887afd92bda52e40bb1561696e5819670601403810cec", size = 250545, upload-time = "2026-05-26T20:38:51.613Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b6/c5dae3c104d89be04828f61810e6b3473825482e4c288cc4ed04553e08ae/coverage-7.14.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d34d75f892b3ab73ba11cab5442cce7b3e168fd64162b16f0e1e0d09c508edef", size = 254310, upload-time = "2026-05-26T20:38:53.503Z" }, + { url = "https://files.pythonhosted.org/packages/ad/a1/2b9d5863e3b83c01ad8199e3c597802fbb3a9dc90b058885804c20296d31/coverage-7.14.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3a56abc20a472baf0304c455721bc601477440d28ecfde8a03dde79ede07e0df", size = 250266, upload-time = "2026-05-26T20:38:55.414Z" }, + { url = "https://files.pythonhosted.org/packages/7f/5e/0e511fbdb269359be26fe678a1c3fa1f2aa2a01573cc3f54268c8d6d4797/coverage-7.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6a3cb83d1552c0cd1b4906655b6a33fd4a8473229633a901c6b73bf86914dee9", size = 251174, upload-time = "2026-05-26T20:38:57.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/10/e55307b622b3dd9671cb321824502dc10f93e72f2802b9946159a8edadeb/coverage-7.14.1-cp311-cp311-win32.whl", hash = "sha256:10274a1fbeb8ec5d72966e17bb198a3104257aca4ac09d98667c5f8aca8c8548", size = 222354, upload-time = "2026-05-26T20:38:58.727Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/107421693cfb71e4f1ca5bf70443f64d4161878068d07a3e51c7ad21d17b/coverage-7.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:87ebdf787d4888e3f3f2d523eadc6e18c6d18c6d0eb173801a189641627fb37e", size = 223290, upload-time = "2026-05-26T20:39:00.413Z" }, + { url = "https://files.pythonhosted.org/packages/b8/1d/3e3644585eb29e9dafefb19555078529a4d7cce12bd21929664eea989277/coverage-7.14.1-cp311-cp311-win_arm64.whl", hash = "sha256:dd34767fa19848d35659ffc0a75314f58c7af3f1cd87ec521e8292a1238398a3", size = 221953, upload-time = "2026-05-26T20:39:02.159Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b7/bdbb725ba02c5b42825b200c940f38b7a54fcad24627b7192f78f8110d76/coverage-7.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a06c76364a9360e33d6d23769aefdf7f66f38e2ffb60ceb1baaa4989d83b695c", size = 220022, upload-time = "2026-05-26T20:39:03.702Z" }, + { url = "https://files.pythonhosted.org/packages/72/81/fdc0898a55c6219223291ec1a1fe89966ef212ce82276aa0899df84b5de0/coverage-7.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fad54e871165f6ec2f536063ac74c3104508a12963e64072ba44bd822de52b0c", size = 220379, upload-time = "2026-05-26T20:39:05.381Z" }, + { url = "https://files.pythonhosted.org/packages/de/72/de048c4a25e13bce59ac6a339351c10bdf2515e07459afcdaf04dc3143a2/coverage-7.14.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:84b535f00655ecafe1d929d1fb00ed5d6fa3051ea643ab2c161a3887b86f294b", size = 251888, upload-time = "2026-05-26T20:39:07.367Z" }, + { url = "https://files.pythonhosted.org/packages/28/30/300c343f68beb9d4cbb64ec81e58c5b6b80b56927f72d2b38654ac26e013/coverage-7.14.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6b6b0853b895fe0e98cbfc580d1ec3393d9302b4b1e96a77b3f5c91fdab899e6", size = 254624, upload-time = "2026-05-26T20:39:09.037Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ed/7b25642496e8170b6bac14adce00537c6e5fa2d586159401a4de3e8b49e6/coverage-7.14.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:442cc9c952b2df400cda54bb04ab87330cf2cd08a8692cbbea36773531eb6f37", size = 255739, upload-time = "2026-05-26T20:39:10.889Z" }, + { url = "https://files.pythonhosted.org/packages/7f/a2/abd210b8c4e29c24e4624916db97bb519097a91034aaeb767f937e7da794/coverage-7.14.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8270544c361ed405a27a060dbc9ed2c124b084d96dfdc2d9a2510482aef981ad", size = 257998, upload-time = "2026-05-26T20:39:12.722Z" }, + { url = "https://files.pythonhosted.org/packages/7f/24/7c50beed3792fe62f6ce0545c6686ce83379719e2c0276179333d97eae92/coverage-7.14.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:48b283b1dd6372e8de2a7a9a4c4d5dc06f4d4fd209b876f3c88a7a205a0c8f84", size = 252296, upload-time = "2026-05-26T20:39:14.259Z" }, + { url = "https://files.pythonhosted.org/packages/15/05/0f874628ebcbfc77ead559ff210281ef06a97db08481832e7dd39274a135/coverage-7.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5b0c99ba93a07d56f6df340bb79be53202a082b2fdb81bfe6190b741a3470d54", size = 253658, upload-time = "2026-05-26T20:39:15.923Z" }, + { url = "https://files.pythonhosted.org/packages/99/6f/ca6ad067364b337ef997802115e7ecad2abd2248b05471464b0dea02b4d4/coverage-7.14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e471bc5769ff073b058cfadb0d736b56ce067c8560eabeb0da88462df98c23e7", size = 251803, upload-time = "2026-05-26T20:39:17.537Z" }, + { url = "https://files.pythonhosted.org/packages/c0/30/b9b4d377cd9f40baf228068f5a81faf8450c6228503011bd499708483a50/coverage-7.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f497a1ea81d4cd7c10ddcaa685135b9aabd291af3d55775a9ddf3cb7a364cdd9", size = 255873, upload-time = "2026-05-26T20:39:19.414Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/7c721a9e5e6bb88547d30a787aefb97512d3f54c1324c7488d9b3743f7f9/coverage-7.14.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2222be86d0b54f5dd5a38f45f17f315f737245e857bf0bdedc70734f84a13c02", size = 251372, upload-time = "2026-05-26T20:39:21.169Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f8ae5a2200130e1503cd7661a6cd3b2b7bacef98277fbf3571fb13f8b766/coverage-7.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:85e85586565842f6932abebd4c18bcb1074223dc0b3576e7d173ca710622813a", size = 253245, upload-time = "2026-05-26T20:39:23.097Z" }, + { url = "https://files.pythonhosted.org/packages/34/62/70a9024672a5f6910517d9628c52c9afbdd3cf8f46426af52bb148a56fff/coverage-7.14.1-cp312-cp312-win32.whl", hash = "sha256:4a28fd227808366b196a75476dced2eb35b351d6766ba9c858dc93319e87f4f1", size = 222567, upload-time = "2026-05-26T20:39:24.868Z" }, + { url = "https://files.pythonhosted.org/packages/f6/81/8b7cd386839b039ebe1855733b9f9449a8dec5d79564018234f185a7fa70/coverage-7.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:54acdb6674a4661768d7bf7db32dfb9f46ab1d764f8aba6df75ce1a6a088724e", size = 223372, upload-time = "2026-05-26T20:39:26.603Z" }, + { url = "https://files.pythonhosted.org/packages/ae/ba/b44d472022f620d289d95fa830143235c0c36461c6f2437ea8d51e5481ed/coverage-7.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:99cd41ff91afd94896fea3bc002706b6ae4ce95727d06e4a0f39c0a8d8bd8b1a", size = 221989, upload-time = "2026-05-26T20:39:28.242Z" }, + { url = "https://files.pythonhosted.org/packages/8a/9e/5f6d56327c62b185225d145191c607e07515294a0aa6338e58805cd4a5ac/coverage-7.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:be9f2c802dcfce3f71298303aa5dad0dce440a76c52f2f60dacd8656dab78793", size = 220044, upload-time = "2026-05-26T20:39:29.902Z" }, + { url = "https://files.pythonhosted.org/packages/75/92/e82aca356744cbbc0f77a0b623e38918c1872361963413a3bab5d0340393/coverage-7.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6223a72fd0e4c7156353ec0f08a5f93623e1d3034d0e2683b9bb8ea674131b1d", size = 220412, upload-time = "2026-05-26T20:39:31.561Z" }, + { url = "https://files.pythonhosted.org/packages/27/c9/385bde0bf7ed0f4bf3a7ee5367060a86b5d218718cfd6fb943c0f836b34f/coverage-7.14.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7279d2110a28cebc738b6459ecda2771735a4c18465fbbd36b3288fe5ed92247", size = 251412, upload-time = "2026-05-26T20:39:33.337Z" }, + { url = "https://files.pythonhosted.org/packages/51/8c/23faf6a2343a0d17f960a4bd56c43bc7eb4cf312f774dd6ceebd82c7d8fc/coverage-7.14.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9eeb3fcbc13ba40dfbdb22d01d196a28e9cef9ed4c29b60061a1e0e823a9929d", size = 254008, upload-time = "2026-05-26T20:39:35.009Z" }, + { url = "https://files.pythonhosted.org/packages/42/06/36f4aa9ca8a815e6036156e80706a67828bb97bd826948244f6996dda957/coverage-7.14.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f0cfc27c539f07cf5c0a4cfe211d0b6cae039f8f40526dbaa71944e64b50a7b", size = 255241, upload-time = "2026-05-26T20:39:36.71Z" }, + { url = "https://files.pythonhosted.org/packages/ca/79/95266316352f90f6b1c6736bb413302edfde2453fb32422d3911642691b3/coverage-7.14.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:221c70f316241a78e77e607c227cefc8808d4e08f28d99c04f35694690e940be", size = 257373, upload-time = "2026-05-26T20:39:38.412Z" }, + { url = "https://files.pythonhosted.org/packages/e3/9c/58316d1f66c488b5fca8a0eb3e98348807813efa8a0d0833b9021be27488/coverage-7.14.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:da028256b04ec30e5e0114b6f76172938c313991f0a2d3d894271315cf5d5e43", size = 251635, upload-time = "2026-05-26T20:39:40.268Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5a/ca2398a568e16fed7bb713e84ba3603a7164fb65779abe645c565ec890d5/coverage-7.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76a085d7005236a767e3426148b2c407e53ad61695c562f8a81da2d373324901", size = 253373, upload-time = "2026-05-26T20:39:42.145Z" }, + { url = "https://files.pythonhosted.org/packages/6e/2c/0396562c32deaebe7be51d865b3a41e9a87d7561acafe1a28f53b07e019a/coverage-7.14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b553d04b5e778a8e56d57eb134aff42a92718ecba45e79c4764ecfa40efd92ff", size = 251341, upload-time = "2026-05-26T20:39:43.907Z" }, + { url = "https://files.pythonhosted.org/packages/fd/8f/a94f9221184c9cae1ee115820e3798e48b6b17777a9f19e46fb9a0c8dc74/coverage-7.14.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:46f714d2fb8ae2f4f29f23ada7f1e79b759fff5a70f94a1dac23af204c3ec9e4", size = 255497, upload-time = "2026-05-26T20:39:46.166Z" }, + { url = "https://files.pythonhosted.org/packages/71/69/505d70e47db1eaebcd002c39759707621ef184cd6b1ae084d9f41293f323/coverage-7.14.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:1896f5e19ff3f0431c7ce2172adc54890fd97f86b59ced8ca1649145d9ffe35d", size = 251159, upload-time = "2026-05-26T20:39:48.03Z" }, + { url = "https://files.pythonhosted.org/packages/e0/aa/58681c383aa33a9d2ed40a02d7a22fbf780d1fa4d575396365777828198c/coverage-7.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:62fd185ef9df3c33d1c8178c5af105f762afbad96038de9a4ae100aa6297ca33", size = 252934, upload-time = "2026-05-26T20:39:49.872Z" }, + { url = "https://files.pythonhosted.org/packages/eb/fd/11c928cd6bdffc7074bb5965c173d9ebf517fb00205e1da524b98d29ef92/coverage-7.14.1-cp313-cp313-win32.whl", hash = "sha256:ab4af6352741a604c431c6072fce5bee33bf0f20dc7a56618d6bf6bb89e9810c", size = 222584, upload-time = "2026-05-26T20:39:51.68Z" }, + { url = "https://files.pythonhosted.org/packages/6f/92/fb416fc26d340dcba19518c418d6048e913186e17243982c5e435e41fa7a/coverage-7.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:7af486dabe8954d03b087f0021540897afe084f04e16ff5579e08cc46f871416", size = 223394, upload-time = "2026-05-26T20:39:53.472Z" }, + { url = "https://files.pythonhosted.org/packages/73/c6/02d56e3867972f77d5036de924643f26c056e848f00452cafb4dbc3c29b4/coverage-7.14.1-cp313-cp313-win_arm64.whl", hash = "sha256:2224f89ffd0c5605ccce1ed7a584da162bc7c55f601ab1c946bc9de31a486b42", size = 222015, upload-time = "2026-05-26T20:39:55.374Z" }, + { url = "https://files.pythonhosted.org/packages/4d/9e/fcc77914050df73f7662fa1f00902774c79c075a8388ab334074574bf77e/coverage-7.14.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:de286598cc65d2b489411174b1faec2f5a7775fb3201fd925db2a76b4030f37d", size = 220733, upload-time = "2026-05-26T20:39:57.189Z" }, + { url = "https://files.pythonhosted.org/packages/f7/67/2963cbdaf5cbadec44efa3a1e39eaa1f02df4079585f05387607a221e126/coverage-7.14.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:042c46ded7c288aeb07cf14a28b6c1e10b78fcba40171c3fa1e939377eeef0b5", size = 221086, upload-time = "2026-05-26T20:39:59.019Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c5/8701645574e11881f2f47d8930f98bc48b5d43b25eb5b4430dfc4a2f9f48/coverage-7.14.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f4ddbe407477f04c45115d1a4e5bc480f753553b534d338d4c3358b1cdd0ea52", size = 262381, upload-time = "2026-05-26T20:40:00.822Z" }, + { url = "https://files.pythonhosted.org/packages/7c/28/7a64d73598263e0c5abd5084211a8474488d31b3c552ff531c719dfcff62/coverage-7.14.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d13e6725992e2d2fd7d81d4f5241952d13740121dfd501da09201be39b2c003a", size = 264458, upload-time = "2026-05-26T20:40:02.506Z" }, + { url = "https://files.pythonhosted.org/packages/fa/d8/4969179db9f7eb4df218e69540adf829d1c835f59452513d065d15446802/coverage-7.14.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f747dc8edcfe740130f28f32f3995e955494285717e86ee25af51db2219df08a", size = 266884, upload-time = "2026-05-26T20:40:04.421Z" }, + { url = "https://files.pythonhosted.org/packages/a6/78/a45d5794dbc9bafd97afc96a4377c86c7820d78b6cf51b89bc1d4e919275/coverage-7.14.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ced2f09ef276fd58611a1ef502164ad266d2b75174e5a40cabbdb4033f9f6cf2", size = 268022, upload-time = "2026-05-26T20:40:06.298Z" }, + { url = "https://files.pythonhosted.org/packages/21/cb/4f5e354e9e3e67af96bd4e57113e6db6b22298c7168b13eec408a549903d/coverage-7.14.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b84800013769a78ccb9ef4659402e26d06867e337b61ec365f77ad008adea80e", size = 261631, upload-time = "2026-05-26T20:40:08.226Z" }, + { url = "https://files.pythonhosted.org/packages/ec/49/eced49af4cb996d5d8b7e94e736175c513e4facd3398507b89892b4326d8/coverage-7.14.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ea8cd6ca0ee9f616aaef3afc6882e32c2cbf18b00d96313ffd76af650574034d", size = 264443, upload-time = "2026-05-26T20:40:10.137Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d8/5603a88a7c5913a6b54f6cb1a8c46f7b39cbb30f27cd3f492908da09b2d7/coverage-7.14.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:aa5e304a873fabddc11e484e9b6b738bd38bd7bed17b09aa84eecf5332e8b8bb", size = 262069, upload-time = "2026-05-26T20:40:11.999Z" }, + { url = "https://files.pythonhosted.org/packages/f0/59/2ae3cb79da554a06c8619d6c88ea19dd1e4aed4b834b6a83bb1fa243bdc5/coverage-7.14.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:5a1c5215be81035e629d5bc756650634d0bf31991038db7a0eccb90f025ce16d", size = 265780, upload-time = "2026-05-26T20:40:13.858Z" }, + { url = "https://files.pythonhosted.org/packages/af/5f/b130c1dc999031f2648bd25317fbce505ad8d5562079b4ed81e736a84967/coverage-7.14.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:79058c47dae6788504b5effb319961bcd72d7240551464b91d474bc0ed186d69", size = 260970, upload-time = "2026-05-26T20:40:16.142Z" }, + { url = "https://files.pythonhosted.org/packages/87/d1/ec13ccddeb48ec963bdfa72a11224bac2584bd045ba13beca82f8113e9c7/coverage-7.14.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:370c5afae3fa0658e11694a32b24c2778f6bc2d17718121f94ee185e69f26b54", size = 263157, upload-time = "2026-05-26T20:40:18.382Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c2/cd91ead503045161092d3845f7bb95ea2f25131ce96d3e314dd835d91b9c/coverage-7.14.1-cp313-cp313t-win32.whl", hash = "sha256:3758dd0a7f1fa57365ef2e781df0f0731d38b6e3772259d13dae4bd8a958d4b1", size = 223259, upload-time = "2026-05-26T20:40:20.381Z" }, + { url = "https://files.pythonhosted.org/packages/71/9f/1e28d97e6bd2c76b07f38b7c02870f1371255ff6717f54eca578fcbbdd0e/coverage-7.14.1-cp313-cp313t-win_amd64.whl", hash = "sha256:6ff665fb023a77386fe11685190cee1f60a7d635994a30d9b0a061533d470fce", size = 224320, upload-time = "2026-05-26T20:40:22.316Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e0/d936e908f0e1efa55e52b91e01b52f1055cef5e1ab2718493390ed8e2fb8/coverage-7.14.1-cp313-cp313t-win_arm64.whl", hash = "sha256:17a5a241e5997621a956a7f402a7433ef4221e5152809b785bec79e2323799f1", size = 222577, upload-time = "2026-05-26T20:40:24.894Z" }, + { url = "https://files.pythonhosted.org/packages/d6/34/fc2f101b151af3799a101f0550b0454aa008afdc0add677394ec4aa8ea10/coverage-7.14.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d5ed429d0b8edaac649e889b4ffcedb6c80b06629a3f93050e3dddfb99235bee", size = 220091, upload-time = "2026-05-26T20:40:27.249Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a7/1ebae2ab5b961b5c79bb09fe7b3ac99edb190d8be4a8c510b2cf66f46468/coverage-7.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8011224a62280e50dab346960c03cf47aca1a1e09e608c0fb33fd6e0cc8e9500", size = 220421, upload-time = "2026-05-26T20:40:30.084Z" }, + { url = "https://files.pythonhosted.org/packages/5e/90/92aca9cf0acc95123c96cd1eb1f08917897a7f5dee01e15738922971ec31/coverage-7.14.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:12c42ec1e14f553c4f817e989365982e646e27211f10a0f717855b94a79c8906", size = 251466, upload-time = "2026-05-26T20:40:32.542Z" }, + { url = "https://files.pythonhosted.org/packages/26/2b/78048cbe3b999f6cbf9cc0d90abba6a88a3e0863a8c1c6cbc762f3f8802f/coverage-7.14.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:06144cd511cf2624873a035c5069cf297144f6e77a73ee3d7a55b605ec5efb42", size = 253973, upload-time = "2026-05-26T20:40:34.473Z" }, + { url = "https://files.pythonhosted.org/packages/8e/21/c2e33b29d1cfde484a19d437afc343c6cd30b08d78cbbf9f5aff14e57b2b/coverage-7.14.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a311d8e1da24be5c1ccf85cbfb06315dbaa1703d5a1eab3f6432c72b837917c8", size = 255318, upload-time = "2026-05-26T20:40:38.154Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ee/aad2f108d63b769121005302f16bf66db8625c88ceaba466942e09a2607e/coverage-7.14.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c79cead5b5bc584d9c71451cb984d0e3a84e0c0937379c8efcbf27c8d661b851", size = 257633, upload-time = "2026-05-26T20:40:40.164Z" }, + { url = "https://files.pythonhosted.org/packages/c2/f8/11a2c29b4fd76d9849f81d0bb812ec0017a9396df3217214e38934a8c837/coverage-7.14.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dcbf65f1f66a26cdd88c35cf68fb4729c5d1cd2e88added72420541dfb212034", size = 251488, upload-time = "2026-05-26T20:40:42.631Z" }, + { url = "https://files.pythonhosted.org/packages/c9/b8/9a5820de4b8ac2b71d85e3b5fb49108d7469c665f0e2ad0dd7569023e305/coverage-7.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fd86572566fb40189a8260446158235159bc7a82dfbc87a3b39cf4fb57fcec1c", size = 253329, upload-time = "2026-05-26T20:40:45.208Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ff/f33e4823667e27548e8fd8df44217515303f9808d0ff29817db56f87d990/coverage-7.14.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7771b601718fdde84832c3a434ca9bbf4ae9adbc49d84198b4110700c3c77c36", size = 251291, upload-time = "2026-05-26T20:40:47.502Z" }, + { url = "https://files.pythonhosted.org/packages/68/9b/489db0ebb209054766b90a9014a45f6d26eb724c02ec21311c3733b5a644/coverage-7.14.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:39b21e212c55af06fa375e3dbf90a8a8e38792f3a910c580066d23563830ddd5", size = 255564, upload-time = "2026-05-26T20:40:49.372Z" }, + { url = "https://files.pythonhosted.org/packages/27/b5/16bc2d4c2409b23c7737edb68c83bc89e345f378050549fe1d75ac7d34d5/coverage-7.14.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f2302660e32562a532b442480121aef8aa61a5bdb20b30bf0adab29f10a5a4b4", size = 251107, upload-time = "2026-05-26T20:40:51.677Z" }, + { url = "https://files.pythonhosted.org/packages/7d/0c/2629997469a00cd069d588a41c9dc887610f2775ae89d250c4791e65272a/coverage-7.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:03a6f93c1ec3b7f2e77b5dbcc5573a2c21f12529a5c6bbe0f16f72303cc2fa4d", size = 252764, upload-time = "2026-05-26T20:40:54.267Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ee/f78d63c8f079e0d7211c7e2401fa17e311514534ba61bae03e4b287ce4ab/coverage-7.14.1-cp314-cp314-win32.whl", hash = "sha256:8a3ce026d73290f42f08dafecbd82c193a74df280461fbf97300fec51fd133ee", size = 222837, upload-time = "2026-05-26T20:40:56.496Z" }, + { url = "https://files.pythonhosted.org/packages/dc/b9/be539854f93a70dfbeec69117f33ec70dc42ff0b65b5b07ab8d40d04228e/coverage-7.14.1-cp314-cp314-win_amd64.whl", hash = "sha256:114c95ef29302423b87d159075805f4ab973254a2638a5d7d046c94887cc87d7", size = 223650, upload-time = "2026-05-26T20:40:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/fe/9e/24e2842fef40f35ac82ba3a7719c8023d011bf3bf652d0675316a9d088a1/coverage-7.14.1-cp314-cp314-win_arm64.whl", hash = "sha256:a07891c3f4805442b31b71e84ba3cf29ed1aa9a428284e06deeb4b23e5b46343", size = 222218, upload-time = "2026-05-26T20:41:00.321Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1d/ac0a9df5fe31c1e8bdd658074905fc12844a05c1a7e3fdb8417e97c31e23/coverage-7.14.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1101a5ebb083aecb625ebb6209d4105b58f647b093cb2dc8122d7b33f743cfe1", size = 220822, upload-time = "2026-05-26T20:41:02.281Z" }, + { url = "https://files.pythonhosted.org/packages/32/cf/f964fd9aff20323f9f1a726c97135f8a76bcd87b92dad141a456a43f3c64/coverage-7.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:851b9e1e4e8a4608e77c79714b2e77c0970d2ed7202a05e92ae407817481887b", size = 221084, upload-time = "2026-05-26T20:41:04.593Z" }, + { url = "https://files.pythonhosted.org/packages/d8/5e/7e5ef2aba844de2b80d678619fcf0841b42e3f37f16411226f3fe4c1016f/coverage-7.14.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d5b89cdfb2ee051b71e8c3c70bd81a9eff81100f736a269136fe1a68efe00474", size = 262454, upload-time = "2026-05-26T20:41:06.641Z" }, + { url = "https://files.pythonhosted.org/packages/64/62/75809bded87015cc4935524218a2a8ed8dd1a8498bfed30a2f4f7a4b4d34/coverage-7.14.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0177614a0370f227888b4e436a7c55686d6a9f90eb1ade2b624ba685a1686e86", size = 264578, upload-time = "2026-05-26T20:41:08.556Z" }, + { url = "https://files.pythonhosted.org/packages/f3/42/d33392dc14633525012d2d504fa1a33b05538bf535f5c1d64675e5754b78/coverage-7.14.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d69af5dea2de76fc485a83032a630523f985198b7e25be901ec60181587b01e", size = 266981, upload-time = "2026-05-26T20:41:10.824Z" }, + { url = "https://files.pythonhosted.org/packages/2a/49/0157c4428c2aca7f1e09d5565930586fd5ae36f1655f08b0daa7cf1fcae1/coverage-7.14.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:35ab22d91de736e8966b980dc355cbcdd2c6dbbcfe275f9a2991bc8a91b3df65", size = 268112, upload-time = "2026-05-26T20:41:12.966Z" }, + { url = "https://files.pythonhosted.org/packages/96/26/86b9ce71f4092b1ed325ce1421698081df1286b833400b6836912834d6e0/coverage-7.14.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:357d4e32935c36588aaba057d734fa32428c360c9fc2e4442afbf1b646beee6e", size = 261558, upload-time = "2026-05-26T20:41:15Z" }, + { url = "https://files.pythonhosted.org/packages/20/4c/c311210c5472cf5401d8422b0d7812cdd520f24417673afabda6c323faca/coverage-7.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:51bd64741cc6fa065abd300ede1afe5a5291ece9c31da8b24884deda48bcc3f8", size = 264447, upload-time = "2026-05-26T20:41:17.369Z" }, + { url = "https://files.pythonhosted.org/packages/fb/71/59513f8710ed3e6b0ac0a050a5b7e977bb9c9e880354863b5d00d8809256/coverage-7.14.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9132cd363a68a4c3daa7c8704a654b1e39d3360f6f5b8ddd470608a945236c07", size = 262048, upload-time = "2026-05-26T20:41:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/bceed32dc494f5bbf50f775cd2e78ca814953942b5ea28d3c1c3ac316f14/coverage-7.14.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:07c6290b1697b862c0478eab545eec949a0d0e4d6d03497f446d706da3b4f2de", size = 265781, upload-time = "2026-05-26T20:41:21.559Z" }, + { url = "https://files.pythonhosted.org/packages/e7/c5/9348fe40dbfd4991aaf78df2c6c3098bfb2cc834d1fd362a64b4efef855a/coverage-7.14.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5ea0c297e27133853b4d8a3eb799bff5a2dbd9f2f41537a240d337ac9b4df890", size = 260896, upload-time = "2026-05-26T20:41:23.428Z" }, + { url = "https://files.pythonhosted.org/packages/ca/92/1ea0f03929da7cf87206b1fa24f4c8e9c158be0455481af29ec0a1f3503f/coverage-7.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:01b7733daad0237daa01ef80fe2dfceffc911e6a17fa7b55d14aa8214eaaaecd", size = 263214, upload-time = "2026-05-26T20:41:25.419Z" }, + { url = "https://files.pythonhosted.org/packages/f6/a9/b2493c054c0e01a643266742ab45e15744e60743f9260cd930c7142b1124/coverage-7.14.1-cp314-cp314t-win32.whl", hash = "sha256:6adc5a36984624a70bf11d7184e20fa0a49aa7c47ffab43804106a1a695ea22e", size = 223624, upload-time = "2026-05-26T20:41:27.795Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bd/3e1e6a57fccd2d7c83fcdf338e93ba98eb85c6e877dd34731ac585375490/coverage-7.14.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ddf799247318f34dbcd2efa8c95a8d0642674e926bb1774cf9b63dfd2a389d1c", size = 224728, upload-time = "2026-05-26T20:41:30.098Z" }, + { url = "https://files.pythonhosted.org/packages/bb/d7/31066cf1d2f0c6c797fce911bcfa01dd35642dc6da992a950256097c5860/coverage-7.14.1-cp314-cp314t-win_arm64.whl", hash = "sha256:145986fe66647eb489f18d9a997567a3fd358584c4b5a808769113abc07466af", size = 222752, upload-time = "2026-05-26T20:41:32.123Z" }, + { url = "https://files.pythonhosted.org/packages/8a/3c/1a983b9a745d7f83d53f057bcc5bf79ba6a2bbc08266b3f0c7d6fe630c9b/coverage-7.14.1-py3-none-any.whl", hash = "sha256:a252f21c27e38347e60111a3266b03827422a7d5525951aceee313aa68bab1d2", size = 211815, upload-time = "2026-05-26T20:41:34.078Z" }, ] [package.optional-dependencies] @@ -495,80 +561,98 @@ toml = [ [[package]] name = "cryptography" -version = "48.0.0" +version = "49.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/a9/db8f313fdcd85d767d4973515e1db101f9c71f95fced83233de224673757/cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", size = 832984, upload-time = "2026-05-04T22:59:38.133Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/df/3d/01f6dd9190170a5a241e0e98c2d04be3664a9e6f5b9b872cde63aff1c3dd/cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", size = 8001587, upload-time = "2026-05-04T22:57:36.803Z" }, - { url = "https://files.pythonhosted.org/packages/b2/6e/e90527eef33f309beb811cf7c982c3aeffcce8e3edb178baa4ca3ae4a6fa/cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", size = 4690433, upload-time = "2026-05-04T22:57:40.373Z" }, - { url = "https://files.pythonhosted.org/packages/90/04/673510ed51ddff56575f306cf1617d80411ee76831ccd3097599140efdfe/cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", size = 4710620, upload-time = "2026-05-04T22:57:42.935Z" }, - { url = "https://files.pythonhosted.org/packages/14/d5/e9c4ef932c8d800490c34d8bd589d64a31d5890e27ec9e9ad532be893294/cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", size = 4696283, upload-time = "2026-05-04T22:57:45.294Z" }, - { url = "https://files.pythonhosted.org/packages/0c/29/174b9dfb60b12d59ecfc6cfa04bc88c21b42a54f01b8aae09bb6e51e4c7f/cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", size = 5296573, upload-time = "2026-05-04T22:57:47.933Z" }, - { url = "https://files.pythonhosted.org/packages/95/38/0d29a6fd7d0d1373f0c0c88a04ba20e359b257753ac497564cd660fc1d55/cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", size = 4743677, upload-time = "2026-05-04T22:57:50.067Z" }, - { url = "https://files.pythonhosted.org/packages/30/be/eef653013d5c63b6a490529e0316f9ac14a37602965d4903efed1399f32b/cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", size = 4330808, upload-time = "2026-05-04T22:57:52.301Z" }, - { url = "https://files.pythonhosted.org/packages/84/9e/500463e87abb7a0a0f9f256ec21123ecde0a7b5541a15e840ea54551fd81/cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", size = 4695941, upload-time = "2026-05-04T22:57:54.603Z" }, - { url = "https://files.pythonhosted.org/packages/e3/dc/7303087450c2ec9e7fbb750e17c2abfbc658f23cbd0e54009509b7cc4091/cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", size = 5252579, upload-time = "2026-05-04T22:57:57.207Z" }, - { url = "https://files.pythonhosted.org/packages/d0/c0/7101d3b7215edcdc90c45da544961fd8ed2d6448f77577460fa75a8443f7/cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", size = 4743326, upload-time = "2026-05-04T22:57:59.535Z" }, - { url = "https://files.pythonhosted.org/packages/ac/d8/5b833bad13016f562ab9d063d68199a4bd121d18458e439515601d3357ec/cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", size = 4826672, upload-time = "2026-05-04T22:58:01.996Z" }, - { url = "https://files.pythonhosted.org/packages/98/e1/7074eb8bf3c135558c73fc2bcf0f5633f912e6fb87e868a55c454080ef09/cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", size = 4972574, upload-time = "2026-05-04T22:58:03.968Z" }, - { url = "https://files.pythonhosted.org/packages/04/70/e5a1b41d325f797f39427aa44ef8baf0be500065ab6d8e10369d850d4a4f/cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", size = 3294868, upload-time = "2026-05-04T22:58:06.467Z" }, - { url = "https://files.pythonhosted.org/packages/f4/ac/8ac51b4a5fc5932eb7ee5c517ba7dc8cd834f0048962b6b352f00f41ebf9/cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", size = 3817107, upload-time = "2026-05-04T22:58:08.845Z" }, - { url = "https://files.pythonhosted.org/packages/6b/84/70e3feea9feea87fd7cbe77efb2712ae1e3e6edf10749dc6e95f4e60e455/cryptography-48.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec", size = 7986556, upload-time = "2026-05-04T22:58:11.172Z" }, - { url = "https://files.pythonhosted.org/packages/89/6e/18e07a618bb5442ba10cf4df16e99c071365528aa570dfcb8c02e25a303b/cryptography-48.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18", size = 4684776, upload-time = "2026-05-04T22:58:13.712Z" }, - { url = "https://files.pythonhosted.org/packages/be/6a/4ea3b4c6c6759794d5ee2103c304a5076dc4b19ae1f9fe47dba439e159e9/cryptography-48.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20", size = 4698121, upload-time = "2026-05-04T22:58:16.448Z" }, - { url = "https://files.pythonhosted.org/packages/2f/59/6ff6ad6cae03bb887da2a5860b2c9805f8dac969ef01ce563336c49bd1d1/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff", size = 4690042, upload-time = "2026-05-04T22:58:18.544Z" }, - { url = "https://files.pythonhosted.org/packages/ca/b4/fc334ed8cfd705aca282fe4d8f5ae64a8e0f74932e9feecb344610cf6e4d/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c", size = 5282526, upload-time = "2026-05-04T22:58:20.75Z" }, - { url = "https://files.pythonhosted.org/packages/11/08/9f8c5386cc4cd90d8255c7cdd0f5baf459a08502a09de30dc51f553d38dc/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db", size = 4733116, upload-time = "2026-05-04T22:58:23.627Z" }, - { url = "https://files.pythonhosted.org/packages/b8/77/99307d7574045699f8805aa500fa0fb83422d115b5400a064ddd306d7750/cryptography-48.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741", size = 4316030, upload-time = "2026-05-04T22:58:25.581Z" }, - { url = "https://files.pythonhosted.org/packages/fd/36/a608b98337af3cb2aff4818e406649d30572b7031918b04c87d979495348/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166", size = 4689640, upload-time = "2026-05-04T22:58:27.747Z" }, - { url = "https://files.pythonhosted.org/packages/dd/a6/825010a291b4438aecc1f568bc428189fc1175515223632477c07dc0a6df/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336", size = 5237657, upload-time = "2026-05-04T22:58:29.848Z" }, - { url = "https://files.pythonhosted.org/packages/b9/09/4e76a09b4caa29aad535ddc806f5d4c5d01885bd978bd984fbc6ca032cae/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057", size = 4732362, upload-time = "2026-05-04T22:58:32.009Z" }, - { url = "https://files.pythonhosted.org/packages/18/78/444fa04a77d0cb95f417dda20d450e13c56ba8e5220fc892a1658f44f882/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae", size = 4819580, upload-time = "2026-05-04T22:58:34.254Z" }, - { url = "https://files.pythonhosted.org/packages/38/85/ea67067c70a1fd4be2c63d35eeed82658023021affccc7b17705f8527dd2/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c", size = 4963283, upload-time = "2026-05-04T22:58:36.376Z" }, - { url = "https://files.pythonhosted.org/packages/75/54/cc6d0f3deac3e81c7f847e8a189a12b6cdd65059b43dad25d4316abd849a/cryptography-48.0.0-cp314-cp314t-win32.whl", hash = "sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f", size = 3270954, upload-time = "2026-05-04T22:58:38.791Z" }, - { url = "https://files.pythonhosted.org/packages/49/67/cc947e288c0758a4e5473d1dcb743037ab7785541265a969240b8885441a/cryptography-48.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12", size = 3797313, upload-time = "2026-05-04T22:58:40.746Z" }, - { url = "https://files.pythonhosted.org/packages/f2/63/61d4a4e1c6b6bab6ce1e213cd36a24c415d90e76d78c5eb8577c5541d2e8/cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", size = 7983482, upload-time = "2026-05-04T22:58:43.769Z" }, - { url = "https://files.pythonhosted.org/packages/d5/ac/f5b5995b87770c693e2596559ffafe195b4033a57f14a82268a2842953f3/cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", size = 4683266, upload-time = "2026-05-04T22:58:46.064Z" }, - { url = "https://files.pythonhosted.org/packages/ec/c6/8b14f67e18338fbc4adb76f66c001f5c3610b3e2d1837f268f47a347dbbb/cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", size = 4696228, upload-time = "2026-05-04T22:58:48.22Z" }, - { url = "https://files.pythonhosted.org/packages/ea/73/f808fbae9514bd91b47875b003f13e284c8c6bdfd904b7944e803937eec1/cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", size = 4689097, upload-time = "2026-05-04T22:58:50.9Z" }, - { url = "https://files.pythonhosted.org/packages/93/01/d86632d7d28db8ae83221995752eeb6639ffb374c2d22955648cf8d52797/cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", size = 5283582, upload-time = "2026-05-04T22:58:53.017Z" }, - { url = "https://files.pythonhosted.org/packages/02/e1/50edc7a50334807cc4791fc4a0ce7468b4a1416d9138eab358bfc9a3d70b/cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", size = 4730479, upload-time = "2026-05-04T22:58:55.611Z" }, - { url = "https://files.pythonhosted.org/packages/6f/af/99a582b1b1641ff5911ac559beb45097cf79efd4ead4657f578ef1af2d47/cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", size = 4326481, upload-time = "2026-05-04T22:58:57.607Z" }, - { url = "https://files.pythonhosted.org/packages/90/ee/89aa26a06ef0a7d7611788ffd571a7c50e368cc6a4d5eef8b4884e866edb/cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", size = 4688713, upload-time = "2026-05-04T22:59:00.077Z" }, - { url = "https://files.pythonhosted.org/packages/70/ba/bcb1b0bb7a33d4c7c0c4d4c7874b4a62ae4f56113a5f4baefa362dfb1f0f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", size = 5238165, upload-time = "2026-05-04T22:59:02.317Z" }, - { url = "https://files.pythonhosted.org/packages/c9/70/ca4003b1ce5ca3dc3186ada51908c8a9b9ff7d5cab83cc0d43ee14ec144f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", size = 4729947, upload-time = "2026-05-04T22:59:05.255Z" }, - { url = "https://files.pythonhosted.org/packages/44/a0/4ec7cf774207905aef1a8d11c3750d5a1db805eb380ee4e16df317870128/cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", size = 4822059, upload-time = "2026-05-04T22:59:07.802Z" }, - { url = "https://files.pythonhosted.org/packages/1e/75/a2e55f99c16fcac7b5d6c1eb19ad8e00799854d6be5ca845f9259eae1681/cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", size = 4960575, upload-time = "2026-05-04T22:59:09.851Z" }, - { url = "https://files.pythonhosted.org/packages/b8/23/6e6f32143ab5d8b36ca848a502c4bcd477ae75b9e1677e3530d669062578/cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", size = 3279117, upload-time = "2026-05-04T22:59:12.019Z" }, - { url = "https://files.pythonhosted.org/packages/9d/9a/0fea98a70cf1749d41d738836f6349d97945f7c89433a259a6c2642eefeb/cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", size = 3792100, upload-time = "2026-05-04T22:59:14.884Z" }, - { url = "https://files.pythonhosted.org/packages/be/d2/024b5e06be9d44cb021fb0e1a03d34d63989cf56a0fe62f3dfbab695b9b4/cryptography-48.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:84cf79f0dc8b36ac5da873481716e87aef31fcfa0444f9e1d8b4b2cece142855", size = 3950391, upload-time = "2026-05-04T22:59:17.415Z" }, - { url = "https://files.pythonhosted.org/packages/bc/17/3861e17c56fa0fd37491a14a8673fdb77c57fc5693cafe745ea8b06dba75/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:fdfef35d751d510fcef5252703621574364fec16418c4a1e5e1055248401054b", size = 4637126, upload-time = "2026-05-04T22:59:20.197Z" }, - { url = "https://files.pythonhosted.org/packages/f0/0a/7e226dbff530f21480727eb764973a7bff2b912f8e15cd4f129e71b56d1d/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:0890f502ddf7d9c6426129c3f49f5c0a39278ed7cd6322c8755ffca6ee675a13", size = 4667270, upload-time = "2026-05-04T22:59:22.647Z" }, - { url = "https://files.pythonhosted.org/packages/3b/f2/5a72274ca9f1b2a8b44a662ee0bf1b435909deb473d6f97bcd035bcdbc71/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:ecde28a596bead48b0cfd2a1b4416c3d43074c2d785e3a398d7ec1fc4d0f7fbb", size = 4636797, upload-time = "2026-05-04T22:59:24.912Z" }, - { url = "https://files.pythonhosted.org/packages/b4/e1/48cedb2fe63626e91ded1edad159e2a4fb8b6906c4425eb7749673077ce7/cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:4defde8685ae324a9eb9d818717e93b4638ef67070ac9bc15b8ca85f63048355", size = 4666800, upload-time = "2026-05-04T22:59:27.474Z" }, - { url = "https://files.pythonhosted.org/packages/a2/ca/7e8365deec19afb2b2c7be7c1c0aa8f99633b54e90c570999acda93260fc/cryptography-48.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:db63bf618e5dea46c07de12e900fe1cdd2541e6dc9dbae772a70b7d4d4765f6a", size = 3739536, upload-time = "2026-05-04T22:59:29.61Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/1f/99/d1c90d6041656cc6ee229dc99cd67fd0cd5aec3c5f7d72fffc27cc750054/cryptography-49.0.0.tar.gz", hash = "sha256:f89660a348f4f78a92366240a61404e337586ef7f5909a2fef59ca88ef505493", size = 854345, upload-time = "2026-06-12T20:02:30.512Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/22/adf66990e63584a68dfb50c24f48a125c07b1699899381c8151e63ed458c/cryptography-49.0.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:966fe0e9c67490071f14c0d2b1cb2dfb3023c5ce39457343931415f08382f2db", size = 4032100, upload-time = "2026-06-12T20:02:32.143Z" }, + { url = "https://files.pythonhosted.org/packages/09/41/3797cfaf69cae04a13ee78ebd83f0678d9c02b4779d21ce24445326f1a69/cryptography-49.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:36d1709f992593689b45bda411498d62c6e365f2ca00b84657d4dadd24de16db", size = 4692978, upload-time = "2026-06-12T20:01:21.305Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8b/43011f7ebe515a8aa20d61f290a326cd890c2e738e16e59eaff8d9c3a412/cryptography-49.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0e959b578856a3924bc0cbb710fc12c387b9412a951389f3ca61704a9e25f325", size = 4716422, upload-time = "2026-06-12T20:01:48.566Z" }, + { url = "https://files.pythonhosted.org/packages/4a/91/01ce7303a4579e6d3a6abef01bd322848e9ea7a219adcabc5048b9033571/cryptography-49.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:53ecee2e23f7169b6117e99fc8a944e5e50f79e69758a83b52a00cb98ab2b2d2", size = 4700503, upload-time = "2026-06-12T20:02:47.091Z" }, + { url = "https://files.pythonhosted.org/packages/62/99/a2c95cf8293f07491e9e27c20cc4dcd18176d944e674679adeb1d0173fd6/cryptography-49.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:2eda353d8a27bcbcaa4cbed18994a74ab4d19a2ca897db188ea269ab9b71419b", size = 5309779, upload-time = "2026-06-12T20:02:08.987Z" }, + { url = "https://files.pythonhosted.org/packages/20/2c/0622f20ff02b2ef32558733443805dc82fd4c275be01b2d19d14676f3a1b/cryptography-49.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2afe9051da7ae7bd5905da5a949280c7d2bb75682e188f650a9d0f2756b834c6", size = 4749683, upload-time = "2026-06-12T20:02:03.335Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5b/c5246635d5fd3b64e0d45ae10e99fd32fe9676a79915ccfe5a61ba9af1a5/cryptography-49.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:0b82e28ee398a386f0807bba7884d30f25218855690f45115831bcce5d90822c", size = 4337874, upload-time = "2026-06-12T20:02:54.323Z" }, + { url = "https://files.pythonhosted.org/packages/6d/88/05563c7fe2e914e87d1a536d06fe83e66b4e1d95cb593e05aea375531da8/cryptography-49.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ccac2bfebc306b862133e3bb71f3f6ee8bb525240089b2d952e4144b3a6d5da7", size = 4700283, upload-time = "2026-06-12T20:01:34.822Z" }, + { url = "https://files.pythonhosted.org/packages/c4/b6/d7696e4e890d6ae1469935164c9e5215c557671cb78d6e3f458ccceaa632/cryptography-49.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d0527ce944105f257f605a827d6ebead966c752038b6e8656abb9c5edee6fc68", size = 5265844, upload-time = "2026-06-12T20:01:24.09Z" }, + { url = "https://files.pythonhosted.org/packages/a9/3c/f3ad17eecc1a57b0ba236dc01f90e783c51f4a2f35f64777cc4f47a184b2/cryptography-49.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:cbc77da8c523d5abd028635ba850a6966fcee2c82e2bf65a41d1d8afe0f98be9", size = 4749290, upload-time = "2026-06-12T20:01:30.848Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/339573cf1023163a400b0b5d16f6d507de413b9f60be6fd1b77feeaf6737/cryptography-49.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b87e65d263b3e5d3bb92a57e2a6638e2f31110fa7aa890c7b2dbba42248d0a3f", size = 4834612, upload-time = "2026-06-12T20:01:29.246Z" }, + { url = "https://files.pythonhosted.org/packages/71/fd/577302e213a1be9468f92d1afef66fcf1ef83d516819d9992ca547f592bd/cryptography-49.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:66ec79c3904820572d7e987abdf304281f141d37ad9a489b8e97066e7b9b6459", size = 4980804, upload-time = "2026-06-12T20:01:42.853Z" }, + { url = "https://files.pythonhosted.org/packages/1f/09/f42b1d190c5ba75f72062a387f8030d1d75f6ab035788f1d9c4b01de6525/cryptography-49.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:e5dfc1e64de5677cec922ffa8da89c546d0415bf6efdf081842e5d44c84e1f0e", size = 3810026, upload-time = "2026-06-12T20:02:39.262Z" }, + { url = "https://files.pythonhosted.org/packages/ec/9e/db72b3ae7fc9cfad53e630e56c6ae83b9b6ff0bf3718ffb8012d20b3aabf/cryptography-49.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:73a205dce83953d131a4aa1e0fd917a2fd1c5b1eef251e9d7152efefcbf5caf7", size = 4013892, upload-time = "2026-06-12T20:02:10.735Z" }, + { url = "https://files.pythonhosted.org/packages/86/12/c48a424f38db03027be9f7ed5c7dc5de9933dbee992865f98b13727a009d/cryptography-49.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:196ecd6a36e4e9aa10270393bb98d8df88fccee0bf1e5128b91ae4eb4375896d", size = 4678835, upload-time = "2026-06-12T20:02:48.743Z" }, + { url = "https://files.pythonhosted.org/packages/68/28/8a3ad4653662c93fc44dc4e5d8fd374c25c42e07b34bbfbadf49cf57a5a8/cryptography-49.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7abcee80084cda3f7691f3eb1ce480d8df49cec637b429aa35986c1de71738aa", size = 4697239, upload-time = "2026-06-12T20:02:56.03Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b2/2193fc74f81aee4f9b62733133b73b5176718932ed8f2e4b03fa040480a6/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:4ae387c9cb68ea569ca17e490d66d8142b81c3cc814bf179974b7d146e490bbb", size = 4685593, upload-time = "2026-06-12T20:02:50.666Z" }, + { url = "https://files.pythonhosted.org/packages/47/f1/1d3eaa243bfc5de4a187b22aa8c048b3e4980bfbe830ac46e6bac2e66947/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:f37d847238971164fdbc68ade6f6574aecc9c0af714190e2083429ff68f4ce9d", size = 5289961, upload-time = "2026-06-12T20:01:46.468Z" }, + { url = "https://files.pythonhosted.org/packages/58/39/2d51306721330c486495853eda1c567880ff036de15a14c4b74f399934af/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:c2bc30226390d60ea19d9f82b19db005fe0452154a23c1c410c12ea801e43561", size = 4731145, upload-time = "2026-06-12T20:02:16.832Z" }, + { url = "https://files.pythonhosted.org/packages/17/50/983e838c7fd0d87fd8c969bcdd328edaf5f756e38df5281637424c155873/cryptography-49.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:07cab27cc7b7e0fd28e5e26bb9eeedde5c135c868b46de4a27845abe94af6122", size = 4321719, upload-time = "2026-06-12T20:02:52.611Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f5/8f571d7e27c55bce9f76f026143bcb1e040a4233149ecca0bea5fa5dd5f7/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:b20133d204d2bb56ba047642199603876c872026ca53e79c35b83772ab2cc505", size = 4685209, upload-time = "2026-06-12T20:02:07.282Z" }, + { url = "https://files.pythonhosted.org/packages/e7/84/0e27016a6fc5a0886f797018b26aa42f40c09a82332bff77822a451deaaa/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b970c6da94d5bb18629db453d14f2a1300f6bf59b61e9b82377931ef95504866", size = 5246285, upload-time = "2026-06-12T20:01:32.439Z" }, + { url = "https://files.pythonhosted.org/packages/11/2d/5e1fb307cb5931881516b464c98774b3f2c36b5d4bb9a2830253cf553cad/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d8ecde755e2e91bf773fc94e8c9d730cd7f2007004cb492263a794ec3899a1c8", size = 4730441, upload-time = "2026-06-12T20:02:01.469Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c0/bff5a02ee731d207d6a1ed51732549d8c53d2bc8da1d10ec6f2844201d68/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3fb64c420688e5319ae25113a354015abbd8dffbfbc41781a1ea66fc7622ac3", size = 4815869, upload-time = "2026-06-12T20:01:36.574Z" }, + { url = "https://files.pythonhosted.org/packages/b9/26/814681d14248d95d73d5c3eea0c39a94eb8302df966f670a2c60de90974b/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32703d93296f5c1f4b53349ad3a250c2cae0fdecd3a3dd5d47e616d8d616af27", size = 4960948, upload-time = "2026-06-12T20:02:18.688Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fe/93ecac273d3738939d023612ad12cca9a3740a5345d69fda04134c43fd96/cryptography-49.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:33cd0565932807baddb67b96dbee92f2c374b5c89dee09fd74079aeb8c8dba61", size = 3799153, upload-time = "2026-06-12T20:01:39.059Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/5bb823f5bedcf80718cea7fbc95ec5515cca3769633c4b01a32be7f30e7c/cryptography-49.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ec5e529fb80935c94fe7b729f9972b50e351a0e6b50aa294fd5cabb109fcc29a", size = 4025947, upload-time = "2026-06-12T20:01:25.745Z" }, + { url = "https://files.pythonhosted.org/packages/3d/df/40577043ca124e17012f408ddddaeb213b856336ac82ddb3bc915f39e29f/cryptography-49.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f78ff2c9ed8dc2d036b0f4d640e22522213d047c1b14e61205a7e55c80a494d4", size = 4692429, upload-time = "2026-06-12T20:01:53.628Z" }, + { url = "https://files.pythonhosted.org/packages/2c/99/2d13299eb3dd27b02dcfaafcc91d6b5cb3329f7cbd6d8f51921acd566c1a/cryptography-49.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:35b151772baff2c74cba7fa290ceaff4c3b11c0c881eb93eb5dbc05a7cfbba18", size = 4700968, upload-time = "2026-06-12T20:02:45.383Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4d/9c0cd02f95e2602dd5e563da149ee0830abef3537be8b34dc56281ebe27a/cryptography-49.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0f21641cf4b30fca7aee061ced0ec7ad7b073518088b7c9969a297c0ae796c69", size = 4697758, upload-time = "2026-06-12T20:01:41.13Z" }, + { url = "https://files.pythonhosted.org/packages/24/01/186c825898477d77e2324d5360fefe622ff1d8d1963ec0554e2cada8ec77/cryptography-49.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9e82dcc8e56052715fb18b2429e3bca4823b1629136a2084fc45a9a5cecb9b64", size = 5298863, upload-time = "2026-06-12T20:02:24.579Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7b/62cbbab75d0659865bf0273790031544a0b16c8072d258f9428dcd8190dc/cryptography-49.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6f2debedf9ca60cf1d5bd466475638af5130f89965605cd818484d19987d3a21", size = 4735983, upload-time = "2026-06-12T20:01:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/6c/72/3e798c064bc39e471008075d0f9bc9daf77a80879c092e4a8e170c585ed4/cryptography-49.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:8c25ceb16df5b9435f3f6a9829204985b0e0cbee3b48aacd432c7d2c850b44d9", size = 4334173, upload-time = "2026-06-12T20:01:44.743Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ee/6fca21d1ac73e06f8bef71940abfd4d2f6472b4bca284d770f32bd4086f6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:28d8b15e6275f12c8a207dc309dfa957903c927d08d0cc937ee3f63f200693cc", size = 4697298, upload-time = "2026-06-12T20:02:20.918Z" }, + { url = "https://files.pythonhosted.org/packages/67/d0/a5fcd3515f0bae49a7b6d0413cc1bdccdcc1fc0047037a0d480642cdc5d6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:6fc361c34fb6aac015ce19435876635e5c6d21db31998b0920f675f131e043b8", size = 5254338, upload-time = "2026-06-12T20:02:22.737Z" }, + { url = "https://files.pythonhosted.org/packages/a0/84/84fe36f19caf857d61cb7fc9c63035a47ffabd84ea12d1d393148efa3615/cryptography-49.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2400ef9c9e2299a25614eb1dea3db54a69b1349efd043bfac9c67630d136df36", size = 4735650, upload-time = "2026-06-12T20:02:41.389Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a0/db537264e234f7273a73ec020873d6d6b39dfd8a53db78b550ca8320440e/cryptography-49.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:67e1d20ad9ef3a563c59ef22e7a8a0b8210bd26604369ea4a30a7c66aefe504e", size = 4834820, upload-time = "2026-06-12T20:01:51.847Z" }, + { url = "https://files.pythonhosted.org/packages/93/77/8df9eb486495979bccecd1062e2eaf435250e84437040295b57d09048b0b/cryptography-49.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:42b0684e0e40cf26122427802486f6d93aea593612603a94fbf260c7eb1e9c1b", size = 4967968, upload-time = "2026-06-12T20:02:12.524Z" }, + { url = "https://files.pythonhosted.org/packages/c2/e6/f60198ea8d9dfa15fff9ed4ca02ce362f6eadd9ba757dcc50634c4257b63/cryptography-49.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:026ac7423e6fa66872d3bf889be5974507da3944f866f704fa200eadacd00001", size = 3785547, upload-time = "2026-06-12T20:02:26.847Z" }, + { url = "https://files.pythonhosted.org/packages/63/d3/4a83af35d65e3fad632c926fad684c193ea4398569ccb0bbbc7fe8f5dc9a/cryptography-49.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc1e275c2f1d97b1a6450b8b0ea3ebfa6e087a611c2b26cb2404d48588abab7b", size = 3993685, upload-time = "2026-06-12T20:02:14.883Z" }, + { url = "https://files.pythonhosted.org/packages/d6/a7/f9dac0ab7f80368c56993a7bf638ef9935f825c91902798481fac0898138/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83782480a4a9da4d0feb51950131ba32e12e70813848b3343f6e18c28a66838", size = 4676239, upload-time = "2026-06-12T20:02:28.793Z" }, + { url = "https://files.pythonhosted.org/packages/d7/70/2ba3769dd0ae167e2f33dfa9592d45db6ff9a61d62ca1a5b3d1bdd09068f/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b39efa323140595abd3ecca8529d321ae50f55f3aa3ba9cc81ea56a6011953d5", size = 4715584, upload-time = "2026-06-12T20:01:27.495Z" }, + { url = "https://files.pythonhosted.org/packages/94/64/2923570ac1c0bd3a737aa366ac3abbbbde273042308b8cde95e2364a6e6a/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:b47db11c2c3525083296069b98ac5221907455e989ae0c2e3008bde851921615", size = 4675885, upload-time = "2026-06-12T20:01:55.49Z" }, + { url = "https://files.pythonhosted.org/packages/ab/f8/614dc7e051418cfe53d55173c1e24c6b0085e89996fe90508c2fdf769aef/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:084ef1af862eb07ec46d25f68689f2102a9fc0e05ce7b80f14f5fe51e4eef0f6", size = 4715449, upload-time = "2026-06-12T20:02:05.469Z" }, + { url = "https://files.pythonhosted.org/packages/aa/50/a9caea39ad19c431c1a3f8a31114df65b260cdfe67786b6c7e7c040c4c44/cryptography-49.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be9fcb48a55f023493482827d4f459bd263cc20efde64f204b97c123201850c6", size = 3783731, upload-time = "2026-06-12T20:02:43.319Z" }, +] + +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + +[[package]] +name = "deprecation" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/d3/8ae2869247df154b64c1884d7346d412fed0c49df84db635aab2d1c40e62/deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", size = 173788, upload-time = "2020-04-20T14:23:38.738Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" }, ] [[package]] name = "distlib" -version = "0.4.0" +version = "0.4.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/02/bd72be9134d25ed783ecbbc38a539ffaefbf90c78418c7fb7229600dbac7/distlib-0.4.3.tar.gz", hash = "sha256:f152097224a0ae24be5a0f6bae1b9359af82133bce63f98a95f86cae1aede9ed", size = 615141, upload-time = "2026-06-12T08:04:52.847Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, + { url = "https://files.pythonhosted.org/packages/02/08/9c41fb51ab5b43eb21674aff13df270e8ba6c4b29c8624e328dc7a9482af/distlib-0.4.3-py2.py3-none-any.whl", hash = "sha256:4b0ce306c966eb73bc3a7b6abad017c556dadd92c44701562cd528ac7fde4d5b", size = 470628, upload-time = "2026-06-12T08:04:50.506Z" }, ] [[package]] name = "docutils" -version = "0.22.4" +version = "0.23" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +sdist = { url = "https://files.pythonhosted.org/packages/39/a4/5180d9afc57e8fca05601dd652bdff19604c218814037fe90ffc7625a50a/docutils-0.23.tar.gz", hash = "sha256:746f5060322511280a1e50eb76846ed6bf2342984b2ac04dc42caa1a8d78799e", size = 2303823, upload-time = "2026-05-27T17:41:06.934Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, + { url = "https://files.pythonhosted.org/packages/32/91/30151a39f7570f448ed84529390628a651d7f27c87d73c9b887f8189695e/docutils-0.23-py3-none-any.whl", hash = "sha256:25d013af9bf23bc1c7b2b093dff4208166c53a94786c9e447808335ef1185fea", size = 634701, upload-time = "2026-05-27T17:40:58.442Z" }, ] [[package]] @@ -576,20 +660,60 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, ] +[[package]] +name = "fastembed" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "loguru" }, + { name = "mmh3" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "onnxruntime", version = "1.23.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "onnxruntime", version = "1.27.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, + { name = "py-rust-stemmers" }, + { name = "requests" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/25/58865e36b6e8a9a0d0ff905b5601aa30db97956327c0df42ec4ed6accc21/fastembed-0.8.0.tar.gz", hash = "sha256:75966edfa8b006ee78514c726bd7f6a50721dadc89305279052be9db72fd53e8", size = 75115, upload-time = "2026-03-23T16:34:41.699Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/e8/26b7d78bb8972498c467ca34cb12ee2e60d26ba5eae6d8443189a1af37a5/fastembed-0.8.0-py3-none-any.whl", hash = "sha256:40bee672657574a1009e35ec50030a55f2b426842cb011845379817641bbbbd0", size = 116572, upload-time = "2026-03-23T16:34:40.69Z" }, +] + [[package]] name = "filelock" -version = "3.29.0" +version = "3.29.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/dc/be6cbe99670cd6e4ad387123647cb08e0c32975e223f82551e914c5568a6/filelock-3.29.4.tar.gz", hash = "sha256:10cdb3656fc44541cdf30652a93fb10ec6b05325620eb316bd26893e4201538a", size = 63028, upload-time = "2026-06-13T16:12:00.744Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/37/a065dc3bd6e49423a6532c642ca7378d3f467b1ef44c2800c937af7f9739/filelock-3.29.4-py3-none-any.whl", hash = "sha256:dac1648087d5115554850d113e7dd8c83ab2d38e3435dde2d4f163847e57b767", size = 42757, upload-time = "2026-06-13T16:11:59.582Z" }, +] + +[[package]] +name = "flatbuffers" +version = "25.12.19" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" } +sdist = { url = "https://files.pythonhosted.org/packages/10/a1/ae4e3e5003468d6391d2c77b6fa1cd73bd5d13511d81c642d7b28ac90ed4/fsspec-2026.6.0.tar.gz", hash = "sha256:f5bac145310fe30e16e1471bd6840b2d990d609e872251d7e674241822abf01a", size = 313646, upload-time = "2026-06-16T01:57:28.105Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, + { url = "https://files.pythonhosted.org/packages/e5/22/4222d7ddf3da30f363edaa98e329c2bce6c65497c9cb2810931c8b2c0fbc/fsspec-2026.6.0-py3-none-any.whl", hash = "sha256:02e0b71817df9b2169dc30a16832045764def1191b43dcff5bb85bdee212d2a1", size = 203949, upload-time = "2026-06-16T01:57:26.358Z" }, ] [[package]] @@ -601,6 +725,71 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "hdbscan" +version = "0.8.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b0/51/b476849d27980d1ce5ba0a172891d37441b0112047894350951f6b169266/hdbscan-0.8.44.tar.gz", hash = "sha256:1ac6196fabdd42072284b60c9be7b9b504b5f4f25cf7a551a8af29a3c7963a4d", size = 7094270, upload-time = "2026-06-01T18:56:31.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/11/2eaf3dda078c19b26194c4332e387d21b0a8986eb6cab18a0c2a0b51d9c7/hdbscan-0.8.44-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:80101ed4897ecbbb5ec36358c4d4c5c38ce4e53b9da5996923d1ec72d21ce665", size = 2615940, upload-time = "2026-06-01T18:55:48.073Z" }, + { url = "https://files.pythonhosted.org/packages/0a/70/dee6ed06d46a45f2054c2c9b5a1563058e775a0ace1412a52a41e5c1d826/hdbscan-0.8.44-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:91dac2f5668b946e3b6335bf6ea4c95fd446af45f7169cf0ab93fea34d4b0e73", size = 2025466, upload-time = "2026-06-01T19:13:28.405Z" }, + { url = "https://files.pythonhosted.org/packages/96/2f/646551ef9caf71211648a0accec50fa625a391e1530abc0d0db9a1c5ab78/hdbscan-0.8.44-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c72c1f357adfd9e9c0f402d7cc256bcf38a04ed833754661ec2600cc04e47c1b", size = 5712965, upload-time = "2026-06-01T18:56:39.348Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6a/7c94a9b888248074b2082fe88ca72a705c2ff7601a7bdbb824111f3686e1/hdbscan-0.8.44-cp310-cp310-win_amd64.whl", hash = "sha256:be163d32e71a7ca9e3fd0c6867fdaa7a0c1989f3edc5048ba211beb3949eb96d", size = 1965101, upload-time = "2026-06-01T18:56:23.273Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/7ea947869eeb877c3d2b23ce2cbfcc9c5e8e31dd10d81e04add53f3f0c7e/hdbscan-0.8.44-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e83276c6147d1ec74359ac2d07b2df9d0deb5e248194660967bcde6437a8518c", size = 2603287, upload-time = "2026-06-01T18:55:43.43Z" }, + { url = "https://files.pythonhosted.org/packages/2a/99/17dd8a7e845504c3b8f9443bb1b15d20461c70c0c305186fa3526bb55277/hdbscan-0.8.44-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6089b8accd805a3a7409b20247e29e54dea6118785771127a4fc6c5eda24f0ef", size = 5929276, upload-time = "2026-06-01T18:56:33.065Z" }, + { url = "https://files.pythonhosted.org/packages/13/8d/eec0040bd273c9e43409672c8cec1956775aca3ebafad8a9e3f5d25cfd80/hdbscan-0.8.44-cp311-cp311-win_amd64.whl", hash = "sha256:df6d6268022747a60c9990cecf446bc7a71621ff92bc51c86f5958d1cd451870", size = 1964877, upload-time = "2026-06-01T18:56:12.515Z" }, + { url = "https://files.pythonhosted.org/packages/5e/f0/4f719c1275158a13918124ce7d798be4b4ae2898469b467216fde974e443/hdbscan-0.8.44-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cc4917a57f73984137bc6f14cddd1a140907102522174af8d8add42668a5e196", size = 2590501, upload-time = "2026-06-01T18:55:52.587Z" }, + { url = "https://files.pythonhosted.org/packages/5c/95/4f08d8adeba894453a057f1b87d24fb155e040601137cb82536d9708ff30/hdbscan-0.8.44-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d67f2b3628a80764a07fff3a994df2c6b2d9a6b9c8024edde7b36c184f6657f6", size = 5868686, upload-time = "2026-06-01T18:56:33.787Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a2/959f617dbff9ebe0ef9b55f61c84b7ad00e5eb91b47b838b3a1a2bf0aa95/hdbscan-0.8.44-cp312-cp312-win_amd64.whl", hash = "sha256:29bffe0ef8a8191e6cd5af3dc7fdf5e7f6334eea8fa39ee8488f2f16911c1cdf", size = 1947427, upload-time = "2026-06-01T18:56:14.433Z" }, + { url = "https://files.pythonhosted.org/packages/a2/cb/c4d5c76145a053a7ac04d5106872ce44b199783a4249536c2d9f140c3c07/hdbscan-0.8.44-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9256a7028f017b257c5eba05fe332a744a410c0dcb9971eaa9ed6c5efaac50d5", size = 2580855, upload-time = "2026-06-01T18:55:37.972Z" }, + { url = "https://files.pythonhosted.org/packages/0e/73/12786f8e999959dbf56dfd2b0e4a7e7cc3ccfdcf3a464b0f019bc93e9556/hdbscan-0.8.44-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3400d93a299228e86dec77cc114b834cdd48413f7999eaddaecba045c38ee915", size = 5850796, upload-time = "2026-06-01T18:56:40.535Z" }, + { url = "https://files.pythonhosted.org/packages/4a/83/02a8d5c03e1fa535965c1339ee3f4d2c0ffd30163904aa817a970d1200c9/hdbscan-0.8.44-cp313-cp313-win_amd64.whl", hash = "sha256:0f4fa78c76459e1e00282fd8d67a6834ca46ba232e38414f69bb1bacc2c263ce", size = 1947052, upload-time = "2026-06-01T18:57:01.255Z" }, + { url = "https://files.pythonhosted.org/packages/15/7e/bd23accbaf40a4cc7a179c22129b81023b5daeb3a4d7af6c5f825f29e0ab/hdbscan-0.8.44-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:2351eb81f21491b5b50bdc5ecaa2d88f58608f51f5906774cb38747f21b18053", size = 2603654, upload-time = "2026-06-01T18:55:41.641Z" }, + { url = "https://files.pythonhosted.org/packages/87/10/1b1379a3576e3d9af5c931f942d321017fd2129dad21bc850063706a8ec9/hdbscan-0.8.44-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7ee278a8da6043671031079b245488ee27565f48a83616637dae0b2c4886ea1", size = 5817791, upload-time = "2026-06-01T18:56:40.999Z" }, + { url = "https://files.pythonhosted.org/packages/db/ae/74d1aee2a7c6c39b16a8d4fa7aa3d029d7d08d0c5461d7b55f9619b48598/hdbscan-0.8.44-cp314-cp314-win_amd64.whl", hash = "sha256:5ea248dcaca951861e811411bf3eb9954f932f3a90c8bbe5629b5ee8479e011e", size = 1969244, upload-time = "2026-06-01T18:56:29.289Z" }, +] + +[[package]] +name = "hf-xet" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/2d/57fd21d84d93efb4bd0b962383790e19dd1bc053501b4264c97903b4e83e/hf_xet-1.5.1.tar.gz", hash = "sha256:51ef4500dab3764b41135ee1381a4b62ce56fc54d4c92b719b59e597d6df5bf6", size = 876636, upload-time = "2026-06-08T23:02:53.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/ee/dd9ba7beae1005e54131b7d45263cc74c8a066d47d354e6d58ae9445a388/hf_xet-1.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:dbf48c0d02cf0b2e568944330c60d9120c272dabe013bd892d48e25bc6797577", size = 4069485, upload-time = "2026-06-08T23:02:13.193Z" }, + { url = "https://files.pythonhosted.org/packages/b6/bc/9cae6cfeb4e03070874e73e5c97c66eb90369d3206b6a2b1ef5f96520888/hf_xet-1.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78e4e5192ad2b674c2e1160b651cb9134db974f8ae1835bdfbfb0166b894a43", size = 3838493, upload-time = "2026-06-08T23:02:15.282Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b4/d5c01e0eb6d9f2ca2dacd84d0d1b71e6cfbb2ef3208c968528e010e9b3d7/hf_xet-1.5.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6f7a04a8ad962422e225bc49fbbac99dc1806764b1f3e54dbd154bffa7593947", size = 4505658, upload-time = "2026-06-08T23:02:17.196Z" }, + { url = "https://files.pythonhosted.org/packages/76/c5/29a7598c0c6383c523dc22186d577f4e04267a626cd95ae60f67c00bfe66/hf_xet-1.5.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d48199c2bf4f8df0adc55d31d1368b6ec0e4d4f45bc86b08038089c23db0bed8", size = 4292822, upload-time = "2026-06-08T23:02:18.608Z" }, + { url = "https://files.pythonhosted.org/packages/04/9a/dceaf6ca69390126b86ea825fb354b93d01163199070b7bd849225de9468/hf_xet-1.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:97f212a88d14bbf573619a74b7fecb238de77d08fc702e54dec6f78276ca3283", size = 4491255, upload-time = "2026-06-08T23:02:20.124Z" }, + { url = "https://files.pythonhosted.org/packages/48/a7/e5a7afaacf6c1791fdbeeac42951fb81c3d2bc482992b115dedcc86d963e/hf_xet-1.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f61e3665892a6c8c5e765395838b8ddf36185da835253d4bc4509a81e49fb342", size = 4711062, upload-time = "2026-06-08T23:02:21.863Z" }, + { url = "https://files.pythonhosted.org/packages/53/49/2802f8433c9742ce281bddc1e65c02c32268ca3098d66828b05e12e45ee2/hf_xet-1.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f4ad3ebd4c32dd2b27099d69dc7b2df821e30767e46fb6ee6a0713778243b8ff", size = 4017205, upload-time = "2026-06-08T23:02:23.495Z" }, + { url = "https://files.pythonhosted.org/packages/9e/5a/50c71195b9fb883659f596e7252faf4c18c58e753a9013bdbf9bac5d2250/hf_xet-1.5.1-cp313-cp313t-win_arm64.whl", hash = "sha256:8298485c1e36e7e67cbd01eeb1376619b7af43d4f1ec245caae306f890a8a32d", size = 3845426, upload-time = "2026-06-08T23:02:25.124Z" }, + { url = "https://files.pythonhosted.org/packages/05/24/5e0c28f80371c17d49fed004597d9d132cb75c1f6f53db2cb95f459d2312/hf_xet-1.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:3474760d10e3bb6f92ff3f024fcb00c0b3e4001e9b035c7483e49a5dd17aa70f", size = 4069676, upload-time = "2026-06-08T23:02:26.759Z" }, + { url = "https://files.pythonhosted.org/packages/d2/17/261ba565b6a4d960fb478f61fdf919c0be5824645aaf1c319eca660c1611/hf_xet-1.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6762d89b9e3267dfd502b29b2a327b4525f33b17e7b509a78d94e2151a30ce30", size = 3838509, upload-time = "2026-06-08T23:02:28.573Z" }, + { url = "https://files.pythonhosted.org/packages/4e/44/7ffdc2e184b0d41fc0f683ba3936ef669ab63cf242cf36ef50e57d683668/hf_xet-1.5.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bf67e6ed10260cef62e852789dc91ebb03f382d5bdc4b1dbeb64763ea275e7d6", size = 4505881, upload-time = "2026-06-08T23:02:30.257Z" }, + { url = "https://files.pythonhosted.org/packages/63/b6/788060d5aa4d5e671f1a31bf69624c314eb2d8babab3aa562f9e5d53444e/hf_xet-1.5.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c6b6cd08ca095058780b50b8ce4d6cbf6787bcf27841705d58a9d32246e3e47a", size = 4292995, upload-time = "2026-06-08T23:02:31.993Z" }, + { url = "https://files.pythonhosted.org/packages/22/93/c5540cbd6b55529b7dc42f6734e88cebee21aefbea34128b66229df56c57/hf_xet-1.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e1af0de8ca6f190d4294a28b88023db64a1e2d1d719cab044baf75bec569e7a9", size = 4491570, upload-time = "2026-06-08T23:02:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/03/f3/9d8ceab30f44f36c1679b1b8683054c71a0dadc787dbf07421891742d3ca/hf_xet-1.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4f561cbbb92f80960772059864b7fb07eae879adde1b2e781ec6f86f6ac26c59", size = 4711565, upload-time = "2026-06-08T23:02:35.454Z" }, + { url = "https://files.pythonhosted.org/packages/cd/54/27ed9a5e2cc583b4df82f75a03a4df8dbf55f5a9fa1f47f1fadfb20dbeac/hf_xet-1.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:e7dbb40617410f432182d918e37c12303fe6700fd6aa6c5964e30a535a4461d6", size = 4017343, upload-time = "2026-06-08T23:02:37.14Z" }, + { url = "https://files.pythonhosted.org/packages/ae/12/ecb2fc8d45e767580e3a37faa97cb895608b614965567efb4f18cff67e27/hf_xet-1.5.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6071d5ccb4d8d2cbd5fea5cc798da4f0ba3f44e25369591c4e89a4987050e61d", size = 3845716, upload-time = "2026-06-08T23:02:39.073Z" }, + { url = "https://files.pythonhosted.org/packages/7a/d8/5e54cf37434759d1f4f2ba9b66077ff9d4c4e1f37b6bd7975da5c40d94ab/hf_xet-1.5.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6abd35c3221eff63836618ddfb954dcf84798603f71d8e33e3ed7b04acfdbe6e", size = 4077794, upload-time = "2026-06-08T23:02:40.656Z" }, + { url = "https://files.pythonhosted.org/packages/35/94/4b2ecfbad8f8b04701a23aefb62f540b9137d058b7e1dbef16a32676f0e9/hf_xet-1.5.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:94e761bbd266bf4c03cee73753916062665ce8365aa40ed321f45afcb934b41e", size = 3845354, upload-time = "2026-06-08T23:02:42.702Z" }, + { url = "https://files.pythonhosted.org/packages/de/cc/f99f4bc7295023d7bd9ebbfd51f75cc530ca262c1227666268b8208f4b77/hf_xet-1.5.1-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:892e3a3a3aecc12aded8b93cf4f9cd059282c7de0732f7d55026f3abdf474350", size = 4514864, upload-time = "2026-06-08T23:02:44.497Z" }, + { url = "https://files.pythonhosted.org/packages/cd/6e/21f7e5a2381278bd3b7b7a5a4d90038518bb6308a0c1daf5d9f8268bb178/hf_xet-1.5.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:a93df2039190502835b1db8cd7e178b0b7b889fe9ab51299d5ced26e0dd879a4", size = 4303784, upload-time = "2026-06-08T23:02:46.203Z" }, + { url = "https://files.pythonhosted.org/packages/35/0e/f992bb6927ac1cb30ef74e62268f551f338bc32b2191f7c96a44c6f7283e/hf_xet-1.5.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0c97106032ef70467b4f6bc2d0ccc266d7613ee076afc56516c502f87ce1c4a6", size = 4500703, upload-time = "2026-06-08T23:02:47.628Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d1/90a498d05447980b977b1669246eeeeae4cfb0ea3e7a286eaba627f91bf9/hf_xet-1.5.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6208adb15d192b90e4c2ad2a27ed864359b2cb0f2494eb6d7c7f3699ac02e2bf", size = 4719498, upload-time = "2026-06-08T23:02:49.268Z" }, + { url = "https://files.pythonhosted.org/packages/6d/b6/20f99cfe97cc663a711f7b33cc21d4793e51968e9a26125b4afcd77315ba/hf_xet-1.5.1-cp37-abi3-win_amd64.whl", hash = "sha256:f7b3002f95d1c13e24bcb4537baa8f0eb3838957067c91bb4959bc004a6435f5", size = 4026419, upload-time = "2026-06-08T23:02:50.829Z" }, + { url = "https://files.pythonhosted.org/packages/f9/fa/77453694888f03e5a8c8852d1514a0894d8e81c622d39edbaf308ea0dcf4/hf_xet-1.5.1-cp37-abi3-win_arm64.whl", hash = "sha256:93d090b57b211133f6c0dab0205ef5cb6d89162979ba75a74845045cc3063b8e", size = 3855178, upload-time = "2026-06-08T23:02:52.452Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -638,6 +827,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, ] +[[package]] +name = "huggingface-hub" +version = "1.19.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/88/27/629cfe58c582f92ded066c4a07d1a057ff617118ab7973200f770bd853cb/huggingface_hub-1.19.0.tar.gz", hash = "sha256:fd771622182d40977272a923953ee3b1b13538f9f8a7f5d78398f10af0f1c0bd", size = 824721, upload-time = "2026-06-11T12:33:18.665Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/a5/558da89f66464d8d0229ff497e8b8666977de2d8cf48c28a2862ecf1250f/huggingface_hub-1.19.0-py3-none-any.whl", hash = "sha256:1dc72e1f6b4d6df6b30eb72e57d00514ef453d660f04af2b87f0e67267f31ee0", size = 693398, upload-time = "2026-06-11T12:33:16.695Z" }, +] + +[[package]] +name = "humanfriendly" +version = "10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyreadline3", marker = "python_full_version < '3.11' and sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702, upload-time = "2021-09-17T21:40:43.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" }, +] + [[package]] name = "id" version = "1.6.1" @@ -661,11 +883,11 @@ wheels = [ [[package]] name = "idna" -version = "3.15" +version = "3.18" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/63/9496c57188a2ee585e0f1db071d75089a11e98aa86eb99d9d7618fc1edce/idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848", size = 196711, upload-time = "2026-06-02T14:34:07.794Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, + { url = "https://files.pythonhosted.org/packages/1e/5e/d4e9f1a599fb8e573b7b87160658329fbf28d19eac2718f51fc3def3aa5a/idna-3.18-py3-none-any.whl", hash = "sha256:7f952cbe720b688055e3f87de14f5c3e5fdaa8bc3928985c4077ca689de849a2", size = 65455, upload-time = "2026-06-02T14:34:06.319Z" }, ] [[package]] @@ -673,7 +895,7 @@ name = "importlib-metadata" version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp" }, + { name = "zipp", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } wheels = [ @@ -734,6 +956,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, ] +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -742,7 +973,8 @@ dependencies = [ { name = "attrs" }, { name = "jsonschema-specifications" }, { name = "referencing" }, - { name = "rpds-py" }, + { name = "rpds-py", version = "0.30.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "rpds-py", version = "2026.5.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } wheels = [ @@ -779,6 +1011,57 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, ] +[[package]] +name = "lance-namespace" +version = "0.8.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lance-namespace-urllib3-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/12/f7ab93b29be3edbf5fc3610714bf2d06088e7f4524bfb38dfd6852458b08/lance_namespace-0.8.6.tar.gz", hash = "sha256:18232e721c8188145f4ec9389cc2dfbeeabf54a619d94885ea1b3375bee9f4af", size = 11529, upload-time = "2026-06-12T17:36:41.651Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/1b/5b1668ee2dc8910965f390640359112a31157092fcf8e000b89c79b58708/lance_namespace-0.8.6-py3-none-any.whl", hash = "sha256:571eae34f9aad70e5b05020416c2860889b9ec82993ccd0eb015e7b39c3ea309", size = 13383, upload-time = "2026-06-12T17:36:43.456Z" }, +] + +[[package]] +name = "lance-namespace-urllib3-client" +version = "0.8.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/80/fb224b4a89c1c1638cde949cb6cce6c3aca7759effbfea46a3d9c3960b21/lance_namespace_urllib3_client-0.8.6.tar.gz", hash = "sha256:b6fb1d306e74a7576e5309919020be744527de484a63dbf5eed10f8b368548df", size = 228772, upload-time = "2026-06-12T17:36:42.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/90/1e27de15cd1b16785a1c7312beb0a59e75c8344a815f600f58173a565bd1/lance_namespace_urllib3_client-0.8.6-py3-none-any.whl", hash = "sha256:9d78249c3fb15aa3d15d668f78f04a275af3d08d800a7027492f37996ac4968b", size = 369950, upload-time = "2026-06-12T17:36:40.438Z" }, +] + +[[package]] +name = "lancedb" +version = "0.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecation" }, + { name = "lance-namespace" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "overrides", marker = "python_full_version < '3.12'" }, + { name = "packaging" }, + { name = "pyarrow" }, + { name = "pydantic" }, + { name = "tqdm" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/2f/d5a4b2a5bb1f800936c76a6d8a4daf127a86fcab621eeb70b574a5adc774/lancedb-0.33.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:d4eaf6fa7c2eac619208f1d396f4de635ee0f535673067118a31c1181575c48b", size = 48338115, upload-time = "2026-05-28T20:37:55.88Z" }, + { url = "https://files.pythonhosted.org/packages/07/12/31787b93a856b2c31382c7771dc22fb05575b70b87c9efe454269f4f0948/lancedb-0.33.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c6c2402ed2744245ae76c4167c0461da0a7a80f1608e0ec491c1548ea2b4302", size = 51162262, upload-time = "2026-05-28T20:37:59.101Z" }, + { url = "https://files.pythonhosted.org/packages/49/b7/081cc29f8e06bf12191b99ab3fe702aceebdb0914476b821a8c0445cacc8/lancedb-0.33.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ebf1ffad811e6254a93931a79489ba1f21f48564bdfa06abae846f5fcaaf3e8", size = 54381368, upload-time = "2026-05-28T20:38:02.2Z" }, + { url = "https://files.pythonhosted.org/packages/1c/bd/e0f4bd621f10ecf96a801b0166e87799ed7ca5a9dbabcef9a6c766a58ef3/lancedb-0.33.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:13da39f80adfea59e5831fe64e4166b2d70a2f843e6507bf644c4fe4c350087c", size = 51188986, upload-time = "2026-05-28T20:38:05.375Z" }, + { url = "https://files.pythonhosted.org/packages/d9/1a/a8647a432ac6aa59cdce1fc061a7050ea4278bcab364539b78af2ecf72d2/lancedb-0.33.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:21b712825f0a00225e8974a41352c4ea84b0899ef8c23b17f672fadc38bd8346", size = 54440958, upload-time = "2026-05-28T20:38:08.474Z" }, + { url = "https://files.pythonhosted.org/packages/08/6c/d0cc8da784cd7ed3b4940a5d1f3e7702e2d99a0a348ba81a376eed782810/lancedb-0.33.0-cp39-abi3-win_amd64.whl", hash = "sha256:4ba78c6202b0f6c2ce8edc7aa470e550d2da56271c7cbdd10428613f1f7126f9", size = 58751944, upload-time = "2026-05-28T20:38:11.549Z" }, +] + [[package]] name = "librt" version = "0.11.0" @@ -864,6 +1147,51 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/62/b40b382fa0c66fee1478073eb8db352a4a6beda4a1adccf1df911d8c289c/librt-0.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dee008f20b542e3cd162ba338a7f9ec0f6d23d395f66fe8aeeec3c9d067ea253", size = 102572, upload-time = "2026-05-10T18:17:06.809Z" }, ] +[[package]] +name = "llvmlite" +version = "0.47.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/88/a8952b6d5c21e74cbf158515b779666f692846502623e9e3c39d8e8ba25f/llvmlite-0.47.0.tar.gz", hash = "sha256:62031ce968ec74e95092184d4b0e857e444f8fdff0b8f9213707699570c33ccc", size = 193614, upload-time = "2026-03-31T18:29:53.497Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/f5/a1bde3aa8c43524b0acaf3f72fb3d80a32dd29dbb42d7dc434f84584cdcc/llvmlite-0.47.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41270b0b1310717f717cf6f2a9c68d3c43bd7905c33f003825aebc361d0d1b17", size = 37232772, upload-time = "2026-03-31T18:28:12.198Z" }, + { url = "https://files.pythonhosted.org/packages/7c/fb/76d88fc05ee1f9c1a6efe39eb493c4a727e5d1690412469017cd23bcb776/llvmlite-0.47.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f9d118bc1dd7623e0e65ca9ac485ec6dd543c3b77bc9928ddc45ebd34e1e30a7", size = 56275179, upload-time = "2026-03-31T18:28:15.725Z" }, + { url = "https://files.pythonhosted.org/packages/4d/08/29da7f36217abd56a0c389ef9a18bea47960826e691ced1a36c92c6ce93c/llvmlite-0.47.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ea5cfb04a6ab5b18e46be72b41b015975ba5980c4ddb41f1975b83e19031063", size = 55128632, upload-time = "2026-03-31T18:28:19.946Z" }, + { url = "https://files.pythonhosted.org/packages/df/f8/5e12e9ed447d65f04acf6fcf2d79cded2355640b5131a46cee4c99a5949d/llvmlite-0.47.0-cp310-cp310-win_amd64.whl", hash = "sha256:166b896a2262a2039d5fc52df5ee1659bd1ccd081183df7a2fba1b74702dd5ea", size = 38138402, upload-time = "2026-03-31T18:28:23.327Z" }, + { url = "https://files.pythonhosted.org/packages/34/0b/b9d1911cfefa61399821dfb37f486d83e0f42630a8d12f7194270c417002/llvmlite-0.47.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74090f0dcfd6f24ebbef3f21f11e38111c4d7e6919b54c4416e1e357c3446b07", size = 37232770, upload-time = "2026-03-31T18:28:26.765Z" }, + { url = "https://files.pythonhosted.org/packages/46/27/5799b020e4cdfb25a7c951c06a96397c135efcdc21b78d853bbd9c814c7d/llvmlite-0.47.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ca14f02e29134e837982497959a8e2193d6035235de1cb41a9cb2bd6da4eedbb", size = 56275177, upload-time = "2026-03-31T18:28:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/7e/51/48a53fedf01cb1f3f43ef200be17ebf83c8d9a04018d3783c1a226c342c2/llvmlite-0.47.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:12a69d4bb05f402f30477e21eeabe81911e7c251cecb192bed82cd83c9db10d8", size = 55128631, upload-time = "2026-03-31T18:28:36.046Z" }, + { url = "https://files.pythonhosted.org/packages/a2/50/59227d06bdc96e23322713c381af4e77420949d8cd8a042c79e0043096cc/llvmlite-0.47.0-cp311-cp311-win_amd64.whl", hash = "sha256:c37d6eb7aaabfa83ab9c2ff5b5cdb95a5e6830403937b2c588b7490724e05327", size = 38138400, upload-time = "2026-03-31T18:28:40.076Z" }, + { url = "https://files.pythonhosted.org/packages/fa/48/4b7fe0e34c169fa2f12532916133e0b219d2823b540733651b34fdac509a/llvmlite-0.47.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:306a265f408c259067257a732c8e159284334018b4083a9e35f67d19792b164f", size = 37232769, upload-time = "2026-03-31T18:28:43.735Z" }, + { url = "https://files.pythonhosted.org/packages/e6/4b/e3f2cd17822cf772a4a51a0a8080b0032e6d37b2dbe8cfb724eac4e31c52/llvmlite-0.47.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5853bf26160857c0c2573415ff4efe01c4c651e59e2c55c2a088740acfee51cd", size = 56275178, upload-time = "2026-03-31T18:28:48.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/55/a3b4a543185305a9bdf3d9759d53646ed96e55e7dfd43f53e7a421b8fbae/llvmlite-0.47.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:003bcf7fa579e14db59c1a1e113f93ab8a06b56a4be31c7f08264d1d4072d077", size = 55128632, upload-time = "2026-03-31T18:28:52.901Z" }, + { url = "https://files.pythonhosted.org/packages/2f/f5/d281ae0f79378a5a91f308ea9fdb9f9cc068fddd09629edc0725a5a8fde1/llvmlite-0.47.0-cp312-cp312-win_amd64.whl", hash = "sha256:f3079f25bdc24cd9d27c4b2b5e68f5f60c4fdb7e8ad5ee2b9b006007558f9df7", size = 38138692, upload-time = "2026-03-31T18:28:57.147Z" }, + { url = "https://files.pythonhosted.org/packages/77/6f/4615353e016799f80fa52ccb270a843c413b22361fadda2589b2922fb9b0/llvmlite-0.47.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a3c6a735d4e1041808434f9d440faa3d78d9b4af2ee64d05a66f351883b6ceec", size = 37232771, upload-time = "2026-03-31T18:29:01.324Z" }, + { url = "https://files.pythonhosted.org/packages/31/b8/69f5565f1a280d032525878a86511eebed0645818492feeb169dfb20ae8e/llvmlite-0.47.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2699a74321189e812d476a43d6d7f652f51811e7b5aad9d9bba842a1c7927acb", size = 56275178, upload-time = "2026-03-31T18:29:05.748Z" }, + { url = "https://files.pythonhosted.org/packages/d6/da/b32cafcb926fb0ce2aa25553bf32cb8764af31438f40e2481df08884c947/llvmlite-0.47.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c6951e2b29930227963e53ee152441f0e14be92e9d4231852102d986c761e40", size = 55128632, upload-time = "2026-03-31T18:29:11.235Z" }, + { url = "https://files.pythonhosted.org/packages/46/9f/4898b44e4042c60fafcb1162dfb7014f6f15b1ec19bf29cfea6bf26df90d/llvmlite-0.47.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2e9adf8698d813a9a5efb2d4370caf344dbc1e145019851fee6a6f319ba760e", size = 38138695, upload-time = "2026-03-31T18:29:15.43Z" }, + { url = "https://files.pythonhosted.org/packages/1c/d4/33c8af00f0bf6f552d74f3a054f648af2c5bc6bece97972f3bfadce4f5ec/llvmlite-0.47.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:de966c626c35c9dff5ae7bf12db25637738d0df83fc370cf793bc94d43d92d14", size = 37232773, upload-time = "2026-03-31T18:29:19.453Z" }, + { url = "https://files.pythonhosted.org/packages/64/1d/a760e993e0c0ba6db38d46b9f48f6c7dceb8ac838824997fb9e25f97bc04/llvmlite-0.47.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ddbccff2aeaff8670368340a158abefc032fe9b3ccf7d9c496639263d00151aa", size = 56275176, upload-time = "2026-03-31T18:29:24.149Z" }, + { url = "https://files.pythonhosted.org/packages/84/3b/e679bc3b29127182a7f4aa2d2e9e5bea42adb93fb840484147d59c236299/llvmlite-0.47.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4a7b778a2e144fc64468fb9bf509ac1226c9813a00b4d7afea5d988c4e22fca", size = 55128631, upload-time = "2026-03-31T18:29:29.536Z" }, + { url = "https://files.pythonhosted.org/packages/be/f7/19e2a09c62809c9e63bbd14ce71fb92c6ff7b7b3045741bb00c781efc3c9/llvmlite-0.47.0-cp314-cp314-win_amd64.whl", hash = "sha256:694e3c2cdc472ed2bd8bd4555ca002eec4310961dd58ef791d508f57b5cc4c94", size = 39153826, upload-time = "2026-03-31T18:29:33.681Z" }, + { url = "https://files.pythonhosted.org/packages/40/a1/581a8c707b5e80efdbbe1dd94527404d33fe50bceb71f39d5a7e11bd57b7/llvmlite-0.47.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:92ec8a169a20b473c1c54d4695e371bde36489fc1efa3688e11e99beba0abf9c", size = 37232772, upload-time = "2026-03-31T18:29:37.952Z" }, + { url = "https://files.pythonhosted.org/packages/11/03/16090dd6f74ba2b8b922276047f15962fbeea0a75d5601607edb301ba945/llvmlite-0.47.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa1cbd800edd3b20bc141521f7fd45a6185a5b84109aa6855134e81397ffe72b", size = 56275178, upload-time = "2026-03-31T18:29:42.58Z" }, + { url = "https://files.pythonhosted.org/packages/f5/cb/0abf1dd4c5286a95ffe0c1d8c67aec06b515894a0dd2ac97f5e27b82ab0b/llvmlite-0.47.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6725179b89f03b17dabe236ff3422cb8291b4c1bf40af152826dfd34e350ae8", size = 55128632, upload-time = "2026-03-31T18:29:46.939Z" }, + { url = "https://files.pythonhosted.org/packages/4f/79/d3bbab197e86e0ff4f9c07122895b66a3e0d024247fcff7f12c473cb36d9/llvmlite-0.47.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6842cf6f707ec4be3d985a385ad03f72b2d724439e118fcbe99b2929964f0453", size = 39153839, upload-time = "2026-03-31T18:29:51.004Z" }, +] + +[[package]] +name = "loguru" +version = "0.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, +] + [[package]] name = "markdown-it-py" version = "4.2.0" @@ -878,7 +1206,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.27.1" +version = "1.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -896,9 +1224,9 @@ dependencies = [ { name = "typing-inspection" }, { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/83/d1efe7c2980d8a3afa476f4e3d42d53dd54c0ab94c27bee5d755b45c8b73/mcp-1.27.1.tar.gz", hash = "sha256:0f47e1820f8f8f941466b39749eb1d1839a04caddca2bc60e9d46e8a99914924", size = 608458, upload-time = "2026-05-08T16:50:12.601Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c1/ee/94c6c50ffc5b5cf4737052275d11b57367f32d1a8516e31dcd60591b3916/mcp-1.28.0.tar.gz", hash = "sha256:559d3f9943674cafbe5744c5d3794f3237e8b47f9bbc58e20c0fad680d8487c2", size = 636040, upload-time = "2026-06-16T21:37:17.996Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fd/73/42d9596facebdb533b7f0b86c1b0364ef350d1f8ba78b1052e8a58b48b65/mcp-1.27.1-py3-none-any.whl", hash = "sha256:1af3c4203b329430fde7a87b4fcb6392a041f5cb851fd68fc674016ab4e7c06f", size = 216260, upload-time = "2026-05-08T16:50:10.547Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e1/4c1dc1fbb688641a712d34650c3d58bbbdcb314ddb75bc5817bbf33515a4/mcp-1.28.0-py3-none-any.whl", hash = "sha256:9c1e7cf3a9125557e418ecd4fed8e9adddce81b0dfdae4d6601d700f5beb71a4", size = 221959, upload-time = "2026-06-16T21:37:16.579Z" }, ] [[package]] @@ -910,13 +1238,136 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mmh3" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/1a/edb23803a168f070ded7a3014c6d706f63b90c84ccc024f89d794a3b7a6d/mmh3-5.2.1.tar.gz", hash = "sha256:bbea5b775f0ac84945191fb83f845a6fd9a21a03ea7f2e187defac7e401616ad", size = 33775, upload-time = "2026-03-05T15:55:57.716Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/bb/88ee54afa5644b0f35ab5b435f208394feb963e5bb47c4e404deb625ffa4/mmh3-5.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5d87a3584093e1a89987e3d36d82c98d9621b2cb944e22a420aa1401e096758f", size = 56080, upload-time = "2026-03-05T15:53:40.452Z" }, + { url = "https://files.pythonhosted.org/packages/cc/bf/5404c2fd6ac84819e8ff1b7e34437b37cf55a2b11318894909e7bb88de3f/mmh3-5.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30e4d2084df019880d55f6f7bea35328d9b464ebee090baa372c096dc77556fb", size = 40462, upload-time = "2026-03-05T15:53:41.751Z" }, + { url = "https://files.pythonhosted.org/packages/de/0b/52bffad0b52ae4ea53e222b594bd38c08ecac1fc410323220a7202e43da5/mmh3-5.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bbc17250b10d3466875a40a52520a6bac3c02334ca709207648abd3c223ed5c", size = 40077, upload-time = "2026-03-05T15:53:42.753Z" }, + { url = "https://files.pythonhosted.org/packages/a0/9e/326c93d425b9fa4cbcdc71bc32aaba520db37577d632a24d25d927594eca/mmh3-5.2.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:76219cd1eefb9bf4af7856e3ae563d15158efa145c0aab01e9933051a1954045", size = 95302, upload-time = "2026-03-05T15:53:43.867Z" }, + { url = "https://files.pythonhosted.org/packages/c6/b1/e20d5f0d19c4c0f3df213fa7dcfa0942c4fb127d38e11f398ae8ddf6cccc/mmh3-5.2.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb9d44c25244e11c8be3f12c938ca8ba8404620ef8092245d2093c6ab3df260f", size = 101174, upload-time = "2026-03-05T15:53:45.194Z" }, + { url = "https://files.pythonhosted.org/packages/7f/4a/1a9bb3e33c18b1e1cee2c249a3053c4d4d9c93ecb30738f39a62249a7e86/mmh3-5.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d5d542bf2abd0fd0361e8017d03f7cb5786214ceb4a40eef1539d6585d93386", size = 103979, upload-time = "2026-03-05T15:53:46.334Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/dab9ee7545429e7acdd38d23d0104471d31de09a0c695f1b751e0ff34532/mmh3-5.2.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:08043f7cb1fb9467c3fbbbaea7896986e7fbc81f4d3fd9289a73d9110ab6207a", size = 110898, upload-time = "2026-03-05T15:53:47.443Z" }, + { url = "https://files.pythonhosted.org/packages/72/08/408f11af7fe9e76b883142bb06536007cc7f237be2a5e9ad4e837716e627/mmh3-5.2.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:add7ac388d1e0bf57259afbcf9ed05621a3bf11ce5ee337e7536f1e1aaf056b0", size = 118308, upload-time = "2026-03-05T15:53:49.1Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/0551be7fe0000736d9ad12ffa1f130d7a0c17b49193d6dc41c82bd9404c6/mmh3-5.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:41105377f6282e8297f182e393a79cfffd521dde37ace52b106373bdcd9ca5cb", size = 101671, upload-time = "2026-03-05T15:53:50.317Z" }, + { url = "https://files.pythonhosted.org/packages/44/17/6e4f80c4e6ad590139fa2017c3aeca54e7cc9ef68e08aa142a0c90f40a97/mmh3-5.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3cb61db880ec11e984348227b333259994c2c85caa775eb7875decb3768db890", size = 96682, upload-time = "2026-03-05T15:53:51.48Z" }, + { url = "https://files.pythonhosted.org/packages/ad/a7/b82fccd38c1fa815de72e94ebe9874562964a10e21e6c1bc3b01d3f15a0e/mmh3-5.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e8b5378de2b139c3a830f0209c1e91f7705919a4b3e563a10955104f5097a70a", size = 110287, upload-time = "2026-03-05T15:53:52.68Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a1/2644069031c8cec0be46f0346f568a53f42fddd843f03cc890306699c1e2/mmh3-5.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e904f2417f0d6f6d514f3f8b836416c360f306ddaee1f84de8eef1e722d212e5", size = 111899, upload-time = "2026-03-05T15:53:53.791Z" }, + { url = "https://files.pythonhosted.org/packages/51/7b/6614f3eb8fb33f931fa7616c6d477247e48ec6c5082b02eeeee998cffa94/mmh3-5.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f1fbb0a99125b1287c6d9747f937dc66621426836d1a2d50d05aecfc81911b57", size = 100078, upload-time = "2026-03-05T15:53:55.234Z" }, + { url = "https://files.pythonhosted.org/packages/27/9a/dd4d5a5fb893e64f71b42b69ecae97dd78db35075412488b24036bc5599c/mmh3-5.2.1-cp310-cp310-win32.whl", hash = "sha256:b4cce60d0223074803c9dbe0721ad3fa51dafe7d462fee4b656a1aa01ee07518", size = 40756, upload-time = "2026-03-05T15:53:56.319Z" }, + { url = "https://files.pythonhosted.org/packages/c9/34/0b25889450f8aeffcec840aa73251e853f059c1b72ed1d1c027b956f95f5/mmh3-5.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:6f01f044112d43a20be2f13a11683666d87151542ad627fe41a18b9791d2802f", size = 41519, upload-time = "2026-03-05T15:53:57.41Z" }, + { url = "https://files.pythonhosted.org/packages/fd/31/8fd42e3c526d0bcb1db7f569c0de6729e180860a0495e387a53af33c2043/mmh3-5.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:7501e9be34cb21e72fcfe672aafd0eee65c16ba2afa9dcb5500a587d3a0580f0", size = 39285, upload-time = "2026-03-05T15:53:58.697Z" }, + { url = "https://files.pythonhosted.org/packages/65/d7/3312a59df3c1cdd783f4cf0c4ee8e9decff9c5466937182e4cc7dbbfe6c5/mmh3-5.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dae0f0bd7d30c0ad61b9a504e8e272cb8391eed3f1587edf933f4f6b33437450", size = 56082, upload-time = "2026-03-05T15:53:59.702Z" }, + { url = "https://files.pythonhosted.org/packages/61/96/6f617baa098ca0d2989bfec6d28b5719532cd8d8848782662f5b755f657f/mmh3-5.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9aeaf53eaa075dd63e81512522fd180097312fb2c9f476333309184285c49ce0", size = 40458, upload-time = "2026-03-05T15:54:01.548Z" }, + { url = "https://files.pythonhosted.org/packages/c1/b4/9cd284bd6062d711e13d26c04d4778ab3f690c1c38a4563e3c767ec8802e/mmh3-5.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0634581290e6714c068f4aa24020acf7880927d1f0084fa753d9799ae9610082", size = 40079, upload-time = "2026-03-05T15:54:02.743Z" }, + { url = "https://files.pythonhosted.org/packages/f6/09/a806334ce1d3d50bf782b95fcee8b3648e1e170327d4bb7b4bad2ad7d956/mmh3-5.2.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080c0637aea036f35507e803a4778f119a9b436617694ae1c5c366805f1e997", size = 97242, upload-time = "2026-03-05T15:54:04.536Z" }, + { url = "https://files.pythonhosted.org/packages/ee/93/723e317dd9e041c4dc4566a2eb53b01ad94de31750e0b834f1643905e97c/mmh3-5.2.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db0562c5f71d18596dcd45e854cf2eeba27d7543e1a3acdafb7eef728f7fe85d", size = 103082, upload-time = "2026-03-05T15:54:06.387Z" }, + { url = "https://files.pythonhosted.org/packages/61/b5/f96121e69cc48696075071531cf574f112e1ffd08059f4bffb41210e6fc5/mmh3-5.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d9f9a3ce559a5267014b04b82956993270f63ec91765e13e9fd73daf2d2738e", size = 106054, upload-time = "2026-03-05T15:54:07.506Z" }, + { url = "https://files.pythonhosted.org/packages/82/49/192b987ec48d0b2aecf8ac285a9b11fbc00030f6b9c694664ae923458dde/mmh3-5.2.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:960b1b3efa39872ac8b6cc3a556edd6fb90ed74f08c9c45e028f1005b26aa55d", size = 112910, upload-time = "2026-03-05T15:54:09.403Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a1/03e91fd334ed0144b83343a76eb11f17434cd08f746401488cfeafb2d241/mmh3-5.2.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d30b650595fdbe32366b94cb14f30bb2b625e512bd4e1df00611f99dc5c27fd4", size = 120551, upload-time = "2026-03-05T15:54:10.587Z" }, + { url = "https://files.pythonhosted.org/packages/93/b9/b89a71d2ff35c3a764d1c066c7313fc62c7cc48fa48a4b3b0304a4a0146f/mmh3-5.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82f3802bfc4751f420d591c5c864de538b71cea117fce67e4595c2afede08a15", size = 99096, upload-time = "2026-03-05T15:54:11.76Z" }, + { url = "https://files.pythonhosted.org/packages/36/b5/613772c1c6ed5f7b63df55eb131e887cc43720fec392777b95a79d34e640/mmh3-5.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:915e7a2418f10bd1151b1953df06d896db9783c9cfdb9a8ee1f9b3a4331ab503", size = 98524, upload-time = "2026-03-05T15:54:13.122Z" }, + { url = "https://files.pythonhosted.org/packages/5e/0e/1524566fe8eaf871e4f7bc44095929fcd2620488f402822d848df19d679c/mmh3-5.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:fc78739b5ec6e4fb02301984a3d442a91406e7700efbe305071e7fd1c78278f2", size = 106239, upload-time = "2026-03-05T15:54:14.601Z" }, + { url = "https://files.pythonhosted.org/packages/04/94/21adfa7d90a7a697137ad6de33eeff6445420ca55e433a5d4919c79bc3b5/mmh3-5.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:41aac7002a749f08727cb91babff1daf8deac317c0b1f317adc69be0e6c375d1", size = 109797, upload-time = "2026-03-05T15:54:15.819Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e6/1aacc3a219e1aa62fa65669995d4a3562b35be5200ec03680c7e4bec9676/mmh3-5.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9d8089d853c7963a8ce87fff93e2a67075c0bc08684a08ea6ad13577c38ffc38", size = 97228, upload-time = "2026-03-05T15:54:16.992Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b9/5e4cca8dcccf298add0a27f3c357bc8cf8baf821d35cdc6165e4bd5a48b0/mmh3-5.2.1-cp311-cp311-win32.whl", hash = "sha256:baeb47635cb33375dee4924cd93d7f5dcaa786c740b08423b0209b824a1ee728", size = 40751, upload-time = "2026-03-05T15:54:18.714Z" }, + { url = "https://files.pythonhosted.org/packages/72/fc/5b11d49247f499bcda591171e9cf3b6ee422b19e70aa2cef2e0ae65ca3b9/mmh3-5.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:1e4ecee40ba19e6975e1120829796770325841c2f153c0e9aecca927194c6a2a", size = 41517, upload-time = "2026-03-05T15:54:19.764Z" }, + { url = "https://files.pythonhosted.org/packages/8a/5f/2a511ee8a1c2a527c77726d5231685b72312c5a1a1b7639ad66a9652aa84/mmh3-5.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:c302245fd6c33d96bd169c7ccf2513c20f4c1e417c07ce9dce107c8bc3f8411f", size = 39287, upload-time = "2026-03-05T15:54:20.904Z" }, + { url = "https://files.pythonhosted.org/packages/92/94/bc5c3b573b40a328c4d141c20e399039ada95e5e2a661df3425c5165fd84/mmh3-5.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0cc21533878e5586b80d74c281d7f8da7932bc8ace50b8d5f6dbf7e3935f63f1", size = 56087, upload-time = "2026-03-05T15:54:21.92Z" }, + { url = "https://files.pythonhosted.org/packages/f6/80/64a02cc3e95c3af0aaa2590849d9ed24a9f14bb93537addde688e039b7c3/mmh3-5.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4eda76074cfca2787c8cf1bec603eaebdddd8b061ad5502f85cddae998d54f00", size = 40500, upload-time = "2026-03-05T15:54:22.953Z" }, + { url = "https://files.pythonhosted.org/packages/8b/72/e6d6602ce18adf4ddcd0e48f2e13590cc92a536199e52109f46f259d3c46/mmh3-5.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:eee884572b06bbe8a2b54f424dbd996139442cf83c76478e1ec162512e0dd2c7", size = 40034, upload-time = "2026-03-05T15:54:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/59/c2/bf4537a8e58e21886ef16477041238cab5095c836496e19fafc34b7445d2/mmh3-5.2.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d0b7e803191db5f714d264044e06189c8ccd3219e936cc184f07106bd17fd7b", size = 97292, upload-time = "2026-03-05T15:54:25.335Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e2/51ed62063b44d10b06d975ac87af287729eeb5e3ed9772f7584a17983e90/mmh3-5.2.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e6c219e375f6341d0959af814296372d265a8ca1af63825f65e2e87c618f006", size = 103274, upload-time = "2026-03-05T15:54:26.44Z" }, + { url = "https://files.pythonhosted.org/packages/75/ce/12a7524dca59eec92e5b31fdb13ede1e98eda277cf2b786cf73bfbc24e81/mmh3-5.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26fb5b9c3946bf7f1daed7b37e0c03898a6f062149127570f8ede346390a0825", size = 106158, upload-time = "2026-03-05T15:54:28.578Z" }, + { url = "https://files.pythonhosted.org/packages/86/1f/d3ba6dd322d01ab5d44c46c8f0c38ab6bbbf9b5e20e666dfc05bf4a23604/mmh3-5.2.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3c38d142c706201db5b2345166eeef1e7740e3e2422b470b8ba5c8727a9b4c7a", size = 113005, upload-time = "2026-03-05T15:54:29.767Z" }, + { url = "https://files.pythonhosted.org/packages/b6/a9/15d6b6f913294ea41b44d901741298e3718e1cb89ee626b3694625826a43/mmh3-5.2.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50885073e2909251d4718634a191c49ae5f527e5e1736d738e365c3e8be8f22b", size = 120744, upload-time = "2026-03-05T15:54:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/76/b3/70b73923fd0284c439860ff5c871b20210dfdbe9a6b9dd0ee6496d77f174/mmh3-5.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3f99e1756fc48ad507b95e5d86f2fb21b3d495012ff13e6592ebac14033f166", size = 99111, upload-time = "2026-03-05T15:54:32.353Z" }, + { url = "https://files.pythonhosted.org/packages/dd/38/99f7f75cd27d10d8b899a1caafb9d531f3903e4d54d572220e3d8ac35e89/mmh3-5.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:62815d2c67f2dd1be76a253d88af4e1da19aeaa1820146dec52cf8bee2958b16", size = 98623, upload-time = "2026-03-05T15:54:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/fd/68/6e292c0853e204c44d2f03ea5f090be3317a0e2d9417ecb62c9eb27687df/mmh3-5.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8f767ba0911602ddef289404e33835a61168314ebd3c729833db2ed685824211", size = 106437, upload-time = "2026-03-05T15:54:35.177Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c6/fedd7284c459cfb58721d461fcf5607a4c1f5d9ab195d113d51d10164d16/mmh3-5.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:67e41a497bac88cc1de96eeba56eeb933c39d54bc227352f8455aa87c4ca4000", size = 110002, upload-time = "2026-03-05T15:54:36.673Z" }, + { url = "https://files.pythonhosted.org/packages/3b/ac/ca8e0c19a34f5b71390171d2ff0b9f7f187550d66801a731bb68925126a4/mmh3-5.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d74a03fb57757ece25aa4b3c1c60157a1cece37a020542785f942e2f827eed5", size = 97507, upload-time = "2026-03-05T15:54:37.804Z" }, + { url = "https://files.pythonhosted.org/packages/df/94/6ebb9094cfc7ac5e7950776b9d13a66bb4a34f83814f32ba2abc9494fc68/mmh3-5.2.1-cp312-cp312-win32.whl", hash = "sha256:7374d6e3ef72afe49697ecd683f3da12f4fc06af2d75433d0580c6746d2fa025", size = 40773, upload-time = "2026-03-05T15:54:40.077Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/cd3527198cf159495966551c84a5f36805a10ac17b294f41f67b83f6a4d6/mmh3-5.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:3a9fed49c6ce4ed7e73f13182760c65c816da006debe67f37635580dfb0fae00", size = 41560, upload-time = "2026-03-05T15:54:41.148Z" }, + { url = "https://files.pythonhosted.org/packages/15/96/6fe5ebd0f970a076e3ed5512871ce7569447b962e96c125528a2f9724470/mmh3-5.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:bbfcb95d9a744e6e2827dfc66ad10e1020e0cac255eb7f85652832d5a264c2fc", size = 39313, upload-time = "2026-03-05T15:54:42.171Z" }, + { url = "https://files.pythonhosted.org/packages/25/a5/9daa0508a1569a54130f6198d5462a92deda870043624aa3ea72721aa765/mmh3-5.2.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:723b2681ed4cc07d3401bbea9c201ad4f2a4ca6ba8cddaff6789f715dd2b391e", size = 40832, upload-time = "2026-03-05T15:54:43.212Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6b/3230c6d80c1f4b766dedf280a92c2241e99f87c1504ff74205ec8cebe451/mmh3-5.2.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:3619473a0e0d329fd4aec8075628f8f616be2da41605300696206d6f36920c3d", size = 41964, upload-time = "2026-03-05T15:54:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/62/fb/648bfddb74a872004b6ee751551bfdda783fe6d70d2e9723bad84dbe5311/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e48d4dbe0f88e53081da605ae68644e5182752803bbc2beb228cca7f1c4454d6", size = 39114, upload-time = "2026-03-05T15:54:45.205Z" }, + { url = "https://files.pythonhosted.org/packages/95/c2/ab7901f87af438468b496728d11264cb397b3574d41506e71b92128e0373/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a482ac121de6973897c92c2f31defc6bafb11c83825109275cffce54bb64933f", size = 39819, upload-time = "2026-03-05T15:54:46.509Z" }, + { url = "https://files.pythonhosted.org/packages/2f/ed/6f88dda0df67de1612f2e130ffea34cf84aaee5bff5b0aff4dbff2babe34/mmh3-5.2.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:17fbb47f0885ace8327ce1235d0416dc86a211dcd8cc1e703f41523be32cfec8", size = 40330, upload-time = "2026-03-05T15:54:47.864Z" }, + { url = "https://files.pythonhosted.org/packages/3d/66/7516d23f53cdf90f43fce24ab80c28f45e6851d78b46bef8c02084edf583/mmh3-5.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d51fde50a77f81330523562e3c2734ffdca9c4c9e9d355478117905e1cfe16c6", size = 56078, upload-time = "2026-03-05T15:54:48.9Z" }, + { url = "https://files.pythonhosted.org/packages/bc/34/4d152fdf4a91a132cb226b671f11c6b796eada9ab78080fb5ce1e95adaab/mmh3-5.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:19bbd3b841174ae6ed588536ab5e1b1fe83d046e668602c20266547298d939a9", size = 40498, upload-time = "2026-03-05T15:54:49.942Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4c/8e3af1b6d85a299767ec97bd923f12b06267089c1472c27c1696870d1175/mmh3-5.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be77c402d5e882b6fbacfd90823f13da8e0a69658405a39a569c6b58fdb17b03", size = 40033, upload-time = "2026-03-05T15:54:50.994Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/966ea560e32578d453c9e9db53d602cbb1d0da27317e232afa7c38ceba11/mmh3-5.2.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fd96476f04db5ceba1cfa0f21228f67c1f7402296f0e73fee3513aa680ad237b", size = 97320, upload-time = "2026-03-05T15:54:52.072Z" }, + { url = "https://files.pythonhosted.org/packages/bb/0d/2c5f9893b38aeb6b034d1a44ecd55a010148054f6a516abe53b5e4057297/mmh3-5.2.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:707151644085dd0f20fe4f4b573d28e5130c4aaa5f587e95b60989c5926653b5", size = 103299, upload-time = "2026-03-05T15:54:53.569Z" }, + { url = "https://files.pythonhosted.org/packages/1c/fc/2ebaef4a4d4376f89761274dc274035ffd96006ab496b4ee5af9b08f21a9/mmh3-5.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3737303ca9ea0f7cb83028781148fcda4f1dac7821db0c47672971dabcf63593", size = 106222, upload-time = "2026-03-05T15:54:55.092Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/ea7ffe126d0ba0406622602a2d05e1e1a6841cc92fc322eb576c95b27fad/mmh3-5.2.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2778fed822d7db23ac5008b181441af0c869455b2e7d001f4019636ac31b6fe4", size = 113048, upload-time = "2026-03-05T15:54:56.305Z" }, + { url = "https://files.pythonhosted.org/packages/85/57/9447032edf93a64aa9bef4d9aa596400b1756f40411890f77a284f6293ca/mmh3-5.2.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d57dea657357230cc780e13920d7fa7db059d58fe721c80020f94476da4ca0a1", size = 120742, upload-time = "2026-03-05T15:54:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/53/82/a86cc87cc88c92e9e1a598fee509f0409435b57879a6129bf3b3e40513c7/mmh3-5.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:169e0d178cb59314456ab30772429a802b25d13227088085b0d49b9fe1533104", size = 99132, upload-time = "2026-03-05T15:54:58.583Z" }, + { url = "https://files.pythonhosted.org/packages/54/f7/6b16eb1b40ee89bb740698735574536bc20d6cdafc65ae702ea235578e05/mmh3-5.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e4e1f580033335c6f76d1e0d6b56baf009d1a64d6a4816347e4271ba951f46d", size = 98686, upload-time = "2026-03-05T15:55:00.078Z" }, + { url = "https://files.pythonhosted.org/packages/e8/88/a601e9f32ad1410f438a6d0544298ea621f989bd34a0731a7190f7dec799/mmh3-5.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:2bd9f19f7f1fcebd74e830f4af0f28adad4975d40d80620be19ffb2b2af56c9f", size = 106479, upload-time = "2026-03-05T15:55:01.532Z" }, + { url = "https://files.pythonhosted.org/packages/d6/5c/ce29ae3dfc4feec4007a437a1b7435fb9507532a25147602cd5b52be86db/mmh3-5.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c88653877aeb514c089d1b3d473451677b8b9a6d1497dbddf1ae7934518b06d2", size = 110030, upload-time = "2026-03-05T15:55:02.934Z" }, + { url = "https://files.pythonhosted.org/packages/13/30/ae444ef2ff87c805d525da4fa63d27cda4fe8a48e77003a036b8461cfd5c/mmh3-5.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fceef7fe67c81e1585198215e42ad3fdba3a25644beda8fbdaf85f4d7b93175a", size = 97536, upload-time = "2026-03-05T15:55:04.135Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f9/dc3787ee5c813cc27fe79f45ad4500d9b5437f23a7402435cc34e07c7718/mmh3-5.2.1-cp313-cp313-win32.whl", hash = "sha256:54b64fb2433bc71488e7a449603bf8bd31fbcf9cb56fbe1eb6d459e90b86c37b", size = 40769, upload-time = "2026-03-05T15:55:05.277Z" }, + { url = "https://files.pythonhosted.org/packages/43/67/850e0b5a1e97799822ebfc4ca0e8c6ece3ed8baf7dcdf64de817dfdda2ca/mmh3-5.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:cae6383181f1e345317742d2ddd88f9e7d2682fa4c9432e3a74e47d92dce0229", size = 41563, upload-time = "2026-03-05T15:55:06.283Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cc/98c90b28e1da5458e19fbfaf4adb5289208d3bfccd45dd14eab216a2f0bb/mmh3-5.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:022aa1a528604e6c83d0a7705fdef0b5355d897a9e0fa3a8d26709ceaa06965d", size = 39310, upload-time = "2026-03-05T15:55:07.323Z" }, + { url = "https://files.pythonhosted.org/packages/63/b4/65bc1fb2bb7f83e91c30865023b1847cf89a5f237165575e8c83aa536584/mmh3-5.2.1-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:d771f085fcdf4035786adfb1d8db026df1eb4b41dac1c3d070d1e49512843227", size = 40794, upload-time = "2026-03-05T15:55:09.773Z" }, + { url = "https://files.pythonhosted.org/packages/c4/86/7168b3d83be8eb553897b1fac9da8bbb06568e5cfe555ffc329ebb46f59d/mmh3-5.2.1-cp314-cp314-android_24_x86_64.whl", hash = "sha256:7f196cd7910d71e9d9860da0ff7a77f64d22c1ad931f1dd18559a06e03109fc0", size = 41923, upload-time = "2026-03-05T15:55:10.924Z" }, + { url = "https://files.pythonhosted.org/packages/bf/9b/b653ab611c9060ce8ff0ba25c0226757755725e789292f3ca138a58082cd/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:b1f12bd684887a0a5d55e6363ca87056f361e45451105012d329b86ec19dbe0b", size = 39131, upload-time = "2026-03-05T15:55:11.961Z" }, + { url = "https://files.pythonhosted.org/packages/9b/b4/5a2e0d34ab4d33543f01121e832395ea510132ea8e52cdf63926d9d81754/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d106493a60dcb4aef35a0fac85105e150a11cf8bc2b0d388f5a33272d756c966", size = 39825, upload-time = "2026-03-05T15:55:13.013Z" }, + { url = "https://files.pythonhosted.org/packages/bd/69/81699a8f39a3f8d368bec6443435c0c392df0d200ad915bf0d222b588e03/mmh3-5.2.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:44983e45310ee5b9f73397350251cdf6e63a466406a105f1d16cb5baa659270b", size = 40344, upload-time = "2026-03-05T15:55:14.026Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b3/71c8c775807606e8fd8acc5c69016e1caf3200d50b50b6dd4b40ce10b76c/mmh3-5.2.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:368625fb01666655985391dbad3860dc0ba7c0d6b9125819f3121ee7292b4ac8", size = 56291, upload-time = "2026-03-05T15:55:15.137Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/2c24517d4b2ce9e4917362d24f274d3d541346af764430249ddcc4cb3a08/mmh3-5.2.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:72d1cc63bcc91e14933f77d51b3df899d6a07d184ec515ea7f56bff659e124d7", size = 40575, upload-time = "2026-03-05T15:55:16.518Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b9/e4a360164365ac9f07a25f0f7928e3a66eb9ecc989384060747aa170e6aa/mmh3-5.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e8b4b5580280b9265af3e0409974fb79c64cf7523632d03fbf11df18f8b0181e", size = 40052, upload-time = "2026-03-05T15:55:17.735Z" }, + { url = "https://files.pythonhosted.org/packages/97/ca/120d92223a7546131bbbc31c9174168ee7a73b1366f5463ffe69d9e691fe/mmh3-5.2.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4cbbde66f1183db040daede83dd86c06d663c5bb2af6de1142b7c8c37923dd74", size = 97311, upload-time = "2026-03-05T15:55:18.959Z" }, + { url = "https://files.pythonhosted.org/packages/b6/71/c1a60c1652b8813ef9de6d289784847355417ee0f2980bca002fe87f4ae5/mmh3-5.2.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8ff038d52ef6aa0f309feeba00c5095c9118d0abf787e8e8454d6048db2037fc", size = 103279, upload-time = "2026-03-05T15:55:20.448Z" }, + { url = "https://files.pythonhosted.org/packages/48/29/ad97f4be1509cdcb28ae32c15593ce7c415db47ace37f8fad35b493faa9a/mmh3-5.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4130d0b9ce5fad6af07421b1aecc7e079519f70d6c05729ab871794eded8617", size = 106290, upload-time = "2026-03-05T15:55:21.6Z" }, + { url = "https://files.pythonhosted.org/packages/77/29/1f86d22e281bd8827ba373600a4a8b0c0eae5ca6aa55b9a8c26d2a34decc/mmh3-5.2.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e0bfe77d238308839699944164b96a2eeccaf55f2af400f54dc20669d8d5f2", size = 113116, upload-time = "2026-03-05T15:55:22.826Z" }, + { url = "https://files.pythonhosted.org/packages/a7/7c/339971ea7ed4c12d98f421f13db3ea576a9114082ccb59d2d1a0f00ccac1/mmh3-5.2.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f963eafc0a77a6c0562397da004f5876a9bcf7265a7bcc3205e29636bc4a1312", size = 120740, upload-time = "2026-03-05T15:55:24.3Z" }, + { url = "https://files.pythonhosted.org/packages/e4/92/3c7c4bdb8e926bb3c972d1e2907d77960c1c4b250b41e8366cf20c6e4373/mmh3-5.2.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:92883836caf50d5255be03d988d75bc93e3f86ba247b7ca137347c323f731deb", size = 99143, upload-time = "2026-03-05T15:55:25.456Z" }, + { url = "https://files.pythonhosted.org/packages/df/0a/33dd8706e732458c8375eae63c981292de07a406bad4ec03e5269654aa2c/mmh3-5.2.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:57b52603e89355ff318025dd55158f6e71396c0f1f609d548e9ea9c94cc6ce0a", size = 98703, upload-time = "2026-03-05T15:55:26.723Z" }, + { url = "https://files.pythonhosted.org/packages/51/04/76bbce05df76cbc3d396f13b2ea5b1578ef02b6a5187e132c6c33f99d596/mmh3-5.2.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f40a95186a72fa0b67d15fef0f157bfcda00b4f59c8a07cbe5530d41ac35d105", size = 106484, upload-time = "2026-03-05T15:55:28.214Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8f/c6e204a2c70b719c1f62ffd9da27aef2dddcba875ea9c31ca0e87b975a46/mmh3-5.2.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:58370d05d033ee97224c81263af123dea3d931025030fd34b61227a768a8858a", size = 110012, upload-time = "2026-03-05T15:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/7181efd8e39db386c1ebc3e6b7d1f702a09d7c1197a6f2742ed6b5c16597/mmh3-5.2.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7be6dfb49e48fd0a7d91ff758a2b51336f1cd21f9d44b20f6801f072bd080cdd", size = 97508, upload-time = "2026-03-05T15:55:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/42/0f/afa7ca2615fd85e1469474bb860e381443d0b868c083b62b41cb1d7ca32f/mmh3-5.2.1-cp314-cp314-win32.whl", hash = "sha256:54fe8518abe06a4c3852754bfd498b30cc58e667f376c513eac89a244ce781a4", size = 41387, upload-time = "2026-03-05T15:55:32.403Z" }, + { url = "https://files.pythonhosted.org/packages/71/0d/46d42a260ee1357db3d486e6c7a692e303c017968e14865e00efa10d09fc/mmh3-5.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:3f796b535008708846044c43302719c6956f39ca2d93f2edda5319e79a29efbb", size = 42101, upload-time = "2026-03-05T15:55:33.646Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7b/848a8378059d96501a41159fca90d6a99e89736b0afbe8e8edffeac8c74b/mmh3-5.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:cd471ede0d802dd936b6fab28188302b2d497f68436025857ca72cd3810423fe", size = 39836, upload-time = "2026-03-05T15:55:35.026Z" }, + { url = "https://files.pythonhosted.org/packages/27/61/1dabea76c011ba8547c25d30c91c0ec22544487a8750997a27a0c9e1180b/mmh3-5.2.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:5174a697ce042fa77c407e05efe41e03aa56dae9ec67388055820fb48cf4c3ba", size = 57727, upload-time = "2026-03-05T15:55:36.162Z" }, + { url = "https://files.pythonhosted.org/packages/b7/32/731185950d1cf2d5e28979cc8593016ba1619a295faba10dda664a4931b5/mmh3-5.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:0a3984146e414684a6be2862d84fcb1035f4984851cb81b26d933bab6119bf00", size = 41308, upload-time = "2026-03-05T15:55:37.254Z" }, + { url = "https://files.pythonhosted.org/packages/76/aa/66c76801c24b8c9418b4edde9b5e57c75e72c94e29c48f707e3962534f18/mmh3-5.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:bd6e7d363aa93bd3421b30b6af97064daf47bc96005bddba67c5ffbc6df426b8", size = 40758, upload-time = "2026-03-05T15:55:38.61Z" }, + { url = "https://files.pythonhosted.org/packages/9e/bb/79a1f638a02f0ae389f706d13891e2fbf7d8c0a22ecde67ba828951bb60a/mmh3-5.2.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:113f78e7463a36dbbcea05bfe688efd7fa759d0f0c56e73c974d60dcfec3dfcc", size = 109670, upload-time = "2026-03-05T15:55:40.13Z" }, + { url = "https://files.pythonhosted.org/packages/26/94/8cd0e187a288985bcfc79bf5144d1d712df9dee74365f59d26e3a1865be6/mmh3-5.2.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e8ec5f606e0809426d2440e0683509fb605a8820a21ebd120dcdba61b74ef7f", size = 117399, upload-time = "2026-03-05T15:55:42.076Z" }, + { url = "https://files.pythonhosted.org/packages/42/94/dfea6059bd5c5beda565f58a4096e43f4858fb6d2862806b8bbd12cbb284/mmh3-5.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22b0f9971ec4e07e8223f2beebe96a6cfc779d940b6f27d26604040dd74d3a44", size = 120386, upload-time = "2026-03-05T15:55:43.481Z" }, + { url = "https://files.pythonhosted.org/packages/47/cb/f9c45e62aaa67220179f487772461d891bb582bb2f9783c944832c60efd9/mmh3-5.2.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85ffc9920ffc39c5eee1e3ac9100c913a0973996fbad5111f939bbda49204bb7", size = 125924, upload-time = "2026-03-05T15:55:44.638Z" }, + { url = "https://files.pythonhosted.org/packages/a5/83/fe54a4a7c11bc9f623dfc1707decd034245602b076dfc1dcc771a4163170/mmh3-5.2.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7aec798c2b01aaa65a55f1124f3405804184373abb318a3091325aece235f67c", size = 135280, upload-time = "2026-03-05T15:55:45.866Z" }, + { url = "https://files.pythonhosted.org/packages/97/67/fe7e9e9c143daddd210cd22aef89cbc425d58ecf238d2b7d9eb0da974105/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:55dbbd8ffbc40d1697d5e2d0375b08599dae8746b0b08dea05eee4ce81648fac", size = 110050, upload-time = "2026-03-05T15:55:47.074Z" }, + { url = "https://files.pythonhosted.org/packages/43/c4/6d4b09fcbef80794de447c9378e39eefc047156b290fa3dd2d5257ca8227/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6c85c38a279ca9295a69b9b088a2e48aa49737bb1b34e6a9dc6297c110e8d912", size = 111158, upload-time = "2026-03-05T15:55:48.239Z" }, + { url = "https://files.pythonhosted.org/packages/81/a6/ca51c864bdb30524beb055a6d8826db3906af0834ec8c41d097a6e8573d5/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:6290289fa5fb4c70fd7f72016e03633d60388185483ff3b162912c81205ae2cf", size = 116890, upload-time = "2026-03-05T15:55:49.405Z" }, + { url = "https://files.pythonhosted.org/packages/cc/04/5a1fe2e2ad843d03e89af25238cbc4f6840a8bb6c4329a98ab694c71deda/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4fc6cd65dc4d2fdb2625e288939a3566e36127a84811a4913f02f3d5931da52d", size = 123121, upload-time = "2026-03-05T15:55:50.61Z" }, + { url = "https://files.pythonhosted.org/packages/af/4d/3c820c6f4897afd25905270a9f2330a23f77a207ea7356f7aadace7273c0/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:623f938f6a039536cc02b7582a07a080f13fdfd48f87e63201d92d7e34d09a18", size = 110187, upload-time = "2026-03-05T15:55:52.143Z" }, + { url = "https://files.pythonhosted.org/packages/21/54/1d71cd143752361c0aebef16ad3f55926a6faf7b112d355745c1f8a25f7f/mmh3-5.2.1-cp314-cp314t-win32.whl", hash = "sha256:29bc3973676ae334412efdd367fcd11d036b7be3efc1ce2407ef8676dabfeb82", size = 41934, upload-time = "2026-03-05T15:55:53.564Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e4/63a2a88f31d93dea03947cccc2a076946857e799ea4f7acdecbf43b324aa/mmh3-5.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:28cfab66577000b9505a0d068c731aee7ca85cd26d4d63881fab17857e0fe1fb", size = 43036, upload-time = "2026-03-05T15:55:55.252Z" }, + { url = "https://files.pythonhosted.org/packages/a0/0f/59204bf136d1201f8d7884cfbaf7498c5b4674e87a4c693f9bde63741ce1/mmh3-5.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:dfd51b4c56b673dfbc43d7d27ef857dd91124801e2806c69bb45585ce0fa019b", size = 40391, upload-time = "2026-03-05T15:55:56.697Z" }, +] + [[package]] name = "more-itertools" -version = "11.0.2" +version = "11.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/f7/139d22fef48ac78127d18e01d80cf1be40236ae489769d17f35c3d425293/more_itertools-11.0.2.tar.gz", hash = "sha256:392a9e1e362cbc106a2457d37cabf9b36e5e12efd4ebff1654630e76597df804", size = 144659, upload-time = "2026-04-09T15:01:33.297Z" } +sdist = { url = "https://files.pythonhosted.org/packages/de/1d/f4da6f02cdffe04d6362210b807146a26044c88d839208aec273bb0d9184/more_itertools-11.1.0.tar.gz", hash = "sha256:48e8f4d9e7e5878571ecf6f2b4e57634f93cd474cc8cfbd2376f2d11b396e30d", size = 145772, upload-time = "2026-05-22T14:14:29.909Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/98/6af411189d9413534c3eb691182bff1f5c6d44ed2f93f2edfe52a1bbceb8/more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4", size = 71939, upload-time = "2026-04-09T15:01:32.21Z" }, + { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] [[package]] @@ -987,6 +1438,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "narwhals" +version = "2.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/62/3c/c4ef2164a71c1a63d7f1ae411c4082c5fa872405106db60a4b7114989ad7/narwhals-2.22.1.tar.gz", hash = "sha256:d62920805a0a43b7ff8b54b0c0d3142d796f8a9301836ada37e573d6a33cbcd9", size = 647493, upload-time = "2026-06-05T12:34:34.051Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/ca/36339329c4604adbcc99c899b7eb1ce1a555c499b6a6860757dc9bfed36d/narwhals-2.22.1-py3-none-any.whl", hash = "sha256:60567d774edf77db53906f89d9fbd164e66e56d66d388e1e6990f17ac33cfb53", size = 454815, upload-time = "2026-06-05T12:34:32.289Z" }, +] + [[package]] name = "nh3" version = "0.3.5" @@ -1030,6 +1490,278 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, ] +[[package]] +name = "numba" +version = "0.65.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "llvmlite" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/c5/db2ac3685833d626c0dcae6bd2330cd68433e1fd248d15f70998160d3ad7/numba-0.65.1.tar.gz", hash = "sha256:19357146c32fe9ed25059ab915e8465fb13951cf6b0aace3826b76886373ab23", size = 2765600, upload-time = "2026-04-24T02:02:56.551Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/1b/3c5a7daf683a95465bf23504bcd1a2d5db8cd5e5e276ca87505d020dffe9/numba-0.65.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9d993ed0a257aa4116e6f553f114004bcfdee540c7276ab8ea48f650d514c452", size = 2680870, upload-time = "2026-04-24T02:02:10.623Z" }, + { url = "https://files.pythonhosted.org/packages/0f/a4/1831836814018a898e7d252aebe09c0f3ce1f26d145b68264b4ae0be6822/numba-0.65.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f098109f361681e57295f7e84d8ab2426902539a141811de0703ace52826981", size = 3739780, upload-time = "2026-04-24T02:02:13.097Z" }, + { url = "https://files.pythonhosted.org/packages/9c/1b/a813ddc81def09e257d2b1f67521982ce4b06204a87268796ffc8187271c/numba-0.65.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:973fd8173f2312815e6b7aaae887c4ce8a817eeff46a4f8840b828305b75bc95", size = 3446722, upload-time = "2026-04-24T02:02:15.083Z" }, + { url = "https://files.pythonhosted.org/packages/09/52/ee1d8b3becda384fe0552221641e05aa668a35e8a77470db4db7f6475000/numba-0.65.1-cp310-cp310-win_amd64.whl", hash = "sha256:c63aa0c4193694026452da55d0ef9d85156c1a7a333454c103bb30dec81b7bf8", size = 2747539, upload-time = "2026-04-24T02:02:16.79Z" }, + { url = "https://files.pythonhosted.org/packages/96/b3/650500c2eab4534d98e9166f4298e0f3c69c742afdf24e6eabccd1f16ad8/numba-0.65.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:7020d74b19cdb8cff16506542fdd510756e28c5e7f3bd0b7f574f0f42272fcd9", size = 2680563, upload-time = "2026-04-24T02:02:18.414Z" }, + { url = "https://files.pythonhosted.org/packages/44/0b/0615dbedb98f5b32a35a53290fbdc6e22306968109278d7e58df82d7a9f6/numba-0.65.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f80ed83774b5173abd6581cd8d2165d1d38e13d2e5c8155c0c0b421784745420", size = 3745018, upload-time = "2026-04-24T02:02:20.252Z" }, + { url = "https://files.pythonhosted.org/packages/49/aa/4361698f35bf63bff67dfe6c90493731177f48ede954f77b0588731537bc/numba-0.65.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ed425a43b0a5f9772f2f4e2dd0bbd12eabecae1af0b24efcfd4e053f012aac6", size = 3450962, upload-time = "2026-04-24T02:02:22.449Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9a/af61ec03b3116c161fd7a06b9e8a265729a8718458333e8ffbb06d9a3978/numba-0.65.1-cp311-cp311-win_amd64.whl", hash = "sha256:df40a5028a975b9ea66f6a2a3f7abbdbd541a863070e34ed367aff21141248e4", size = 2747417, upload-time = "2026-04-24T02:02:24.43Z" }, + { url = "https://files.pythonhosted.org/packages/57/bc/76f8f8c5cf9adee47fdb7bbb03be8900f76f902d451d7477cf12b845e1de/numba-0.65.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ac3f1e77c352dd0ea9712732c2d8f9ca507717435eec5b5013bf138ac33c4a08", size = 2681371, upload-time = "2026-04-24T02:02:26.105Z" }, + { url = "https://files.pythonhosted.org/packages/69/47/a415af0283e4db0398104c6d1c11c9861a98dc67a7aa442a7769ed5d6196/numba-0.65.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:52bc6f3ceb8fcaff9b2ae26b4c6b1e9fee39db8d355534c0fe4f39a901246b84", size = 3802467, upload-time = "2026-04-24T02:02:27.712Z" }, + { url = "https://files.pythonhosted.org/packages/46/36/246f73ec99cfeab2f2cb2ce7d4218766cc36a2da418901223f4f4da9c813/numba-0.65.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90ca10b3463bae0bd70589726fe3c77d01d6b5fc86bee54bcdf9fb6b47c28977", size = 3502628, upload-time = "2026-04-24T02:02:29.763Z" }, + { url = "https://files.pythonhosted.org/packages/db/9e/3c679b2ee078425b9e99a91e44f8d132a6830d8ccce5227bc5e9181aeed8/numba-0.65.1-cp312-cp312-win_amd64.whl", hash = "sha256:5971c632be2a2351500431f46213821dba8d02b18a9f7d02fd36bd2743e41a6a", size = 2750611, upload-time = "2026-04-24T02:02:31.477Z" }, + { url = "https://files.pythonhosted.org/packages/79/37/14a4579049c1eb673afd0de0cb4842982acd55b9ce2643e763db858bcea0/numba-0.65.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1735c15c1134a5108b4d6a5c77fc0947924ea066a738dc09a52008c13df9cad3", size = 2681344, upload-time = "2026-04-24T02:02:33.65Z" }, + { url = "https://files.pythonhosted.org/packages/a0/22/b8d873f6466b20aa563fc9b33acd48dec89a07803ddaa2f1c8ca1cd33126/numba-0.65.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c09f49117ef255e1f1c6dad0c7a1ed39868243862a73be5706793241a3755f1b", size = 3810619, upload-time = "2026-04-24T02:02:36.041Z" }, + { url = "https://files.pythonhosted.org/packages/62/08/e16a8b5d9a018962ebb5c66be662317cde32b9f5dab08441f90bed5522fb/numba-0.65.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:594a8680b3fadac99e97e489b1fd89007177e5336713745c3b769528c635a464", size = 3509783, upload-time = "2026-04-24T02:02:38.245Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a5/03c970d57f4c1741354837353ce39fb5206952ae1dba8922d29c86f64805/numba-0.65.1-cp313-cp313-win_amd64.whl", hash = "sha256:85be74c0d036842699a30058f82fb88fc5ffdc59f7615cab5792ea92914c9b62", size = 2750534, upload-time = "2026-04-24T02:02:39.903Z" }, + { url = "https://files.pythonhosted.org/packages/4f/2e/8aed9b726d9ba5f11ad287645fd479e88278db3060a25cb1225d730eb2b7/numba-0.65.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:33f5eb68eb1c843511615d14663ce60258525d6a4c65ab040e2c2b0c4cf17450", size = 2681554, upload-time = "2026-04-24T02:02:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/87/96/f3eb235fafa82a34e2ab5dd7dc9ffff998ebf5f0bbc23fa56a96aeb44da6/numba-0.65.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71e73029bf53a62cc6afcf96be4bd942290d8b4c55f0a454fb536158115790f7", size = 3779602, upload-time = "2026-04-24T02:02:43.726Z" }, + { url = "https://files.pythonhosted.org/packages/09/90/b0f09b48752d23640b8284f22aa597737e8adaddc7fbfacc4708b7f73a4c/numba-0.65.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a07635e0be926b9bdbffb09137c230fb13f6ec0e564914ba937cee12ce3eb35", size = 3479532, upload-time = "2026-04-24T02:02:45.427Z" }, + { url = "https://files.pythonhosted.org/packages/56/46/3f7fc04fb853559e74b210e0b62c19974ec844cefec611f9e535f4da3761/numba-0.65.1-cp314-cp314-win_amd64.whl", hash = "sha256:2a20fcdabdefbdacf88d85caf70c3b18c4bcb7ebb8f82e6a19486383dd26ab63", size = 2752637, upload-time = "2026-04-24T02:02:47.664Z" }, + { url = "https://files.pythonhosted.org/packages/81/7b/c1a341a9067367778f4152a5f01061cf281fb09582c92c510ec4918cabf6/numba-0.65.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:548dd4b3a4508d5062768d1514b2cd7b015f9a25ec7af651c50dee243965e652", size = 2684600, upload-time = "2026-04-24T02:02:49.653Z" }, + { url = "https://files.pythonhosted.org/packages/03/36/98ddbcf3e4f04a6dd07e1c67249955920579ba4af6bb6868e3088f4ed282/numba-0.65.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78abc28feff2c2ff8307fff3975b6438352759c9acb797ecd6b1fb6e7e39e31d", size = 3817198, upload-time = "2026-04-24T02:02:51.266Z" }, + { url = "https://files.pythonhosted.org/packages/a3/83/0dad21057ece5a835599f5d24099b091703995e23dbbf894f259e91c010b/numba-0.65.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee7676cb389555805f9b9a1840cbcd1ea6c8bd5376ab6918e3a29c5ea1dbda20", size = 3533862, upload-time = "2026-04-24T02:02:52.987Z" }, + { url = "https://files.pythonhosted.org/packages/32/36/8be7118ffd4c8440881046eac3d0982cc5ab42909508cf5d67024d62a2e4/numba-0.65.1-cp314-cp314t-win_amd64.whl", hash = "sha256:20609346e3bd75204950dcbbfe383a8d7dbf4902f442aedbf00f97fef4aa8f38", size = 2758237, upload-time = "2026-04-24T02:02:54.612Z" }, +] + +[[package]] +name = "numpy" +version = "2.2.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" }, + { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" }, + { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" }, + { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" }, + { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" }, + { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" }, + { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" }, + { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" }, + { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" }, + { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" }, + { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" }, + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" }, + { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" }, + { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" }, + { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/d0/ad/fed0499ce6a338d2a03ebae59cd15093910c8875328855781952abf6c2fe/numpy-2.4.6.tar.gz", hash = "sha256:f3a3570c4a2a16746ac2c31a7c7c7b0c186b95ce902e33db6f28094ed7387dda", size = 20735807, upload-time = "2026-05-18T23:37:14.07Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/49/ec46835a70be8fa6446c495126ac84fdb28cb2558e1620ffb87a10c8b64c/numpy-2.4.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0280e0356c0829a18d9de1cb7eee50ec22ca639878d7240307ca0943d73cd2c4", size = 16969194, upload-time = "2026-05-18T23:33:13.503Z" }, + { url = "https://files.pythonhosted.org/packages/0e/0d/f5957185c0ee2f3e12f78715aa9e3b353fd83633316c8532b38faa37e3f6/numpy-2.4.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:110f8b71aacb688ec69062bb7f6938a0f8acb01b7c1c4beb453c65b6d234584d", size = 14964111, upload-time = "2026-05-18T23:33:17.795Z" }, + { url = "https://files.pythonhosted.org/packages/ad/40/40a40ee0ddf7ceb782c49af278894b686e586d65d8c1889c8b5da01a3d7d/numpy-2.4.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4cfe66903cc32a9921a6733d96b19bb6abf310397581bbad89c228f5abaf0ee8", size = 5469159, upload-time = "2026-05-18T23:33:20.654Z" }, + { url = "https://files.pythonhosted.org/packages/63/13/f9a8046535cb21deae82f8d03de9617e08882d274fad2539630761888228/numpy-2.4.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8155154c7c691289fe18f510b5d4657c68c67989f293f0535a91360392ff6538", size = 6798936, upload-time = "2026-05-18T23:33:22.987Z" }, + { url = "https://files.pythonhosted.org/packages/33/a8/6fa8c1a345a8c85dbb21932c447bee07c30a2c2a3f31e369c0a84b300147/numpy-2.4.6-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ab0a9c4ffb1a6d95ef519fe4247dba8eb6b18ad93999f76b7f657039acabd47", size = 15966692, upload-time = "2026-05-18T23:33:26.62Z" }, + { url = "https://files.pythonhosted.org/packages/02/03/74fe2a4cb3817d94d86402f2506554130a2f01414e299b5a843e5a8a957f/numpy-2.4.6-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:89cd468399cfd2504718f0ba50e410dca55a170b61a02ad92bb18c8a65186e93", size = 16918164, upload-time = "2026-05-18T23:33:29.955Z" }, + { url = "https://files.pythonhosted.org/packages/c5/80/3615be3313f7e7696609bc194b9f0101da809df79e859bdb84e0cd043f46/numpy-2.4.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c2d37ab77531417474168eb79d6d80b14f821a966818505d03013d0833edb7a8", size = 17322877, upload-time = "2026-05-18T23:33:34.724Z" }, + { url = "https://files.pythonhosted.org/packages/ca/ac/a691e0fe2675e370d0e08ff905adc49a1c8830e8cae03efe4477e92cd55d/numpy-2.4.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f407cb6b8e9d6d8c626bc73c945db1706035af8fd632295547bf1c9e46d092d6", size = 18651487, upload-time = "2026-05-18T23:33:38.217Z" }, + { url = "https://files.pythonhosted.org/packages/15/a7/9bc1cd626d7bf6869bfedf27b91b6ab5dd607758bf8e959d6fa80c6a59cb/numpy-2.4.6-cp311-cp311-win32.whl", hash = "sha256:ddea102b48f9e339f3948bf22040944184627a30fdf7f858667673b9c5f033c8", size = 6233945, upload-time = "2026-05-18T23:33:41.331Z" }, + { url = "https://files.pythonhosted.org/packages/c5/31/7fc6239c12bce7e931463251cca4426c465e1876ba3cc785402ef4dd8f4e/numpy-2.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:1e254a00cdf42b1e4d5b3d68d33af63268d41340d8885df2ab6470f2e1500147", size = 12608406, upload-time = "2026-05-18T23:33:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/27/83/140f85a466595a16382996a1bf06b2b54bcd597488921b0c9daaeeda72af/numpy-2.4.6-cp311-cp311-win_arm64.whl", hash = "sha256:ed9749eef4cbd126da3dc1d6bcb3a57f5eb7ac6a6484146bdbf743f552dfc577", size = 10479528, upload-time = "2026-05-18T23:33:50.725Z" }, + { url = "https://files.pythonhosted.org/packages/95/2a/3d7b5ac8aac24feaf9ad7ed58f45b0bbc06d37e4338ae84c9f2298b570f9/numpy-2.4.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:001fbb8e08d942dd57599e781f2472269ee7f2755fae407b4f67b2f0b17da3f1", size = 16689119, upload-time = "2026-05-18T23:33:54.065Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/92c4c131527599e8288d6918e888d88726f84d805d784b771f32408aeaef/numpy-2.4.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ebfb099f8dcf083deef3ac1ca4c1503f387cf76296fcb3816b66f5ecb5f54fdb", size = 14699246, upload-time = "2026-05-18T23:33:57.621Z" }, + { url = "https://files.pythonhosted.org/packages/ad/fe/c0a6b7b2ca128a8fb228575147073b660656734b8ebe4d76c8fd748dcc79/numpy-2.4.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3213d622a0283a39a93d188f3cf72b26862df52fbb4ca3697f51705016523d41", size = 5204410, upload-time = "2026-05-18T23:34:00.302Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d4/9770d14ba719432bb90a421bfd443872ed0f70f7264b64bec12ea363d5fd/numpy-2.4.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:357cc07a6d7b0b182ff02249616a03742827ebb1277546b5c7cd7f7620a45698", size = 6551240, upload-time = "2026-05-18T23:34:02.852Z" }, + { url = "https://files.pythonhosted.org/packages/c9/c6/50a46a6205feba2343f1d6d17438107c5dc491ed1c736e6ea68689fd906b/numpy-2.4.6-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f9fb9157b4ce2971008323afe46053787b526ef624fea915b261468a8421a0f", size = 15671012, upload-time = "2026-05-18T23:34:05.485Z" }, + { url = "https://files.pythonhosted.org/packages/99/60/14115e6364fa676c5397c2ad3004e527e9aa487abf5d0706ec81bbd08529/numpy-2.4.6-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90f9849678c75fe7afa2d348ac842c168b0a4d3d61919687216dfc547976d853", size = 16645538, upload-time = "2026-05-18T23:34:09.265Z" }, + { url = "https://files.pythonhosted.org/packages/ae/c5/693cbe59e57db94d2231fa519ca3978dc9e19da5a8f088588f5c6e947ff2/numpy-2.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1a2af6c6ef86344a6b0db6b97834208bf598db514f2b155042439b62605601a", size = 17020706, upload-time = "2026-05-18T23:34:13.053Z" }, + { url = "https://files.pythonhosted.org/packages/ef/fc/85b7c4eff9b4966ade25c2273cf7e7012e92366c032058653934b37de044/numpy-2.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e5805d5a22fd19c8ccff10a9561f9df94436b0545619ea579db2d3c35294bce2", size = 18368541, upload-time = "2026-05-18T23:34:17.024Z" }, + { url = "https://files.pythonhosted.org/packages/f6/81/e1b27545deedce7f4a0b348618c6b62d74e36a4dc9ccd42f3eb2f85eee32/numpy-2.4.6-cp312-cp312-win32.whl", hash = "sha256:e3eeb0aabd6bd5ce64faae67e9935203a6991b4bc2a485a767fbafb2c5125f45", size = 5962825, upload-time = "2026-05-18T23:34:20.3Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ca/feab00bd44aa5fe1ad2c18f08b4d3bb92e26484b0b1d1443897809ed528c/numpy-2.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:d8e8286dd7cea7895157318d1b91cdacac64c479f3cbc8dce548331728484751", size = 12321687, upload-time = "2026-05-18T23:34:23.095Z" }, + { url = "https://files.pythonhosted.org/packages/63/cf/5a6d34850a39d1093558564f77ee8e8e0bee5061151b8f05a55711001ec7/numpy-2.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:4081eb135ac24158bd51cdfbef16f1c64df7063b1143f24731387137c092bec8", size = 10221482, upload-time = "2026-05-18T23:34:25.876Z" }, + { url = "https://files.pythonhosted.org/packages/fb/82/bdab26d7438c6791ca31b7c024ca37c1eab8b726ba236129005cd4a06e45/numpy-2.4.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:511dbaf848decaaaf4b4ca48032619fb3138710c4bf7da7617765edad1ef96b0", size = 16684648, upload-time = "2026-05-18T23:34:29.41Z" }, + { url = "https://files.pythonhosted.org/packages/1b/30/a80189bcc7f5e4258b3fbc3968d909d1756f54d023299ecc39ad6fdb9ef8/numpy-2.4.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf162abab1c1a736333192707cef898e735a5ca00f38f27eeedf44b39d9e85eb", size = 14693902, upload-time = "2026-05-18T23:34:33.013Z" }, + { url = "https://files.pythonhosted.org/packages/97/12/70b5d0d7c15e1ebb8a6a84a8caa1d19e181d84fb58bb6d70aca29099dec1/numpy-2.4.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:043191bfa8eab18c776647b62723ac9dddece59743b13f49b2016094129c2b3f", size = 5198992, upload-time = "2026-05-18T23:34:36.132Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8c/ebd2a8f8a83541f8d38cc5667e8c2b69cecfd30da6e45693e8158857d44b/numpy-2.4.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:6180d8b35af935aed8ece3a85e0a43f87393ae0ac87c8d2c8bd2c993f7270ef3", size = 6546944, upload-time = "2026-05-18T23:34:38.484Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c5/7b863a97a91671a0338f4253bd3b5a3d3852f0692dae91711c9f4a10e787/numpy-2.4.6-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72fbe16c6fac95aedf5937fa873445cec2110be35d8a4e9433d7501fd98dae6b", size = 15669392, upload-time = "2026-05-18T23:34:41.257Z" }, + { url = "https://files.pythonhosted.org/packages/a5/9d/3584b9984ca4c047aea75214ce1a4c4c73d849bd71b604264b7f5653f8a8/numpy-2.4.6-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7830bab239b79cda9c08c2da014761cafb48da6150e1da17ac06283f43b6089", size = 16633220, upload-time = "2026-05-18T23:34:45.075Z" }, + { url = "https://files.pythonhosted.org/packages/05/ae/7c67fba23bd98caec7c99261f3a16072ade14813486b0282cb29846de832/numpy-2.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ef4aea96ce4d3b074422cb4f2f64e216bf9e213004bb58ecfdf50ea02ea8eb9a", size = 17020800, upload-time = "2026-05-18T23:34:49.065Z" }, + { url = "https://files.pythonhosted.org/packages/d9/5d/3b6725cb31d983c5e66916f5d36f6d7e5521129e4c4404d64f918292a5b6/numpy-2.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dfa20cc6ca228e6b155b11da03825975ce66aea520985dbbddf0f2a5a495c605", size = 18357600, upload-time = "2026-05-18T23:34:52.709Z" }, + { url = "https://files.pythonhosted.org/packages/f7/da/2ccc6c2fe8898dee01d90c75c5f5f914a23daf99e3e0f59516a08760c8b5/numpy-2.4.6-cp313-cp313-win32.whl", hash = "sha256:56b39e5e0622a09a25bf5baf62f4bcf0cb8a41ae6e2819cf49bbc5a74c083f91", size = 5961134, upload-time = "2026-05-18T23:34:55.618Z" }, + { url = "https://files.pythonhosted.org/packages/b5/cd/9cc4dc876fb065d5c220aae4d5e14826b2715331bb7618ce1fb07a679d99/numpy-2.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:c4fc99836233ea196540b17ab0983aff60ed07941751930f5f4d05bc3b3b7359", size = 12318598, upload-time = "2026-05-18T23:34:58.928Z" }, + { url = "https://files.pythonhosted.org/packages/39/1e/c0bcba1f8694116485fe28fd1be698c278fcda4141c5b0e53a2aed8b12a8/numpy-2.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a7c711e21628b52034bb5ab8d1bce291f752fcc5e92accc615778acee1ff4778", size = 10222272, upload-time = "2026-05-18T23:35:02.167Z" }, + { url = "https://files.pythonhosted.org/packages/63/6d/cc5619247c8f4204e507f5883528372e4ac4bb189e579fb859a12e480b1f/numpy-2.4.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:112b06a867b235ef466ed3508ddf0238050df9c727cafb5301ac385b899189a1", size = 14821197, upload-time = "2026-05-18T23:35:05.468Z" }, + { url = "https://files.pythonhosted.org/packages/00/58/f1c39161c87d9e9bed660f1ed4bafc0e403d5ec9650b6dd77aead07d489b/numpy-2.4.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:eaf7fa2de5c0be8ae6ff8e9bea2ccd725e980541244521d8d4b5f3354a27babe", size = 5326287, upload-time = "2026-05-18T23:35:08.693Z" }, + { url = "https://files.pythonhosted.org/packages/af/57/3917ab0fd97f271a8694513581b8a36c655f111c446852c302f04ccdb6fc/numpy-2.4.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:7265a2f3d436e54ef9f2b52b5c937e6be778781bd97a590319d7348f1c1ca997", size = 6646763, upload-time = "2026-05-18T23:35:11.459Z" }, + { url = "https://files.pythonhosted.org/packages/eb/0f/037e64c494b67581ae18193d770adef354c41f3f2c8ebf865602d949bf8f/numpy-2.4.6-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f74a575920ab21fe304421a3fc28793d82e299cae9eccb37084e9fc7f3617c20", size = 15728070, upload-time = "2026-05-18T23:35:14.79Z" }, + { url = "https://files.pythonhosted.org/packages/21/a6/5d2bae9c9542eb4df16dc9c46dc79c186e9bad53805dfa5399a6023c6db0/numpy-2.4.6-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ede83e07a75dd06bc501566c1eca2afc0d61677c1472ac9ad93fdee6e638a48d", size = 16681752, upload-time = "2026-05-18T23:35:18.836Z" }, + { url = "https://files.pythonhosted.org/packages/92/14/23d1dfb410ae362cd59ce53e936b1513d545eb40db3949ced632e19a459e/numpy-2.4.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:68bb27509ac1b9a3443094260f6326150663b06abe40b73a2f81160623da5b67", size = 17086024, upload-time = "2026-05-18T23:35:22.52Z" }, + { url = "https://files.pythonhosted.org/packages/4b/6e/23595a2c642cdf3bc567877064bdd7f91c8b0038a4453cf2daf7248eafe9/numpy-2.4.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a0df0043bdb289bde1f62da130d20df23d58b45429f752bc7a8fc5325a225ecd", size = 18403398, upload-time = "2026-05-18T23:35:26.398Z" }, + { url = "https://files.pythonhosted.org/packages/8a/90/0ac3bc947217e66dec77e7cbc6a1979d1af70b6461b82f620d3bccd5e4c8/numpy-2.4.6-cp313-cp313t-win32.whl", hash = "sha256:29a287e0cf63ff528da061de6b9f64a4618da591ca1046aafc54062e40ca7eab", size = 6084971, upload-time = "2026-05-18T23:35:29.387Z" }, + { url = "https://files.pythonhosted.org/packages/77/71/5673e351671a1d2bd6063b91b44f70c0affea7d1516fa7a6572941ba4aa1/numpy-2.4.6-cp313-cp313t-win_amd64.whl", hash = "sha256:25c692919ac5a01f170a3bfcd62d745b24fd095c353d50812637d6fcab442e75", size = 12458532, upload-time = "2026-05-18T23:35:32.175Z" }, + { url = "https://files.pythonhosted.org/packages/3f/88/19d3503c5046e688f049274b27a3ef3d771152fa80d3ba3d01a3dff61abe/numpy-2.4.6-cp313-cp313t-win_arm64.whl", hash = "sha256:1e978ec1e8bd0e0e4de6bb75de9d30cbb74db6b6a2bb727618613703ca0167dd", size = 10291881, upload-time = "2026-05-18T23:35:35.465Z" }, + { url = "https://files.pythonhosted.org/packages/f8/91/3ab2044d05fd16d343c5ac2e69b127f1b2854040dd20b193257c78028bd3/numpy-2.4.6-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06ca2f61ec4385a07a6977c55ba998a4466c123642b4a32694d3128fce18c079", size = 16683458, upload-time = "2026-05-18T23:35:38.353Z" }, + { url = "https://files.pythonhosted.org/packages/8e/62/764ce66fa4147ae6d73071a3abf804ffe606f174618697c571acdf26a7c9/numpy-2.4.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:38efbc8de75c7a0fc1ac190162d892787f3f47b57cc291231aafee36b80982b7", size = 14704559, upload-time = "2026-05-18T23:35:42.14Z" }, + { url = "https://files.pythonhosted.org/packages/60/61/23f27c172f022e04025b7dc2367f4d63c1a398120607ec896228649a6f48/numpy-2.4.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:d581b735e177fdcdce6fed8e7e8880a3fb6ee4e3653a3ac6af01c6f4c03effc5", size = 5209716, upload-time = "2026-05-18T23:35:45.377Z" }, + { url = "https://files.pythonhosted.org/packages/03/71/21cf70dc6ea3e3acb95fc53a265b2fc248b981f0194ceb5b475271b8809d/numpy-2.4.6-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:0a041d3d761dc3c35cc56ce0351506a02bcbc25f7b169f652435141a17db9096", size = 6543947, upload-time = "2026-05-18T23:35:47.926Z" }, + { url = "https://files.pythonhosted.org/packages/d5/91/64288395ee1799bd2e0b04a305dce9666da90c961e1f3fe982a05ee1c036/numpy-2.4.6-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40fdc1ae7125e518ea98e53e69a4ebc27e1fd50510c47b7ea130cf21e5e1d42b", size = 15685197, upload-time = "2026-05-18T23:35:50.863Z" }, + { url = "https://files.pythonhosted.org/packages/f3/eb/ebffaa97dc55502df69584a8f0dcf07f69a3e0b3e2323670a2722db9aa39/numpy-2.4.6-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a2c306dea656c12c68f51f4cea133cbe78ca7435eb28c735eac1d3ebe73be6e8", size = 16638245, upload-time = "2026-05-18T23:35:54.752Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0b/54f9da33128d7e350fab89c7455902eeae70349ee52bddb448dc4a576f45/numpy-2.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:33111801a01c12a8a1e3721f0a9232f8cfc8ae2c6b7098167e6f623c6073f402", size = 17036587, upload-time = "2026-05-18T23:35:58.355Z" }, + { url = "https://files.pythonhosted.org/packages/b6/f0/fdebc1052db1cc37c64beb22072d67cd6d1c71adca1299f53dec2b5e20d3/numpy-2.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae506e6902902557576a26ff33eda8695e7ecb3cb36c3b573a0765dee114ebdb", size = 18363226, upload-time = "2026-05-18T23:36:02.845Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b4/298628d98c72b57e57f7165ae6a481a1deaf6f3c28262a6e4c739c275930/numpy-2.4.6-cp314-cp314-win32.whl", hash = "sha256:aaf159caa35993cb1f56fb9b8e4610d35758e7ca005412eb1daa856a78c9c4b1", size = 6010196, upload-time = "2026-05-18T23:36:05.92Z" }, + { url = "https://files.pythonhosted.org/packages/df/ac/46de6dda46478f7942f839e094970be2d4a861e005c4b3bf07c92e291a09/numpy-2.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:b507f5c4c1d508876d1819b6bf9a49d365b96320b5d4993426b33a23ca4b8261", size = 12450334, upload-time = "2026-05-18T23:36:09.107Z" }, + { url = "https://files.pythonhosted.org/packages/78/92/b8b798ac784102c0da830d2257d59358e3d3d90d1e2b3f2575dad976c5cf/numpy-2.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:6f41ae150c4e32db4f3310cdaf64b1593a03dbabe29eec77fc9b50fe64061df6", size = 10495678, upload-time = "2026-05-18T23:36:12.766Z" }, + { url = "https://files.pythonhosted.org/packages/30/34/ec28d1aa8115971537c01469ab2011ee96827930f0a124de1000cc2a7ed7/numpy-2.4.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ece3d2cfe132e7d51f44a832b303895e6f2d499c5e74dfbdb06ee246147a304a", size = 14823672, upload-time = "2026-05-18T23:36:16.473Z" }, + { url = "https://files.pythonhosted.org/packages/16/bd/f6d1fede4e54e8042a7ff97bb495510f3c220f94bcd9e8b228e87c92cc0d/numpy-2.4.6-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:e3e5193ef5a3dc73bceee50f7fdc2c90dbb76c42df8d8fae3d1067a583df579e", size = 5328731, upload-time = "2026-05-18T23:36:19.767Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f0/e105b9e2fd728a9910103884decd6951d9dd73896b914a98d9a231de02ee/numpy-2.4.6-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:17f9ade344e7d9b464a084d69bcf18fc691cb1db67c62ed80820bf4926d78f0e", size = 6649805, upload-time = "2026-05-18T23:36:22.266Z" }, + { url = "https://files.pythonhosted.org/packages/82/dd/1206a7ca6ab15e3f02069707ca96222e202af681bb73756da7527f3cb837/numpy-2.4.6-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cd5ffd25db4e7ba6a375693b3fc0fc1791ec636c17db3720da19bde7180ec43", size = 15730496, upload-time = "2026-05-18T23:36:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/51/e7/38d3ea825dcab85a591734decb2f6c67caa7c8367d374df1a1c3842f9b07/numpy-2.4.6-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d92c3819208a60205a12a245c91ad70cb0a85336659b19b834205573ac8456e", size = 16679616, upload-time = "2026-05-18T23:36:29.652Z" }, + { url = "https://files.pythonhosted.org/packages/93/b7/caabfdf53edf663e0b4eb74d7d405d83baef09eb5e83bcd32d601d72b93e/numpy-2.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e85b752a1e912b70eaad4fafbd4d1238007ab221de2009b9a2f5ae7461239895", size = 17085145, upload-time = "2026-05-18T23:36:33.449Z" }, + { url = "https://files.pythonhosted.org/packages/f9/45/68d7c33a6bcf3e5aa3bdbd57a367e6f615286dfd6482f97e8ffeb734306e/numpy-2.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:29cb7f67d10b479ff07c17d33e39f78c07f71c40ef30d63c153d340e96cd3fb4", size = 18403813, upload-time = "2026-05-18T23:36:37.369Z" }, + { url = "https://files.pythonhosted.org/packages/9c/50/0753655aa844c99cd9e018aacf76f130f1bd81d881bb74bc0aef5d73a8ba/numpy-2.4.6-cp314-cp314t-win32.whl", hash = "sha256:260a5d70215b61ab4fadf5c7baacd64821842975eea312125ed3c39a6391b063", size = 6156982, upload-time = "2026-05-18T23:36:40.817Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d4/7c67becf668f973cb490cec3e98dfd799d866f9c989a54d355672cfa0db6/numpy-2.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:81a1cca95ed5bb92aa8b10dd2cdc9a0d3853a50fad926c28b5d7e8ea54389627", size = 12638908, upload-time = "2026-05-18T23:36:43.996Z" }, + { url = "https://files.pythonhosted.org/packages/43/bb/e1c71a4295b1b1d1393d50dbb4f2a36283c6859d9d3892e84f00ec5a91d5/numpy-2.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:0c9136e14ed34a9e343a31c533d78a9813a69a3148332bce5e9821cb2f996e66", size = 10565867, upload-time = "2026-05-18T23:36:47.114Z" }, + { url = "https://files.pythonhosted.org/packages/de/12/b422cc84439adc0d00de605bf4a308890ae5c26f2c71fbd73e5d08fbb0dd/numpy-2.4.6-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:55cced7c52e981362f708ad635198e97a752dfba412cc03c23bbf3bd8d5cd662", size = 16847511, upload-time = "2026-05-18T23:36:50.673Z" }, + { url = "https://files.pythonhosted.org/packages/44/53/f481bef68011740f8849418d82db07230e825013f31f4eef5ba5b805316a/numpy-2.4.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d6da64deb6b8ed903e7560180a92f2d804ee1ba5eeb849ac2748b8c1aba1f6d7", size = 14889064, upload-time = "2026-05-18T23:36:53.879Z" }, + { url = "https://files.pythonhosted.org/packages/7f/57/42ed575c10ced8af951d426bc4e1f8aff16fd851db33f067036215a7f860/numpy-2.4.6-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:68a5124b13fa6cc2086764a20005d30bc0548146f7f5322f02fce212ca14317f", size = 5394157, upload-time = "2026-05-18T23:36:57.194Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ef/f66cc724fcc36c1e364c67f51ae9146090b8b584f27d58b97fdae3edd737/numpy-2.4.6-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:948424b06129ce883307e8cff868c31396d8dc7630a59c61d70d98dbe70f222c", size = 6708728, upload-time = "2026-05-18T23:36:59.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/9c/c531f2293b91265d8b48e9b329f54fdd7ffae73cb4134ea10cca4237e9cc/numpy-2.4.6-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5dbbdb29840ca3d91ee0fece42fc29278886d908280bfec0a5846c6f901a3eb0", size = 15798374, upload-time = "2026-05-18T23:37:02.674Z" }, + { url = "https://files.pythonhosted.org/packages/1a/b0/413077f6b1153ed3cba361401c6783bbad6114804a000cc22eb71c13e190/numpy-2.4.6-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8ad03c0965fb3c692200e74d458ca28c1dbb4ce96f9a479a8aa041ad5fabca02", size = 16747286, upload-time = "2026-05-18T23:37:06.327Z" }, + { url = "https://files.pythonhosted.org/packages/15/ce/e5ec180bc41812edcd8daeb8639d205622c0e8c02259d8ab25a0201b3c2a/numpy-2.4.6-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2803abfebfc990042cd494d8ce2d5f82e9d847af6d35ec486923aa19dbad5e73", size = 12504263, upload-time = "2026-05-18T23:37:09.715Z" }, +] + +[[package]] +name = "onnxruntime" +version = "1.23.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "coloredlogs", marker = "python_full_version < '3.11'" }, + { name = "flatbuffers", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "protobuf", marker = "python_full_version < '3.11'" }, + { name = "sympy", marker = "python_full_version < '3.11'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/d6/311b1afea060015b56c742f3531168c1644650767f27ef40062569960587/onnxruntime-1.23.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:a7730122afe186a784660f6ec5807138bf9d792fa1df76556b27307ea9ebcbe3", size = 17195934, upload-time = "2025-10-27T23:06:14.143Z" }, + { url = "https://files.pythonhosted.org/packages/db/db/81bf3d7cecfbfed9092b6b4052e857a769d62ed90561b410014e0aae18db/onnxruntime-1.23.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:b28740f4ecef1738ea8f807461dd541b8287d5650b5be33bca7b474e3cbd1f36", size = 19153079, upload-time = "2025-10-27T23:05:57.686Z" }, + { url = "https://files.pythonhosted.org/packages/2e/4d/a382452b17cf70a2313153c520ea4c96ab670c996cb3a95cc5d5ac7bfdac/onnxruntime-1.23.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f7d1fe034090a1e371b7f3ca9d3ccae2fabae8c1d8844fb7371d1ea38e8e8d2", size = 15219883, upload-time = "2025-10-22T03:46:21.66Z" }, + { url = "https://files.pythonhosted.org/packages/fb/56/179bf90679984c85b417664c26aae4f427cba7514bd2d65c43b181b7b08b/onnxruntime-1.23.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ca88747e708e5c67337b0f65eed4b7d0dd70d22ac332038c9fc4635760018f7", size = 17370357, upload-time = "2025-10-22T03:46:57.968Z" }, + { url = "https://files.pythonhosted.org/packages/cd/6d/738e50c47c2fd285b1e6c8083f15dac1a5f6199213378a5f14092497296d/onnxruntime-1.23.2-cp310-cp310-win_amd64.whl", hash = "sha256:0be6a37a45e6719db5120e9986fcd30ea205ac8103fd1fb74b6c33348327a0cc", size = 13467651, upload-time = "2025-10-27T23:06:11.904Z" }, + { url = "https://files.pythonhosted.org/packages/44/be/467b00f09061572f022ffd17e49e49e5a7a789056bad95b54dfd3bee73ff/onnxruntime-1.23.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:6f91d2c9b0965e86827a5ba01531d5b669770b01775b23199565d6c1f136616c", size = 17196113, upload-time = "2025-10-22T03:47:33.526Z" }, + { url = "https://files.pythonhosted.org/packages/9f/a8/3c23a8f75f93122d2b3410bfb74d06d0f8da4ac663185f91866b03f7da1b/onnxruntime-1.23.2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:87d8b6eaf0fbeb6835a60a4265fde7a3b60157cf1b2764773ac47237b4d48612", size = 19153857, upload-time = "2025-10-22T03:46:37.578Z" }, + { url = "https://files.pythonhosted.org/packages/3f/d8/506eed9af03d86f8db4880a4c47cd0dffee973ef7e4f4cff9f1d4bcf7d22/onnxruntime-1.23.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbfd2fca76c855317568c1b36a885ddea2272c13cb0e395002c402f2360429a6", size = 15220095, upload-time = "2025-10-22T03:46:24.769Z" }, + { url = "https://files.pythonhosted.org/packages/e9/80/113381ba832d5e777accedc6cb41d10f9eca82321ae31ebb6bcede530cea/onnxruntime-1.23.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da44b99206e77734c5819aa2142c69e64f3b46edc3bd314f6a45a932defc0b3e", size = 17372080, upload-time = "2025-10-22T03:47:00.265Z" }, + { url = "https://files.pythonhosted.org/packages/3a/db/1b4a62e23183a0c3fe441782462c0ede9a2a65c6bbffb9582fab7c7a0d38/onnxruntime-1.23.2-cp311-cp311-win_amd64.whl", hash = "sha256:902c756d8b633ce0dedd889b7c08459433fbcf35e9c38d1c03ddc020f0648c6e", size = 13468349, upload-time = "2025-10-22T03:47:25.783Z" }, + { url = "https://files.pythonhosted.org/packages/1b/9e/f748cd64161213adeef83d0cb16cb8ace1e62fa501033acdd9f9341fff57/onnxruntime-1.23.2-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:b8f029a6b98d3cf5be564d52802bb50a8489ab73409fa9db0bf583eabb7c2321", size = 17195929, upload-time = "2025-10-22T03:47:36.24Z" }, + { url = "https://files.pythonhosted.org/packages/91/9d/a81aafd899b900101988ead7fb14974c8a58695338ab6a0f3d6b0100f30b/onnxruntime-1.23.2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:218295a8acae83905f6f1aed8cacb8e3eb3bd7513a13fe4ba3b2664a19fc4a6b", size = 19157705, upload-time = "2025-10-22T03:46:40.415Z" }, + { url = "https://files.pythonhosted.org/packages/3c/35/4e40f2fba272a6698d62be2cd21ddc3675edfc1a4b9ddefcc4648f115315/onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76ff670550dc23e58ea9bc53b5149b99a44e63b34b524f7b8547469aaa0dcb8c", size = 15226915, upload-time = "2025-10-22T03:46:27.773Z" }, + { url = "https://files.pythonhosted.org/packages/ef/88/9cc25d2bafe6bc0d4d3c1db3ade98196d5b355c0b273e6a5dc09c5d5d0d5/onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f9b4ae77f8e3c9bee50c27bc1beede83f786fe1d52e99ac85aa8d65a01e9b77", size = 17382649, upload-time = "2025-10-22T03:47:02.782Z" }, + { url = "https://files.pythonhosted.org/packages/c0/b4/569d298f9fc4d286c11c45e85d9ffa9e877af12ace98af8cab52396e8f46/onnxruntime-1.23.2-cp312-cp312-win_amd64.whl", hash = "sha256:25de5214923ce941a3523739d34a520aac30f21e631de53bba9174dc9c004435", size = 13470528, upload-time = "2025-10-22T03:47:28.106Z" }, + { url = "https://files.pythonhosted.org/packages/3d/41/fba0cabccecefe4a1b5fc8020c44febb334637f133acefc7ec492029dd2c/onnxruntime-1.23.2-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:2ff531ad8496281b4297f32b83b01cdd719617e2351ffe0dba5684fb283afa1f", size = 17196337, upload-time = "2025-10-22T03:46:35.168Z" }, + { url = "https://files.pythonhosted.org/packages/fe/f9/2d49ca491c6a986acce9f1d1d5fc2099108958cc1710c28e89a032c9cfe9/onnxruntime-1.23.2-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:162f4ca894ec3de1a6fd53589e511e06ecdc3ff646849b62a9da7489dee9ce95", size = 19157691, upload-time = "2025-10-22T03:46:43.518Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a1/428ee29c6eaf09a6f6be56f836213f104618fb35ac6cc586ff0f477263eb/onnxruntime-1.23.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45d127d6e1e9b99d1ebeae9bcd8f98617a812f53f46699eafeb976275744826b", size = 15226898, upload-time = "2025-10-22T03:46:30.039Z" }, + { url = "https://files.pythonhosted.org/packages/f2/2b/b57c8a2466a3126dbe0a792f56ad7290949b02f47b86216cd47d857e4b77/onnxruntime-1.23.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bace4e0d46480fbeeb7bbe1ffe1f080e6663a42d1086ff95c1551f2d39e7872", size = 17382518, upload-time = "2025-10-22T03:47:05.407Z" }, + { url = "https://files.pythonhosted.org/packages/4a/93/aba75358133b3a941d736816dd392f687e7eab77215a6e429879080b76b6/onnxruntime-1.23.2-cp313-cp313-win_amd64.whl", hash = "sha256:1f9cc0a55349c584f083c1c076e611a7c35d5b867d5d6e6d6c823bf821978088", size = 13470276, upload-time = "2025-10-22T03:47:31.193Z" }, + { url = "https://files.pythonhosted.org/packages/7c/3d/6830fa61c69ca8e905f237001dbfc01689a4e4ab06147020a4518318881f/onnxruntime-1.23.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9d2385e774f46ac38f02b3a91a91e30263d41b2f1f4f26ae34805b2a9ddef466", size = 15229610, upload-time = "2025-10-22T03:46:32.239Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ca/862b1e7a639460f0ca25fd5b6135fb42cf9deea86d398a92e44dfda2279d/onnxruntime-1.23.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e2b9233c4947907fd1818d0e581c049c41ccc39b2856cc942ff6d26317cee145", size = 17394184, upload-time = "2025-10-22T03:47:08.127Z" }, +] + +[[package]] +name = "onnxruntime" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "flatbuffers", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging", marker = "python_full_version >= '3.11'" }, + { name = "protobuf", marker = "python_full_version >= '3.11'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/e4/5353d7e09ced4a8f473f843223fc75d726b2b5519dcefc12f22a6c92852d/onnxruntime-1.27.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:8ba14a38c570087f3cdb8cfba33f7a38a1e826c1e5b29e17c28ceda0cc910016", size = 18416484, upload-time = "2026-06-15T22:43:43.894Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1f/a2117aa3f144fce88774efa37440d0ca72d0c9144854dfc0961f2b04c6fc/onnxruntime-1.27.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2eb083321af8a236a84c7c140a7f4cecbfa2a987a18c07c78db471c20cd390ef", size = 16419330, upload-time = "2026-06-15T22:42:37.58Z" }, + { url = "https://files.pythonhosted.org/packages/e0/cd/74bb804170ceb622fda9111df31a07b3024f7491472256d3a90b5391a4d2/onnxruntime-1.27.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4f7b0e90d2d212e2c2deaa6c8291616183ab815d3ec558ea12d3ac8b26d36f4", size = 18636930, upload-time = "2026-06-15T22:43:01.584Z" }, + { url = "https://files.pythonhosted.org/packages/fe/8f/5b8e2b85e81735696887175dbaf6409f215683f5ca9d4928fbb038211d32/onnxruntime-1.27.0-cp311-cp311-win_amd64.whl", hash = "sha256:ff050e4f6bf7f12918fa14dcb047c0b02e295f35e86d42532552be4b3d54e977", size = 13356110, upload-time = "2026-06-15T22:43:32.172Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3a/4f568de678126b6a371a93862f015a82138359decd97fcac61fc84b5b774/onnxruntime-1.27.0-cp311-cp311-win_arm64.whl", hash = "sha256:75fbc1e1fb43a39a856c8209c544cca7817b5de7ac16b15b1bdf55d1cc67b9df", size = 13098635, upload-time = "2026-06-15T22:43:19.607Z" }, + { url = "https://files.pythonhosted.org/packages/c3/b7/dd3a524ed93a820dff1af902d0412957ab12499953333e9daa01af5bc480/onnxruntime-1.27.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a14c2ce45312def86b77aea651f46565e45960cf5f0721bfdff449165086ab76", size = 18433506, upload-time = "2026-06-15T22:43:47.026Z" }, + { url = "https://files.pythonhosted.org/packages/84/86/c3b6b17745a1997d784dadc9bd88d713d2e6721139a5a0e885b28cfb79b1/onnxruntime-1.27.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6fddce0539a4898c7bef35b052ffd37935b2190e35488eab99ce91887743ea1", size = 16438140, upload-time = "2026-06-15T22:42:40.666Z" }, + { url = "https://files.pythonhosted.org/packages/26/81/24dd9b31b0fb912ee19ca53ac1c9764bfd79d58a2ccef564eb693be831a5/onnxruntime-1.27.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c65a7438632d55dfbc8a02ee60bd6cf7dd9d1ba05a43d4b851452f32338e194", size = 18658316, upload-time = "2026-06-15T22:43:04.012Z" }, + { url = "https://files.pythonhosted.org/packages/4f/88/8ec9db1a4d126bb8b758992beb40d1249df171917d75f44a327eb5f20dda/onnxruntime-1.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:20c321cf187ba496e648acf6b4cf90b4d398b0d17c2a77fdaeba365b908cc1c1", size = 13358769, upload-time = "2026-06-15T22:43:34.581Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9f/fdad359dfcba7e7cd8815569b304a596531d4efa77a75d77f8b4981891a2/onnxruntime-1.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:d0d1f68868e2ef30ef70998ba9bbbc5c305e9b17041e3936751c1b8aa6aade06", size = 13104440, upload-time = "2026-06-15T22:43:22.893Z" }, + { url = "https://files.pythonhosted.org/packages/fb/2b/54208fd03ad410480bc17edf4869376362da8bbf46fe186ddf4cb5cc20fe/onnxruntime-1.27.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:b3e5b58b8c89c2b20e086e890aa9527377e5c240dc3ecc1640d18e07705eeb1c", size = 18432958, upload-time = "2026-06-15T22:42:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/24fc51fcbb126da6d032372314e47b55c3faad58f2aa78c0e199ccd20b9c/onnxruntime-1.27.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48b3d87eb560ff6a772240506f3c78d6d27c63cafedd5c775672e1194f968cfd", size = 16438180, upload-time = "2026-06-15T22:42:43.093Z" }, + { url = "https://files.pythonhosted.org/packages/cb/19/14929c3c2fe0b79b41cce24463062bf3afa4cdd3c19dccf00319caa92bff/onnxruntime-1.27.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6872443f236a554921cda6f318c900e2d0c226792cf3534d00e5057c6926e5d2", size = 18658445, upload-time = "2026-06-15T22:43:08.053Z" }, + { url = "https://files.pythonhosted.org/packages/7f/76/59ed932b0244acd7bbbd6449480053a6d958ea66357f022f932872e19287/onnxruntime-1.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:760021bca514d64a811837820d351a08a41741f16f8b4c26450da708fecf14e6", size = 13357856, upload-time = "2026-06-15T22:43:37.315Z" }, + { url = "https://files.pythonhosted.org/packages/79/51/d1ec60ec7b1e2ae2d7340ba52b8a13529140039cd4407ba8dddbbc046582/onnxruntime-1.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:2fdfa9df40a0ded0028ce6f9cd863264237f3970559dea2b81456e9ac4622b94", size = 13104412, upload-time = "2026-06-15T22:43:27.457Z" }, + { url = "https://files.pythonhosted.org/packages/5e/7d/e6bb1c6445c94f708c38cd8fbb7bf0264108c33498b9445c93e60fe6d329/onnxruntime-1.27.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54c0c4e9202c36c4ecdb1f3443f5dfbfd5ee3b54d1362c4b4c6134110e74fb32", size = 16443331, upload-time = "2026-06-15T22:42:45.649Z" }, + { url = "https://files.pythonhosted.org/packages/72/1b/b18b31e806eabc41077810199fbbb36fbc2d5f19912416e5ccfbf73053d1/onnxruntime-1.27.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b215aa662c8f983f7d6dedafe65a9be72c26e5338e0fe98b3e0422c32c85428", size = 18670967, upload-time = "2026-06-15T22:43:10.621Z" }, + { url = "https://files.pythonhosted.org/packages/3a/37/48ab79c39b58a7c9f6f5aac1fa0ff2b993eb2643393d6ed9e839ddb6f347/onnxruntime-1.27.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0874edc171f470fc4dd2bbb60bc0989612ed1a8b89b365cda016630a93227f13", size = 18433941, upload-time = "2026-06-15T22:42:58.867Z" }, + { url = "https://files.pythonhosted.org/packages/6e/24/d535ca8a09dbf697f853377c8dc0820dbcaae5f334316b400b953afbcba8/onnxruntime-1.27.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5b51c014cf1a4fcd93c29a97eac8071fa27710dae05a4d0380bb60a66d60a62c", size = 16439970, upload-time = "2026-06-15T22:42:48.023Z" }, + { url = "https://files.pythonhosted.org/packages/f9/b1/ea9ee80c0bdaa4efb13f29f8c236f3740f6655e8c092a2d119515a5a652c/onnxruntime-1.27.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:445fb702ea5241ba813a3ce2febe2e9408a64f6ad2eb610924322c536165f7cd", size = 18659240, upload-time = "2026-06-15T22:43:13.165Z" }, + { url = "https://files.pythonhosted.org/packages/e9/f2/1404507d76a21940e8bf46f414e3d1abd94dc888cb89a30f4a540275846f/onnxruntime-1.27.0-cp314-cp314-win_amd64.whl", hash = "sha256:49e416be0d717338b6d041b99911b716d70c397d277056450724f93bdded3fc2", size = 13685306, upload-time = "2026-06-15T22:43:40.416Z" }, + { url = "https://files.pythonhosted.org/packages/10/e5/ca5cf012ccccb806c70e94aadfebca5606acc62b33eb88cec13352d0778f/onnxruntime-1.27.0-cp314-cp314-win_arm64.whl", hash = "sha256:856032937dd3bc7a7c141909c8d7ae4fde3e3f59bddf061ae627b9a051bda95c", size = 13456280, upload-time = "2026-06-15T22:43:29.693Z" }, + { url = "https://files.pythonhosted.org/packages/67/7b/dca330a8397e9d816c976d7aed4e24a4a2d279bb1e551e3d0221d1389b1d/onnxruntime-1.27.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6197a02e3f620c4dc13cff51b80672409fc1ffab3aa2593911b19fd322ff48b", size = 16443274, upload-time = "2026-06-15T22:42:50.467Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f6/2bac21f722aa45d876d4a51f26bd0ef30e704068a3cd5021a5a7cd784271/onnxruntime-1.27.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:370d211e1ceeac4cd5f45301655463ac59e27cdc74d9f7aeb2d19ff4b7a76715", size = 18670781, upload-time = "2026-06-15T22:43:17.151Z" }, +] + [[package]] name = "orjson" version = "3.11.9" @@ -1111,6 +1843,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/21/5a3f1e8913103b703a436a5664238e5b965ec392b555fe68943ea3691e6b/orjson-3.11.9-cp314-cp314-win_arm64.whl", hash = "sha256:eebdbdeef0094e4f5aefa20dcd4eb2368ab5e7a3b4edea27f1e7b2892e009cf9", size = 126687, upload-time = "2026-05-06T15:11:06.602Z" }, ] +[[package]] +name = "overrides" +version = "7.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812, upload-time = "2024-01-27T21:01:33.423Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832, upload-time = "2024-01-27T21:01:31.393Z" }, +] + [[package]] name = "packaging" version = "26.2" @@ -1129,13 +1870,111 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/d9/7fb5aa316bc299258e68c73ba3bddbc499654a07f151cba08f6153988714/pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189", size = 57328, upload-time = "2026-04-27T01:46:07.06Z" }, ] +[[package]] +name = "pillow" +version = "12.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/21/c2bcdd5906101a30244eaffc1b6e6ce71a31bd0742a01eb89e660ebfac2d/pillow-12.2.0.tar.gz", hash = "sha256:a830b1a40919539d07806aa58e1b114df53ddd43213d9c8b75847eee6c0182b5", size = 46987819, upload-time = "2026-04-01T14:46:17.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/aa/d0b28e1c811cd4d5f5c2bfe2e022292bd255ae5744a3b9ac7d6c8f72dd75/pillow-12.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:a4e8f36e677d3336f35089648c8955c51c6d386a13cf6ee9c189c5f5bd713a9f", size = 5354355, upload-time = "2026-04-01T14:42:15.402Z" }, + { url = "https://files.pythonhosted.org/packages/27/8e/1d5b39b8ae2bd7650d0c7b6abb9602d16043ead9ebbfef4bc4047454da2a/pillow-12.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e589959f10d9824d39b350472b92f0ce3b443c0a3442ebf41c40cb8361c5b97", size = 4695871, upload-time = "2026-04-01T14:42:18.234Z" }, + { url = "https://files.pythonhosted.org/packages/f0/c5/dcb7a6ca6b7d3be41a76958e90018d56c8462166b3ef223150360850c8da/pillow-12.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a52edc8bfff4429aaabdf4d9ee0daadbbf8562364f940937b941f87a4290f5ff", size = 6269734, upload-time = "2026-04-01T14:42:20.608Z" }, + { url = "https://files.pythonhosted.org/packages/ea/f1/aa1bb13b2f4eba914e9637893c73f2af8e48d7d4023b9d3750d4c5eb2d0c/pillow-12.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:975385f4776fafde056abb318f612ef6285b10a1f12b8570f3647ad0d74b48ec", size = 8076080, upload-time = "2026-04-01T14:42:23.095Z" }, + { url = "https://files.pythonhosted.org/packages/a1/2a/8c79d6a53169937784604a8ae8d77e45888c41537f7f6f65ed1f407fe66d/pillow-12.2.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd9c0c7a0c681a347b3194c500cb1e6ca9cab053ea4d82a5cf45b6b754560136", size = 6382236, upload-time = "2026-04-01T14:42:25.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/42/bbcb6051030e1e421d103ce7a8ecadf837aa2f39b8f82ef1a8d37c3d4ebc/pillow-12.2.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88d387ff40b3ff7c274947ed3125dedf5262ec6919d83946753b5f3d7c67ea4c", size = 7070220, upload-time = "2026-04-01T14:42:28.68Z" }, + { url = "https://files.pythonhosted.org/packages/3f/e1/c2a7d6dd8cfa6b231227da096fd2d58754bab3603b9d73bf609d3c18b64f/pillow-12.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:51c4167c34b0d8ba05b547a3bb23578d0ba17b80a5593f93bd8ecb123dd336a3", size = 6493124, upload-time = "2026-04-01T14:42:31.579Z" }, + { url = "https://files.pythonhosted.org/packages/5f/41/7c8617da5d32e1d2f026e509484fdb6f3ad7efaef1749a0c1928adbb099e/pillow-12.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34c0d99ecccea270c04882cb3b86e7b57296079c9a4aff88cb3b33563d95afaa", size = 7194324, upload-time = "2026-04-01T14:42:34.615Z" }, + { url = "https://files.pythonhosted.org/packages/2d/de/a777627e19fd6d62f84070ee1521adde5eeda4855b5cf60fe0b149118bca/pillow-12.2.0-cp310-cp310-win32.whl", hash = "sha256:b85f66ae9eb53e860a873b858b789217ba505e5e405a24b85c0464822fe88032", size = 6376363, upload-time = "2026-04-01T14:42:37.19Z" }, + { url = "https://files.pythonhosted.org/packages/e7/34/fc4cb5204896465842767b96d250c08410f01f2f28afc43b257de842eed5/pillow-12.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:673aa32138f3e7531ccdbca7b3901dba9b70940a19ccecc6a37c77d5fdeb05b5", size = 7083523, upload-time = "2026-04-01T14:42:39.62Z" }, + { url = "https://files.pythonhosted.org/packages/2d/a0/32852d36bc7709f14dc3f64f929a275e958ad8c19a6deba9610d458e28b3/pillow-12.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:3e080565d8d7c671db5802eedfb438e5565ffa40115216eabb8cd52d0ecce024", size = 2463318, upload-time = "2026-04-01T14:42:42.063Z" }, + { url = "https://files.pythonhosted.org/packages/68/e1/748f5663efe6edcfc4e74b2b93edfb9b8b99b67f21a854c3ae416500a2d9/pillow-12.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:8be29e59487a79f173507c30ddf57e733a357f67881430449bb32614075a40ab", size = 5354347, upload-time = "2026-04-01T14:42:44.255Z" }, + { url = "https://files.pythonhosted.org/packages/47/a1/d5ff69e747374c33a3b53b9f98cca7889fce1fd03d79cdc4e1bccc6c5a87/pillow-12.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:71cde9a1e1551df7d34a25462fc60325e8a11a82cc2e2f54578e5e9a1e153d65", size = 4695873, upload-time = "2026-04-01T14:42:46.452Z" }, + { url = "https://files.pythonhosted.org/packages/df/21/e3fbdf54408a973c7f7f89a23b2cb97a7ef30c61ab4142af31eee6aebc88/pillow-12.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f490f9368b6fc026f021db16d7ec2fbf7d89e2edb42e8ec09d2c60505f5729c7", size = 6280168, upload-time = "2026-04-01T14:42:49.228Z" }, + { url = "https://files.pythonhosted.org/packages/d3/f1/00b7278c7dd52b17ad4329153748f87b6756ec195ff786c2bdf12518337d/pillow-12.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8bd7903a5f2a4545f6fd5935c90058b89d30045568985a71c79f5fd6edf9b91e", size = 8088188, upload-time = "2026-04-01T14:42:51.735Z" }, + { url = "https://files.pythonhosted.org/packages/ad/cf/220a5994ef1b10e70e85748b75649d77d506499352be135a4989c957b701/pillow-12.2.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3997232e10d2920a68d25191392e3a4487d8183039e1c74c2297f00ed1c50705", size = 6394401, upload-time = "2026-04-01T14:42:54.343Z" }, + { url = "https://files.pythonhosted.org/packages/e9/bd/e51a61b1054f09437acfbc2ff9106c30d1eb76bc1453d428399946781253/pillow-12.2.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e74473c875d78b8e9d5da2a70f7099549f9eb37ded4e2f6a463e60125bccd176", size = 7079655, upload-time = "2026-04-01T14:42:56.954Z" }, + { url = "https://files.pythonhosted.org/packages/6b/3d/45132c57d5fb4b5744567c3817026480ac7fc3ce5d4c47902bc0e7f6f853/pillow-12.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:56a3f9c60a13133a98ecff6197af34d7824de9b7b38c3654861a725c970c197b", size = 6503105, upload-time = "2026-04-01T14:42:59.847Z" }, + { url = "https://files.pythonhosted.org/packages/7d/2e/9df2fc1e82097b1df3dce58dc43286aa01068e918c07574711fcc53e6fb4/pillow-12.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:90e6f81de50ad6b534cab6e5aef77ff6e37722b2f5d908686f4a5c9eba17a909", size = 7203402, upload-time = "2026-04-01T14:43:02.664Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2e/2941e42858ebb67e50ae741473de81c2984e6eff7b397017623c676e2e8d/pillow-12.2.0-cp311-cp311-win32.whl", hash = "sha256:8c984051042858021a54926eb597d6ee3012393ce9c181814115df4c60b9a808", size = 6378149, upload-time = "2026-04-01T14:43:05.274Z" }, + { url = "https://files.pythonhosted.org/packages/69/42/836b6f3cd7f3e5fa10a1f1a5420447c17966044c8fbf589cc0452d5502db/pillow-12.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e6b2a0c538fc200b38ff9eb6628228b77908c319a005815f2dde585a0664b60", size = 7082626, upload-time = "2026-04-01T14:43:08.557Z" }, + { url = "https://files.pythonhosted.org/packages/c2/88/549194b5d6f1f494b485e493edc6693c0a16f4ada488e5bd974ed1f42fad/pillow-12.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:9a8a34cc89c67a65ea7437ce257cea81a9dad65b29805f3ecee8c8fe8ff25ffe", size = 2463531, upload-time = "2026-04-01T14:43:10.743Z" }, + { url = "https://files.pythonhosted.org/packages/58/be/7482c8a5ebebbc6470b3eb791812fff7d5e0216c2be3827b30b8bb6603ed/pillow-12.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2d192a155bbcec180f8564f693e6fd9bccff5a7af9b32e2e4bf8c9c69dbad6b5", size = 5308279, upload-time = "2026-04-01T14:43:13.246Z" }, + { url = "https://files.pythonhosted.org/packages/d8/95/0a351b9289c2b5cbde0bacd4a83ebc44023e835490a727b2a3bd60ddc0f4/pillow-12.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3f40b3c5a968281fd507d519e444c35f0ff171237f4fdde090dd60699458421", size = 4695490, upload-time = "2026-04-01T14:43:15.584Z" }, + { url = "https://files.pythonhosted.org/packages/de/af/4e8e6869cbed569d43c416fad3dc4ecb944cb5d9492defaed89ddd6fe871/pillow-12.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:03e7e372d5240cc23e9f07deca4d775c0817bffc641b01e9c3af208dbd300987", size = 6284462, upload-time = "2026-04-01T14:43:18.268Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9e/c05e19657fd57841e476be1ab46c4d501bffbadbafdc31a6d665f8b737b6/pillow-12.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b86024e52a1b269467a802258c25521e6d742349d760728092e1bc2d135b4d76", size = 8094744, upload-time = "2026-04-01T14:43:20.716Z" }, + { url = "https://files.pythonhosted.org/packages/2b/54/1789c455ed10176066b6e7e6da1b01e50e36f94ba584dc68d9eebfe9156d/pillow-12.2.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7371b48c4fa448d20d2714c9a1f775a81155050d383333e0a6c15b1123dda005", size = 6398371, upload-time = "2026-04-01T14:43:23.443Z" }, + { url = "https://files.pythonhosted.org/packages/43/e3/fdc657359e919462369869f1c9f0e973f353f9a9ee295a39b1fea8ee1a77/pillow-12.2.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62f5409336adb0663b7caa0da5c7d9e7bdbaae9ce761d34669420c2a801b2780", size = 7087215, upload-time = "2026-04-01T14:43:26.758Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f8/2f6825e441d5b1959d2ca5adec984210f1ec086435b0ed5f52c19b3b8a6e/pillow-12.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:01afa7cf67f74f09523699b4e88c73fb55c13346d212a59a2db1f86b0a63e8c5", size = 6509783, upload-time = "2026-04-01T14:43:29.56Z" }, + { url = "https://files.pythonhosted.org/packages/67/f9/029a27095ad20f854f9dba026b3ea6428548316e057e6fc3545409e86651/pillow-12.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc3d34d4a8fbec3e88a79b92e5465e0f9b842b628675850d860b8bd300b159f5", size = 7212112, upload-time = "2026-04-01T14:43:32.091Z" }, + { url = "https://files.pythonhosted.org/packages/be/42/025cfe05d1be22dbfdb4f264fe9de1ccda83f66e4fc3aac94748e784af04/pillow-12.2.0-cp312-cp312-win32.whl", hash = "sha256:58f62cc0f00fd29e64b29f4fd923ffdb3859c9f9e6105bfc37ba1d08994e8940", size = 6378489, upload-time = "2026-04-01T14:43:34.601Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7b/25a221d2c761c6a8ae21bfa3874988ff2583e19cf8a27bf2fee358df7942/pillow-12.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f84204dee22a783350679a0333981df803dac21a0190d706a50475e361c93f5", size = 7084129, upload-time = "2026-04-01T14:43:37.213Z" }, + { url = "https://files.pythonhosted.org/packages/10/e1/542a474affab20fd4a0f1836cb234e8493519da6b76899e30bcc5d990b8b/pillow-12.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:af73337013e0b3b46f175e79492d96845b16126ddf79c438d7ea7ff27783a414", size = 2463612, upload-time = "2026-04-01T14:43:39.421Z" }, + { url = "https://files.pythonhosted.org/packages/4a/01/53d10cf0dbad820a8db274d259a37ba50b88b24768ddccec07355382d5ad/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8297651f5b5679c19968abefd6bb84d95fe30ef712eb1b2d9b2d31ca61267f4c", size = 4100837, upload-time = "2026-04-01T14:43:41.506Z" }, + { url = "https://files.pythonhosted.org/packages/0f/98/f3a6657ecb698c937f6c76ee564882945f29b79bad496abcba0e84659ec5/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:50d8520da2a6ce0af445fa6d648c4273c3eeefbc32d7ce049f22e8b5c3daecc2", size = 4176528, upload-time = "2026-04-01T14:43:43.773Z" }, + { url = "https://files.pythonhosted.org/packages/69/bc/8986948f05e3ea490b8442ea1c1d4d990b24a7e43d8a51b2c7d8b1dced36/pillow-12.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:766cef22385fa1091258ad7e6216792b156dc16d8d3fa607e7545b2b72061f1c", size = 3640401, upload-time = "2026-04-01T14:43:45.87Z" }, + { url = "https://files.pythonhosted.org/packages/34/46/6c717baadcd62bc8ed51d238d521ab651eaa74838291bda1f86fe1f864c9/pillow-12.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d2fd0fa6b5d9d1de415060363433f28da8b1526c1c129020435e186794b3795", size = 5308094, upload-time = "2026-04-01T14:43:48.438Z" }, + { url = "https://files.pythonhosted.org/packages/71/43/905a14a8b17fdb1ccb58d282454490662d2cb89a6bfec26af6d3520da5ec/pillow-12.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56b25336f502b6ed02e889f4ece894a72612fe885889a6e8c4c80239ff6e5f5f", size = 4695402, upload-time = "2026-04-01T14:43:51.292Z" }, + { url = "https://files.pythonhosted.org/packages/73/dd/42107efcb777b16fa0393317eac58f5b5cf30e8392e266e76e51cff28c3d/pillow-12.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f1c943e96e85df3d3478f7b691f229887e143f81fedab9b20205349ab04d73ed", size = 6280005, upload-time = "2026-04-01T14:43:54.242Z" }, + { url = "https://files.pythonhosted.org/packages/a8/68/b93e09e5e8549019e61acf49f65b1a8530765a7f812c77a7461bca7e4494/pillow-12.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03f6fab9219220f041c74aeaa2939ff0062bd5c364ba9ce037197f4c6d498cd9", size = 8090669, upload-time = "2026-04-01T14:43:57.335Z" }, + { url = "https://files.pythonhosted.org/packages/4b/6e/3ccb54ce8ec4ddd1accd2d89004308b7b0b21c4ac3d20fa70af4760a4330/pillow-12.2.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdfebd752ec52bf5bb4e35d9c64b40826bc5b40a13df7c3cda20a2c03a0f5ed", size = 6395194, upload-time = "2026-04-01T14:43:59.864Z" }, + { url = "https://files.pythonhosted.org/packages/67/ee/21d4e8536afd1a328f01b359b4d3997b291ffd35a237c877b331c1c3b71c/pillow-12.2.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eedf4b74eda2b5a4b2b2fb4c006d6295df3bf29e459e198c90ea48e130dc75c3", size = 7082423, upload-time = "2026-04-01T14:44:02.74Z" }, + { url = "https://files.pythonhosted.org/packages/78/5f/e9f86ab0146464e8c133fe85df987ed9e77e08b29d8d35f9f9f4d6f917ba/pillow-12.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00a2865911330191c0b818c59103b58a5e697cae67042366970a6b6f1b20b7f9", size = 6505667, upload-time = "2026-04-01T14:44:05.381Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1e/409007f56a2fdce61584fd3acbc2bbc259857d555196cedcadc68c015c82/pillow-12.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e1757442ed87f4912397c6d35a0db6a7b52592156014706f17658ff58bbf795", size = 7208580, upload-time = "2026-04-01T14:44:08.39Z" }, + { url = "https://files.pythonhosted.org/packages/23/c4/7349421080b12fb35414607b8871e9534546c128a11965fd4a7002ccfbee/pillow-12.2.0-cp313-cp313-win32.whl", hash = "sha256:144748b3af2d1b358d41286056d0003f47cb339b8c43a9ea42f5fea4d8c66b6e", size = 6375896, upload-time = "2026-04-01T14:44:11.197Z" }, + { url = "https://files.pythonhosted.org/packages/3f/82/8a3739a5e470b3c6cbb1d21d315800d8e16bff503d1f16b03a4ec3212786/pillow-12.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:390ede346628ccc626e5730107cde16c42d3836b89662a115a921f28440e6a3b", size = 7081266, upload-time = "2026-04-01T14:44:13.947Z" }, + { url = "https://files.pythonhosted.org/packages/c3/25/f968f618a062574294592f668218f8af564830ccebdd1fa6200f598e65c5/pillow-12.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:8023abc91fba39036dbce14a7d6535632f99c0b857807cbbbf21ecc9f4717f06", size = 2463508, upload-time = "2026-04-01T14:44:16.312Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a4/b342930964e3cb4dce5038ae34b0eab4653334995336cd486c5a8c25a00c/pillow-12.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:042db20a421b9bafecc4b84a8b6e444686bd9d836c7fd24542db3e7df7baad9b", size = 5309927, upload-time = "2026-04-01T14:44:18.89Z" }, + { url = "https://files.pythonhosted.org/packages/9f/de/23198e0a65a9cf06123f5435a5d95cea62a635697f8f03d134d3f3a96151/pillow-12.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd025009355c926a84a612fecf58bb315a3f6814b17ead51a8e48d3823d9087f", size = 4698624, upload-time = "2026-04-01T14:44:21.115Z" }, + { url = "https://files.pythonhosted.org/packages/01/a6/1265e977f17d93ea37aa28aa81bad4fa597933879fac2520d24e021c8da3/pillow-12.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88ddbc66737e277852913bd1e07c150cc7bb124539f94c4e2df5344494e0a612", size = 6321252, upload-time = "2026-04-01T14:44:23.663Z" }, + { url = "https://files.pythonhosted.org/packages/3c/83/5982eb4a285967baa70340320be9f88e57665a387e3a53a7f0db8231a0cd/pillow-12.2.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d362d1878f00c142b7e1a16e6e5e780f02be8195123f164edf7eddd911eefe7c", size = 8126550, upload-time = "2026-04-01T14:44:26.772Z" }, + { url = "https://files.pythonhosted.org/packages/4e/48/6ffc514adce69f6050d0753b1a18fd920fce8cac87620d5a31231b04bfc5/pillow-12.2.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c727a6d53cb0018aadd8018c2b938376af27914a68a492f59dfcaca650d5eea", size = 6433114, upload-time = "2026-04-01T14:44:29.615Z" }, + { url = "https://files.pythonhosted.org/packages/36/a3/f9a77144231fb8d40ee27107b4463e205fa4677e2ca2548e14da5cf18dce/pillow-12.2.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd8c21c98c5cc60653bcb311bef2ce0401642b7ce9d09e03a7da87c878289d4", size = 7115667, upload-time = "2026-04-01T14:44:32.773Z" }, + { url = "https://files.pythonhosted.org/packages/c1/fc/ac4ee3041e7d5a565e1c4fd72a113f03b6394cc72ab7089d27608f8aaccb/pillow-12.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f08483a632889536b8139663db60f6724bfcb443c96f1b18855860d7d5c0fd4", size = 6538966, upload-time = "2026-04-01T14:44:35.252Z" }, + { url = "https://files.pythonhosted.org/packages/c0/a8/27fb307055087f3668f6d0a8ccb636e7431d56ed0750e07a60547b1e083e/pillow-12.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dac8d77255a37e81a2efcbd1fc05f1c15ee82200e6c240d7e127e25e365c39ea", size = 7238241, upload-time = "2026-04-01T14:44:37.875Z" }, + { url = "https://files.pythonhosted.org/packages/ad/4b/926ab182c07fccae9fcb120043464e1ff1564775ec8864f21a0ebce6ac25/pillow-12.2.0-cp313-cp313t-win32.whl", hash = "sha256:ee3120ae9dff32f121610bb08e4313be87e03efeadfc6c0d18f89127e24d0c24", size = 6379592, upload-time = "2026-04-01T14:44:40.336Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c4/f9e476451a098181b30050cc4c9a3556b64c02cf6497ea421ac047e89e4b/pillow-12.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:325ca0528c6788d2a6c3d40e3568639398137346c3d6e66bb61db96b96511c98", size = 7085542, upload-time = "2026-04-01T14:44:43.251Z" }, + { url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" }, + { url = "https://files.pythonhosted.org/packages/bf/98/4595daa2365416a86cb0d495248a393dfc84e96d62ad080c8546256cb9c0/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:3adc9215e8be0448ed6e814966ecf3d9952f0ea40eb14e89a102b87f450660d8", size = 4100848, upload-time = "2026-04-01T14:44:48.48Z" }, + { url = "https://files.pythonhosted.org/packages/0b/79/40184d464cf89f6663e18dfcf7ca21aae2491fff1a16127681bf1fa9b8cf/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:6a9adfc6d24b10f89588096364cc726174118c62130c817c2837c60cf08a392b", size = 4176515, upload-time = "2026-04-01T14:44:51.353Z" }, + { url = "https://files.pythonhosted.org/packages/b0/63/703f86fd4c422a9cf722833670f4f71418fb116b2853ff7da722ea43f184/pillow-12.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:6a6e67ea2e6feda684ed370f9a1c52e7a243631c025ba42149a2cc5934dec295", size = 3640159, upload-time = "2026-04-01T14:44:53.588Z" }, + { url = "https://files.pythonhosted.org/packages/71/e0/fb22f797187d0be2270f83500aab851536101b254bfa1eae10795709d283/pillow-12.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2bb4a8d594eacdfc59d9e5ad972aa8afdd48d584ffd5f13a937a664c3e7db0ed", size = 5312185, upload-time = "2026-04-01T14:44:56.039Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8c/1a9e46228571de18f8e28f16fabdfc20212a5d019f3e3303452b3f0a580d/pillow-12.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:80b2da48193b2f33ed0c32c38140f9d3186583ce7d516526d462645fd98660ae", size = 4695386, upload-time = "2026-04-01T14:44:58.663Z" }, + { url = "https://files.pythonhosted.org/packages/70/62/98f6b7f0c88b9addd0e87c217ded307b36be024d4ff8869a812b241d1345/pillow-12.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22db17c68434de69d8ecfc2fe821569195c0c373b25cccb9cbdacf2c6e53c601", size = 6280384, upload-time = "2026-04-01T14:45:01.5Z" }, + { url = "https://files.pythonhosted.org/packages/5e/03/688747d2e91cfbe0e64f316cd2e8005698f76ada3130d0194664174fa5de/pillow-12.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b14cc0106cd9aecda615dd6903840a058b4700fcb817687d0ee4fc8b6e389be", size = 8091599, upload-time = "2026-04-01T14:45:04.5Z" }, + { url = "https://files.pythonhosted.org/packages/f6/35/577e22b936fcdd66537329b33af0b4ccfefaeabd8aec04b266528cddb33c/pillow-12.2.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cbeb542b2ebc6fcdacabf8aca8c1a97c9b3ad3927d46b8723f9d4f033288a0f", size = 6396021, upload-time = "2026-04-01T14:45:07.117Z" }, + { url = "https://files.pythonhosted.org/packages/11/8d/d2532ad2a603ca2b93ad9f5135732124e57811d0168155852f37fbce2458/pillow-12.2.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bfd07bc812fbd20395212969e41931001fd59eb55a60658b0e5710872e95286", size = 7083360, upload-time = "2026-04-01T14:45:09.763Z" }, + { url = "https://files.pythonhosted.org/packages/5e/26/d325f9f56c7e039034897e7380e9cc202b1e368bfd04d4cbe6a441f02885/pillow-12.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9aba9a17b623ef750a4d11b742cbafffeb48a869821252b30ee21b5e91392c50", size = 6507628, upload-time = "2026-04-01T14:45:12.378Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f7/769d5632ffb0988f1c5e7660b3e731e30f7f8ec4318e94d0a5d674eb65a4/pillow-12.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:deede7c263feb25dba4e82ea23058a235dcc2fe1f6021025dc71f2b618e26104", size = 7209321, upload-time = "2026-04-01T14:45:15.122Z" }, + { url = "https://files.pythonhosted.org/packages/6a/7a/c253e3c645cd47f1aceea6a8bacdba9991bf45bb7dfe927f7c893e89c93c/pillow-12.2.0-cp314-cp314-win32.whl", hash = "sha256:632ff19b2778e43162304d50da0181ce24ac5bb8180122cbe1bf4673428328c7", size = 6479723, upload-time = "2026-04-01T14:45:17.797Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8b/601e6566b957ca50e28725cb6c355c59c2c8609751efbecd980db44e0349/pillow-12.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:4e6c62e9d237e9b65fac06857d511e90d8461a32adcc1b9065ea0c0fa3a28150", size = 7217400, upload-time = "2026-04-01T14:45:20.529Z" }, + { url = "https://files.pythonhosted.org/packages/d6/94/220e46c73065c3e2951bb91c11a1fb636c8c9ad427ac3ce7d7f3359b9b2f/pillow-12.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:b1c1fbd8a5a1af3412a0810d060a78b5136ec0836c8a4ef9aa11807f2a22f4e1", size = 2554835, upload-time = "2026-04-01T14:45:23.162Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ab/1b426a3974cb0e7da5c29ccff4807871d48110933a57207b5a676cccc155/pillow-12.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:57850958fe9c751670e49b2cecf6294acc99e562531f4bd317fa5ddee2068463", size = 5314225, upload-time = "2026-04-01T14:45:25.637Z" }, + { url = "https://files.pythonhosted.org/packages/19/1e/dce46f371be2438eecfee2a1960ee2a243bbe5e961890146d2dee1ff0f12/pillow-12.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5d38f1411c0ed9f97bcb49b7bd59b6b7c314e0e27420e34d99d844b9ce3b6f3", size = 4698541, upload-time = "2026-04-01T14:45:28.355Z" }, + { url = "https://files.pythonhosted.org/packages/55/c3/7fbecf70adb3a0c33b77a300dc52e424dc22ad8cdc06557a2e49523b703d/pillow-12.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c0a9f29ca8e79f09de89293f82fc9b0270bb4af1d58bc98f540cc4aedf03166", size = 6322251, upload-time = "2026-04-01T14:45:30.924Z" }, + { url = "https://files.pythonhosted.org/packages/1c/3c/7fbc17cfb7e4fe0ef1642e0abc17fc6c94c9f7a16be41498e12e2ba60408/pillow-12.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1610dd6c61621ae1cf811bef44d77e149ce3f7b95afe66a4512f8c59f25d9ebe", size = 8127807, upload-time = "2026-04-01T14:45:33.908Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c3/a8ae14d6defd2e448493ff512fae903b1e9bd40b72efb6ec55ce0048c8ce/pillow-12.2.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a34329707af4f73cf1782a36cd2289c0368880654a2c11f027bcee9052d35dd", size = 6433935, upload-time = "2026-04-01T14:45:36.623Z" }, + { url = "https://files.pythonhosted.org/packages/6e/32/2880fb3a074847ac159d8f902cb43278a61e85f681661e7419e6596803ed/pillow-12.2.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e9c4f5b3c546fa3458a29ab22646c1c6c787ea8f5ef51300e5a60300736905e", size = 7116720, upload-time = "2026-04-01T14:45:39.258Z" }, + { url = "https://files.pythonhosted.org/packages/46/87/495cc9c30e0129501643f24d320076f4cc54f718341df18cc70ec94c44e1/pillow-12.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb043ee2f06b41473269765c2feae53fc2e2fbf96e5e22ca94fb5ad677856f06", size = 6540498, upload-time = "2026-04-01T14:45:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/18/53/773f5edca692009d883a72211b60fdaf8871cbef075eaa9d577f0a2f989e/pillow-12.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f278f034eb75b4e8a13a54a876cc4a5ab39173d2cdd93a638e1b467fc545ac43", size = 7239413, upload-time = "2026-04-01T14:45:44.705Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e4/4b64a97d71b2a83158134abbb2f5bd3f8a2ea691361282f010998f339ec7/pillow-12.2.0-cp314-cp314t-win32.whl", hash = "sha256:6bb77b2dcb06b20f9f4b4a8454caa581cd4dd0643a08bacf821216a16d9c8354", size = 6482084, upload-time = "2026-04-01T14:45:47.568Z" }, + { url = "https://files.pythonhosted.org/packages/ba/13/306d275efd3a3453f72114b7431c877d10b1154014c1ebbedd067770d629/pillow-12.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6562ace0d3fb5f20ed7290f1f929cae41b25ae29528f2af1722966a0a02e2aa1", size = 7225152, upload-time = "2026-04-01T14:45:50.032Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6e/cf826fae916b8658848d7b9f38d88da6396895c676e8086fc0988073aaf8/pillow-12.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:aa88ccfe4e32d362816319ed727a004423aab09c5cea43c01a4b435643fa34eb", size = 2556579, upload-time = "2026-04-01T14:45:52.529Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b7/2437044fb910f499610356d1352e3423753c98e34f915252aafecc64889f/pillow-12.2.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538bd5e05efec03ae613fd89c4ce0368ecd2ba239cc25b9f9be7ed426b0af1f", size = 5273969, upload-time = "2026-04-01T14:45:55.538Z" }, + { url = "https://files.pythonhosted.org/packages/f6/f4/8316e31de11b780f4ac08ef3654a75555e624a98db1056ecb2122d008d5a/pillow-12.2.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:394167b21da716608eac917c60aa9b969421b5dcbbe02ae7f013e7b85811c69d", size = 4659674, upload-time = "2026-04-01T14:45:58.093Z" }, + { url = "https://files.pythonhosted.org/packages/d4/37/664fca7201f8bb2aa1d20e2c3d5564a62e6ae5111741966c8319ca802361/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d04bfa02cc2d23b497d1e90a0f927070043f6cbf303e738300532379a4b4e0f", size = 5288479, upload-time = "2026-04-01T14:46:01.141Z" }, + { url = "https://files.pythonhosted.org/packages/49/62/5b0ed78fce87346be7a5cfcfaaad91f6a1f98c26f86bdbafa2066c647ef6/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0c838a5125cee37e68edec915651521191cef1e6aa336b855f495766e77a366e", size = 7032230, upload-time = "2026-04-01T14:46:03.874Z" }, + { url = "https://files.pythonhosted.org/packages/c3/28/ec0fc38107fc32536908034e990c47914c57cd7c5a3ece4d8d8f7ffd7e27/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a6c9fa44005fa37a91ebfc95d081e8079757d2e904b27103f4f5fa6f0bf78c0", size = 5355404, upload-time = "2026-04-01T14:46:06.33Z" }, + { url = "https://files.pythonhosted.org/packages/5e/8b/51b0eddcfa2180d60e41f06bd6d0a62202b20b59c68f5a132e615b75aecf/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:25373b66e0dd5905ed63fa3cae13c82fbddf3079f2c8bf15c6fb6a35586324c1", size = 6002215, upload-time = "2026-04-01T14:46:08.83Z" }, + { url = "https://files.pythonhosted.org/packages/bc/60/5382c03e1970de634027cee8e1b7d39776b778b81812aaf45b694dfe9e28/pillow-12.2.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:bfa9c230d2fe991bed5318a5f119bd6780cda2915cca595393649fc118ab895e", size = 7080946, upload-time = "2026-04-01T14:46:11.734Z" }, +] + [[package]] name = "platformdirs" -version = "4.9.6" +version = "4.10.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/47/e4501f49c178ae1d9f4a75073fda4204f52647993f075a9db4d14930e0c5/platformdirs-4.10.0.tar.gz", hash = "sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7", size = 31224, upload-time = "2026-05-28T03:32:53.587Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" }, + { url = "https://files.pythonhosted.org/packages/81/e6/cd9575ac904136b3cbf7aa7ee819ef86eedb7274e46f230e94ea4342e729/platformdirs-4.10.0-py3-none-any.whl", hash = "sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a", size = 22743, upload-time = "2026-05-28T03:32:52.175Z" }, ] [[package]] @@ -1163,6 +2002,168 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/80/6e/4b28b62ecb6aae56769c34a8ff1d661473ec1e9519e2d5f8b2c150086b26/pre_commit-4.6.0-py2.py3-none-any.whl", hash = "sha256:e2cf246f7299edcabcf15f9b0571fdce06058527f0a06535068a86d38089f29b", size = 226472, upload-time = "2026-04-21T20:31:40.092Z" }, ] +[[package]] +name = "protobuf" +version = "7.35.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/01/9ef0afd7999eb9badb3a768b4aedd78c86d4c65cfaf1958ab276199e76b4/protobuf-7.35.1.tar.gz", hash = "sha256:ce115a26fe0c39a2c29973d914d327e516a6455464489fe3cd1e51a1b354f81a", size = 458717, upload-time = "2026-06-11T21:55:40.257Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/03/8aeeb7458d22546bf64b5250ca1daeb5ff757d900e8e4a7476c6f0db843e/protobuf-7.35.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:24f857477359a85c0c235261b8ba905fd51b2562f4a64ca1df5473f29850cbf6", size = 433226, upload-time = "2026-06-11T21:55:31.719Z" }, + { url = "https://files.pythonhosted.org/packages/37/4b/dfb89eb0e652a1ff073c39a59fb5e3a83cfe9b57a2c83fa6d78270101767/protobuf-7.35.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:11d6b0ec246892d85215b0a13ca6e0233cf5284b68f0ac02646427f4ff88a799", size = 328847, upload-time = "2026-06-11T21:55:34.035Z" }, + { url = "https://files.pythonhosted.org/packages/0f/58/dc12f2cd484951524af6e3382c785869b9b3fb5e52ee95ae23add53ee8f9/protobuf-7.35.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:b73f9489a4b8b1c9cb1f8ed951c736392592edb24b9d6819f36d2e10b171d5b4", size = 344030, upload-time = "2026-06-11T21:55:34.941Z" }, + { url = "https://files.pythonhosted.org/packages/e4/be/5b3cfe508bfab6761414ff944e3366eb13be4fd71efcd69450f89ba39f43/protobuf-7.35.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:74758715c53d7158fb76caf4f0cfdacc5329a4b1bb994f865d6cf302d413a1c4", size = 327130, upload-time = "2026-06-11T21:55:35.921Z" }, + { url = "https://files.pythonhosted.org/packages/d8/bc/6d6c7ba8709c85f8f2c390b2b118d6fb08a783676a572271851bf45a7d22/protobuf-7.35.1-cp310-abi3-win32.whl", hash = "sha256:353652e4efd0bca5b5fc2656abf8307ef351f0cf938c9eba09f0e09c20a25c30", size = 428945, upload-time = "2026-06-11T21:55:37.034Z" }, + { url = "https://files.pythonhosted.org/packages/0a/19/8d0cb6f20a1ef7b18f1c8986ad5783f22f84cce39c6ce9a6e645ea55192e/protobuf-7.35.1-cp310-abi3-win_amd64.whl", hash = "sha256:230a75ddfc2de4806e56696ce9640c1cdfdb6543b7cfce98d42a4c0a0e7bdb87", size = 439996, upload-time = "2026-06-11T21:55:38.123Z" }, + { url = "https://files.pythonhosted.org/packages/19/c7/5f7c636ec43e0c545e28d1f1db71990108306f7bdcb89f069ba97e428e7f/protobuf-7.35.1-py3-none-any.whl", hash = "sha256:4bc97768d8fe4ad6743c8a19403e314511ed9f6d13205b687e52421c023ac1b9", size = 171659, upload-time = "2026-06-11T21:55:39.155Z" }, +] + +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + +[[package]] +name = "py-rust-stemmers" +version = "0.1.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/c1/9763f9fb1cd73f9c317a83feeed6e0d4af320c6bbddab47b4a94f3a47d0c/py_rust_stemmers-0.1.8.tar.gz", hash = "sha256:6b0f6f48bc54d607aed802de872fcd5a71bae969a6760976dc78ce55e8eaf3da", size = 9732, upload-time = "2026-05-22T11:00:24.358Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/d6/28285b1c6fb9e6689a78135659679f637edc7395a2b994f48123094f1c99/py_rust_stemmers-0.1.8-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:36b952ce65a794faf15553b8f5b60431483c2d5bec00bc6982bf490e727250f9", size = 290828, upload-time = "2026-05-22T10:59:19.4Z" }, + { url = "https://files.pythonhosted.org/packages/42/da/cfe72e8213390079be9db139ec3b2f9e810f33e0d1f5fc0ebe30effd608e/py_rust_stemmers-0.1.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3bef8062d28251b465299cc676de7c11dde003858caf2c2b5c14de7298dc63db", size = 276052, upload-time = "2026-05-22T10:59:20.715Z" }, + { url = "https://files.pythonhosted.org/packages/e5/81/2a670bf588cf255698d3c5133c13ce8d5e018c6c0bf6ac64b77abc897999/py_rust_stemmers-0.1.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af749b3b9f6531342250dd05854c0ae93e01f79b0049a8769012e0b50e9aba5b", size = 314770, upload-time = "2026-05-22T10:59:21.636Z" }, + { url = "https://files.pythonhosted.org/packages/08/a5/45b5fba9c25b00f4ae17ae81a54a4555b0466f5c8d774465591b11dd9745/py_rust_stemmers-0.1.8-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:45d0c42346f8e5d04b86a0b0f895bb15c53788bf551e7fad36be1dad093e856f", size = 319086, upload-time = "2026-05-22T10:59:22.866Z" }, + { url = "https://files.pythonhosted.org/packages/ba/9b/fcc7f3e0b01b570b646478b16461d9934b39eae4f34009c104a2428aa631/py_rust_stemmers-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:342b6cc9eb833f102d86e146ee71bccb3c1ed1e8320db8e6553cc81b716b1b14", size = 320186, upload-time = "2026-05-22T10:59:23.91Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7f/a406c7fada4fc8281dd01a389efb15c9cbe81e07afbd70e089e6b6574020/py_rust_stemmers-0.1.8-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:25bb9b0b6b8d79b32c151c7f5f94af9af9aea201ca8736e6f117c841b017f028", size = 320502, upload-time = "2026-05-22T10:59:24.903Z" }, + { url = "https://files.pythonhosted.org/packages/47/ab/da7228d7f68d156b3d690c355eed98438f0e9564f04cb5bccef66189c4f7/py_rust_stemmers-0.1.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dab8a862fa8e4c9e715848e9d64c317229d7a2c37238cd1c73237b85d655ab7e", size = 492445, upload-time = "2026-05-22T10:59:26.318Z" }, + { url = "https://files.pythonhosted.org/packages/e4/87/fa4b5dba78e1e5597419f1cdad25139165031cdf63adff96fbb3e01b0e17/py_rust_stemmers-0.1.8-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:da0326c913070d5f3fabd56393ca4118167bb0b13c2932a77c7a1b31f85f651a", size = 595744, upload-time = "2026-05-22T10:59:27.585Z" }, + { url = "https://files.pythonhosted.org/packages/ff/84/e1212e47f7db3d468c9c4555f85594019a15b948a614e60b190adf9c477a/py_rust_stemmers-0.1.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0f1d2135974bbbea2c15087a7d8cec8697338b2a748c9694c92943775f4d6c14", size = 538125, upload-time = "2026-05-22T10:59:28.92Z" }, + { url = "https://files.pythonhosted.org/packages/1c/af/af00e6b00f0aa2bc3c164615af362b962cc79d2ddedf53d0e9e92920c425/py_rust_stemmers-0.1.8-cp310-cp310-win_amd64.whl", hash = "sha256:22d037a82920bed8fccbec62cf5ef47d821ac3966a3d098fa48a2053397ea6b7", size = 208538, upload-time = "2026-05-22T10:59:30.403Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5b/fcc991636129fb2840fd1c7560112798046f26fa085b7a377382d50d2679/py_rust_stemmers-0.1.8-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4b1159a38a198eabeabd908015f9425c4220b61b42c6603c58870481ff2b50bb", size = 290471, upload-time = "2026-05-22T10:59:32.033Z" }, + { url = "https://files.pythonhosted.org/packages/48/0a/c88c9a7b5c94acc1175a33964637aff9cf8fa4c2e595846ab1df04c1f0bf/py_rust_stemmers-0.1.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1686fc009869ff8bcc1d5a305f071eeb8c3b3612a9827bcadd4e61fdb5727179", size = 275775, upload-time = "2026-05-22T10:59:32.979Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e2/e685cd31655a1ac56ebe0d571d221c199b1971eb5a2fdad88c889dc25983/py_rust_stemmers-0.1.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:769f37882905da2311cb720681b112eb70a4e6bd56fb424d473427b5379c8396", size = 314523, upload-time = "2026-05-22T10:59:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/65/93/a6c0f30109c259199ac171cb6a0c69addefdba454ee0a8d51bb94e767c11/py_rust_stemmers-0.1.8-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3007ad4ec51e0c352ae410234a24a9ac75fab0c1e06c585fbac9fcced69385f8", size = 318808, upload-time = "2026-05-22T10:59:35.719Z" }, + { url = "https://files.pythonhosted.org/packages/59/87/ecaffed03e4b78d35ffb44740ca779e57d9f49d7d764f3f56b633b1e1c8c/py_rust_stemmers-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a1e11d22a240318dc917266eb3c85919455b6ea834445b95997712d9ede6b93", size = 319990, upload-time = "2026-05-22T10:59:36.84Z" }, + { url = "https://files.pythonhosted.org/packages/eb/0d/2976bb288240e25110be687e6be5ecb0623a17f667f186e07033e429985f/py_rust_stemmers-0.1.8-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:08c258deab6d994551a92e9468ce88e58f97e636e73d9c5763978a57d7675a13", size = 320291, upload-time = "2026-05-22T10:59:38.263Z" }, + { url = "https://files.pythonhosted.org/packages/2e/fb/7b1a93f63600633b2c741714f0f6024b2caff54e5aed77c5f6e0be384947/py_rust_stemmers-0.1.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eee4af7ada2ce9cb3ec59ffe8458148c3933a86507d816bf954ee506a0e45b61", size = 492171, upload-time = "2026-05-22T10:59:39.537Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3b/8e829e709542f928beb0613f4dffca4797a817f740c1be07eabd11bd2db4/py_rust_stemmers-0.1.8-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f16deb1557b8253d8c11693047bec4ed67d6b09ae0f84c8b896ea03ac2fc8925", size = 595398, upload-time = "2026-05-22T10:59:41.016Z" }, + { url = "https://files.pythonhosted.org/packages/27/8b/b3972f0fc14e6bfc602a9260a1747742aaf86737ad57872998b085a2f1aa/py_rust_stemmers-0.1.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:870afb2d1d4731bd2d74b715b34439b29734e4dc94c55342096f07669f7f9fa0", size = 537820, upload-time = "2026-05-22T10:59:42.307Z" }, + { url = "https://files.pythonhosted.org/packages/0e/90/54c2949cc4fef544810305526e0fd658e2bc87abcc046283379a7044abec/py_rust_stemmers-0.1.8-cp311-cp311-win_amd64.whl", hash = "sha256:13b25ce65509ff7e37725bd38c62704f32ae0604ac0899f43c8cce41d5543212", size = 208396, upload-time = "2026-05-22T10:59:43.335Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6a/39080bc8f4a441a35378c0faeeb834fb27974997f40d51342574e70f9662/py_rust_stemmers-0.1.8-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6a9a4b8733d0b307bd0879ab7e321aa8a0bfd054a75a5cb23c647df5ca7d17c3", size = 290230, upload-time = "2026-05-22T10:59:44.551Z" }, + { url = "https://files.pythonhosted.org/packages/73/15/ae60b9010924adac465f418822d9c514690aba6846edd67b6e2b5c227745/py_rust_stemmers-0.1.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:51d0042d2a92ef0f7048bfc06b6c2a02306af31ea47f09d24b34e4b7e63c4e80", size = 275449, upload-time = "2026-05-22T10:59:45.547Z" }, + { url = "https://files.pythonhosted.org/packages/ec/7c/94be8b932179823d66e0d2be03a94706132a7d16a640d5e5710de1cb1b8f/py_rust_stemmers-0.1.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89d3d34094b9b6078a8ea6fe1c7044e5fd32f14e76c94818c5008f49ae075f08", size = 316676, upload-time = "2026-05-22T10:59:46.522Z" }, + { url = "https://files.pythonhosted.org/packages/f3/a4/8bd5c9f31207136830457d819e3f98bb21c54c0cdc40d6f1845ce4efdf7c/py_rust_stemmers-0.1.8-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:40c86be90cee4a709ad84fde4db7f11ca44d65630a56b77ec86fe84c23adfc09", size = 319458, upload-time = "2026-05-22T10:59:47.914Z" }, + { url = "https://files.pythonhosted.org/packages/f9/95/95da2b353b164a3a2b8a1c799866a58060693be4f1dc21065663dc67dc17/py_rust_stemmers-0.1.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:515884bcfb47b10335146648f276930d0c1201ae5e8b7b400fb46d8ea05c0ec2", size = 323541, upload-time = "2026-05-22T10:59:48.894Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ce/f34403b68808519dfa3220e1d94a40f26d5025f27e28893e2388ab9cfde5/py_rust_stemmers-0.1.8-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:fa42f5f8feb694aaaa869eedf477fcaf66f67a192cd64d94302d06920c33864a", size = 323873, upload-time = "2026-05-22T10:59:49.872Z" }, + { url = "https://files.pythonhosted.org/packages/57/01/fb8527f6474d576975415405c985a97260e0403829e062103d334230b7d2/py_rust_stemmers-0.1.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2e86ad68fe297a6652f0f0390625ea81858b6f27862fd4c5ee1214bf5af29b9d", size = 494761, upload-time = "2026-05-22T10:59:51.021Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ac/73816237dbec20a7299abf901e2f7b6061d238754e033b48e423603f5336/py_rust_stemmers-0.1.8-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4b90fc81411943b114e8eb4988a876ba3b12bd2d20741559803eddc4131575dc", size = 596141, upload-time = "2026-05-22T10:59:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/52/0a/dd48debf386a206ee1c6ad75a0827eac89428441291c90d98bc3803fccf1/py_rust_stemmers-0.1.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:56cc2c2df742fa6529285b7d204720f34b7da789ed78eb578442f93c6de97d89", size = 541633, upload-time = "2026-05-22T10:59:53.18Z" }, + { url = "https://files.pythonhosted.org/packages/92/ca/ebb707ab280636b8f46d040ccb051d1a9ddbc1f1ca2d90cdba626872f405/py_rust_stemmers-0.1.8-cp312-cp312-win_amd64.whl", hash = "sha256:dd967eea2f808a1e73aa71ecccef0f4925a4cca4eb02ced94057afe3303153ef", size = 212134, upload-time = "2026-05-22T10:59:54.245Z" }, + { url = "https://files.pythonhosted.org/packages/c2/98/f078f3930311e7b6154ccdf9166c4e30a416c7d199e136b5f09265d58a35/py_rust_stemmers-0.1.8-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5bd15b89203ecd886960e237124d1aa6e55498d76418c36c967d3b12168d43dc", size = 290427, upload-time = "2026-05-22T10:59:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/c9/46/21d784a3f1db6a23051ffd5826d8ee667d26a64587c1cfbda0443ed87fff/py_rust_stemmers-0.1.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6c92733b020534470ca5a0d7fe8b85c85622ff383d4f37fec75a1c677aa84921", size = 275628, upload-time = "2026-05-22T10:59:56.687Z" }, + { url = "https://files.pythonhosted.org/packages/57/d5/701c73a4f6a7fecfd96a6588f0cafe98d6b0acde93adf8a2e45535f3d1d5/py_rust_stemmers-0.1.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ab605a86c950ba7e8ab1392cf91296c0bec3084babb897a4aecf90a10c82395", size = 316656, upload-time = "2026-05-22T10:59:57.67Z" }, + { url = "https://files.pythonhosted.org/packages/9d/0d/c58fe98153cfdb6abf4dfb6ac335c923000d4af4e736080c3a3045b7aea7/py_rust_stemmers-0.1.8-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:21ed8055cec1f78d666afad8ffd7a51775ba419d2c615b8a1df7b32ca7f33e2b", size = 319377, upload-time = "2026-05-22T10:59:58.664Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d7/e60d04849e90aa3ad457211cc4999c30401f433341f9a5588c12b81f9877/py_rust_stemmers-0.1.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae773e1d01e9aa328d175f461475d0cd7074a82bfcc71de6dc5765e51f1cc9f7", size = 323719, upload-time = "2026-05-22T10:59:59.845Z" }, + { url = "https://files.pythonhosted.org/packages/6a/48/c0e4fb955db784cc354e0756354602f7043ff4c10fcbd9d901a2f8fe3239/py_rust_stemmers-0.1.8-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5cc8fab9d0f1b274a26935a632362b8278f03e81b65e8b8644d5ca3f62a5a1a4", size = 324110, upload-time = "2026-05-22T11:00:01.26Z" }, + { url = "https://files.pythonhosted.org/packages/48/eb/981b26baff37cf7a26ee206763cc4d2fb3e1db8f0f86ec030074431fae05/py_rust_stemmers-0.1.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:35570098da02eb439afcd7270a12bf850bbe874b85cb912e0fb2d87a6e703920", size = 494645, upload-time = "2026-05-22T11:00:02.737Z" }, + { url = "https://files.pythonhosted.org/packages/6d/af/f16e805b7aefc2257b192b83a89300c8360b0fdffd3dfefa92dee4ec9b15/py_rust_stemmers-0.1.8-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0a68745d4b3c7f5abc778ca967e8711df6154873abcfe4e62a6631fa2363cc32", size = 596124, upload-time = "2026-05-22T11:00:04.499Z" }, + { url = "https://files.pythonhosted.org/packages/76/8c/e7a2c940ba00e0792ae346aed5e755d51d37cf6d6853f6b141e5380e285d/py_rust_stemmers-0.1.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7cc0cc0b8eb45d2158c28ea43e2f338c110aad63052ad3bd00bc7446a595e12f", size = 541771, upload-time = "2026-05-22T11:00:06.081Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a0/dd7c5fc6ade6d2a2a49e49937f06f2d488511454e8ab1b313d277ee8c3b1/py_rust_stemmers-0.1.8-cp313-cp313-win_amd64.whl", hash = "sha256:15af4e12e1288de2e5241eec375afc6ad6be4c125a28ca010599d9f92db23f01", size = 212438, upload-time = "2026-05-22T11:00:07.244Z" }, + { url = "https://files.pythonhosted.org/packages/b0/7e/f4346adfd44acbd7eaedcbd7d21b7f40ec9712e6c699e71fddad8dae6f8d/py_rust_stemmers-0.1.8-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:526b58958c6ffa36c4a805326cfb624ecbd665d16ba435027dbed0bcbcaa09d2", size = 290379, upload-time = "2026-05-22T11:00:08.192Z" }, + { url = "https://files.pythonhosted.org/packages/c2/d8/988fc3f5dc0dbbd4bf5909f50ff953ab55ee8b5f79a835d00e57847d3123/py_rust_stemmers-0.1.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2b607f0b270951fb66479baf4b68716cc63a981585cbd898b0b6b5c359efde7e", size = 275458, upload-time = "2026-05-22T11:00:09.522Z" }, + { url = "https://files.pythonhosted.org/packages/f4/94/e04c8b6a8364bca1b368785cef143755dd2d1ffe74df8f8b47b075bb1043/py_rust_stemmers-0.1.8-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b0327b151ab8a338fb54fdac114ba34394327fc1e2c4c425ad1caf2013e5de3", size = 314711, upload-time = "2026-05-22T11:00:10.878Z" }, + { url = "https://files.pythonhosted.org/packages/4f/cb/f59f9a80caa099cb6625a46c9a8e6e7e80bb3ed284f17e80245c8240a66e/py_rust_stemmers-0.1.8-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dadd0e369703817fc7026987b3093f461f9f58d8dde74e689d546184bc8f3451", size = 319370, upload-time = "2026-05-22T11:00:11.961Z" }, + { url = "https://files.pythonhosted.org/packages/06/59/8211cd0f56e53f7770debd9a78de37985fb5662ae66e3b7b380f4c79888b/py_rust_stemmers-0.1.8-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:245e2c61c52e073341893a9682cd1396b61047154548aee30bb1af3d8ed4b4cc", size = 321373, upload-time = "2026-05-22T11:00:13.213Z" }, + { url = "https://files.pythonhosted.org/packages/10/72/fe33e614c114264d1ba54d39da4b5a4abeb6aedd0d26e5a8fd0637d6ddba/py_rust_stemmers-0.1.8-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:451ee1c02a3f5cf1e161b46ba9032cdda4ba10a8b03ff9ee61c1d34d42a0bc81", size = 321707, upload-time = "2026-05-22T11:00:14.177Z" }, + { url = "https://files.pythonhosted.org/packages/91/f9/3cd18902fe2fa54557d3fe9132552256372d381c7aca71346163055d78b1/py_rust_stemmers-0.1.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d396dd25c473c1bc4248c79cd223f4b36356b55a124652f015c6a001547f81ac", size = 492457, upload-time = "2026-05-22T11:00:15.245Z" }, + { url = "https://files.pythonhosted.org/packages/90/d7/32c6d3995e7036b73683389de2771f4dbbf40de192b7efe73c2528ee1eb5/py_rust_stemmers-0.1.8-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:479c77c32d8be692f3cfcde7e19273f02ac81d6f45c6aef49887ef95cab7abbb", size = 596085, upload-time = "2026-05-22T11:00:16.404Z" }, + { url = "https://files.pythonhosted.org/packages/00/8c/e68fa5d862ea6a27fced3535c25ea4eaa26ba1ce00dfef5841924c74b167/py_rust_stemmers-0.1.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c786235275c5c2abb7f206b8236aee3ca0bc53c7497daf7fb7b01d3491469547", size = 539747, upload-time = "2026-05-22T11:00:17.414Z" }, + { url = "https://files.pythonhosted.org/packages/44/48/aa584cf3772e01231641c95dc1aa73327a7d986c562639d78d0013733acf/py_rust_stemmers-0.1.8-cp314-cp314-win_amd64.whl", hash = "sha256:931d13570962b093417e5443a9d1bd63d73fa239ebb81e5b1d346663571403e4", size = 209636, upload-time = "2026-05-22T11:00:18.662Z" }, + { url = "https://files.pythonhosted.org/packages/c0/8c/7c6d581412a6f33d316e72a8f3442ae0c61a7b6190ca30e1a06ee17ea234/py_rust_stemmers-0.1.8-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c03f51280d5d72f7f9b07101ad248845279dc1c82c47a74149303d25937464b7", size = 290748, upload-time = "2026-05-22T11:00:19.794Z" }, + { url = "https://files.pythonhosted.org/packages/76/fe/04436ffe3aa4c02a40500835fc1a80d52375c738aa7ef66ebe0c4ccc2900/py_rust_stemmers-0.1.8-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:234fdcb58f4d907877ed03c9358668a149b5a66d096abcf43c324a4f5697d36d", size = 276111, upload-time = "2026-05-22T11:00:21.026Z" }, + { url = "https://files.pythonhosted.org/packages/45/24/6b32c86dd4eecdc309bfe6c15529a11e90b1e2c7af015366498c14e925f7/py_rust_stemmers-0.1.8-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dca0ae40715238582d6f1824b61d09ea3982359a061b69798ab5732b3ba0d4c5", size = 314816, upload-time = "2026-05-22T11:00:22.207Z" }, + { url = "https://files.pythonhosted.org/packages/22/78/3bf351dbcc7f51eb03a506c0bcf8aead8b1401cf26aaa1328968471531aa/py_rust_stemmers-0.1.8-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc185b599e646a0e39d11df3f5e6d15edefb110496601556385d33b55fed5de", size = 320180, upload-time = "2026-05-22T11:00:23.387Z" }, +] + +[[package]] +name = "pyarrow" +version = "24.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/bf/a34fee1d624152124fa8355c42f34195ad5fe5233ce5bb87946432047d52/pyarrow-24.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:7c2b98645d576a0b9616892ead22b64a83a5f043c5e2ca15ebcefcb5b70c80cb", size = 35076681, upload-time = "2026-04-21T08:51:46.845Z" }, + { url = "https://files.pythonhosted.org/packages/1d/41/64180033d7027afce12dc96d0fe1f504c6fa112190582b458acea2399530/pyarrow-24.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:644a246325b8c69c595ad1dd4b463eba4b0cdb731370e4a86137d433208d6147", size = 36684260, upload-time = "2026-04-21T08:51:53.642Z" }, + { url = "https://files.pythonhosted.org/packages/57/02/9b9320e673dd8a99411fac78690f3df92f6dd6f59754c750110bca66d64e/pyarrow-24.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:3a577bd840ca83f646f0a625dbc571dba7044c43c2d1503afc378b570954345c", size = 45698566, upload-time = "2026-04-21T10:46:02.133Z" }, + { url = "https://files.pythonhosted.org/packages/67/33/f75e91b9a64c3f33c787e263c93b871ad91b8a4a68c1d5cebddd9840e835/pyarrow-24.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:e3268e43984d0b1a185c89b4cfff282a7ead12fc93f56cfd7088bdbcbe727041", size = 48835562, upload-time = "2026-04-21T10:46:10.278Z" }, + { url = "https://files.pythonhosted.org/packages/a5/63/097510448e47e4091faa41c43ba92f97cecaab8f4535b56a3d149578f634/pyarrow-24.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2392d954fcb920f42d230284b677605e4e2fbb11f2821e823e642abd67fbb491", size = 49394997, upload-time = "2026-04-21T10:46:18.08Z" }, + { url = "https://files.pythonhosted.org/packages/60/6b/c047d6222ab279024a062742d1807e2fbaf27bba88a98637299ff47b9236/pyarrow-24.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bec9373df11544592b0ba7ec2af0e35059e5f0e7647c6183a854dedd193298f1", size = 51911424, upload-time = "2026-04-21T10:46:25.347Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ba/464cc70761c2a525d97ebd84e21c31ebd47f3ef4bdcee117009f51c46f24/pyarrow-24.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:c42ab9439498270139cc63e18847a02afe5c8b3ed9c931266533cfe378bd3591", size = 27251730, upload-time = "2026-04-21T10:46:30.913Z" }, + { url = "https://files.pythonhosted.org/packages/62/c9/a47ab7ece0d86cbe6678418a0fbd1ac4bb493b9184a3891dfa0e7f287ae0/pyarrow-24.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b0e131f880cda8d04e076cee175a46fc0e8bc8b65c99c6c09dff6669335fde74", size = 35068898, upload-time = "2026-04-21T10:46:36.599Z" }, + { url = "https://files.pythonhosted.org/packages/d1/bc/8db86617a9a58008acf8913d6fed68ea2a46acb6de928db28d724c891a68/pyarrow-24.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:1b2fe7f9a5566401a0ef2571f197eb92358925c1f0c8dba305d6e43ea0871bb3", size = 36679915, upload-time = "2026-04-21T10:46:42.602Z" }, + { url = "https://files.pythonhosted.org/packages/eb/8e/fb178720400ef69db251eb4a9c3ccf4af269bc1feb5055529b8fc87170d1/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0b3537c00fb8d384f15ac1e79b6eb6db04a16514c8c1d22e59a9b95c8ba42868", size = 45697931, upload-time = "2026-04-21T10:46:48.403Z" }, + { url = "https://files.pythonhosted.org/packages/f3/27/99c42abe8e21b44f4917f62631f3aa31404882a2c41d8a4cd5c110e13d52/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:14e31a3c9e35f1ab6356c6378f6f72830e6d2d5f1791df3774a7b097d18a6a1e", size = 48837449, upload-time = "2026-04-21T10:46:55.329Z" }, + { url = "https://files.pythonhosted.org/packages/36/b6/333749e2666e9032891125bf9c691146e92901bece62030ac1430e2e7c88/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d9a514e73bc42711e6a35aaccf3587c520024fe0a25d830a1a8a27c15f4f57", size = 49395949, upload-time = "2026-04-21T10:47:01.869Z" }, + { url = "https://files.pythonhosted.org/packages/17/25/c5201706a2dd374e8ba6ee3fd7a8c89fb7ffc16eed5217a91fd2bd7f7626/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b196eb3f931862af3fa84c2a253514d859c08e0d8fe020e07be12e75a5a9780c", size = 51912986, upload-time = "2026-04-21T10:47:09.872Z" }, + { url = "https://files.pythonhosted.org/packages/f8/d2/4d1bbba65320b21a49678d6fbdc6ff7c649251359fdcfc03568c4136231d/pyarrow-24.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:35405aecb474e683fb36af650618fd5340ee5471fc65a21b36076a18bbc6c981", size = 27255371, upload-time = "2026-04-21T10:47:15.943Z" }, + { url = "https://files.pythonhosted.org/packages/b4/a9/9686d9f07837f91f775e8932659192e02c74f9d8920524b480b85212cc68/pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810", size = 34981559, upload-time = "2026-04-21T10:47:22.17Z" }, + { url = "https://files.pythonhosted.org/packages/80/b6/0ddf0e9b6ead3474ab087ae598c76b031fc45532bf6a63f3a553440fb258/pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a", size = 36663654, upload-time = "2026-04-21T10:47:28.315Z" }, + { url = "https://files.pythonhosted.org/packages/7c/3b/926382efe8ce27ba729071d3566ade6dfb86bdf112f366000196b2f5780a/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66", size = 45679394, upload-time = "2026-04-21T10:47:34.821Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7a/829f7d9dfd37c207206081d6dad474d81dde29952401f07f2ba507814818/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb", size = 48863122, upload-time = "2026-04-21T10:47:42.056Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e8/f88ce625fe8babaae64e8db2d417c7653adb3019b08aae85c5ed787dc816/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e", size = 49376032, upload-time = "2026-04-21T10:47:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" }, + { url = "https://files.pythonhosted.org/packages/66/1c/e3e72c8014ad2743ca64a701652c733cc5cbcee15c0463a32a8c55518d9e/pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826", size = 27355660, upload-time = "2026-04-21T10:48:01.718Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" }, + { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" }, + { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" }, + { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" }, + { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" }, + { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" }, + { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" }, + { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" }, + { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" }, + { url = "https://files.pythonhosted.org/packages/ad/80/d022a34ff05d2cbedd8ccf841fc1f532ecfa9eb5ed1711b56d0e0ea71fc9/pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838", size = 35007997, upload-time = "2026-04-21T10:49:48.796Z" }, + { url = "https://files.pythonhosted.org/packages/1a/ff/f01485fda6f4e5d441afb8dd5e7681e4db18826c1e271852f5d3957d6a80/pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b", size = 36678720, upload-time = "2026-04-21T10:49:55.858Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c2/2d2d5fea814237923f71b36495211f20b43a1576f9a4d6da7e751a64ec6f/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795", size = 45741852, upload-time = "2026-04-21T10:50:04.624Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3a/28ba9c1c1ebdbb5f1b94dfebb46f207e52e6a554b7fe4132540fde29a3a0/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26", size = 48889852, upload-time = "2026-04-21T10:50:12.293Z" }, + { url = "https://files.pythonhosted.org/packages/df/51/4a389acfd31dca009f8fb82d7f510bb4130f2b3a8e18cf00194d0687d8ac/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde", size = 49445207, upload-time = "2026-04-21T10:50:20.677Z" }, + { url = "https://files.pythonhosted.org/packages/19/4b/0bab2b23d2ae901b1b9a03c0efd4b2d070256f8ce3fc43f6e58c167b2081/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76", size = 51954117, upload-time = "2026-04-21T10:50:29.14Z" }, + { url = "https://files.pythonhosted.org/packages/29/88/f4e9145da0417b3d2c12035a8492b35ff4a3dbc653e614fcfb51d9dedb38/pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e", size = 28001155, upload-time = "2026-04-21T10:51:22.337Z" }, + { url = "https://files.pythonhosted.org/packages/79/4f/46a49a63f43526da895b1a45bbb51d5baf8e4d77159f8528fc3e5490007f/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05", size = 35250387, upload-time = "2026-04-21T10:50:35.552Z" }, + { url = "https://files.pythonhosted.org/packages/a0/da/d5e0cd5ef00796922404806d5f00325cdadc3441ce2c13fe7115f2df9a64/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a", size = 36797102, upload-time = "2026-04-21T10:50:42.417Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/5904145b0a593a05236c882933d439b5720f0a145381179063722fbfc123/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072", size = 45745118, upload-time = "2026-04-21T10:50:49.324Z" }, + { url = "https://files.pythonhosted.org/packages/13/d3/cca42fe166d1c6e4d5b80e530b7949104d10e17508a90ae202dac205ce2a/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931", size = 48844765, upload-time = "2026-04-21T10:50:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/b0/49/942c3b79878ba928324d1e17c274ed84581db8c0a749b24bcf4cbdf15bd3/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699", size = 49471890, upload-time = "2026-04-21T10:51:02.439Z" }, + { url = "https://files.pythonhosted.org/packages/76/97/ff71431000a75d84135a1ace5ca4ba11726a231a8007bbb320a4c54075d5/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136", size = 51932250, upload-time = "2026-04-21T10:51:10.576Z" }, + { url = "https://files.pythonhosted.org/packages/51/be/6f79d55816d5c22557cf27533543d5d70dfe692adfbee4b99f2760674f38/pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19", size = 28131282, upload-time = "2026-04-21T10:51:16.815Z" }, +] + [[package]] name = "pycparser" version = "3.0" @@ -1328,14 +2329,14 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.12.1" +version = "2.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/81/58d0ac84e1ef3a3843791d6954d94c0b33d526c75eeb1efbce9d0a4c4077/pyjwt-2.13.0.tar.gz", hash = "sha256:41571c89ca91598c79e8ef18a2d07367d4810fbbd6f637794879baf1b7703423", size = 107515, upload-time = "2026-05-21T19:54:36.618Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5e/ecf12fdb62546d64385c158514e9b2b671f7832108ef2ecd2020ce0af2d1/pyjwt-2.13.0-py3-none-any.whl", hash = "sha256:66adcc2aff09b3f1bbd95fc1e1577df8ac8723c978552fd43304c8a290ac5728", size = 31274, upload-time = "2026-05-21T19:54:35.362Z" }, ] [package.optional-dependencies] @@ -1343,6 +2344,24 @@ crypto = [ { name = "cryptography" }, ] +[[package]] +name = "pynndescent" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "llvmlite" }, + { name = "numba" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4a/fb/7f58c397fb31666756457ee2ac4c0289ef2daad57f4ae4be8dec12f80b03/pynndescent-0.6.0.tar.gz", hash = "sha256:7ffde0fb5b400741e055a9f7d377e3702e02250616834231f6c209e39aac24f5", size = 2992987, upload-time = "2026-01-08T21:29:58.943Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/e6/94145d714402fd5ade00b5661f2d0ab981219e07f7db9bfa16786cdb9c04/pynndescent-0.6.0-py3-none-any.whl", hash = "sha256:dc8c74844e4c7f5cbd1e0cd6909da86fdc789e6ff4997336e344779c3d5538ef", size = 73511, upload-time = "2026-01-08T21:29:57.306Z" }, +] + [[package]] name = "pyproject-hooks" version = "1.2.0" @@ -1352,9 +2371,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216, upload-time = "2024-09-29T09:24:11.978Z" }, ] +[[package]] +name = "pyreadline3" +version = "3.5.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/6d/f94028646d7bbe6d9d873c47ee7c246f2d29129d253f0d96cb6fcab70733/pyreadline3-3.5.6.tar.gz", hash = "sha256:61e53218b99656091ddb077df9e71f25850e72e030b6183b39c9b7e6e4f4a9bf", size = 100368, upload-time = "2026-05-14T17:55:04.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/5e/35c856e186b74678c24927847ad9895a51f1bc02a0c6126477a6c6040064/pyreadline3-3.5.6-py3-none-any.whl", hash = "sha256:8449b734232e42a5dcd74048e39b60db2839a4c38cf3ae2bf7707d58b5389c0d", size = 85243, upload-time = "2026-05-14T17:55:03.262Z" }, +] + [[package]] name = "pytest" -version = "9.0.3" +version = "9.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1365,9 +2393,9 @@ dependencies = [ { name = "pygments" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +sdist = { url = "https://files.pythonhosted.org/packages/84/0e/b5858858d74958632c49b72cb25a3976ff9f632397626715be71c89d3971/pytest-9.1.0.tar.gz", hash = "sha256:41dd9148c08072446394cefd3d79701701335a9f4cae69ba92e39f6c7f5c061c", size = 1634181, upload-time = "2026-06-13T18:52:45.983Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, + { url = "https://files.pythonhosted.org/packages/8b/5a/ba30a81239b909821b3153e303e7def45178bf353da4f72380e6c5e8793b/pytest-9.1.0-py3-none-any.whl", hash = "sha256:8ebb0e7888bdf2bdfc602ec51f8f62d50200af37356c74e503c79a94f5c81f32", size = 386453, upload-time = "2026-06-13T18:52:44.045Z" }, ] [[package]] @@ -1384,17 +2412,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + [[package]] name = "python-discovery" -version = "1.3.1" +version = "1.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/48/60/e88788207d81e46362cfbef0d4aaf4c0f49efc3c12d4c3fa3f542c34ebec/python_discovery-1.3.1.tar.gz", hash = "sha256:62f6db28064c9613e7ca76cb3f00c38c839a07c31c00dfe7ed0986493d2150a6", size = 68011, upload-time = "2026-05-12T20:53:36.336Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/1a/cbbaf13b730abb0a16b964d984e19f2fe520c21a4dc664051359a3f5a9e7/python_discovery-1.4.2.tar.gz", hash = "sha256:8f3746c4b4968d22afbb97d36e1a0e5b66e6c0f297290f2e95f05b9b8bf18690", size = 70277, upload-time = "2026-06-11T16:10:42.383Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/6f/a05a317a66fee0aad270011461f1a63a453ed12471249f172f7d2e2bc7b4/python_discovery-1.3.1-py3-none-any.whl", hash = "sha256:ed188687ebb3b82c01a17cd5ac62fc94d9f6487a7f1a0f9dfe89753fec91039c", size = 33185, upload-time = "2026-05-12T20:53:34.969Z" }, + { url = "https://files.pythonhosted.org/packages/1a/82/a70006589557f267f15bd384c0642ad49f0d97b690c3a05b166b9dcbad3b/python_discovery-1.4.2-py3-none-any.whl", hash = "sha256:475803f53b7b2ed6e490e27373f9d8340f7d2eebf9acdaf645d7d714c97bb500", size = 33886, upload-time = "2026-06-11T16:10:41.192Z" }, ] [[package]] @@ -1408,33 +2448,36 @@ wheels = [ [[package]] name = "python-multipart" -version = "0.0.29" +version = "0.0.32" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/fe/70bd71a6738b09a0bdf6480ca6436b167469ca4578b2a0efbe390b4b0e70/python_multipart-0.0.29.tar.gz", hash = "sha256:643e93849196645e2dbdd81a0f8829a23123ad7f797a84a364c6fb3563f18904", size = 45678, upload-time = "2026-05-17T17:29:47.654Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/42/55c32bb9b12693c092ad250a0e82edb5b31ddeda6eb772de5f308b3804ad/python_multipart-0.0.32.tar.gz", hash = "sha256:be54b7f3fa167bb83e4fcd936b887b708f4e57fe75911c02aebf53efaf8d938e", size = 46881, upload-time = "2026-06-04T16:18:58.647Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/cb/769cfc37177252872a45a71f3fbdde9d51b471a3f3c14bfe95dde3407386/python_multipart-0.0.29-py3-none-any.whl", hash = "sha256:2ddcc971cef266225f54f552d8fa10bcfbb1f14446caec199060daac59ff2d69", size = 29640, upload-time = "2026-05-17T17:29:45.69Z" }, + { url = "https://files.pythonhosted.org/packages/e1/04/e8135ebd1ad02c56ec633277529b2602ff99ff634be76cdba5744cf554fd/python_multipart-0.0.32-py3-none-any.whl", hash = "sha256:ff6d3f776f16878c894e52e107296ffc890e913c611b1a4ec6c44e2821fe2e23", size = 30042, upload-time = "2026-06-04T16:18:57.319Z" }, ] [[package]] name = "pywin32" -version = "311" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/40/44efbb0dfbd33aca6a6483191dae0716070ed99e2ecb0c53683f400a0b4f/pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3", size = 8760432, upload-time = "2025-07-14T20:13:05.9Z" }, - { url = "https://files.pythonhosted.org/packages/5e/bf/360243b1e953bd254a82f12653974be395ba880e7ec23e3731d9f73921cc/pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b", size = 9590103, upload-time = "2025-07-14T20:13:07.698Z" }, - { url = "https://files.pythonhosted.org/packages/57/38/d290720e6f138086fb3d5ffe0b6caa019a791dd57866940c82e4eeaf2012/pywin32-311-cp310-cp310-win_arm64.whl", hash = "sha256:0502d1facf1fed4839a9a51ccbcc63d952cf318f78ffc00a7e78528ac27d7a2b", size = 8778557, upload-time = "2025-07-14T20:13:11.11Z" }, - { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" }, - { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" }, - { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" }, - { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, - { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, - { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, - { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, - { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, - { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, - { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, - { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +version = "312" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/1b/9cfdeac80ee45bebbbcb31f1b7b99a0d81a1c72de48d837be984e0e88b1d/pywin32-312-cp310-cp310-win32.whl", hash = "sha256:772235332b5d1024c696f11cea1ae4be7930f0a8b894bb43db14e3f435f1ff7e", size = 6361387, upload-time = "2026-06-04T07:49:14.329Z" }, + { url = "https://files.pythonhosted.org/packages/33/b1/7afc96d041d982c27bc2df6f853d43f01fd273e3d39d04be3647ddeb533d/pywin32-312-cp310-cp310-win_amd64.whl", hash = "sha256:5dbc35d2b5320dc07f25fa31269cfb767471002b17de5eb067d03da68c7cb2db", size = 6926780, upload-time = "2026-06-04T07:49:16.881Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3a/4140da9ad54108e517f4a16b2d83da3033e08662144623e1239587cb7db6/pywin32-312-cp310-cp310-win_arm64.whl", hash = "sha256:3020656e34f1cf7faeb7bccd2b84653a607c6ff0c55ada85e6487d61716deabd", size = 4307203, upload-time = "2026-06-04T07:49:18.993Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f5/10a6e845a00fc5e7afd0a988b744f403d4d57162a28d160a093c4d9322f0/pywin32-312-cp311-cp311-win32.whl", hash = "sha256:17948aeadbdb091f0ced6ef0841620794e68327b94ee415571c1203594b7215c", size = 6362659, upload-time = "2026-06-04T07:49:21.349Z" }, + { url = "https://files.pythonhosted.org/packages/35/c4/dcd2d62b5944b6d5db53413a5899016ccd57ffcb7278f3f81655d25d2027/pywin32-312-cp311-cp311-win_amd64.whl", hash = "sha256:d11417d84412f859b722fad0841b3614459ed0047f7542d8362e77884f6b6e8a", size = 6928825, upload-time = "2026-06-04T07:49:23.934Z" }, + { url = "https://files.pythonhosted.org/packages/b7/56/3cbb433fe4501cdba2eb9040f56a4e1a8243faa4186b25295564d1a7a79d/pywin32-312-cp311-cp311-win_arm64.whl", hash = "sha256:b2200a054ca6d6625c4842fc56a4976a4b47f96b73dbe5538c3f813a80359f47", size = 6721875, upload-time = "2026-06-04T07:49:26.416Z" }, + { url = "https://files.pythonhosted.org/packages/83/ff/32aa7d2ed0ab12b323aaa64f9b75e6ad4f8fd09f9ccfc28c79414d46838d/pywin32-312-cp312-cp312-win32.whl", hash = "sha256:dab4f65ac9c4e48400a2a0530c46c3c579cd5905ecd11b80692373915269208b", size = 6371877, upload-time = "2026-06-04T07:49:28.836Z" }, + { url = "https://files.pythonhosted.org/packages/03/d9/77040d3b43df3f3be32ea289433d660d2727f5ba327bc73be835127d9d60/pywin32-312-cp312-cp312-win_amd64.whl", hash = "sha256:b457f6d628a47e8a7346ce22acb7e1a46a4a78b52e1d17e1af56871bd19a93bc", size = 6914841, upload-time = "2026-06-04T07:49:31.85Z" }, + { url = "https://files.pythonhosted.org/packages/e3/cc/7b1ec671775756020a0ee7f4feeaf3c568f0ab86bd3900088cf986937a92/pywin32-312-cp312-cp312-win_arm64.whl", hash = "sha256:6017c58e12f6809fbb0555b75df144c2922a9ffd18e4b9b5afa863b6c1a9d950", size = 6727901, upload-time = "2026-06-04T07:49:34.244Z" }, + { url = "https://files.pythonhosted.org/packages/2d/41/12fbfd7f36ed2146d8bc9de96c2741296bf0d490b98508496cff322e274c/pywin32-312-cp313-cp313-win32.whl", hash = "sha256:7a27df850933d16a8eabfbaeb73d52b273e2da667f80d70b01a89d1f6828d02c", size = 6370184, upload-time = "2026-06-04T07:49:36.253Z" }, + { url = "https://files.pythonhosted.org/packages/ba/db/36a78e3403099d31d9746d13fdcde5accc43c1155f375a34d15983a479a7/pywin32-312-cp313-cp313-win_amd64.whl", hash = "sha256:c53e878d15a1c44788082bfe712a905433473aa38f86375b7cf8b45e3acbaaf9", size = 6914298, upload-time = "2026-06-04T07:49:38.876Z" }, + { url = "https://files.pythonhosted.org/packages/84/37/c1697194092b76de9ed47ca124323f02c57ffc8a45c06f88a3d5acaf01eb/pywin32-312-cp313-cp313-win_arm64.whl", hash = "sha256:59aba5d5940842075343a5ddc6b11f1cdf0d1567fe745290359dfbcc7c2eb831", size = 6727640, upload-time = "2026-06-04T07:49:41.083Z" }, + { url = "https://files.pythonhosted.org/packages/fc/2b/1f3cded5822fd49c02f40544cbb5f58c7cfd6b1694869fd476cb6170ee97/pywin32-312-cp314-cp314-win32.whl", hash = "sha256:a77a90fbb6881238d2ca9c6fd797b25817f3768fe78d214a90137ff055a75f5b", size = 6468928, upload-time = "2026-06-04T07:49:43.188Z" }, + { url = "https://files.pythonhosted.org/packages/21/82/3bf86d2e2808902013132e1ce905a7da0da53790f3836c64bf44d55e24f3/pywin32-312-cp314-cp314-win_amd64.whl", hash = "sha256:a4dd3a848290ef724347b19f301045831d8e802fa4464f491b98b1e0a081432e", size = 7024157, upload-time = "2026-06-04T07:49:45.34Z" }, + { url = "https://files.pythonhosted.org/packages/a4/0e/73f6d6800b4f27655abd9e9f6aaeaefcddb2b946e4674efa2bab184a7f7b/pywin32-312-cp314-cp314-win_arm64.whl", hash = "sha256:9fce94568364e0155e6dfb781ac5d95903be8baf28670632beab1b523f300daa", size = 6839598, upload-time = "2026-06-04T07:49:47.613Z" }, + { url = "https://files.pythonhosted.org/packages/eb/61/caa39686032d2ebdd04ff0ab5cbe163126c0066d98e00c9018646e42393b/pywin32-312-cp315-cp315-win32.whl", hash = "sha256:5c1fbe4a937a73ae9297384a3da38518cbc694c68ad8a809b2e19acd350f03ed", size = 6471159, upload-time = "2026-06-04T07:49:50.035Z" }, + { url = "https://files.pythonhosted.org/packages/0f/cd/7e1de64a4a6f69c04214169657ccab0d93a670ea50e35eb8f489d7378249/pywin32-312-cp315-cp315-win_amd64.whl", hash = "sha256:c2f03a0f73f804a13c2735b99392b0cd426bb4f2c4d0178e5ac966a0f21618d5", size = 7025293, upload-time = "2026-06-04T07:49:54.857Z" }, + { url = "https://files.pythonhosted.org/packages/23/ed/4532e9388e65fa16b46776ef47ad631a64eda1631884488af707666350ed/pywin32-312-cp315-cp315-win_arm64.whl", hash = "sha256:a8597d28f267b39074aef51fa593530082b39cbe5a074226096857b1fed2dfb9", size = 6840337, upload-time = "2026-06-04T07:49:57.531Z" }, ] [[package]] @@ -1512,16 +2555,16 @@ wheels = [ [[package]] name = "readme-renderer" -version = "44.0" +version = "45.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "docutils" }, { name = "nh3" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/a9/104ec9234c8448c4379768221ea6df01260cd6c2ce13182d4eac531c8342/readme_renderer-44.0.tar.gz", hash = "sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1", size = 32056, upload-time = "2024-07-08T15:00:57.805Z" } +sdist = { url = "https://files.pythonhosted.org/packages/02/51/d3a6ea424652c60f05600d8c2e01a55c913755e7cdad64afabbd1aa16f44/readme_renderer-45.0.tar.gz", hash = "sha256:030a8fac74904f8fba11ad1bb6964e3f76e896dc7e5e71f16af190c9056696d1", size = 36172, upload-time = "2026-06-09T21:05:17.37Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/67/921ec3024056483db83953ae8e48079ad62b92db7880013ca77632921dd0/readme_renderer-44.0-py3-none-any.whl", hash = "sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151", size = 13310, upload-time = "2024-07-08T15:00:56.577Z" }, + { url = "https://files.pythonhosted.org/packages/97/1b/295bf2fa3e740131778065e5ffa2c481f0e7210182d408e9a2c244ff5b0c/readme_renderer-45.0-py3-none-any.whl", hash = "sha256:3385ed220117104a2bceb4a9dac8c5fdf6d1f96890d7ea2a9c7174fd5c84091f", size = 14134, upload-time = "2026-06-09T21:05:15.85Z" }, ] [[package]] @@ -1530,7 +2573,8 @@ version = "0.37.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, - { name = "rpds-py" }, + { name = "rpds-py", version = "0.30.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "rpds-py", version = "2026.5.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } @@ -1538,6 +2582,127 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, ] +[[package]] +name = "regex" +version = "2026.5.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/0e/49aee608ad09480e7fd276898c99ec6192985fa331abe4eb3a986094490b/regex-2026.5.9.tar.gz", hash = "sha256:a8234aa23ec39894bfe4a3f1b85616a7032481964a13ac6fc9f10de4f6fca270", size = 416074, upload-time = "2026-05-09T23:15:19.37Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/ed/0ad2c8edf634918eb4484365d3819fa7bd7f58daf807fe7fb21812c316e5/regex-2026.5.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a9e1328e17c84c1a5d22ec9f785ecef4a967fab9a42b6a8dc3bcbebd0a0c9e44", size = 489438, upload-time = "2026-05-09T23:11:29.374Z" }, + { url = "https://files.pythonhosted.org/packages/89/a9/4ed972ad263963b860b7c3e86e0e1bcc791def47b43b8c8efe57e710f139/regex-2026.5.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfe1ce50cbfb569d74e1e4337da6468961f31dbea55fd85aa5de59c0947a805a", size = 291270, upload-time = "2026-05-09T23:11:33.254Z" }, + { url = "https://files.pythonhosted.org/packages/16/81/075930d9fa28c4ea1f53398dd015ee7c882f623539759113cda1257f4b82/regex-2026.5.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15ee42209947f4ca045412eae98416317238163618ace2a8e54f99586a466733", size = 289198, upload-time = "2026-05-09T23:11:35.769Z" }, + { url = "https://files.pythonhosted.org/packages/d4/c8/5cdfbf0b5dc6599e1b6131eff43262e5275d4ec3469ce10216061659aadb/regex-2026.5.9-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4bb445ff3f725f59df8f6014edb547ee928ec7023a774f6a39a3f953038cbb2", size = 784765, upload-time = "2026-05-09T23:11:37.689Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ca/ae5fd6edc59b7f84b904b31d6ec39a860cbcecd10f64bd5a062ca83a4864/regex-2026.5.9-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:446ddd671e43ab535810c4b21cff7104945c701d4a14d1e6d1cd6f4e445a8bea", size = 852115, upload-time = "2026-05-09T23:11:39.973Z" }, + { url = "https://files.pythonhosted.org/packages/f6/ce/a91cf555afb51f3b74a182e24ba073b91ea7bb64592fc4b315c111bb19fd/regex-2026.5.9-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7b92817338591505f282cf3864c145244b1edcf5381d237038df955001091538", size = 899503, upload-time = "2026-05-09T23:11:42.48Z" }, + { url = "https://files.pythonhosted.org/packages/55/7f/725a0a2b245a4cf0c4bab29d0e97c74285d94136a65d1b55a6459a583502/regex-2026.5.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b8a143aca6c39b446ea8092cde25cc8fe9304d4f5fecfbc1a9dbb0282703c2", size = 794093, upload-time = "2026-05-09T23:11:44.681Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2a/996efbd59ce6b5d4a09e3af6180ceb62af171f4a9a6fb557d2f0ae0d462b/regex-2026.5.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0f03aa6898aaaac4592479821df16e68e8d0e29e903e65d8f2dfb2f19028a989", size = 786234, upload-time = "2026-05-09T23:11:46.882Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0a/8731e8b8806174c9cdd5903f80a14990331c1f42fc4209b540952e9e010d/regex-2026.5.9-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ed457d8e98ae812ed7732bef7bf78de78e834eae0372a74e23ca90ef21d910f9", size = 769895, upload-time = "2026-05-09T23:11:49.324Z" }, + { url = "https://files.pythonhosted.org/packages/9a/0b/932473194bd563f342a412ae2ffbbd6da608306a2bc4e99249a41c2b0b92/regex-2026.5.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71b61c5bfe1c806332defc42ad6c780b3c55f661986d7f40283a3a88274b4c00", size = 774991, upload-time = "2026-05-09T23:11:51.261Z" }, + { url = "https://files.pythonhosted.org/packages/98/80/9523d196010031df25f7177ee0a467efbee436324038e5d99def17a57515/regex-2026.5.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3b1e39888c5e0c7d92cea4fc777396c4a90363b05de75d02eb459a4752200808", size = 848790, upload-time = "2026-05-09T23:11:53.232Z" }, + { url = "https://files.pythonhosted.org/packages/3c/07/56987b35e89edf47e4a38cf2845aeee476bfa688a6bdbd3e820cda461dc1/regex-2026.5.9-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:6ba42b2e7e7f46cf68cc6a5ca36fa07959f9bbd9c6bdcc47b6ee76549a590248", size = 757679, upload-time = "2026-05-09T23:11:55.82Z" }, + { url = "https://files.pythonhosted.org/packages/04/2a/ff713fff0c566507c06a4ce2dc0ae8e7eeebc88811a95fc81cf1e7d534dd/regex-2026.5.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:c010eb8caca74bdb40c07498d7ece26b4428fd3f04aa8a72c9ac6f79e8faaac6", size = 837116, upload-time = "2026-05-09T23:11:57.934Z" }, + { url = "https://files.pythonhosted.org/packages/77/90/df6d982b03e3614785c6937ba51b57f6733d97d2ee1c9bc7531dbfab3a54/regex-2026.5.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a6a563446a41adc451393dc6b8e6ad87979efaee3c8738690a8d1b08ebead1b4", size = 782081, upload-time = "2026-05-09T23:11:59.607Z" }, + { url = "https://files.pythonhosted.org/packages/c7/8a/4e88a5f7c3e98489aac4dd23142723d907b2a595b4a6abcbacabefeded09/regex-2026.5.9-cp310-cp310-win32.whl", hash = "sha256:954cc214c04663ee6d266fc61739cad83054683048de65c5bd1d640ad28098ac", size = 266247, upload-time = "2026-05-09T23:12:01.116Z" }, + { url = "https://files.pythonhosted.org/packages/6a/40/4b224cb0582b2dca1786726e6cdabe26abbf757d7f6718332f186da155d2/regex-2026.5.9-cp310-cp310-win_amd64.whl", hash = "sha256:b310768746dd314ea6e2ff4cc89ef215426813396ff4e94ee8e6f7096c8b6e03", size = 278416, upload-time = "2026-05-09T23:12:03.2Z" }, + { url = "https://files.pythonhosted.org/packages/12/4d/014fbe803204cab0947ee428f09f658a29632053dde1d3c6176bb4f0fd4c/regex-2026.5.9-cp310-cp310-win_arm64.whl", hash = "sha256:19c16ceb4a267a8789e25733e583983eeab9f0f8664e66b0bd1c5d21f14c2d4b", size = 270413, upload-time = "2026-05-09T23:12:04.649Z" }, + { url = "https://files.pythonhosted.org/packages/c2/dc/c1f2df4027e82fc54b5a473e4b250f5139faca49a0fbe29a48668d228f34/regex-2026.5.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ccf5249114cc3e772ecdd88a98a86eca0fd74c61ce32a94743758c083fc05d48", size = 489445, upload-time = "2026-05-09T23:12:06.111Z" }, + { url = "https://files.pythonhosted.org/packages/03/d2/59f01110660081cce9c0bc30ebd0b5ee250dacf658e3248ed92f01e0e8ee/regex-2026.5.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46f1326ca6e65b0879d23ca302c0f2415aad42ff0309b9c818e7949fe19a41d8", size = 291271, upload-time = "2026-05-09T23:12:07.731Z" }, + { url = "https://files.pythonhosted.org/packages/58/b6/14b2c84ff90ddb370c81d27503f4a0fcf071496416f4855f6cc8c5d81c35/regex-2026.5.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef31cbfe458e21c6122ba8150ff060e0c7789ed0d26eb423f25472584920b555", size = 289212, upload-time = "2026-05-09T23:12:09.266Z" }, + { url = "https://files.pythonhosted.org/packages/03/d0/4db86529117320de0c84afd90e70bb47434625875e34fcef9d8c127c5b16/regex-2026.5.9-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:992604d02e6d9c6d786c24a706a71ecffe1020fc1ef264044474cd81fa2c3919", size = 792310, upload-time = "2026-05-09T23:12:11.416Z" }, + { url = "https://files.pythonhosted.org/packages/07/78/fe4800cd322f862ecffd2d553409b20d80650e5ed71b9d178f853d020b82/regex-2026.5.9-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9411dd64ca95477225734a93dfc8583b51916b8d5942f99d6cac21e09965451", size = 861721, upload-time = "2026-05-09T23:12:13.681Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d0/b3618a895dd8feb897c61bb2954edd265e1767d82a01d53065d5871127a3/regex-2026.5.9-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4a3ff360dfb836fecdb93a4598f9d6e2ac81e3e397125145c6221bf58cf4c", size = 906460, upload-time = "2026-05-09T23:12:15.443Z" }, + { url = "https://files.pythonhosted.org/packages/33/6f/1481597e859ef19508b345eec4afd1416ed6e6b459c75a64026ef193aecf/regex-2026.5.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a661a7d270a61f7cf460caee8b9fa2d5ef9e5c681234bcb9e0fe14f488e7dfc", size = 799843, upload-time = "2026-05-09T23:12:16.892Z" }, + { url = "https://files.pythonhosted.org/packages/73/59/955734c803f59108deccba3597ae440c76b62a652733c0006e6243758420/regex-2026.5.9-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f079e50a0d3cc3cd5091fa9ff45869a2e6b2cd35895731edafb0327901a8d86d", size = 773610, upload-time = "2026-05-09T23:12:19.127Z" }, + { url = "https://files.pythonhosted.org/packages/68/8f/70c04a236d651c81881dac42ef8538bddda6121434509d0a22d9e601503b/regex-2026.5.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4ebe8f0b5ec5a5024dc4a4c59f444c4e9afc5f2abdbb8962065b75d27fb971f9", size = 781645, upload-time = "2026-05-09T23:12:20.806Z" }, + { url = "https://files.pythonhosted.org/packages/1d/96/05c7434d88185e5d27fe54aeb74df86bd77cd79f52f0b4eae54faa8fea70/regex-2026.5.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:97cf3bc1b7d7d2306772ec07366c80d9df00ff79e79cea32898883a646d2fae2", size = 854473, upload-time = "2026-05-09T23:12:22.465Z" }, + { url = "https://files.pythonhosted.org/packages/4e/c1/6e3d8202d981f3117004bf341ee74893ba4ba8a9fbaf4b94615846550a08/regex-2026.5.9-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0f9eede6a5cbdc02d4978090186390936e1776a7d1359b21e41014c609880bcf", size = 763311, upload-time = "2026-05-09T23:12:24.351Z" }, + { url = "https://files.pythonhosted.org/packages/93/c7/e7737f1526b3fb32bd4c337fd6c71c3ebb5c8296fc34d11197e0955d2e35/regex-2026.5.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:01f0f5f55f4b64dacec85dc116d3c05fd23ad3ff037bbc73a2085775953c2611", size = 844593, upload-time = "2026-05-09T23:12:26.341Z" }, + { url = "https://files.pythonhosted.org/packages/a5/27/0daffb1a535bb39f422c3d200f4ab023c71110ad66a32b366bee708baba0/regex-2026.5.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1268eddd8486dc561d08eee1156e40aa3a8fe10f4bdec8fa653b455fcbffd12c", size = 789167, upload-time = "2026-05-09T23:12:27.975Z" }, + { url = "https://files.pythonhosted.org/packages/ce/fc/294fe4fac4f2ed67207b17471815870c1c45b3a489e08e0ac96daea16ef6/regex-2026.5.9-cp311-cp311-win32.whl", hash = "sha256:8676474c07469d6f33dd1085ca2cd45f65785f32518f2b20e36d9953ca07f994", size = 266249, upload-time = "2026-05-09T23:12:30.141Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b0/8dce459f6245bcf8f6e9f23ac9569f1a0f15c131cc0745e82b43226204cf/regex-2026.5.9-cp311-cp311-win_amd64.whl", hash = "sha256:246de9d60aa3f8538b519834dd95cbf276ea263d6a7bd5a3666dc3fa0230505b", size = 278423, upload-time = "2026-05-09T23:12:31.676Z" }, + { url = "https://files.pythonhosted.org/packages/db/8d/f9aeff6ad63a3ef720386f2907e6d34a35a510a6e498ebad28b0fb3f6ab6/regex-2026.5.9-cp311-cp311-win_arm64.whl", hash = "sha256:d726ca3f0d76969bf1e8e477d160d3d666bbf999f6860bd314889e5345782046", size = 270420, upload-time = "2026-05-09T23:12:33.194Z" }, + { url = "https://files.pythonhosted.org/packages/50/9b/6550044bc44e17c84d312c031c2ec42fbdb6a4ec4e29093be3a172d08772/regex-2026.5.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57eeeb05db7979413dec5438f2db21d7ecbba787cde7a711df1a6f6df672aa06", size = 490451, upload-time = "2026-05-09T23:12:34.72Z" }, + { url = "https://files.pythonhosted.org/packages/1e/95/fc7ba4303b5a0f92446a12ee6778ef2c6c799233f5060042a31bf390cfe9/regex-2026.5.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:398c521292f4c7fb807001dcd54694d3a1fcafc179a36ad9cc56f98df85930b6", size = 292112, upload-time = "2026-05-09T23:12:36.285Z" }, + { url = "https://files.pythonhosted.org/packages/54/4b/ee27938d1b2c443e89a9a10e00d2d19aa5ee300cd3d61140644e93bb083e/regex-2026.5.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f7a7c26137296beba7784de6eba69c6a93a63ccebc385e4962fe67e267a91225", size = 289599, upload-time = "2026-05-09T23:12:38.089Z" }, + { url = "https://files.pythonhosted.org/packages/d8/dd/ba103dc19614e25f3880800ca67ce093d6e21b325d72b8383c7bf906e9fa/regex-2026.5.9-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6441cc660d76107934a09c22167200839a0e89604a6297f78a974e66e931d2c0", size = 796732, upload-time = "2026-05-09T23:12:40.062Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e7/f035b4fd858b050b0080bf302968dc0f59ba34e391872d54936758e6844e/regex-2026.5.9-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:91328f1c23d47595ca3ef0a7557fa129c5a23404b775c770697d2f35b33e0107", size = 865440, upload-time = "2026-05-09T23:12:42.059Z" }, + { url = "https://files.pythonhosted.org/packages/0a/51/8cd301ecc899aea28124357f729f4272f44de7806fc7ca02490bfbe253e8/regex-2026.5.9-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:93a7860539414dddaefba2b40f8771765ae17949d4c7182b876ce429e11a8309", size = 912329, upload-time = "2026-05-09T23:12:44.373Z" }, + { url = "https://files.pythonhosted.org/packages/cc/1e/3fbe2fa1e8cebd62f3bb7d3321cff1640aca2e240b51d9bd624aad949260/regex-2026.5.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd2810d22146b6d838acc5ec15602cb6b47920aa4e33015df3868eedfd20bab8", size = 801239, upload-time = "2026-05-09T23:12:46.268Z" }, + { url = "https://files.pythonhosted.org/packages/17/2f/6f6008682bf2cf98040a0d3153a8e557b6ab728d7713d045cee4ce544ab8/regex-2026.5.9-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daff2bdbaf1d23e52fdff7c0b7bc2048b68f978df6a4d107ac981f94caef2e66", size = 777054, upload-time = "2026-05-09T23:12:48.051Z" }, + { url = "https://files.pythonhosted.org/packages/19/2b/eee0d20a6842ba04df4b8847a920b57ef56853f14ef85405473e586b605a/regex-2026.5.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4eeb011098fcb77af513dcef521a3dbecbf8849b1e38940759d293b7a93f5026", size = 785098, upload-time = "2026-05-09T23:12:49.851Z" }, + { url = "https://files.pythonhosted.org/packages/4a/98/6fc1e6410feefb92159edaed5041992bfe390e8d26c721865434acbca558/regex-2026.5.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ea9c8ecfa1b73c73b626534d6626e5340d429630943672b8480724f44e84b962", size = 860095, upload-time = "2026-05-09T23:12:51.666Z" }, + { url = "https://files.pythonhosted.org/packages/18/a3/bd855e0f2cb1a978ecf6fa6bb69632dd9c3f6ea3b81cde62fde14c9daec7/regex-2026.5.9-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:cd2846168eb9ee3c513902bc8225409cb1caab31d04728b145171fa1625d9621", size = 765762, upload-time = "2026-05-09T23:12:53.413Z" }, + { url = "https://files.pythonhosted.org/packages/dc/66/0ae8c092e60b14c79d24f8e0b7f0aea5bfbffdcab00b5483d13404d3c3a5/regex-2026.5.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39617fb0cde9c0e6306dc70e3bfc096f3da793219879f7ae7aa341a69fbdcf6d", size = 852100, upload-time = "2026-05-09T23:12:55.256Z" }, + { url = "https://files.pythonhosted.org/packages/21/de/8dfde60fc1b21c946a893ba273403b72617edb261370cb1087099a83f088/regex-2026.5.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd03c4f0e33280d15cae17159b899245d6b7c53d21def19b263b39655061f5ce", size = 789479, upload-time = "2026-05-09T23:12:57.573Z" }, + { url = "https://files.pythonhosted.org/packages/c3/1c/bdcc98f9a4af4fdd166c74941174619ccff4726d3ce32faa8e9a2ecd38dd/regex-2026.5.9-cp312-cp312-win32.whl", hash = "sha256:164eba9b755ea6f244b0d881196fbc1fac09714e9782c9e2732b813142033c8e", size = 266699, upload-time = "2026-05-09T23:12:59.14Z" }, + { url = "https://files.pythonhosted.org/packages/78/87/240d36864f9e48ace85f72e79ced97ceb7f27ce87739a947dcb834b4e6bc/regex-2026.5.9-cp312-cp312-win_amd64.whl", hash = "sha256:86f40a5d6444db30a125c9c9177e6b25dad981cbc37451fd838f145e6edac92e", size = 277783, upload-time = "2026-05-09T23:13:00.789Z" }, + { url = "https://files.pythonhosted.org/packages/4f/b5/7b30f312b0669dff5beebe5b0989dc2d1a312b1a44fab852199c387a5b96/regex-2026.5.9-cp312-cp312-win_arm64.whl", hash = "sha256:96f5f58b54a063d7ea9dca08e1cf57bfe10499c4d579ee672da284f57f5f0070", size = 270513, upload-time = "2026-05-09T23:13:02.426Z" }, + { url = "https://files.pythonhosted.org/packages/aa/da/797e91ecec6f84135da778ddce78c20e0af5d2a15c26f87a81bc3eadb6db/regex-2026.5.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d626b84406444b165fc0ba981604edea39f0588ff1f92baa23fe50799ea9afdb", size = 490303, upload-time = "2026-05-09T23:13:04.382Z" }, + { url = "https://files.pythonhosted.org/packages/44/da/bf30abaaa737b58f4a4b8c4a03659e02fd92092c822e0197ed9e0daab917/regex-2026.5.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d7bdc0ab8f3dd7e1b4f9ab88634e13374669db86bb3c72e8292f07ae313f539f", size = 292019, upload-time = "2026-05-09T23:13:06.022Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e7/d0eaf5713828417b9e5648cf81fa9bacd4961f6ab98c380c2034f8716e35/regex-2026.5.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a8820737949116ffff55fe18f9fc644530063ba6ebfcb8314239416e78f1347c", size = 289468, upload-time = "2026-05-09T23:13:08.214Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9b/b3fdd62b003baa1a9b593cd8c8699c9651c2e80cc21a5c715707983c42d7/regex-2026.5.9-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0fbdbac82cb3e4450d0ccde7d7a35607f4cb2dd9fba4b8b69bfaf8c9fa6aed", size = 796749, upload-time = "2026-05-09T23:13:10.573Z" }, + { url = "https://files.pythonhosted.org/packages/d4/30/66ab84588765f5b4b271a9ca09ef7ce2b87caa95176ec3d2ad65d7bc4902/regex-2026.5.9-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:57e8915c7986aa33d25e4d3629cef711cd2863f2961b10409f0c04cb8b7d9020", size = 865445, upload-time = "2026-05-09T23:13:12.523Z" }, + { url = "https://files.pythonhosted.org/packages/1a/89/f05169e8588aac365f35ffc7f3bc3184f095ef4cfded7cfaa3c7fd5dbd89/regex-2026.5.9-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508f56a89ba9cb26e4168cbc37dbd60a28d82430a9e18ad1d25fe0883c314ca2", size = 912322, upload-time = "2026-05-09T23:13:14.281Z" }, + { url = "https://files.pythonhosted.org/packages/30/e1/c93444052cf41581f3c884ab3fb5823daf0992f11cd4388d4275ca610558/regex-2026.5.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6d189041f15691cfa2b6c4290448ec221244d225b3f5fe9e7771b34ffcdf6e2", size = 801269, upload-time = "2026-05-09T23:13:16.569Z" }, + { url = "https://files.pythonhosted.org/packages/50/fe/0cf96b882f540e62e8b9956599798203d599c44cf4c77917ca27400ff69b/regex-2026.5.9-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e82db382b44d0111b22601c509c89f64434816c9e0eef9d1989cda8cc6ff1c04", size = 777085, upload-time = "2026-05-09T23:13:18.675Z" }, + { url = "https://files.pythonhosted.org/packages/23/5c/d78d4924e7fc875557b9e9b768423925fdfaac5549d06da7810019a9bd26/regex-2026.5.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2acfb48634f64996b57f90f39afa692ff362162722581921fe92239a59960f3c", size = 785153, upload-time = "2026-05-09T23:13:20.525Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e0/5214774090e7b4524dcea3e3c4aa74141d43043f8beb49c1599db1c8b53a/regex-2026.5.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d29eebfc9525db68cad3c97eedd7f754fa265aa5cd0cf4f863b2421e1b48fc9f", size = 860164, upload-time = "2026-05-09T23:13:22.263Z" }, + { url = "https://files.pythonhosted.org/packages/6e/e1/4a57a83350319b1271f0d7a249b8672513ed928b237a741631270de6caea/regex-2026.5.9-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:debb893095e944091c16e641a6e33c1b0f4cb61ab945ec5afbf53ce7068834d8", size = 765731, upload-time = "2026-05-09T23:13:24.277Z" }, + { url = "https://files.pythonhosted.org/packages/12/f4/499e74a20c156fc75836ee04a72a38d1a063978f600937f9760467beb1b0/regex-2026.5.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d659eee77986549c9ea45b861c7567e44d6287c3dc9a4565478853f7b9fe2ff6", size = 852062, upload-time = "2026-05-09T23:13:26.125Z" }, + { url = "https://files.pythonhosted.org/packages/5b/92/7eebc0d0a01e78629695f342ba17e0deaff8fb45e79cc0d7b98287da6e3e/regex-2026.5.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2efa205e6d98b24d1f3ab395c11aa15cdf10935bca283d0285e0499c284fba21", size = 789577, upload-time = "2026-05-09T23:13:27.814Z" }, + { url = "https://files.pythonhosted.org/packages/05/a4/018e71f7d2ad48c1ebe6d3ae0026f9b7cb4802fd15c7cc02fdf724355102/regex-2026.5.9-cp313-cp313-win32.whl", hash = "sha256:f3844f134e834076677dd369976e9f5068679fcb8e50102fdf6b7ac96a3ec127", size = 266691, upload-time = "2026-05-09T23:13:29.549Z" }, + { url = "https://files.pythonhosted.org/packages/e6/1d/861a93719fb9ee7dbfc3761b3797b7a3e112a5d42c6129459d2d741be9b5/regex-2026.5.9-cp313-cp313-win_amd64.whl", hash = "sha256:3527bb4942d2c14552155406cdedd906567456821848aed1cb4933a391bf5eca", size = 277747, upload-time = "2026-05-09T23:13:31.859Z" }, + { url = "https://files.pythonhosted.org/packages/d9/c6/0a2436ae4da1ba76e51cb98943c6838a9a721faa40ebe2dce07694ae34e3/regex-2026.5.9-cp313-cp313-win_arm64.whl", hash = "sha256:56a33f191f17d8c417f99945ebdc1e691d3af9605d86ec68c7e54a57e3e17af6", size = 270500, upload-time = "2026-05-09T23:13:33.525Z" }, + { url = "https://files.pythonhosted.org/packages/e8/e9/d21346f7b60ed58789371358ed66b09d00f832e1bd7c06e55d9da5679882/regex-2026.5.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:01f28d868834624c934b8d2e0aa1c8341337e37831f4a012f18a5afcba4cbaf3", size = 494172, upload-time = "2026-05-09T23:13:35.935Z" }, + { url = "https://files.pythonhosted.org/packages/c4/43/fd1177a2032037c681baecdb3422ee4e1424aec4e4f470ef47793d325274/regex-2026.5.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:48036f6374aaa79eb3b754ec29c61d1c6b1606749d705a13f8854fa2539671f6", size = 293952, upload-time = "2026-05-09T23:13:38.307Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7d/9fbf919768368d3f8a4f6c692cf2aa61e482b2b81ec6a298ace4cbf02480/regex-2026.5.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b96350aa424e79d4fd6b567b344dcbe2b2d6bfc48dfe7717587e1fa6d43da6ff", size = 292314, upload-time = "2026-05-09T23:13:40.353Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6c/e41bfeecb589716843e7c4df09ba46ff2a42961457afece19059d85caeef/regex-2026.5.9-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f3af7a4903c5c04a11a196a5aa75cdd7dd3f8508132f9fb3259d9f5908e3b88", size = 811681, upload-time = "2026-05-09T23:13:42.543Z" }, + { url = "https://files.pythonhosted.org/packages/87/83/a5c1c525fba0aa656e88ad0face0b1829788ef4c2fb6b26df58aa1151b84/regex-2026.5.9-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7e87577720152d2caae19fe2baaf1f8d5ca12091e9e229f03915c37d1e4b9178", size = 871135, upload-time = "2026-05-09T23:13:44.326Z" }, + { url = "https://files.pythonhosted.org/packages/18/d4/80882e799e440dd878b0979cbebf8fa4d54624a332c83037c7a701649e3f/regex-2026.5.9-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8b9b9d294cfea3cd19c718ade7cc93492b2c4991abd9a68d0b3477ae6d8e100", size = 917265, upload-time = "2026-05-09T23:13:47.295Z" }, + { url = "https://files.pythonhosted.org/packages/ae/ff/8db60211e2286e396aad7dc7725356c502bff0901ea05bd6cdc2e1a042b9/regex-2026.5.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:728d8bfd28a8845c8b6bc5dc7ce010453d206396786c0765c2740cb65f37791e", size = 816311, upload-time = "2026-05-09T23:13:49.885Z" }, + { url = "https://files.pythonhosted.org/packages/4c/47/742ef579c61730f8d268e5cf1f9ce0e37e2ea041ad0f5644724f2378e463/regex-2026.5.9-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7e30b874d341fac767d7df5a0870540541c2c054b80cfaac116e8d367a8a7ff2", size = 785498, upload-time = "2026-05-09T23:13:52.25Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ab/cb0999802dcb0fb95b1ab005e8d4163d8afdd67efc2cb6b6630ac13f8cb1/regex-2026.5.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fd190e88a895a8901325fad284a3f74ea52b1da8525b76cc811fa9b1edf0ce2b", size = 801348, upload-time = "2026-05-09T23:13:54.127Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/8ca59a24c55bc34d166eefaf3717bd77772f329fdbf984d86581e0a3571c/regex-2026.5.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8e76e8161ad00694cfce6767d5dea860c6391ac5b83e5c3a39661e696f11fc7e", size = 866493, upload-time = "2026-05-09T23:13:56.067Z" }, + { url = "https://files.pythonhosted.org/packages/8d/3d/30f2ae62cef3278bb5bb821f467277a55fb73f01032cf85997e15e8289a8/regex-2026.5.9-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ddda5340e6c01a293027dd46232fa79eaff1b48058ce7a98f572b6445b088041", size = 772811, upload-time = "2026-05-09T23:13:57.867Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ae/7d2089bcd78ad0c0161bc684339df50032acb438a7bd3305e7ddb1193cec/regex-2026.5.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:205109e96b3cf5adf8f4cd62bedde9487feb282b9497a3535451e5a24cd706a0", size = 856584, upload-time = "2026-05-09T23:13:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/a9/29/92ff47f75990131ea4f24ba17819e5a9d141e10819807e09addd73409af6/regex-2026.5.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dfbe4579b9f08036aa7d101d1835437a20783574ac66327e6b29b4018a138081", size = 803453, upload-time = "2026-05-09T23:14:01.978Z" }, + { url = "https://files.pythonhosted.org/packages/04/99/eff29f1037dcab36702c9ee5d6858cf1ce2336ea8ea2987f64245b99ea5e/regex-2026.5.9-cp313-cp313t-win32.whl", hash = "sha256:ed2c9e8068b614c574d8d30e543d617cf5379b0535d46f97ef00e904745a08b5", size = 269951, upload-time = "2026-05-09T23:14:03.661Z" }, + { url = "https://files.pythonhosted.org/packages/0e/9d/8870b8981d27b22cda77bb26a5ac7ebfa9c7d9e0dea195a834a82380e748/regex-2026.5.9-cp313-cp313t-win_amd64.whl", hash = "sha256:b46b0f094dc1d3b90356c85a0bd2c9bafc4a6a190b9d6f8ddd5a033b6e088ed4", size = 281240, upload-time = "2026-05-09T23:14:05.56Z" }, + { url = "https://files.pythonhosted.org/packages/72/b1/3379415e8f135c13ac551353397cc4fe97b4978f3cac73c5fcbcded548b8/regex-2026.5.9-cp313-cp313t-win_arm64.whl", hash = "sha256:872acc074bd29ffc9913ecdfedf6ea77502312ca44a4aa0d3779089c6069d8de", size = 272383, upload-time = "2026-05-09T23:14:07.843Z" }, + { url = "https://files.pythonhosted.org/packages/13/3e/9c3cd292d8808b3645a2ce517e200179b6d0e903f176300bd8b542e14de5/regex-2026.5.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:1bd7587a2948b4085195d5a3374eaf4a425dc3e55784c038175355ecf3bbbf8a", size = 490376, upload-time = "2026-05-09T23:14:09.64Z" }, + { url = "https://files.pythonhosted.org/packages/60/70/d43ee8a2ca0a8b68d167f21658b85520ac0574617c7f320367c5047f7556/regex-2026.5.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:dea2e88e1cce4522496cce630e11e67b98b7076620bc4336c3f674bc21a375f4", size = 291964, upload-time = "2026-05-09T23:14:11.424Z" }, + { url = "https://files.pythonhosted.org/packages/21/91/9d50b433828d8e74196904e168a43abf1e6e88b2a15d47ed742456720c37/regex-2026.5.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2099f7e7ff7b6aa3192312650a56e91cc091e49d50b04e4f6f8b6e28b3b27f1c", size = 289682, upload-time = "2026-05-09T23:14:13.123Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/b835e3cafbb9d977736912436259ff551d60919f7d7b3d37d46659c63564/regex-2026.5.9-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecd353045824e4477562a2ac718c25799cdaaa41f7aa925a806a8a3e6848a5b9", size = 796996, upload-time = "2026-05-09T23:14:14.923Z" }, + { url = "https://files.pythonhosted.org/packages/2c/a6/9f992d00019166b9de01c546dd4549bc679f2a68df11b877740b0760b7c2/regex-2026.5.9-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:65c8c8c37377794bd5b2f3ebe51919042bf17aec802e23c833d89782ed0c78af", size = 866089, upload-time = "2026-05-09T23:14:17.757Z" }, + { url = "https://files.pythonhosted.org/packages/e0/08/4d32af657e049b19cb62b02e46e38fe1518797bfb2203ee93a510b21b0dc/regex-2026.5.9-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b73ab8afcf66c622db143d1c6fda4e58e4d537ee4f125229ad47b1ab80f34c0", size = 911530, upload-time = "2026-05-09T23:14:20.353Z" }, + { url = "https://files.pythonhosted.org/packages/d9/27/2af43dd1dc201d1fecefda64a45f4ad0995855b92724f795a777b402ee69/regex-2026.5.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0de5cf193997384ed2ca6f1cd4f78055b255d93d82d5a8cd6ba0d11c10b167e4", size = 800643, upload-time = "2026-05-09T23:14:22.265Z" }, + { url = "https://files.pythonhosted.org/packages/a4/dd/23a249047013b5321d4a60c4d2437462086f601b061776a525e5fba2a59f/regex-2026.5.9-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d641a8c9a61618047796d572a39a79b26167b0411d2c3031937b2fe2d081e2cf", size = 777223, upload-time = "2026-05-09T23:14:24.179Z" }, + { url = "https://files.pythonhosted.org/packages/94/6a/e85ed9538cd19586d0465076a4578a12e093ce776d15f3f8ce92733a8dd6/regex-2026.5.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:24b2355ef5cc9aa5b8f07d17704face1c166fdcc2290fa7bd6e6c925655a8346", size = 785760, upload-time = "2026-05-09T23:14:26.065Z" }, + { url = "https://files.pythonhosted.org/packages/2a/c4/f25473209438638e947c55f9156fd8f236f74169229028cc99116380868e/regex-2026.5.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a24852d3c29ad9e47593593d8a247c44ccc3d0548ef12c822d6ed0810affe676", size = 860891, upload-time = "2026-05-09T23:14:28.17Z" }, + { url = "https://files.pythonhosted.org/packages/f9/f7/f4f86e3c74419c37370e91f150ae0c2ef7d34b2e0e4cdd5da046a02e4022/regex-2026.5.9-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:916714069da19329ef7de197dcbc77bb3104145c7c2c864dbfbe318f46b88b14", size = 765891, upload-time = "2026-05-09T23:14:30.06Z" }, + { url = "https://files.pythonhosted.org/packages/26/70/704d8e13765939146b1cd0ef4e2feb71d7929727d2290f026eed10095955/regex-2026.5.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:fa411799ca8da32a8d38d020a88faa5b6f91657d284761352940ecf9f7c3bbdd", size = 851380, upload-time = "2026-05-09T23:14:32.123Z" }, + { url = "https://files.pythonhosted.org/packages/26/29/1a13582a8460038edc38e49f64ceb0dd7c60f5caba77571f4bf6601965d9/regex-2026.5.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1e6da47d679b7010ef27556b6e0f99771b744936db1792a10ceac6547ae1503e", size = 789350, upload-time = "2026-05-09T23:14:34.799Z" }, + { url = "https://files.pythonhosted.org/packages/73/56/3dcafe34fc72e271d62ad9a291801e88a1457bb251c132f15fcc2e5aad1a/regex-2026.5.9-cp314-cp314-win32.whl", hash = "sha256:98bd73080e8756255137e1bd3f3f00295bbc5aa383c0e0f973920e9134d7c4ad", size = 272130, upload-time = "2026-05-09T23:14:36.729Z" }, + { url = "https://files.pythonhosted.org/packages/d0/9c/02eebf0be95efe416c664db7fb8b6b05b7a0b06a7544f2884f2558b0526f/regex-2026.5.9-cp314-cp314-win_amd64.whl", hash = "sha256:ff8d372ac2acdc048d1c19916f27ee61bc5722728458ba6ca5052f2c72d51763", size = 280999, upload-time = "2026-05-09T23:14:39.126Z" }, + { url = "https://files.pythonhosted.org/packages/70/5a/1dd1abee76cb7a846a0bcf42fdc87e5720c3c33c24f3e37814310a513d9f/regex-2026.5.9-cp314-cp314-win_arm64.whl", hash = "sha256:e1d93bf647916292e8edcec150c07ddf3dc50179ccaf770c04a7f9e452155372", size = 273500, upload-time = "2026-05-09T23:14:41.059Z" }, + { url = "https://files.pythonhosted.org/packages/86/c1/c5f619b0057a7965cb78ec559c1d7a45ce8c99a35bea95483d64959a93d9/regex-2026.5.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:83d0ee4a57d1c87cb549e195ec300b8f0ec3a82eba66d835e4e2ed8634fe4499", size = 494269, upload-time = "2026-05-09T23:14:42.869Z" }, + { url = "https://files.pythonhosted.org/packages/05/2c/5d01f1aee33de4bbe60c8452945bfc8477ca7c5ae4450f6bfe711036cb36/regex-2026.5.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d3d7eb5c9a7f6df82ed3cfac9beb93882a5cbcb5b8b157b56cb2b3b276574ac1", size = 293954, upload-time = "2026-05-09T23:14:44.822Z" }, + { url = "https://files.pythonhosted.org/packages/7a/fe/e8988b2ae2108c6ef71bd4aa8d87fbe257976dd0810e826cd75f701c68b6/regex-2026.5.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:075160bf16658e16d35233300b8453aac25de4cbea808d22348b6979668e924d", size = 292405, upload-time = "2026-05-09T23:14:47.211Z" }, + { url = "https://files.pythonhosted.org/packages/79/34/d2b0937faa7859263f7f0a3c6b103a1296306be6952dc173d0154e9a2f49/regex-2026.5.9-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45375819235558a4ff1c4971dc32881f022613abdb180128f5cb4768c1765a1c", size = 811855, upload-time = "2026-05-09T23:14:49.21Z" }, + { url = "https://files.pythonhosted.org/packages/80/fe/daf53a47457a8486db66c66c01ceb9c2303eecee3f87197f1e77eb1a736d/regex-2026.5.9-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ead4b163ac30a29574510cd4b3e2e985ac5290c05fc7095557d6a5f403fc31b5", size = 871189, upload-time = "2026-05-09T23:14:51.555Z" }, + { url = "https://files.pythonhosted.org/packages/1c/75/058fc4470cbfbf57d800aff1a0022b929a3f9fa553ee10a0cdf2070eb31f/regex-2026.5.9-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c6e4218fbdfbcd4f6c19efca40930d24a621bf4b48cb76bc6640543bd28ef20", size = 917485, upload-time = "2026-05-09T23:14:53.633Z" }, + { url = "https://files.pythonhosted.org/packages/88/e7/179cfda3a28bc843b5c6cfe7f79f23489c791ed95f151083803660878432/regex-2026.5.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6351571c8a42b505eb555c0dc47d740d0fb66977dc142919eea6f4325b7c56a0", size = 816369, upload-time = "2026-05-09T23:14:56.198Z" }, + { url = "https://files.pythonhosted.org/packages/41/90/6f0cc422071688266d344fca8462d787cba0a2c144acb25721f9a61ec265/regex-2026.5.9-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:002205cafd2a9e78c6290c7d1df277bf3277b3b7a30e0b4bb0dac2e2e3f7cb2d", size = 785869, upload-time = "2026-05-09T23:14:58.602Z" }, + { url = "https://files.pythonhosted.org/packages/02/67/a31f1760f09c27b251ef39e9beb541f462cf977381d067faa764c2c0e393/regex-2026.5.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8abd33fef90b2a9efac5557d6033ca82d1195ed3a15fea5af15ba7b463c6a63b", size = 801427, upload-time = "2026-05-09T23:15:00.642Z" }, + { url = "https://files.pythonhosted.org/packages/e3/c4/1a80654597b6bc1e1ea0494824c31200e8a956abe290afae9b19a166a148/regex-2026.5.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:31037c82eccb44b7ea2e9e221d7c01429430e989a1f4b91ea5a855f6017b509a", size = 866482, upload-time = "2026-05-09T23:15:03.384Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/960724e06482c08466ff5611e242e86f80062949cdf6b4b9cc317b9dd93d/regex-2026.5.9-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5604dfd046dc37eca90250fc3be938b076c8059fa772ac0ed6f499b0f0fb0415", size = 773022, upload-time = "2026-05-09T23:15:05.625Z" }, + { url = "https://files.pythonhosted.org/packages/50/a8/a9979c3e7918280e93159ebcab5ef1a65116dd4f3bd6091be0eae4a126e8/regex-2026.5.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0e1b1b4e496afbb24f4a62aba855ee4f88f25578927697b340702e48c9ee6bc2", size = 856642, upload-time = "2026-05-09T23:15:07.966Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d4/a9b732f2f0072c0ab12227483abb24fffcb9f73f8a2b203df0a6d0434735/regex-2026.5.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:be3372b9df6ddecff6486d37e19095a7b4973137caf5512407a89f4455361f41", size = 803552, upload-time = "2026-05-09T23:15:10.215Z" }, + { url = "https://files.pythonhosted.org/packages/d5/fe/1b3113817447a1d4155e4ac76d2e072f42c0bcba2f43fa8a0e756ea2cd91/regex-2026.5.9-cp314-cp314t-win32.whl", hash = "sha256:3ddd90103f9e5c471c49c7852ecc1fe27c7e45eb99e977aefe7caa4e779f4f58", size = 275746, upload-time = "2026-05-09T23:15:12.609Z" }, + { url = "https://files.pythonhosted.org/packages/92/73/93d42045302636c91f2e5ef588b65b84b01428f28ec77de256b1dfdfbe5c/regex-2026.5.9-cp314-cp314t-win_amd64.whl", hash = "sha256:ca518ed29c46eecba6010b15f1b9a479314d2de409536e71b6a13aa04e3b8a77", size = 285685, upload-time = "2026-05-09T23:15:15.086Z" }, + { url = "https://files.pythonhosted.org/packages/da/80/35b4c33c804a165a7f55289afda3ea9e3eb6d15800341a2d66455c0f1f30/regex-2026.5.9-cp314-cp314t-win_arm64.whl", hash = "sha256:5e41809d2683fcde7d5a8c87a6567ba1fb1ce0de9f31bff578de00a4b2d76daa", size = 275713, upload-time = "2026-05-09T23:15:16.98Z" }, +] + [[package]] name = "requests" version = "2.34.2" @@ -1591,6 +2756,9 @@ wheels = [ name = "rpds-py" version = "0.30.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/06/0c/0c411a0ec64ccb6d104dcabe0e713e05e153a9a2c3c2bd2b32ce412166fe/rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288", size = 370490, upload-time = "2025-11-30T20:21:33.256Z" }, @@ -1709,29 +2877,409 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] +[[package]] +name = "rpds-py" +version = "2026.5.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/43/25a8dcd3feedd735039a8f0b5b7e3b118232b5eae288c4fd9ab200d41094/rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256", size = 64459, upload-time = "2026-05-28T12:02:13.232Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4f/a0/acf8b6fc20bfdcd3a45bd3f57680fb198e157b7e997b9123b10763798bd2/rpds_py-2026.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3397a5ed7174dc2786bb214030232fc36fe8e5584fec43a9952cc542b1a12036", size = 355609, upload-time = "2026-05-28T11:58:50.78Z" }, + { url = "https://files.pythonhosted.org/packages/b6/95/f8203fd997484b1690a6869cd0e503b6c3c6be55b0ecc36d1a491fe742f0/rpds_py-2026.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99ab6ba7bfa2cb0f96a04e3652355bf04e3f51aceb1e943b8541dab7ba4828cc", size = 348460, upload-time = "2026-05-28T11:58:52.374Z" }, + { url = "https://files.pythonhosted.org/packages/33/8c/b47326ad2f0be545a5e5c1a55937a12afaea7d392ba2837bb9680f57e6c9/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0efbe45632665e53e3db8fe1e5692db58fc5cb9bab4459d570b83efefe11164", size = 381031, upload-time = "2026-05-28T11:58:53.775Z" }, + { url = "https://files.pythonhosted.org/packages/22/0b/e83bbd97ffac6f6389b605cd4e1c8ac5761dc7e977769c9255d8c5adb7bd/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:01d17b29c0c23d82b1f4751147ec49cf451f1fc2554eb9ef5f957e55d2656ead", size = 387121, upload-time = "2026-05-28T11:58:55.243Z" }, + { url = "https://files.pythonhosted.org/packages/fd/0e/d285d1bc8864245919c61e1ca82263e4a66d337759c3a4cef72766ff9afc/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7559f72b94ae52659086c595dfa017cde03155f7832071d30959049052cb3ece", size = 501026, upload-time = "2026-05-28T11:58:56.788Z" }, + { url = "https://files.pythonhosted.org/packages/86/06/ccb2109a1e543437b5e43816f2b43b9554cc6783145528a4e3711e05c011/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e25b7088f9ccbfc0dfcaa52bf969300ca229e10ecf758974ebcbb080a4b37bb", size = 391865, upload-time = "2026-05-28T11:58:58.298Z" }, + { url = "https://files.pythonhosted.org/packages/3d/33/237173db1cfef10105b3839a24de00eb8d2a523711add4632447cdf0aedd/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613fc4ee9eaef26dc5840666214dd6fbcebcf32f46e76f4abc473059f4e13dda", size = 378012, upload-time = "2026-05-28T11:58:59.589Z" }, + { url = "https://files.pythonhosted.org/packages/97/64/1eae54e34d5161f9969295e80bd6b62a55f2b6ac5f2a5b60d02c2140e758/rpds_py-2026.5.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:85264a90ff4c05c1568dd65f5921c837614b67c60358fb4c17df3b7f2e90690a", size = 391111, upload-time = "2026-05-28T11:59:01.104Z" }, + { url = "https://files.pythonhosted.org/packages/d8/34/5bb334a5a0f65d77869217c4654f34c78a7d11b93938a3c076a2edeafc52/rpds_py-2026.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe71bca7d547acb17027c7fd1624ff8aae623499c498d3e7011182c4de5c25e0", size = 409225, upload-time = "2026-05-28T11:59:02.433Z" }, + { url = "https://files.pythonhosted.org/packages/16/0f/007ec21283b5b040b4ec3bd95e0402591e22bfa7d5c93dfe01c465c2d2d7/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05fa4f41f37ec97c9c260441a940450a192f78d774d2b097eee1379f1e1246a", size = 556487, upload-time = "2026-05-28T11:59:04.012Z" }, + { url = "https://files.pythonhosted.org/packages/ff/10/5437c94508169b6b22d8418fef7a66e9ffb5f3b9e9c94460f2eedafe06ff/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df1d2a1996755b24b9ecee92cb4d36c28f86f464a6a173349c26bab41e94b8c2", size = 620798, upload-time = "2026-05-28T11:59:05.485Z" }, + { url = "https://files.pythonhosted.org/packages/e0/d5/9937dce4d6bda74157b954e7d1460db05a22f5929dccfeeba1ed27a93df0/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8895840ac4809e5f60c88fd07617cd71326e73d6e5a8aa783c5c0f7c24985de2", size = 584053, upload-time = "2026-05-28T11:59:06.837Z" }, + { url = "https://files.pythonhosted.org/packages/6c/31/750617dd0ae1752471bf43f9e41d263398fae7cde7849d23b8574a70e617/rpds_py-2026.5.1-cp311-cp311-win32.whl", hash = "sha256:3684a59b158a7683aaeb8e25352e9a9dd2122cec78f2d8530266e4f91b4c7b3f", size = 214390, upload-time = "2026-05-28T11:59:08.402Z" }, + { url = "https://files.pythonhosted.org/packages/3c/bb/3dcab0e1d9516303f2eb672a5d6f62eca5a69e2886301e9c8c54b520c39b/rpds_py-2026.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:7bd530e6a530bb3ea892f194fafa455f3516ac25ecf7143fd33c09be62b0470a", size = 231097, upload-time = "2026-05-28T11:59:09.786Z" }, + { url = "https://files.pythonhosted.org/packages/49/d6/c6bbf5cb1cf12b9732df8074b57f6ef8341ba884c95d40632ae8bddb44e4/rpds_py-2026.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:0a5ae4dbe43c1076983b72616496919872ae7bbe7a1e21cc48336bc3154d130b", size = 226361, upload-time = "2026-05-28T11:59:11.079Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e7/a78582dc57caa592dcc7d4fb69b61390561e908eb3d2f5df5928a8e354c0/rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d", size = 353040, upload-time = "2026-05-28T11:59:12.531Z" }, + { url = "https://files.pythonhosted.org/packages/a3/43/35e3f136343aef451e545ce8c38d36c2f93c0ed88703db8b64ba2b205c68/rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c", size = 345775, upload-time = "2026-05-28T11:59:13.827Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/0f2160c5982d3157734d5cb3ed63d8b2d583a73c9864f77b666449f32cf8/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08", size = 376329, upload-time = "2026-05-28T11:59:15.271Z" }, + { url = "https://files.pythonhosted.org/packages/d0/11/ee0ba42aff83bf4effdbc576673c6be64c5e173978c3f6d537e94482f77d/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb", size = 383539, upload-time = "2026-05-28T11:59:16.665Z" }, + { url = "https://files.pythonhosted.org/packages/11/df/d94aa6a499d4ac40afe2d7620f2c597fd3c0f182e854ad7cf3f596a81cb6/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1", size = 494674, upload-time = "2026-05-28T11:59:17.991Z" }, + { url = "https://files.pythonhosted.org/packages/1f/75/33d30f43bb2f458de11979486a591b1bf6e5651765ed1704c6197c2dc773/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5", size = 389268, upload-time = "2026-05-28T11:59:19.434Z" }, + { url = "https://files.pythonhosted.org/packages/f4/1e/2c9096fc19d5fd084b0184ca2b651e659aa0a37e6fdbecf6ece47f147fe1/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644", size = 376280, upload-time = "2026-05-28T11:59:21Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e5/61ec9f8be8211ea7f48448195549e4aaf02004083475493b0e137702ecb2/rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4", size = 387233, upload-time = "2026-05-28T11:59:22.454Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ca/bcec1005c4f4a234f92a29078631fee49206c7265ccae966f18fd332e80e/rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6", size = 405009, upload-time = "2026-05-28T11:59:23.845Z" }, + { url = "https://files.pythonhosted.org/packages/72/e6/4d5718c5cf26c522dc7c9999e238da1e77380b81d0c5d1df11e271ddfeb1/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4", size = 553113, upload-time = "2026-05-28T11:59:25.184Z" }, + { url = "https://files.pythonhosted.org/packages/d4/25/2ee807bdb3e1f0b7eddf7782acd5665a8b5205a331a7d7244a52c4812fd9/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24", size = 618838, upload-time = "2026-05-28T11:59:26.749Z" }, + { url = "https://files.pythonhosted.org/packages/6a/c1/7d4c26f167f8c41501cc073d30ee22082b16ce358cf5b00ec97cbc7804ea/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732", size = 582436, upload-time = "2026-05-28T11:59:28.11Z" }, + { url = "https://files.pythonhosted.org/packages/04/1d/9d12b0a337bab46f4769f8857f4007e3b2d639e14f9a44a0efe157696e64/rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed", size = 212734, upload-time = "2026-05-28T11:59:29.689Z" }, + { url = "https://files.pythonhosted.org/packages/c5/93/e4116f2de7f56bc7406a76033dc501811ddeb22b7f056b92d632871ebb0c/rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870", size = 229045, upload-time = "2026-05-28T11:59:31.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/53/6c3419d85eb2ec5938a37627c585b42d76a63bb731d6e42ed4b079ebf486/rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473", size = 223967, upload-time = "2026-05-28T11:59:32.318Z" }, + { url = "https://files.pythonhosted.org/packages/6c/32/14c961ad295f490eb0849ada8b79683e93a59b9de3afdd983eaf55fa6867/rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d", size = 352787, upload-time = "2026-05-28T11:59:33.655Z" }, + { url = "https://files.pythonhosted.org/packages/ca/bb/d1b85117967c11191441a7274ae616c65d93901d082c588f89a50a8da5ae/rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3", size = 345179, upload-time = "2026-05-28T11:59:35Z" }, + { url = "https://files.pythonhosted.org/packages/7c/46/d84105f062e626a1b233f863907288a4708c2d833b8b4c6fb2764bc080c0/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559", size = 376173, upload-time = "2026-05-28T11:59:36.43Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ae/469d7959ce5b1201e1de135dc735b86db3b35dd0d1734f6a44246d5f061c/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db", size = 383162, upload-time = "2026-05-28T11:59:37.995Z" }, + { url = "https://files.pythonhosted.org/packages/dc/a2/57853d31a1116a561aa072794602ad3f6341e18d70a8523f1bd5b9fc1e5a/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02", size = 495093, upload-time = "2026-05-28T11:59:39.453Z" }, + { url = "https://files.pythonhosted.org/packages/99/63/3a8eabcad9314b7daf5c65f451d2c33d989235cd8a5762186cf2c3f5a4f8/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b", size = 389829, upload-time = "2026-05-28T11:59:40.896Z" }, + { url = "https://files.pythonhosted.org/packages/4b/25/05678d97fc25e2622df14dc530fb82023174ecfff6733991ed0d78f167bd/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e", size = 374786, upload-time = "2026-05-28T11:59:42.626Z" }, + { url = "https://files.pythonhosted.org/packages/88/d1/8c90b6431e80a3b91b284a5c7c8c0c4f9c006444d90477a740d6e0f9c694/rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b", size = 386920, upload-time = "2026-05-28T11:59:44.124Z" }, + { url = "https://files.pythonhosted.org/packages/ff/99/4638f672ab356682d633ee0da9255f5b67ce6efd0b85eb94ad3e255e65a5/rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46", size = 405059, upload-time = "2026-05-28T11:59:47.177Z" }, + { url = "https://files.pythonhosted.org/packages/66/3f/3546524b6eb4cc2e1f363a3d638fa52f6c24faae3500c25fb488b02f1740/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf", size = 553030, upload-time = "2026-05-28T11:59:48.603Z" }, + { url = "https://files.pythonhosted.org/packages/c6/c3/7b3388c796fcf471bd17194242d4dc1a7608567c0fa422bcc1c5e79f9c1e/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f", size = 618975, upload-time = "2026-05-28T11:59:50.314Z" }, + { url = "https://files.pythonhosted.org/packages/61/1e/a3cb07f2795075d1d88efddae2f541359fde5f08c81ee114c29c2949c90a/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89", size = 581178, upload-time = "2026-05-28T11:59:51.673Z" }, + { url = "https://files.pythonhosted.org/packages/a1/74/e758c03a5ef46f04c37f2651a2893db846d569ba8a7bca469d4b58939bcd/rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842", size = 212481, upload-time = "2026-05-28T11:59:53.148Z" }, + { url = "https://files.pythonhosted.org/packages/70/ec/a2aca432db9c7359b40fa393eeeaa0d166c2f70175be956e75fa24197c44/rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf", size = 228519, upload-time = "2026-05-28T11:59:54.505Z" }, + { url = "https://files.pythonhosted.org/packages/29/60/a73bfdd45b096574556acf303bbd9fa9eed36ca8a818b514e2a5d5fe2b9d/rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd", size = 223446, upload-time = "2026-05-28T11:59:56.081Z" }, + { url = "https://files.pythonhosted.org/packages/18/e2/408105fd611823f00882aea810f3989a30d26b1bab8b6beb20f98c724e0e/rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600", size = 355287, upload-time = "2026-05-28T11:59:57.448Z" }, + { url = "https://files.pythonhosted.org/packages/8d/58/5c4a43436843c90d0f6d19f82c200c80e3843ca9fa07b237623327f6d384/rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa", size = 347033, upload-time = "2026-05-28T11:59:58.881Z" }, + { url = "https://files.pythonhosted.org/packages/fb/c2/1a71acdacaf4e259b10278fb87b039ded3cf80041bcd89dd8a3ea702ded6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00", size = 376891, upload-time = "2026-05-28T12:00:00.516Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c8/535f3d9b65addd8e28aa87b83c6e526799c3717a88273db8ea795beeef7a/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0", size = 385646, upload-time = "2026-05-28T12:00:02.394Z" }, + { url = "https://files.pythonhosted.org/packages/1c/91/dc033f313345c354ade914dbe73cdb90b615a4409ea02430d5356794f3d8/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97", size = 498830, upload-time = "2026-05-28T12:00:04.189Z" }, + { url = "https://files.pythonhosted.org/packages/27/fc/90fcbea459dbb8ddc18a2e0fd1de9412b48bc84ffff2db771cf714bacfd6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef", size = 392830, upload-time = "2026-05-28T12:00:05.797Z" }, + { url = "https://files.pythonhosted.org/packages/b2/1d/46cd11a228c9750684a798d98f878be6f614aa762438da7378f035e79e35/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d", size = 379613, upload-time = "2026-05-28T12:00:07.433Z" }, + { url = "https://files.pythonhosted.org/packages/24/4a/d9b0c6af3a1de03eb93741bbe8be2bdce84d8fda8224f3005451d86df389/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83", size = 388183, upload-time = "2026-05-28T12:00:09.227Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b4/db7aaabdda6d020afc87d981bcc2f57a434c7dec60ecfc2ab3dd50b20351/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2", size = 408578, upload-time = "2026-05-28T12:00:10.779Z" }, + { url = "https://files.pythonhosted.org/packages/08/d6/070f6a41cbb343e2ac4171859bf3f3623e0ab002f72619d6d505313ec2de/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd", size = 553573, upload-time = "2026-05-28T12:00:12.443Z" }, + { url = "https://files.pythonhosted.org/packages/75/ab/1a71ea3589c4345dac0a0518f0e6a031cb42689277851b683c46d27463a5/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1", size = 620861, upload-time = "2026-05-28T12:00:14.09Z" }, + { url = "https://files.pythonhosted.org/packages/8a/22/9bf80a56069c0c443fcfefac639a86a744550a2898817a6dfd3e26654924/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3", size = 585633, upload-time = "2026-05-28T12:00:15.66Z" }, + { url = "https://files.pythonhosted.org/packages/da/68/3b2c0a75c9e04125696f84ebdbbf304acf5a40b58ba4481cdb98a922c3ba/rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc", size = 210074, upload-time = "2026-05-28T12:00:17.291Z" }, + { url = "https://files.pythonhosted.org/packages/e7/8b/609157d5a25d37d4f29f92840ba531f416907c34ae5c5739dd21fc2bef98/rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55", size = 228635, upload-time = "2026-05-28T12:00:18.73Z" }, + { url = "https://files.pythonhosted.org/packages/d4/6f/19c1918a4b590d8de87e712e4abe4b3875771eff60216fb6153cf6665c68/rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9", size = 349756, upload-time = "2026-05-28T12:00:20.217Z" }, + { url = "https://files.pythonhosted.org/packages/e5/60/a06fe7da34eca79dacbf958a2ba0c6eea85bc2b29de20080bf40f72f66fa/rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78", size = 343831, upload-time = "2026-05-28T12:00:21.711Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ec/b2333b97b90e2a6ef6ca8ad386ee284968e74bcfe113b3f1a8d9036429a9/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63", size = 375127, upload-time = "2026-05-28T12:00:23.326Z" }, + { url = "https://files.pythonhosted.org/packages/14/7f/e00aae54067f2b488c4637961d5f58204d470795fc791085fa3f15060d2e/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a", size = 379034, upload-time = "2026-05-28T12:00:24.89Z" }, + { url = "https://files.pythonhosted.org/packages/be/cc/423999bbb8ae8dc93c77fc1d5e984ade5eb89d237d3bb884ccfa72ae2890/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195", size = 490823, upload-time = "2026-05-28T12:00:26.676Z" }, + { url = "https://files.pythonhosted.org/packages/0f/aa/c671bf660f12e68d3c52ff86c7066ed1372df5a0f4f2ff584e419b8207e7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee", size = 388144, upload-time = "2026-05-28T12:00:28.577Z" }, + { url = "https://files.pythonhosted.org/packages/19/c8/d63bb75b68afe77b229e3021c6031bcaf01da5db5b0e69d0d10f9ba679a7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba", size = 371959, upload-time = "2026-05-28T12:00:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/82/35/c51122014d8274ff37dc606d60049c3db7d83da02b5b282511e5a906a9a6/rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec", size = 383558, upload-time = "2026-05-28T12:00:31.764Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f9/2790cb99c136a5363acdeacf5c27c56f3de0d4118a1f48fca83404c99c89/rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d", size = 402789, upload-time = "2026-05-28T12:00:33.247Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1b/e4fb584f8c75d35c38150ff6a332cda949e6f97acba1f4fd123b14ab56fe/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d", size = 551405, upload-time = "2026-05-28T12:00:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f7/a6731b4216cb3793ea1af5391da240f5683dacc0d13e034fe5fc3503f240/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02", size = 616975, upload-time = "2026-05-28T12:00:36.268Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/2e051a81d95d8e63f4b35a1c463a87e8766bc3d083c067c5dfb6bf220747/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0", size = 578701, upload-time = "2026-05-28T12:00:37.82Z" }, + { url = "https://files.pythonhosted.org/packages/65/56/b5f6fdb2083e32bca8a8993d89e70db114b4756c9e2c38421328126689d2/rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7", size = 209806, upload-time = "2026-05-28T12:00:39.492Z" }, + { url = "https://files.pythonhosted.org/packages/fb/80/65a5aa96c155e611d1ed844e4e1f57f3e36b021f396d9f8585d756e6b90d/rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838", size = 225985, upload-time = "2026-05-28T12:00:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/27/7c/ad185212e87b05f196daef92bc5f3caf07298eb47c295b5585c3dd3093ac/rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8", size = 221219, upload-time = "2026-05-28T12:00:43.15Z" }, + { url = "https://files.pythonhosted.org/packages/23/58/e14ae18759020334646b031e708ab4158d653a938822bfb7b95ef2e93aa3/rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad", size = 352148, upload-time = "2026-05-28T12:00:44.638Z" }, + { url = "https://files.pythonhosted.org/packages/31/9b/5f4a1e2f960bca3ac5d052b139dd31eed97b259f9d909173821760d542e8/rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3", size = 345196, upload-time = "2026-05-28T12:00:46.14Z" }, + { url = "https://files.pythonhosted.org/packages/1a/71/1d9574d6a2fa20ab60eaa55c7467f5aa20cbc770f341a05f09c0876f59e2/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081", size = 374981, upload-time = "2026-05-28T12:00:47.531Z" }, + { url = "https://files.pythonhosted.org/packages/0c/9a/37e99f4915a80aa71670263c1267f7ae0af95f53a3f61e6c3bdc016d4515/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6", size = 379961, upload-time = "2026-05-28T12:00:49.216Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ff/6e73f74b89d2e0715e0fc86b7dde893f9a61ae2f9b256ff3bdfe41ac4e94/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5", size = 495965, upload-time = "2026-05-28T12:00:51.111Z" }, + { url = "https://files.pythonhosted.org/packages/ea/e0/425faba25f59d74d4638b267f7c7a80e8649d2ef4db10a19b0c4a71e6e6f/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b", size = 389526, upload-time = "2026-05-28T12:00:52.77Z" }, + { url = "https://files.pythonhosted.org/packages/c6/76/7a41960e3fddae47fab43a28684d5da981401dffd88253de0944148654cb/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964", size = 376190, upload-time = "2026-05-28T12:00:54.215Z" }, + { url = "https://files.pythonhosted.org/packages/27/60/5f38dc70824fc6951b51d35377e577a3a3a4c81a6769cc5a2de25ebe0ad1/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131", size = 383921, upload-time = "2026-05-28T12:00:55.673Z" }, + { url = "https://files.pythonhosted.org/packages/60/1a/d60a38caa1505f4b9483c3fbbde12c94e1079154f4f401a6da96f7e77621/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81", size = 404766, upload-time = "2026-05-28T12:00:57.518Z" }, + { url = "https://files.pythonhosted.org/packages/87/ff/602fd3f174d6425f0bce05ad0dfbec0e96b38d0f7d08a79af5aa20083885/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47", size = 551343, upload-time = "2026-05-28T12:00:58.978Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c1/1be13327acdbead3eca1fde03b6a34dbb011f1e864e217f0d32cc1779a7f/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a", size = 618502, upload-time = "2026-05-28T12:01:00.656Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d7/afb49b49d7f2be8b7ba1a9f0977fa5168003437b93086726f066544e8351/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca", size = 581916, upload-time = "2026-05-28T12:01:02.22Z" }, + { url = "https://files.pythonhosted.org/packages/25/d1/dbef8c1f8a10f07beb62b5f054e20099fd9924b3ec001b8f0b6ac7813a85/rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a", size = 207855, upload-time = "2026-05-28T12:01:03.821Z" }, + { url = "https://files.pythonhosted.org/packages/2a/72/bfa4e61ab8e7dc1c8adf397e05e6cbdd4239357bd72b248d3de662f23915/rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6", size = 225422, upload-time = "2026-05-28T12:01:05.194Z" }, + { url = "https://files.pythonhosted.org/packages/27/3a/7b5da92b640f67b6717ccafc83cdd06bfa7ff2395c3685c68922bb54d703/rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb", size = 349576, upload-time = "2026-05-28T12:01:06.722Z" }, + { url = "https://files.pythonhosted.org/packages/d7/8a/2aafd7ad355a1bd48ca76e2262b74b15e6432b5a1efe150efd4d779cd55d/rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291", size = 343640, upload-time = "2026-05-28T12:01:08.441Z" }, + { url = "https://files.pythonhosted.org/packages/f7/7d/6c9523c1abbe840a1b7fba3c516d48e1d3487cc80fea4366c4071cf56784/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1", size = 375322, upload-time = "2026-05-28T12:01:09.934Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5d/0b7b03fb1dc509321f01de3149784ab773e34c8573022029af8076afcb9c/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8", size = 379066, upload-time = "2026-05-28T12:01:11.48Z" }, + { url = "https://files.pythonhosted.org/packages/d7/e2/8ef6012999ebf1cb1c22f876d9ce5e63d960fd4631d2af3202d3f480aa25/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2", size = 494586, upload-time = "2026-05-28T12:01:13.051Z" }, + { url = "https://files.pythonhosted.org/packages/80/af/1eeb029bec67582c226b7809172207cd005073af4ebd906e65ff494f4983/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038", size = 388415, upload-time = "2026-05-28T12:01:14.631Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/ffbe10711c4d766c1cab0557d6906c074f795814863c67b351355d29354a/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26", size = 372427, upload-time = "2026-05-28T12:01:16.153Z" }, + { url = "https://files.pythonhosted.org/packages/bd/3a/30ba4a6ad457e5b070c18d742a33fb77d8d922b565cc881f8a5313d63bfe/rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd", size = 383615, upload-time = "2026-05-28T12:01:17.809Z" }, + { url = "https://files.pythonhosted.org/packages/d3/69/62e242b53ce39c0814bd24e1a6e6eba6c92be716277745f317f9540a2e7b/rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9", size = 402786, upload-time = "2026-05-28T12:01:19.419Z" }, + { url = "https://files.pythonhosted.org/packages/38/c1/a770b9c186928a1ed0f7e6d7ae50e7f3950ed23e3f9e366dbc8e38cb55de/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14", size = 551583, upload-time = "2026-05-28T12:01:21.013Z" }, + { url = "https://files.pythonhosted.org/packages/21/7c/68e8579b95375b70d2a963103c42e705856cdb98569258bd807f4423891c/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01", size = 616941, upload-time = "2026-05-28T12:01:22.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/a1/a6135aed5730ff03ab957182259987ac11e55fb392a28dc6f0592048a280/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d", size = 578349, upload-time = "2026-05-28T12:01:24.118Z" }, + { url = "https://files.pythonhosted.org/packages/09/6e/f24201a76a84e6c49d0bdfdfcb735210e21701e9b21c5bfc0ba497dd62f6/rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa", size = 209922, upload-time = "2026-05-28T12:01:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e4/966bc240bb0485fc265278f6de44d05834bf0b3618886e0b22e33d54c49a/rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325", size = 226003, upload-time = "2026-05-28T12:01:27.062Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5c/a15a59269cd5e74472734516c73795c15eccfc841b3d4b0228c3f53f19d0/rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16", size = 221245, upload-time = "2026-05-28T12:01:28.51Z" }, + { url = "https://files.pythonhosted.org/packages/e0/22/135ce03804e179a71ceb13be095deda4a279bc88f7a6b8fa161c5ad44e12/rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723", size = 352015, upload-time = "2026-05-28T12:01:30.214Z" }, + { url = "https://files.pythonhosted.org/packages/3b/5f/f1f6d2652eb9d848f6eb369d8db83a2da6249bb49ad2c2a48f45d54538d3/rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41", size = 345016, upload-time = "2026-05-28T12:01:31.656Z" }, + { url = "https://files.pythonhosted.org/packages/88/66/b74182775691ea2290c99e52ac8d5db844e56fbec90ce421f107658c8314/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a", size = 374775, upload-time = "2026-05-28T12:01:33.136Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8f/15e5a61d9f0a43902d36561d4f07cae6ae9f4716be825159fd72717f33af/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358", size = 380270, upload-time = "2026-05-28T12:01:34.574Z" }, + { url = "https://files.pythonhosted.org/packages/02/c3/f859b12763a80540cdf2af0f15b19904cf756a71d7bdd3f82ff3e5b1bbf9/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb", size = 495285, upload-time = "2026-05-28T12:01:36.127Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c7/ff27c2ac8411d30b03b1829fd88cae8dad1a4d0da48dd25e57c4038042e6/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b", size = 389581, upload-time = "2026-05-28T12:01:37.635Z" }, + { url = "https://files.pythonhosted.org/packages/6e/67/fe92ee32a6cc05c77228a2f8b1762e7124f386ec20ff83d0757b762d58d0/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc", size = 376041, upload-time = "2026-05-28T12:01:39.307Z" }, + { url = "https://files.pythonhosted.org/packages/f8/91/b4d6685c27aba55bd82f25b278be8237038117d05f9659a6213ad3408130/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015", size = 383946, upload-time = "2026-05-28T12:01:41.043Z" }, + { url = "https://files.pythonhosted.org/packages/bd/79/2c1d832a53c8e0f8e98fc970ec257b950fecd4f62be2ab7182b500a0cbc8/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa", size = 405526, upload-time = "2026-05-28T12:01:43.032Z" }, + { url = "https://files.pythonhosted.org/packages/78/c4/c98117b03c6a8581ab2c2dfccfe9a5ad82bd8128a3c28b46a6ad2d97c393/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972", size = 551165, upload-time = "2026-05-28T12:01:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c1/bc479ca069200af730881b1bd525e3114b2b391a351509fcb1b772f28086/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66", size = 618778, upload-time = "2026-05-28T12:01:46.337Z" }, + { url = "https://files.pythonhosted.org/packages/77/65/38ab2f90df44c2febfb63cc10ced40763d9b4bc94d173e734528663fe7f5/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb", size = 581839, upload-time = "2026-05-28T12:01:48.109Z" }, + { url = "https://files.pythonhosted.org/packages/15/2d/ce1f605fe036aadd460e5822e578c6c7ec3a860936cca37d6e0f299daa77/rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df", size = 207866, upload-time = "2026-05-28T12:01:49.648Z" }, + { url = "https://files.pythonhosted.org/packages/79/cb/966040123eb102371559746908ef2c9471f4d43e17ec9a645a2258dab64b/rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3", size = 225441, upload-time = "2026-05-28T12:01:51.408Z" }, + { url = "https://files.pythonhosted.org/packages/42/56/3fe0fb34820ff667be791b3a3c22b85e8bcba54e9c832f47438c191fa7be/rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:edf2765d84e42447f112ad877af8fe1db0089aaec5b28e88d6eab45e7fe99cea", size = 357151, upload-time = "2026-05-28T12:01:53.43Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/3eb9ccdb9f143b8c9b003978898cb497f942a324c077401e6b8834238e63/rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad3773236e95f7f33991eb125224b7da66f206504d032a253a02da7e134519fb", size = 350195, upload-time = "2026-05-28T12:01:54.901Z" }, + { url = "https://files.pythonhosted.org/packages/a7/24/dbda232bc4f3ed732120692ab0d2c8402cb020516556d8bee622dcef2413/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a04df86b3f0fade39ec8fd0e0aab089b1da9fbd2b48df778a57ef96f5e7d38df", size = 381850, upload-time = "2026-05-28T12:01:56.601Z" }, + { url = "https://files.pythonhosted.org/packages/40/30/32e769839a358f78810c234f160f2cc21d1e4e47e1c0e0e0d535be5a0219/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6142dbd80c4df62a5d899f0d616d417f84e0bc8d32526c8e5589019d75d028a7", size = 387899, upload-time = "2026-05-28T12:01:58.212Z" }, + { url = "https://files.pythonhosted.org/packages/ab/86/ec84d243aadb3b34b71dd26a010d0930b2d284ff5fc9a69fec53810ee6fd/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b35217adefe87f2fe4db7e9766cabe84744bfe9616d9667be18988928c7f2dc", size = 501618, upload-time = "2026-05-28T12:01:59.888Z" }, + { url = "https://files.pythonhosted.org/packages/74/25/b60e52686bbff777a64f9e4f4d3dd57980dc846913777177a2c92e4937aa/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b95d5e11fc712b752081183a55a244c03cd00570489edd7014d8899f8ceb8162", size = 394003, upload-time = "2026-05-28T12:02:01.482Z" }, + { url = "https://files.pythonhosted.org/packages/9b/c7/b3a6a588cc2219510ef3f42e207483a93950bedd1e3a0fd4015c95cff9e5/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141c9498daf2ace9eda35d2b0e376f9ea8b058d84f2aef4f96fccfd449a2f251", size = 379778, upload-time = "2026-05-28T12:02:03.197Z" }, + { url = "https://files.pythonhosted.org/packages/31/00/c7dba3fc8a3da8cb3f6db1eb3386be4d79c2e97c6890d20eb9ac66ae8c43/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:6f249f8b860a200ad35193af961183ebe9132710484e6f6ce0cf89fd83c63a9a", size = 392359, upload-time = "2026-05-28T12:02:04.817Z" }, + { url = "https://files.pythonhosted.org/packages/93/dd/472ba494c70753f93745992c99855bee0636daf74e6984e5e003f150316f/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4abbf391a70be864920858bf360f4fb380577c9a0f732438a1996726e2c195b", size = 412820, upload-time = "2026-05-28T12:02:06.401Z" }, + { url = "https://files.pythonhosted.org/packages/1d/6f/93831a3bfe789542ed0c1d0d74b78b440f055d6dc3ea4640eba2d95e6e23/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c74005a7bb87752acf351c93897ec63ad77a07a0da7ecad9c050e32e7286ba34", size = 557243, upload-time = "2026-05-28T12:02:08.013Z" }, + { url = "https://files.pythonhosted.org/packages/1f/ff/0b3d604614ffc77522c6b288fdbce68957eb583da1002aa65ba38ac0ee40/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:8213afbe8a3a906fb9acb2014423fe3359ee783d0bf90995f70623a3217bfa6c", size = 623541, upload-time = "2026-05-28T12:02:09.661Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ea/e7b0251441da9adfeaebcf29601d10f2a1455fcf0772fae9e7e19032bd96/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8c43a8a973270fd173bf48cdf80bbe66312421cba68d40845034f174f2389049", size = 586326, upload-time = "2026-05-28T12:02:11.47Z" }, +] + [[package]] name = "ruff" -version = "0.15.13" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/21/a7d5c126d5b557715ef81098f3db2fe20f622a039ff2e626af28d674ab80/ruff-0.15.13.tar.gz", hash = "sha256:f9d89f17f7ba7fb2ed42921f0df75da797a9a5d71bc39049e2c687cf2baf44b7", size = 4678180, upload-time = "2026-05-14T13:44:37.869Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/61/11d458dc6ac22504fd8e237b29dfd40504c7fbbcc8930402cfe51a8e63ed/ruff-0.15.13-py3-none-linux_armv6l.whl", hash = "sha256:444b580fc72fd6887e650acd3e575e18cdc79dbcf42fb4030b491057921f61f8", size = 10738279, upload-time = "2026-05-14T13:44:18.7Z" }, - { url = "https://files.pythonhosted.org/packages/86/ca/caa871ee7be718c45256fada4e16a218ee3e33f0c4a46b729a60a24912e6/ruff-0.15.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6590d009e7cb7ebf36f83dbdd44a3fa48a0994ff6f1cdc1b08006abe58f98dc7", size = 11124798, upload-time = "2026-05-14T13:44:06.427Z" }, - { url = "https://files.pythonhosted.org/packages/d3/19/43f5f2e568dddde567fc41f8471f9432c09563e19d3e617a48cfa52f8f0a/ruff-0.15.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1c26d2f66163deeb6e08d8b39fbbe983ce3c71cea06a6d7591cfd1421793c629", size = 10460761, upload-time = "2026-05-14T13:44:04.375Z" }, - { url = "https://files.pythonhosted.org/packages/99/df/cf938cd6de3003178f03ad7c1ea2a6c099468c03a35037985070b37e76be/ruff-0.15.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbd6f94b434f896308e4d57fb7bfde0d02b99f7a64b3bdab0fdfa6a864203a5", size = 10804451, upload-time = "2026-05-14T13:44:25.221Z" }, - { url = "https://files.pythonhosted.org/packages/c7/7d/5d0973129b154ded2225729169d7068f26b467760b146493fde138415f23/ruff-0.15.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bf3259f3be4d181bda591da5db2571aed6853c6a048157756448020bc6c5cd22", size = 10534285, upload-time = "2026-05-14T13:44:08.888Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e3/6b999bbc66cd51e5f073842bc2a3995e99c5e0e72e16b15e7261f7abf57a/ruff-0.15.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae9c17e5eb4430c154e76abc25d79a318190f5a997f38fb6b114416c5319ffc9", size = 11312063, upload-time = "2026-05-14T13:44:11.274Z" }, - { url = "https://files.pythonhosted.org/packages/af/5a/642639e9f5db04f1e97fbd6e091c6fd20725bdf072fb114d00eefb9e6eb8/ruff-0.15.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e2e39bff6c341f4b577a21b801326fab0b11847f48fcaa83f00a113c9b3cb55", size = 12183079, upload-time = "2026-05-14T13:44:01.634Z" }, - { url = "https://files.pythonhosted.org/packages/19/4c/7585735f6b53b0f12de13618b2f7d250a844f018822efc899df2e7b8295f/ruff-0.15.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e8d9a8e08013542e94d3220bc5b62cc3e5ef87c5f74bff367d3fac14fab013e6", size = 11440833, upload-time = "2026-05-14T13:43:59.043Z" }, - { url = "https://files.pythonhosted.org/packages/e8/31/bf1a0803d077e679cfeee5f2f67290a0fa79c7385b5d9a8c17b9db2c48f0/ruff-0.15.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc411dfebe5eebe55ce041c6ae080eb7668955e866daa2fbb16692a784f1c4ca", size = 11434486, upload-time = "2026-05-14T13:44:27.761Z" }, - { url = "https://files.pythonhosted.org/packages/e1/4e/62c9b999875d4f14db80f277c030578f5e249c9852d65b7ac7ad0b43c041/ruff-0.15.13-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:768494eb08b9cee54e2fd27969966f74db5a57f6eaa7a90fcb3306af34dfc4bd", size = 11385189, upload-time = "2026-05-14T13:44:13.704Z" }, - { url = "https://files.pythonhosted.org/packages/fc/89/7e959047a104df3eb12863447c110140191fc5b6c4f379ea2e803fcdb0e4/ruff-0.15.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fb75f9a3a7e42ffe117d734494e6c5e5cb3565d66e12612cb63d0e572a41a5b6", size = 10781380, upload-time = "2026-05-14T13:43:56.734Z" }, - { url = "https://files.pythonhosted.org/packages/ff/52/5fd18f3b88cab63e88aa11516b3b4e1e5f720e5c330f8dbe5c26210f41f8/ruff-0.15.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8cb74dd33bb2f6613faf7fc03b660053b5ac4f80e706d5788c6335e2a8048d51", size = 10540605, upload-time = "2026-05-14T13:44:20.748Z" }, - { url = "https://files.pythonhosted.org/packages/e8/e0/9e35f338990d3e41a82875ff7053ffe97541dae81c9d02143177f381d572/ruff-0.15.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7ef823f817fcd191dc934e984be9cf4094f808effa16f2542ad8e821ba02bbf2", size = 11036554, upload-time = "2026-05-14T13:44:16.256Z" }, - { url = "https://files.pythonhosted.org/packages/c2/13/070fb048c24080fba188f66371e2a92785be257ad02242066dc7255ac6e9/ruff-0.15.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f345a13937bd7f09f6f5d19fa0721b0c103e00e7f62bc67089a8e5e037719e0b", size = 11528133, upload-time = "2026-05-14T13:44:22.808Z" }, - { url = "https://files.pythonhosted.org/packages/6b/8c/b1e1666aef7fc6555094d73ae6cd981701781ae85b97ceefc0eebd0b4668/ruff-0.15.13-py3-none-win32.whl", hash = "sha256:4044f94208b3b05ba0fc4a4abd0558cf4d6459bd18325eead7fd8cc66f909b41", size = 10721455, upload-time = "2026-05-14T13:44:35.697Z" }, - { url = "https://files.pythonhosted.org/packages/ab/a6/870a3e8a50590bb92be184ad928c2922f088b00d9dc5c5ec7b924ee08c22/ruff-0.15.13-py3-none-win_amd64.whl", hash = "sha256:7064884d442b7d477b4e7473d12da7f08851d2b1982763c5d3f388a19468a1a4", size = 11900409, upload-time = "2026-05-14T13:44:30.389Z" }, - { url = "https://files.pythonhosted.org/packages/9b/36/9c015cd052fca743dae8cb2aeb16b551444787467db42ceab0fc968865af/ruff-0.15.13-py3-none-win_arm64.whl", hash = "sha256:2471da9bd1068c8c064b5fd9c0c4b6dddffd6369cb1cd68b29993b1709ff1b21", size = 11179336, upload-time = "2026-05-14T13:44:33.026Z" }, +version = "0.15.17" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/a9/3abdf488f1bf3d24c699415e454ed554a6350d5d89ce183be1ee0a3361ac/ruff-0.15.17.tar.gz", hash = "sha256:2ec446937fd16c8c4de2674a209cc5af64d9c6f17d21fbf1151054fa0bcf5219", size = 4743346, upload-time = "2026-06-11T17:54:47.663Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/4d/e11259f5da07cb6afb2d074c31bf09da9671993f7329d4f15d2fdc458301/ruff-0.15.17-py3-none-linux_armv6l.whl", hash = "sha256:d9feddb927fc68bd295f5eebc587a7e42cfaf9b65f60ca4a2386febff575da8f", size = 10856677, upload-time = "2026-06-11T17:54:49.533Z" }, + { url = "https://files.pythonhosted.org/packages/29/3e/772d679e1a0dc058e58875bd2c0cb713a0530877b4a76fee3c7966df0d49/ruff-0.15.17-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:25805a226d741c47d274a35ad5c10a7dde175fcddfa511d7cf3da0a21eb3eab7", size = 11223443, upload-time = "2026-06-11T17:55:00.573Z" }, + { url = "https://files.pythonhosted.org/packages/68/58/bd41f7688b2fd5623012605130ed70e60aa7f2244baa3d5066bdd61530c8/ruff-0.15.17-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f6ad73b14c2d18a3bf8ad7cb6974294d7f613a7898604826058e6ac64918ef4d", size = 10566458, upload-time = "2026-06-11T17:55:07.52Z" }, + { url = "https://files.pythonhosted.org/packages/d8/5b/733371013fcf1ec339e477ece6ab42bfe10bdd9bba8ee88a9516aa56bfc0/ruff-0.15.17-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ba0c1e4f95bcb3869d0d30cbd5917071ef2e28665abfec970cdab0492c713ed", size = 10914483, upload-time = "2026-06-11T17:55:05.501Z" }, + { url = "https://files.pythonhosted.org/packages/bd/cc/6f24251cc0252f7239391ccb85833f320efad14ebe5b443943f37ced6332/ruff-0.15.17-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:81647960f10bff57d2e51cadd0c3950fe598400c852863a038720ef5b8cca91e", size = 10647497, upload-time = "2026-06-11T17:54:57.733Z" }, + { url = "https://files.pythonhosted.org/packages/68/dd/0d10c17ce1a1624d6fc3156309c3f834fdb5dfaad026ec90c85684f3990e/ruff-0.15.17-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e01a84ddbc8c16c23055ba3924476850f1bbc1917cebbb9376665a63e74260d", size = 11416967, upload-time = "2026-06-11T17:54:51.461Z" }, + { url = "https://files.pythonhosted.org/packages/2f/91/556bfb156f6144f355e831c23db00b2fc4120f86b3ce81cc5f7fd2df51f3/ruff-0.15.17-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fe9f653152f8f294f9f7e03bf3a453d8b4a27f7a59c78c8666167f2b17b96c", size = 12335770, upload-time = "2026-06-11T17:54:45.793Z" }, + { url = "https://files.pythonhosted.org/packages/88/82/8b5999aa13355e926f06d9f42a32dcca862f623bf0363785ff89d607dffd/ruff-0.15.17-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c0fe88a7676e7a05b73174d4d4a59cb2ac21ff8263583f87a81a6018475a978", size = 11575441, upload-time = "2026-06-11T17:54:32.661Z" }, + { url = "https://files.pythonhosted.org/packages/11/93/f10377bb04109ca0e8cbc483ff1982c54b6d418210041776f93e8cdc7fa9/ruff-0.15.17-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecfc3c7878fff94633ab0348524e093f9ce3243080416dd7d14f8ba400174719", size = 11557614, upload-time = "2026-06-11T17:54:34.698Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a6/eeeae7f7d5493df41649ab3db92f086b2d0a30199e4efdf8e3dd7a033f24/ruff-0.15.17-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:b8461180b22420b1bdc289909410930761629fddf2a5aaf60fae1ab26cedc4c4", size = 11544450, upload-time = "2026-06-11T17:54:39.042Z" }, + { url = "https://files.pythonhosted.org/packages/32/88/5991ce565129a24dd4a00db1254b3b5db2e53018cbe4018ea5a89738e727/ruff-0.15.17-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6eccbe50a038b503e7140b441aa9c7fc8c1f36edf23ebef9f4165c2f28f568b7", size = 10892524, upload-time = "2026-06-11T17:55:09.432Z" }, + { url = "https://files.pythonhosted.org/packages/f5/1d/0fdd248313425f55223968af04b0a42125466a8d88d21c1d99c6af0a51e8/ruff-0.15.17-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:382fc0521025f5a8ad447d8bdd523545d0d7646adb718eb1c2dac5065ec27c0f", size = 10659573, upload-time = "2026-06-11T17:54:36.824Z" }, + { url = "https://files.pythonhosted.org/packages/9e/0e/072e8260deb9461062ce9311ced27a8e541229a6ffd483013dd37661e43e/ruff-0.15.17-py3-none-musllinux_1_2_i686.whl", hash = "sha256:456d41fcd1b2777ad63f09a6e7121d43f7b688bbc76a800c10f7f8fb1f912c3f", size = 11127818, upload-time = "2026-06-11T17:55:03.124Z" }, + { url = "https://files.pythonhosted.org/packages/ab/b4/55060a34163121498014696b5f656db5b8c6963768f227dbf0d76b311073/ruff-0.15.17-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b1a04bcc94ae6194e9db05d16ad31f298a7194bfbcb08258bbe589cee1d587b8", size = 11655901, upload-time = "2026-06-11T17:54:53.562Z" }, + { url = "https://files.pythonhosted.org/packages/49/71/9b29d6b87cef468d697f43c6a91e3fae4a80185779d7d5a4ef27d173439f/ruff-0.15.17-py3-none-win32.whl", hash = "sha256:596065960ab1ff593f744220c9fe6580eda00a95003cffa9f4048bb5b1bf0392", size = 10925574, upload-time = "2026-06-11T17:54:55.723Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b2/8fc77f3723228836fa5d12497eb71c808f83782e10d058d2b15cfa14640b/ruff-0.15.17-py3-none-win_amd64.whl", hash = "sha256:6769e5fa1710b179b92e0bfa5a51735b35baea9013dadb06d5f44cbcf9547084", size = 12058788, upload-time = "2026-06-11T17:54:41.042Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c7/c53e8dbff9c9dc4b7928773421ae294a5d28fcb8dcda1a089579d3a7e510/ruff-0.15.17-py3-none-win_arm64.whl", hash = "sha256:f3be1fbb34bcdfd146240d8fb92a709d4c2c8191348580a3c044ec60fa0b4456", size = 11355275, upload-time = "2026-06-11T17:54:43.635Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, + { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, + { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, + { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, + { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, + { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, + { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, + { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, + { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, + { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, + { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, + { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, + { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, + { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, + { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, + { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, + { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, + { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, + { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, + { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, + { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, + { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, + { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, + { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version >= '3.11'" }, + { name = "narwhals", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/6f/37092bdb25f712817231799fc5674d8e704066a8a70c1d2d40517e18b4ab/scikit_learn-1.9.0.tar.gz", hash = "sha256:8833266989d3a5110178a9fae30783675460724d0e1efb13b14901d2c660c557", size = 7750767, upload-time = "2026-06-02T11:54:32.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/be/e844fd9586e66540a15b71924d17a6cbc1bb749e81ddd0a796bcdba4c055/scikit_learn-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9db6f4d34e68c8899e4cab27fdf8eafe6ed21f2ba52ceb25ea250cd237f8e47b", size = 8789686, upload-time = "2026-06-02T11:53:05.439Z" }, + { url = "https://files.pythonhosted.org/packages/42/e2/ff880f62677a17d035817d543cb0fc8727d01eccbee81c5f7fc733a9d856/scikit_learn-1.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f401448645a3e7bc115aa3c094097865155b34bff1cba8101857d9104e99074c", size = 8256782, upload-time = "2026-06-02T11:53:08.904Z" }, + { url = "https://files.pythonhosted.org/packages/25/64/eb40435e1a508ab1b4e284ce43ae80f6a162e5be5e38ed5a6fab467a9ea4/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd3a8ef0c758555a3b23c03adaa858af32f7736785ded50ad5991f59c4ed03fa", size = 8992419, upload-time = "2026-06-02T11:53:11.551Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/4810a28e473185429e45a57eebcc91fc991b33d889cc0676063e671db03d/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7e254636164090da847715a27f8e5478feb98c40a9e0ee90cbd277de9e5ceb8", size = 9281411, upload-time = "2026-06-02T11:53:15.063Z" }, + { url = "https://files.pythonhosted.org/packages/3b/67/be3d369f40d8178ba3bd86635d132e08cb5329b023e4669d9426d84bc007/scikit_learn-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:5dc1818c77575d149e25fce9ef82dd7b7263ae372f03494158668ad632a69759", size = 8272736, upload-time = "2026-06-02T11:53:18.108Z" }, + { url = "https://files.pythonhosted.org/packages/37/79/a733f02dc2118da7e77a134b34f39f40201a353311b011d20859d2db3556/scikit_learn-1.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:366652351f092b219c248f1e72821e841960a63d8f358f1dcfd54dc1cbdbbc28", size = 7919564, upload-time = "2026-06-02T11:53:21.2Z" }, + { url = "https://files.pythonhosted.org/packages/ac/20/75f915ff375d6249e6550ac740fdbbd66159a068fd3af1400ff62036b07a/scikit_learn-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2bd41b0d201bc81575531b96b713d3eb5e5f50fb0b82101ff0f92294fdc236ac", size = 8741122, upload-time = "2026-06-02T11:53:24.08Z" }, + { url = "https://files.pythonhosted.org/packages/cc/d5/2b5148f2279196775e1db2aeb85d14b70ac80e7e32b3b28e7ebeafb0901d/scikit_learn-1.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5be45aa4a42a68a533913a6ed736cf309de2226411c79ef8d609a5456f1939b1", size = 8261512, upload-time = "2026-06-02T11:53:27.183Z" }, + { url = "https://files.pythonhosted.org/packages/a0/ee/5adbc77656b71f9456a2f5a7a9fdb4bcf9207a6b962889f1c2f9323afa4e/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e50ed4da51974e86e940690e9a3d82e729b62b5a49f7c9bac534d515d39d86f", size = 8837603, upload-time = "2026-06-02T11:53:30.328Z" }, + { url = "https://files.pythonhosted.org/packages/6c/c2/63fdda36c56437eeb44aaf9493c8bcd62ce230ab1598924fc626ffbfa943/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:056c92bb67ad4c28463c2f2653d9701449201e7e7a9e94e321be0f71c4fef2b8", size = 9132097, upload-time = "2026-06-02T11:53:33.456Z" }, + { url = "https://files.pythonhosted.org/packages/83/a4/c8e67227c680e2259c8864ae72ff48b06e16a6f51253a22167aa02a8aa4e/scikit_learn-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4306775fad04cc4b472a1b15af1ae9cede1540fbfcc17fbce3767cd8dc7ae283", size = 8211173, upload-time = "2026-06-02T11:53:36.602Z" }, + { url = "https://files.pythonhosted.org/packages/cf/fd/3c0863792e98e67e9184aa4029288a175935eb65443afcd30d4f143450cf/scikit_learn-1.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:26e22435f63bcdcf396b574273f29f13dd531f5ea035801f5be10ba1540a4e60", size = 7867451, upload-time = "2026-06-02T11:53:39.075Z" }, + { url = "https://files.pythonhosted.org/packages/3c/01/cf3310626b6d48d3e9be69a1223f9180360b5e6edb045f50fade723ce494/scikit_learn-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:80746d63bd4b6eaca54d36fe5feaf4d28bb38dc6f9470f81c7cad7c40155f119", size = 8705188, upload-time = "2026-06-02T11:53:41.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/04/5acd7ae280c5f93b6ac5ef6cdec14eef4c8d1cd91d85b3292989c94d96b1/scikit_learn-1.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5b934c45c252844a91d69fda3a34cff5e7307e1db10d77cb10a3980312c74713", size = 8228299, upload-time = "2026-06-02T11:53:44.817Z" }, + { url = "https://files.pythonhosted.org/packages/0c/39/ffe829a5b8ecb40a518724a997794657fdc354ada5e8fe8e64d998c0bac9/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38c3dcb9a1ffb85505ec53d54c7b4aea0cff70050425a7760c2af661ac85df05", size = 8789690, upload-time = "2026-06-02T11:53:47.461Z" }, + { url = "https://files.pythonhosted.org/packages/1f/88/8dab5de10c638c083772a6be83a3d8106ced492f74a928c8693638e5bb50/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da76d09304a4706db7cc1e3ebaa3b6b98a67365cc11d2996c4f1e58ba47df714", size = 9087723, upload-time = "2026-06-02T11:53:50.702Z" }, + { url = "https://files.pythonhosted.org/packages/20/3f/7917ca72464038f6240ec70c29f94862d08a34a74291ae4d4ec5eb8186a0/scikit_learn-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5808d98f15c6bf6d9d96d2348c1997392a5888ce7097e664105f930c4bca1277", size = 8184330, upload-time = "2026-06-02T11:53:53.396Z" }, + { url = "https://files.pythonhosted.org/packages/78/c7/15739eb2f61fda3c54639e9942414e5a19ad8a8d1f5a3266afad7cb7df80/scikit_learn-1.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:d77f54c017633791bc0225a43e2f8d03745fdcfe4880268fcc4df15f505dec2e", size = 7840653, upload-time = "2026-06-02T11:53:56.035Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7d/c9a35cf59b20a86fec24d306f1547b78dec194b08d367ce2a3e4854169d9/scikit_learn-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9656acd4e93f74e0b66c8a36c88830a99252dfa900044d36bc2212ae89a47162", size = 8713289, upload-time = "2026-06-02T11:53:58.788Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a7/552a7821597c632b907f7bfe8f36f9f572777af8ef8a48353041cf8e091a/scikit_learn-1.9.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:24360002ae845e7866522b0a5bbf690802e7bc388cac8663502e78aa98598aa2", size = 8245141, upload-time = "2026-06-02T11:54:01.694Z" }, + { url = "https://files.pythonhosted.org/packages/7d/79/f4a0c4fe9711154cddabf913471153af79056382ddc612cfe5ee0ff4b72e/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5162ad10a418c8a282dde04c9aa06965de3e9a65f33c1440c0ae69bb1a09d913", size = 8847671, upload-time = "2026-06-02T11:54:04.448Z" }, + { url = "https://files.pythonhosted.org/packages/f0/af/4d72d9e475ac83719160c662619e4bf7b95c19507cd582e7d0167a3c3dae/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fea2cc5677ab49d6f5bade978c866da44957b712d92e9635e8b4f723013c3cb", size = 9118104, upload-time = "2026-06-02T11:54:07.205Z" }, + { url = "https://files.pythonhosted.org/packages/a2/d5/6a58eea2cb9abbb9b3f2bb8b2cfb3243d1152d69f442d256c7af71304769/scikit_learn-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:64fa347efc1c839c487433e40c5144d38c336e8a2b59c81aa8660373945c2673", size = 8290674, upload-time = "2026-06-02T11:54:10.087Z" }, + { url = "https://files.pythonhosted.org/packages/65/5b/d4c879cf358f1187141cf90ced473f087183489090244f50c124a2ee478b/scikit_learn-1.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:1b944b6db288f6b926e3650026ddafb988929de95d11fc2cc5fa117773c9ba42", size = 7978807, upload-time = "2026-06-02T11:54:12.769Z" }, + { url = "https://files.pythonhosted.org/packages/8a/43/bfae3121ec67ae09150d453c442c7c1cc166e9aefe056e6ab3b7728a5cfc/scikit_learn-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4ccacf04ca5f4b492158a5f28afe0ace43f81b2571e4b9a66d34848b46128949", size = 9031941, upload-time = "2026-06-02T11:54:15.436Z" }, + { url = "https://files.pythonhosted.org/packages/75/b0/20a4546eb17f3b25d3c66df15810411c14ed5065bcfab50b53c96fb627b2/scikit_learn-1.9.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ee1a8db2c18c08e34c7412d4b10be1cac214cd4ea7dc9715a6a327eb49a37c96", size = 8613528, upload-time = "2026-06-02T11:54:18.842Z" }, + { url = "https://files.pythonhosted.org/packages/18/3c/e440e039bb82cd19004edaaad00acbde0fb9b461083c3ecf37941c557312/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:147e9329ef0e39f75d4cffa02b2aa48d827832684926cd5210d9a2cb5c57246b", size = 8855050, upload-time = "2026-06-02T11:54:21.699Z" }, + { url = "https://files.pythonhosted.org/packages/43/26/b341b8dab5998da6270a3a42c2152c578501354d36f944b5856757035ef8/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bad8f8b9950321b54c965fdcbac6c6c55e79e16646b49977bcf3668d3870a1a", size = 9097190, upload-time = "2026-06-02T11:54:24.454Z" }, + { url = "https://files.pythonhosted.org/packages/fb/de/b650b4d69b84468cfa2e28a3ff7b8103743029e6446ce1a97fe060ef688c/scikit_learn-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:78fc56eafd4edb9575d2d8950d1dd152061abb573341a1cb7e099fc40f6c6666", size = 8963204, upload-time = "2026-06-02T11:54:27.428Z" }, + { url = "https://files.pythonhosted.org/packages/ee/f3/ff83d76d7418112e5a61326443cdda87be3545dd8d6599c95b2481a4419e/scikit_learn-1.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:051075bda8b7aab87b1906ab3d4740a1e1224a19d7b3781a576736edc94e76aa", size = 8222661, upload-time = "2026-06-02T11:54:30.192Z" }, +] + +[[package]] +name = "scipy" +version = "1.15.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/2f/4966032c5f8cc7e6a60f1b2e0ad686293b9474b65246b0c642e3ef3badd0/scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c", size = 38702770, upload-time = "2025-05-08T16:04:20.849Z" }, + { url = "https://files.pythonhosted.org/packages/a0/6e/0c3bf90fae0e910c274db43304ebe25a6b391327f3f10b5dcc638c090795/scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253", size = 30094511, upload-time = "2025-05-08T16:04:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/ea/b1/4deb37252311c1acff7f101f6453f0440794f51b6eacb1aad4459a134081/scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f", size = 22368151, upload-time = "2025-05-08T16:04:31.731Z" }, + { url = "https://files.pythonhosted.org/packages/38/7d/f457626e3cd3c29b3a49ca115a304cebb8cc6f31b04678f03b216899d3c6/scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92", size = 25121732, upload-time = "2025-05-08T16:04:36.596Z" }, + { url = "https://files.pythonhosted.org/packages/db/0a/92b1de4a7adc7a15dcf5bddc6e191f6f29ee663b30511ce20467ef9b82e4/scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82", size = 35547617, upload-time = "2025-05-08T16:04:43.546Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/41991e503e51fc1134502694c5fa7a1671501a17ffa12716a4a9151af3df/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40", size = 37662964, upload-time = "2025-05-08T16:04:49.431Z" }, + { url = "https://files.pythonhosted.org/packages/25/e1/3df8f83cb15f3500478c889be8fb18700813b95e9e087328230b98d547ff/scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e", size = 37238749, upload-time = "2025-05-08T16:04:55.215Z" }, + { url = "https://files.pythonhosted.org/packages/93/3e/b3257cf446f2a3533ed7809757039016b74cd6f38271de91682aa844cfc5/scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c", size = 40022383, upload-time = "2025-05-08T16:05:01.914Z" }, + { url = "https://files.pythonhosted.org/packages/d1/84/55bc4881973d3f79b479a5a2e2df61c8c9a04fcb986a213ac9c02cfb659b/scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13", size = 41259201, upload-time = "2025-05-08T16:05:08.166Z" }, + { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255, upload-time = "2025-05-08T16:05:14.596Z" }, + { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035, upload-time = "2025-05-08T16:05:20.152Z" }, + { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499, upload-time = "2025-05-08T16:05:24.494Z" }, + { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602, upload-time = "2025-05-08T16:05:29.313Z" }, + { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415, upload-time = "2025-05-08T16:05:34.699Z" }, + { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622, upload-time = "2025-05-08T16:05:40.762Z" }, + { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796, upload-time = "2025-05-08T16:05:48.119Z" }, + { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684, upload-time = "2025-05-08T16:05:54.22Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504, upload-time = "2025-05-08T16:06:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" }, + { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" }, + { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" }, + { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" }, + { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" }, + { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" }, + { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" }, + { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" }, + { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" }, + { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" }, + { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" }, + { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" }, + { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" }, + { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" }, + { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" }, + { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" }, + { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" }, + { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" }, + { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" }, + { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" }, + { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" }, + { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" }, +] + +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, + { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, + { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, + { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, + { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, + { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, ] [[package]] @@ -1747,6 +3295,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + [[package]] name = "sse-starlette" version = "3.4.4" @@ -1762,15 +3328,124 @@ wheels = [ [[package]] name = "starlette" -version = "1.0.0" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/e3/7c1dc7381d9f8ab7d854328ebfa884e62cb3f3d8549ddfd37c7814f42afa/starlette-1.3.1.tar.gz", hash = "sha256:05d0213193f2fbaae60e2ecb593b4add4262ad4e46536b54abe36f11a71724e0", size = 2703240, upload-time = "2026-06-12T09:23:11.602Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bb/2799cc2ede3ed41131f8975621e7213dfc7ef4acbbaadfa440f32500c370/starlette-1.3.1-py3-none-any.whl", hash = "sha256:c7372aae11c3c3f26a42df7bd626cec2f47d03483d261d369516a615a53714c6", size = 73632, upload-time = "2026-06-12T09:23:10.017Z" }, +] + +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + +[[package]] +name = "tiktoken" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e4/e5/5f3cb2159769d0f4324c0e9e87f9de3c4b1cd45848a96b2eb3566ad5ca77/tiktoken-0.13.0.tar.gz", hash = "sha256:c9435714c3a84c2319499de9a300c0e604449dd0799ff246458b3bb6a7f433c1", size = 38986, upload-time = "2026-05-15T04:51:27.153Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/e3/03c90dadcf5b3f82b83cee9adee60ef666b329c654f58c066af44eae0287/tiktoken-0.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:47b1df8d73390a24f94980c75158cdd5c56d256f16d55f30cb49c230caba9ba4", size = 1036627, upload-time = "2026-05-15T04:50:11.229Z" }, + { url = "https://files.pythonhosted.org/packages/5e/30/760463e5b2e8ad2bc229ae0a17ecb06727b6cbc094f08d8f65844315632e/tiktoken-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7d40c6c5aab171dcd6eb8455bc567bde404bb9def60cdb8c1299cc782b242bb9", size = 984699, upload-time = "2026-05-15T04:50:12.874Z" }, + { url = "https://files.pythonhosted.org/packages/de/8a/8895f342a6b6aabd1a358e672f6f077b3ae51d0c63ca605d142db3bcd8ab/tiktoken-0.13.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:9b842981fa91accdffd48ff6408a977b7a91c3fbda55d353c3c68114d5c9d69e", size = 1118690, upload-time = "2026-05-15T04:50:14.234Z" }, + { url = "https://files.pythonhosted.org/packages/51/e0/92557768fb0801f0d9dd9243cb9b6d342900b05e4b1006d4771f49ce233e/tiktoken-0.13.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ed5a30027cb4d8c7ca8b273d4766f3db3cf58fad9e9f3b1a68a351ffb54873d5", size = 1138423, upload-time = "2026-05-15T04:50:15.668Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b9/a3d99feeedb032ffd09cd6652077f86bdee9a70dd0b990b2b272b445d4c3/tiktoken-0.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7ab10f4a21c2999846940113f6dbd72e0fa06a24119feddd74cc47e85818e06d", size = 1185077, upload-time = "2026-05-15T04:50:17.19Z" }, + { url = "https://files.pythonhosted.org/packages/cc/93/bab868277d475dc6d2aaacd34cdd239c282f4908dcc8702e0a3311a8e032/tiktoken-0.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a2937ad042d49d50eac6e1ba07c5661d4bd3942a5b1e0c0d08475c4df83676e1", size = 1241702, upload-time = "2026-05-15T04:50:18.772Z" }, + { url = "https://files.pythonhosted.org/packages/c3/16/27e9f7e0ed76e501cfefc9fb2112df4c7bf70ca96945b15ecb7615aac860/tiktoken-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:44733b99bfd72b590cd0936b1c01b3b4dd73122db2d544bc1ceeb18a7678c910", size = 876565, upload-time = "2026-05-15T04:50:20.268Z" }, + { url = "https://files.pythonhosted.org/packages/1a/4c/1bc81f4cd53e827c4ee67ca951b5935724716049452d8dfa09b8b82372bb/tiktoken-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:7bfe1849caa65d1e1d9871817170ec497bbb7984e182012e1bdce72f66608cdb", size = 1036353, upload-time = "2026-05-15T04:50:21.757Z" }, + { url = "https://files.pythonhosted.org/packages/75/91/10b9c7076bc02c246c853201fdbbe300a4b8c5ed7b84c25f7403f4e32655/tiktoken-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:91c180fe255bd5a86d8316210d2833a1d4d33d026cd86a67812f4773743c8d26", size = 984644, upload-time = "2026-05-15T04:50:23.256Z" }, + { url = "https://files.pythonhosted.org/packages/4e/e4/fceae98015fab47fcd49b8bd7f46145bcd187a47e0add1e5378ed67ef980/tiktoken-0.13.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:059c8ecf554eb5b41e6e054ba467b871b03277d267dee7244380aca4359747d4", size = 1119261, upload-time = "2026-05-15T04:50:24.348Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/fe42ad00de01a8c4a49ad8649a2c8a316835a9cad5961b11d21eac0020a5/tiktoken-0.13.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:36217497eaffc158607a3b26f065300db2aefd43b115263f3b9688ce38146173", size = 1138253, upload-time = "2026-05-15T04:50:25.505Z" }, + { url = "https://files.pythonhosted.org/packages/03/c4/ccee1ecccca107e9a16efcecdeeb964c325305038554d466ece65b42338f/tiktoken-0.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:303f7d91b4fce3baddbcde05c139091d4caa5026ac7214c1dc7ff7a71ee429ff", size = 1185747, upload-time = "2026-05-15T04:50:27.02Z" }, + { url = "https://files.pythonhosted.org/packages/9d/03/cd0cba295522b91eb55c6b2704f1df895f8226cfe60ab10d4d51d0cc9e69/tiktoken-0.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5d48843bee149630eb735a99e1f4a85b47308d21868ea63163f6e87768d3cfed", size = 1241265, upload-time = "2026-05-15T04:50:28.815Z" }, + { url = "https://files.pythonhosted.org/packages/7e/25/a10efd564402d82c2ff50d12057353ace447aa8007deceaa48641f63d35c/tiktoken-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:fc1c44cd37b43fc46bae593129164f4f281e82ea116b57a85aa81bda57eafc94", size = 876509, upload-time = "2026-05-15T04:50:30.026Z" }, + { url = "https://files.pythonhosted.org/packages/85/8e/144bde4e01df66b34bb865557c7cd754ed08b036217ebd79c9db5e9048a9/tiktoken-0.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32ac870a806cfb260a02d0cb70426aef02e038297f8ad50df5040bb5af360791", size = 1034888, upload-time = "2026-05-15T04:50:31.579Z" }, + { url = "https://files.pythonhosted.org/packages/36/18/d4ac9d20956cdebca04841316660ed584c2fecdc2b81722a28bc7ad3b1e4/tiktoken-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d9980f11429ed2d737c463bb1fb78cf330caa026adf002f714aced7849a687b", size = 982970, upload-time = "2026-05-15T04:50:32.961Z" }, + { url = "https://files.pythonhosted.org/packages/74/ed/6bb8d05b9f731f749fee5c6f5ca63e981143c826a5985877330507bd13b7/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3f277ebea5edd7b8bf03c6f9431e1d67d517530115572b2dc1d465326e8f88c7", size = 1115741, upload-time = "2026-05-15T04:50:34.475Z" }, + { url = "https://files.pythonhosted.org/packages/34/de/2ca96b07a82d972b74fe4b46de055b79c904e45c7eab699354a0bfa697dc/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a116178fa7e1b4065bff05214360373a65cac22f965be7b3f73d00a0dbfe7649", size = 1136523, upload-time = "2026-05-15T04:50:35.782Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/9dafec002c2d4424378563cf4cf5c7fb93631d2a55013c8b87554ee4012c/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2c397ddda233208345b01bd30f2fca79ff730e55731d0108a603f9bc57f6af3b", size = 1181954, upload-time = "2026-05-15T04:50:36.99Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d0/1f8578c45b2f24759b46f0b50d31878c63c73e6bf0f2227e10ec5c5408dc/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:95097e4f89b06403976e498abf61a0ee73a7497e73fb599cb211d8197a054d91", size = 1240069, upload-time = "2026-05-15T04:50:38.221Z" }, + { url = "https://files.pythonhosted.org/packages/aa/90/28d7f154888610aa9237e541986beb62b479df29d193a5a0617dbb1514d0/tiktoken-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:8f2d16e7a7c783ad81f36e457d046d1f1c8af70b22aec8a13238efe531977c41", size = 874748, upload-time = "2026-05-15T04:50:39.587Z" }, + { url = "https://files.pythonhosted.org/packages/9c/83/b096c859c2a47c11731bf2f5885f4028b809dfe2396582883eed9cae372f/tiktoken-0.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5df5d1507bd245f1ccad4a074698240021239e455eb0bb4ced4e3d7181872154", size = 1034228, upload-time = "2026-05-15T04:50:40.988Z" }, + { url = "https://files.pythonhosted.org/packages/53/61/c68e123b6d753e3fc2751e9b18e732c9d8bf1e1926762e736eee935d931c/tiktoken-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fe806a50664e83a6ffd56cbd1e4f5dcc6cd32a3e7538f70dc38b1a271384545", size = 982978, upload-time = "2026-05-15T04:50:42.195Z" }, + { url = "https://files.pythonhosted.org/packages/ef/8b/96cc178cc584e65d363134500f297790b06cd48cdeb1e8fcf7bbe60f4715/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:125bc05005e747f993a83dc67934249932d6e4209854452cd4c0b1d53fba3ba2", size = 1116355, upload-time = "2026-05-15T04:50:43.564Z" }, + { url = "https://files.pythonhosted.org/packages/86/f5/bab735d2c72ea55404b295d02d092644eb5f7cc6205e34d35eb9abfb9ab2/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5e6358911cab4adee6712da27d65573496a4f68cf8a2b5fca6a4ad10fc5748cf", size = 1135772, upload-time = "2026-05-15T04:50:44.782Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b9/6de04ebdf904edfaad87788011b3735087a0c9ea671b9027e1e4e965e8c8/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:975cbd78d085d75d26b59660e262736dcaed1e35f8f142cd6291025c01d25486", size = 1182415, upload-time = "2026-05-15T04:50:46.422Z" }, + { url = "https://files.pythonhosted.org/packages/0d/9c/470a05f3b1caf038f44880e334d47ab674e0c80d514c66b375d14d5afa10/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ab9bc99fa020a4c283424590ecd7f3afd70c1c281cb3fa3192a6c3af9f9615", size = 1239879, upload-time = "2026-05-15T04:50:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/42/a6/c1936d16055436cb32e6c6128d68629622e00f4768562f55653752d34768/tiktoken-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:6b1615f0ff71953d19729ceb18865429c185b0a23c5353f1bbca34a394bf60f7", size = 874829, upload-time = "2026-05-15T04:50:49.202Z" }, + { url = "https://files.pythonhosted.org/packages/d6/07/acb5992c3772b5a36284f742cfb7a5895aa4471d1848ac31464ad50d7fdf/tiktoken-0.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6eb4a5bfbc6426938026b1a334e898ac53541360d62d8c689870160cc80abd67", size = 1033600, upload-time = "2026-05-15T04:50:50.4Z" }, + { url = "https://files.pythonhosted.org/packages/14/e9/742e9aec30f59b9f161f7ff7cd072e02ea836c9e1c0854a8076dfcd40d5c/tiktoken-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:43cee3e5400573b2046fbf092cc7a5bc30164f9e4c95ce20714da929df48737a", size = 982516, upload-time = "2026-05-15T04:50:52.03Z" }, + { url = "https://files.pythonhosted.org/packages/72/74/ca1541b053e7648254d2e4b42a253e1bb4359f2c91a0a8d49228c794e1a0/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7de52e3f566d19b3b11bd37eea552c6c305ad74081f736882bd44d148ed4c48d", size = 1115518, upload-time = "2026-05-15T04:50:53.543Z" }, + { url = "https://files.pythonhosted.org/packages/46/e3/93825eaf5a4a504795b787e5d5dea07fbeb3dabf97aa7b450be8bde59c89/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:51384448aa508e4df84c0f7c1dc3211c7f7b8096325660ee5fc82f3e11b381ce", size = 1136867, upload-time = "2026-05-15T04:50:55.191Z" }, + { url = "https://files.pythonhosted.org/packages/8c/46/002b68de6827091d5ae90b048f326e8aad8d953520950e5ce1508879414f/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e28157350f7ebf35008dd8e9e0fdb621f976e4230c881099c85e8cf07eaa50e2", size = 1181826, upload-time = "2026-05-15T04:50:56.296Z" }, + { url = "https://files.pythonhosted.org/packages/db/c6/d393e3185a276505182f7abd93fe714f3c444a2be9180798fa052347504e/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:165cf1820ea4a354985c2490a5205d4cc74661c934aca79dd0368232fff94e0f", size = 1239489, upload-time = "2026-05-15T04:50:57.918Z" }, + { url = "https://files.pythonhosted.org/packages/b7/4d/bc07d1f1635d4897a202acc0ae11c2886eaa7325c359ba4741b47bf8e225/tiktoken-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6c43a675ca14f6f2749ba7f12075d37456015a24b859f2517b9beb4ef30807ec", size = 873820, upload-time = "2026-05-15T04:50:59.528Z" }, + { url = "https://files.pythonhosted.org/packages/8c/93/0dd6adca026a616c3a92974566b43381eea4b475ce1f36c062b8271a9ac5/tiktoken-0.13.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaaaef47c2406277181d2086484c317bf7fc433e2d5d03ff94f56b0dcec87471", size = 1034977, upload-time = "2026-05-15T04:51:00.957Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5ec6e6bc5b30bed6d93f7f2162d8f6b32437b3ba27cb527cfe004f6109c9/tiktoken-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ca8b310bd93b3772cb1b7922d915446864860f562bdfe4825c63a0aed3fb28cd", size = 983635, upload-time = "2026-05-15T04:51:02.629Z" }, + { url = "https://files.pythonhosted.org/packages/94/b0/c8ae9aff00d625c50659b4513e707a0462c4bf5d4d6cc1b802103225c02e/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:32e0c12305105002c047b3bb1070b0dd9a73b0cb3b2856a8972b810e7a4f5881", size = 1116036, upload-time = "2026-05-15T04:51:04.082Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ac/6a5dddd1d0a6018ecb389bd0353e6b4a515eb4d2286611bd0ace1937b9e1/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:5ba5fd62507a932d1241346179e3b39bc7bf7408f03c272652d93b3bedf5db24", size = 1135544, upload-time = "2026-05-15T04:51:05.229Z" }, + { url = "https://files.pythonhosted.org/packages/f4/b8/585032b4384b2f7dcdaddcb52865c83a701a420d09e3c2b4a2be1c450c57/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d108bc2d470fc53c8ecd24f2c0fd2b5f98c33e87cdb6aa2e9b8c5dced703d273", size = 1182217, upload-time = "2026-05-15T04:51:06.517Z" }, + { url = "https://files.pythonhosted.org/packages/cd/b6/993ff1ded3958215fd341a847b8e5ffeb5de473f435296870d314fc91ac4/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cb99cb5127449f58d0a2d5f5ccfb390d8dbdfd919c221246caaee29d8725ed51", size = 1239404, upload-time = "2026-05-15T04:51:07.843Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3d/fef7e06e3b33e7538db0ced734cf9fe23b6832d2ac4990c119c377aec55e/tiktoken-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:115c4f26ffa11caac8b54eea35c2ad38c612c20a48d35dd15d70a02ac6f51f58", size = 918686, upload-time = "2026-05-15T04:51:08.925Z" }, + { url = "https://files.pythonhosted.org/packages/c1/82/a7fc44582bc32ab00de988a2299bf77c077f59068b233109e34b7d6ca7e6/tiktoken-0.13.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:472527e9132952f2fbf77cd290658bacf003d4d5a3fabc18e5fbd407cbae4d9b", size = 1034454, upload-time = "2026-05-15T04:51:10.035Z" }, + { url = "https://files.pythonhosted.org/packages/37/d0/24d8a890c14f432a05cea669c17bebeaa99f96a7c79523b590f564246411/tiktoken-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e2f67d27c9626cdd25fe33d9313c5cdb3d8d82da646b68d6eb8e7e9c20e6448", size = 982976, upload-time = "2026-05-15T04:51:11.23Z" }, + { url = "https://files.pythonhosted.org/packages/49/b7/2ab43f62788a9266187a9bfc1d3af99ad83e5eaa25fbef168a69cd5ad14f/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2b920b35805cd64585a37c3dc7ce65fba4d2d36016be01e1d7942482ca29093a", size = 1115526, upload-time = "2026-05-15T04:51:12.608Z" }, + { url = "https://files.pythonhosted.org/packages/64/39/1494321ed323ce7a14d88e3cd6cb9058625977df1c6961ddc492bd10a9f3/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:493af3aa28a4aaf2e3d2600a2ee717252c9bf5ab38fff94eb5a02db5ab77e5ad", size = 1136466, upload-time = "2026-05-15T04:51:13.926Z" }, + { url = "https://files.pythonhosted.org/packages/96/d9/dfd086aa2d918c563a140720e0ce296cada1634efd2783d5cf51e05f984e/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6644c9c2b5cf3916f5a3641d7d12fdb3f006a7b3d9ff6acdaec44e29ab1ff91e", size = 1181863, upload-time = "2026-05-15T04:51:15.025Z" }, + { url = "https://files.pythonhosted.org/packages/2f/68/a18b4f307086954fdae32714cb4f85562e34f9d34ab206e61f1816aa6018/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5cb65b60b9408563676d874a3a4ee573370066f0dc4e29d84e82e989c6517424", size = 1239218, upload-time = "2026-05-15T04:51:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/16/5b/f2aa703a4fc5d2dff73460a7d46cc2f3f44aa0f3dd8eeb20d2a0ecf68862/tiktoken-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:85b78cc3a2c3d48723ca751fa981f1fedccd54194ca0471b957364353a898b07", size = 918110, upload-time = "2026-05-15T04:51:17.237Z" }, +] + +[[package]] +name = "tokenizers" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c1/60/21f715d9faba5f5407ff759472ade058ec4a507ad62bcea47cb847239a73/tokenizers-0.23.1.tar.gz", hash = "sha256:1feeeadf865a7915adc25445dea30e9933e593c31bb96c277cee36de227c8bfa", size = 365748, upload-time = "2026-04-27T14:43:25.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/39/b87a87d5bb9470610b80a2d31df42fcffeaf35118b8b97952b2aff598cc7/tokenizers-0.23.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e03d6ffcbe0d56ee9c1ccd070e70a13fa750727c0277e138152acbc0252c2224", size = 3146732, upload-time = "2026-04-27T14:43:15.427Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6a/068ed9f6e444c9d7e9d55ce134181325700f3d7f30410721bdc8f848d727/tokenizers-0.23.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:e0948bbb1ac1d7cdfc9fb6d62c596e3b7550036ad60ecd654a66ad273326324e", size = 3054954, upload-time = "2026-04-27T14:43:13.745Z" }, + { url = "https://files.pythonhosted.org/packages/6c/36/e006edf031154cba92b8416057d92c3abe3635e4c4b0aa0b5b9bb39dde70/tokenizers-0.23.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bf13402aff9bc533c89cb849ec3b412dc3fbeacc9744840e423d7bf3f7dc0e3", size = 3374081, upload-time = "2026-04-27T14:43:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/a2/ef/7735d226f9c7f874a6bee5e3f27fb25ecabdf207d37b8cf45286d0795893/tokenizers-0.23.1-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f836ca703b89ae07919a309f9651f7a88fd5a33d5f718ba5ad0870ec0256bad6", size = 3247641, upload-time = "2026-04-27T14:43:03.856Z" }, + { url = "https://files.pythonhosted.org/packages/b9/d9/24827036f6e21297bfffda0768e58eb6096a4f411e932964a01707857931/tokenizers-0.23.1-cp310-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae848657742035523fdf261773630cb819a26995fcd3d9ecae0c1daf6e5a4959", size = 3585624, upload-time = "2026-04-27T14:43:10.664Z" }, + { url = "https://files.pythonhosted.org/packages/0c/9a/22f3582b3a4f49358293a5206e25317621ee4526bfe9cdaa0f07a12e770e/tokenizers-0.23.1-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53b09e85775d5187941e7bab30e941b4134ab4a7dd8c68e783d231fb7ca27c51", size = 3844062, upload-time = "2026-04-27T14:43:05.643Z" }, + { url = "https://files.pythonhosted.org/packages/7e/65/b8f8814eef95800f20721384136d9a1d22241d50b2874357cb70542c392f/tokenizers-0.23.1-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea5a0ce170074329faaa8ea3f6400ecde604b6678192688533af80980daae71a", size = 3460098, upload-time = "2026-04-27T14:43:08.854Z" }, + { url = "https://files.pythonhosted.org/packages/0d/d5/1353e5f677ec27c2494fb6a6725e82d56c985f53e90ec511369e7e4f02c6/tokenizers-0.23.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5075b405006415ea148a992d093699c66eb01952bf59f4d5727089a98bda45a4", size = 3346235, upload-time = "2026-04-27T14:43:12.377Z" }, + { url = "https://files.pythonhosted.org/packages/71/89/39b6b8fc073fb6d413d0147aa333dc7eff7be65639ac9d19930a0b21bf33/tokenizers-0.23.1-cp310-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:56f3a77de629917652f876294dc9fe6bad4a0c43bc229dc72e59bb23a0f4729a", size = 3426398, upload-time = "2026-04-27T14:43:07.264Z" }, + { url = "https://files.pythonhosted.org/packages/0f/80/127c854da64827e5b79264ce524993a90dddcb320e5cd42412c5c02f9e8a/tokenizers-0.23.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d10a6d957ef01896dc274e890eee27d41bd0e74ef31e60616f0fc311345184e", size = 9823279, upload-time = "2026-04-27T14:43:17.222Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ba/44c2502feb1a058f096ddfb4e0996ef3225a01a388e1a9b094e91689fe93/tokenizers-0.23.1-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1974288a609c343774f1b897c8b482c791ab17b75ab5c8c2b1737565c1d82288", size = 9644986, upload-time = "2026-04-27T14:43:19.45Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c1/464019a9fb059870bfe4eebb4ba12208f3042035e258bf5e782906bd3847/tokenizers-0.23.1-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:120468fb4c24faf0543c835a4fabafa4deb3f20a035c9b6e83d0b553a97615d4", size = 9976181, upload-time = "2026-04-27T14:43:21.463Z" }, + { url = "https://files.pythonhosted.org/packages/79/94/3ac1432bda31626071e9b6a12709b97ae05131c804b94c8f3ac622c5da32/tokenizers-0.23.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e3d8f40ea6268047de7046906326abed5134f27d4e8447b23763afe5808c8a96", size = 10113853, upload-time = "2026-04-27T14:43:23.617Z" }, + { url = "https://files.pythonhosted.org/packages/6a/dd/631b21433c771b1382535326f0eca80b9c9cee2e64961dd993bc9ac4669e/tokenizers-0.23.1-cp310-abi3-win32.whl", hash = "sha256:93120a930b919416da7cd10a2f606ac9919cc69cacae7980fa2140e277660948", size = 2536263, upload-time = "2026-04-27T14:43:29.888Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/2553f72aaf65a2797d4229e37fa7fbe38ffbf3e32912d31bdd78b3323e59/tokenizers-0.23.1-cp310-abi3-win_amd64.whl", hash = "sha256:e7bfaf995c1bdbbd21d13539decb6650967013759318627d85daeb7881af16b7", size = 2798223, upload-time = "2026-04-27T14:43:28.51Z" }, + { url = "https://files.pythonhosted.org/packages/cd/2b/2be299bab55fc595e3d38567edb1a87f86e594842968fa9515a07bdcf422/tokenizers-0.23.1-cp310-abi3-win_arm64.whl", hash = "sha256:a26197957d8e4425dfba746315f3c425ea00cfa8367c5fbc4ec73447893dcea9", size = 2664127, upload-time = "2026-04-27T14:43:26.949Z" }, ] [[package]] @@ -1827,6 +3502,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" }, ] +[[package]] +name = "tqdm" +version = "4.68.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/d7/0535a28b1f5f24f6612fb3ff1e89fb1a8d160fee0f976e0aa6803862134b/tqdm-4.68.3.tar.gz", hash = "sha256:00dfa48452b6b6cfae3dd9885636c23d3422d1ec97c66d96818cbd5e0821d482", size = 170596, upload-time = "2026-06-17T07:36:52.105Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/8e/bb97bb0c71802080bfc8952937d174e49cfc50de5c951dd47b2496f0dcdb/tqdm-4.68.3-py3-none-any.whl", hash = "sha256:39832cc2def2789a6f29df83f172db7416cea70052c0907a57801c5f2fdccb03", size = 78337, upload-time = "2026-06-17T07:36:50.132Z" }, +] + [[package]] name = "twine" version = "6.2.0" @@ -1847,6 +3534,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/7a/882d99539b19b1490cac5d77c67338d126e4122c8276bf640e411650c830/twine-6.2.0-py3-none-any.whl", hash = "sha256:418ebf08ccda9a8caaebe414433b0ba5e25eb5e4a927667122fbe8f829f985d8", size = 42727, upload-time = "2025-09-04T15:43:15.994Z" }, ] +[[package]] +name = "typer" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e4/51/9aed62104cea109b820bbd6c14245af756112017d309da813ef107d42e7e/typer-0.25.1.tar.gz", hash = "sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc", size = 122276, upload-time = "2026-04-30T19:32:16.964Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl", hash = "sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89", size = 58409, upload-time = "2026-04-30T19:32:18.271Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1868,6 +3570,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] +[[package]] +name = "umap-learn" +version = "0.5.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numba" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pynndescent" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/ee/af4171241117f85c74b5ca6448ea1033cc28d599c13651d67289bacd4083/umap_learn-0.5.12.tar.gz", hash = "sha256:6aff02ecac5f2aad9f3c65ee518d7ae93e1a985ae38721fdcffceee4232c33c7", size = 96672, upload-time = "2026-04-08T20:03:54.012Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/98/f63318ccbe75c810011fe9233884c5d348d94d90005de1b79e5f93bef9c0/umap_learn-0.5.12-py3-none-any.whl", hash = "sha256:f2a85d2a2adcb52b541bed9b27a23ca169b56bb1b23283abeebfb8dfb8a42fe5", size = 91849, upload-time = "2026-04-08T20:03:52.561Z" }, +] + [[package]] name = "urllib3" version = "2.7.0" @@ -1879,21 +3601,21 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.47.0" +version = "0.49.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f6/b1/8e7077a8641086aea449e1b5752a570f1b5906c64e0a33cd6d93b63a066b/uvicorn-0.47.0.tar.gz", hash = "sha256:7c9a0ea1a9414106bbab7324609c162d8fa0cdcdcb703060987269d77c7bb533", size = 90582, upload-time = "2026-05-14T18:16:54.455Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/1f/fa18009dea8469069cca78a4e877a008ab78f08b064bfc9ab891579077ff/uvicorn-0.49.0.tar.gz", hash = "sha256:ebf4271aa580d9de97f93192d4595176df6e91f9aae919ca73e4fc07df1e66a3", size = 91284, upload-time = "2026-06-03T22:01:30.448Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/41/ac2dfdbc1f60c7af4f994c7a335cfa7040c01642b605d65f611cecc2a1e4/uvicorn-0.47.0-py3-none-any.whl", hash = "sha256:2c5715bc12d1892d84752049f400cd1c3cb018514967fdfeb97640443a6a9432", size = 71301, upload-time = "2026-05-14T18:16:51.762Z" }, + { url = "https://files.pythonhosted.org/packages/88/fa/e1388bbcf24ef3274f45c0c1c7b501fd14971037c1b6ee23610553307497/uvicorn-0.49.0-py3-none-any.whl", hash = "sha256:ba3d14c3ee7e41c6c654c46c9eb489d33213cdd30aa1696eab1374337c13f68f", size = 71376, upload-time = "2026-06-03T22:01:29.037Z" }, ] [[package]] name = "virtualenv" -version = "21.3.3" +version = "21.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, @@ -1902,9 +3624,18 @@ dependencies = [ { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/15/ba/1f6e8c957e4932be060dcdc482d339c12e0216351478add3645cdaa53c05/virtualenv-21.3.3.tar.gz", hash = "sha256:f5bda277e553b1c2b3c1a8debfc30496e1288cc93ce6b7b71b3280047e317328", size = 7613784, upload-time = "2026-05-13T18:01:30.19Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/a5/81f987504738e6defeed61ec1c47e2aefab3c35d8eeb87e1b3f38cf28254/virtualenv-21.5.1.tar.gz", hash = "sha256:dca3bf98275a59c652b69d68e73433e597d977c2da9198882479d1a7188009c8", size = 4578798, upload-time = "2026-06-16T16:23:58.603Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/02/3623e6169bed617ed1e2d372f7c69f92ec28d54c4dfc997055c8578ec148/virtualenv-21.5.1-py3-none-any.whl", hash = "sha256:55aa670b67bbfb991b03fda39bd3276d92c419d702376e98c5df1c9989a26783", size = 4558820, upload-time = "2026-06-16T16:23:56.963Z" }, +] + +[[package]] +name = "win32-setctime" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/34/a9dbe051de88a63eb7408ea66630bac38e72f7f6077d4be58737106860d9/virtualenv-21.3.3-py3-none-any.whl", hash = "sha256:7d5987d8369e098e41406efb780a3d4ca79280097293899e351a6407ee153ab3", size = 7594554, upload-time = "2026-05-13T18:01:27.815Z" }, + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, ] [[package]] diff --git a/zensical.toml b/zensical.toml new file mode 100644 index 00000000..36689c0a --- /dev/null +++ b/zensical.toml @@ -0,0 +1,244 @@ +# Zensical configuration for CodeClone docs site. +# Docs: https://zensical.org/docs/ + +[project] +site_name = "CodeClone" +site_description = "Structural review layer for Python" +site_url = "https://orenlab.github.io/codeclone/" +repo_url = "https://github.com/orenlab/codeclone" +repo_name = "orenlab/codeclone" + +docs_dir = "docs" +edit_uri = "blob/main/docs/" + +exclude_docs = ["README-pypi.md"] + +extra_css = ["assets/stylesheets/extra.css"] + +nav = [ + { "Home" = "index.md" }, + { "Get started" = [ + { "1. Install & first run" = "getting-started.md" }, + { "2. Your first governed edit" = "start/first-governed-edit.md" }, + { "Example report" = "examples/report.md" }, + ] }, + { "Guide" = [ + { "Overview" = "guide/README.md" }, + { "Explanation" = [ + { "How CodeClone works" = "guide/explanation/how-it-works.md" }, + ] }, + { "Change control" = [ + { "Overview" = "guide/change-control/overview.md" }, + { "Agent edit cycle" = "guide/change-control/agent-cycle.md" }, + { "Queue & recovery" = "guide/change-control/queue-and-recovery.md" }, + { "Atomic debug path" = "guide/change-control/atomic-debug.md" }, + ] }, + { "Engineering Memory" = [ + { "Overview" = "guide/memory/overview.md" }, + { "Trajectories & Experiences" = "guide/memory/trajectories-and-experiences.md" }, + ] }, + { "MCP" = [ + { "Overview" = "guide/mcp/README.md" }, + { "Client setup" = "guide/mcp/client-setup.md" }, + { "Server & transport" = "guide/mcp/server-and-transport.md" }, + { "Architecture" = "guide/mcp/architecture.md" }, + { "Workflows" = [ + { "Analyze & triage" = "guide/mcp/workflows/analyze-and-triage.md" }, + { "Drill down & checks" = "guide/mcp/workflows/drill-down-and-checks.md" }, + { "Change control" = "guide/mcp/workflows/change-control.md" }, + { "Memory recipes" = "guide/mcp/workflows/memory-recipes.md" }, + { "Coverage join & session markers" = "guide/mcp/workflows/session-and-coverage.md" }, + { "Observability (maintainer)" = "guide/mcp/workflows/observability-recipes.md" }, + ] }, + { "Prompt patterns" = "guide/mcp/prompts.md" }, + { "Payload cheat sheet" = "guide/mcp/payload-cheatsheet.md" }, + { "Troubleshooting" = "guide/mcp/troubleshooting.md" }, + ] }, + { "Integrations" = [ + { "VS Code" = "guide/integrations/vscode/setup.md" }, + { "Cursor" = "guide/integrations/cursor/install-and-skills.md" }, + { "Claude Code" = "guide/integrations/claude-code/setup.md" }, + { "Codex" = "guide/integrations/codex/setup.md" }, + { "Claude Desktop" = "guide/integrations/claude-desktop/setup.md" }, + { "SARIF export" = "guide/integrations/sarif/export.md" }, + { "GitHub Action" = "book/integrations/github-action.md" }, + ] }, + ] }, + { "Contracts" = [ + { "Overview" = "book/README.md" }, + { "Foundations" = [ + { "Intro" = "book/00-intro.md" }, + { "Terminology" = "book/01-terminology.md" }, + { "Architecture Map" = "book/02-architecture-map.md" }, + ] }, + { "Pipeline & data" = [ + { "Core Pipeline" = "book/03-core-pipeline.md" }, + { "CFG Semantics" = "book/04-cfg-semantics.md" }, + { "Report" = "book/05-report.md" }, + { "HTML Render" = "book/06-html-render.md" }, + { "Baseline" = "book/07-baseline.md" }, + { "Cache" = "book/08-cache.md" }, + ] }, + { "Contracts & config" = [ + { "Exit Codes" = "book/09-exit-codes.md" }, + { "Config and Defaults" = "book/10-config-and-defaults.md" }, + { "CLI" = "book/11-cli.md" }, + ] }, + { "Change control" = [ + { "Overview" = "book/12-structural-change-controller/index.md" }, + { "CLI queries" = "book/12-structural-change-controller/cli-controller-queries.md" }, + { "Blast radius & receipt" = "book/12-structural-change-controller/blast-radius-and-receipt.md" }, + { "Intent registry & queue" = "book/12-structural-change-controller/intent-registry-and-queue.md" }, + { "Verification profiles" = "book/12-structural-change-controller/verification-profiles.md" }, + { "Patch contract verify" = "book/12-structural-change-controller/patch-contract-verify.md" }, + { "Workflow tools" = "book/12-structural-change-controller/workflow-tools.md" }, + { "Finish controlled change" = "book/12-structural-change-controller/finish-controlled-change.md" }, + { "Finish hygiene" = "book/12-structural-change-controller/finish-hygiene.md" }, + { "Patch Trail" = "book/12-structural-change-controller/patch-trail.md" }, + { "Payload semantics" = "book/12-structural-change-controller/payload-semantics.md" }, + { "Token budget" = "book/12-structural-change-controller/token-budget.md" }, + { "Claim Guard" = "book/14-claim-guard.md" }, + ] }, + { "Engineering Memory" = [ + { "Overview" = "book/13-engineering-memory/index.md" }, + { "Trust & lifecycle" = "book/13-engineering-memory/trust-and-lifecycle.md" }, + { "Bootstrap & config" = "book/13-engineering-memory/bootstrap-and-config.md" }, + { "CLI surface" = "book/13-engineering-memory/cli-surface.md" }, + { "MCP surface" = "book/13-engineering-memory/mcp-surface.md" }, + { "Agent contracts" = "book/13-engineering-memory/agent-contracts.md" }, + { "Staleness" = "book/13-engineering-memory/staleness-and-anchors.md" }, + { "FTS search" = "book/13-engineering-memory/search-fts.md" }, + { "Semantic search" = "book/13-engineering-memory/search-semantic.md" }, + { "Trajectory" = "book/13-engineering-memory/trajectory-and-patch-trail.md" }, + { "Trajectory quality & passport" = "book/13-engineering-memory/trajectory-quality-and-passport.md" }, + { "Trajectory labels" = "book/13-engineering-memory/trajectory-labels.md" }, + { "Experience Layer" = "book/13-engineering-memory/experience-layer.md" }, + { "Projection jobs" = "book/13-engineering-memory/projection-jobs.md" }, + { "Scope & invariants" = "book/13-engineering-memory/scope-and-invariants.md" }, + ] }, + { "Quality signals" = [ + { "Health Score" = "book/15-health-score.md" }, + { "Metrics and Gates" = "book/16-metrics-and-quality-gates.md" }, + { "Dead Code" = "book/17-dead-code-contract.md" }, + { "Suggestions and Clone Typing" = "book/18-suggestions-and-clone-typing.md" }, + { "Inline Suppressions" = "book/19-inline-suppressions.md" }, + { "Benchmarking" = "book/20-benchmarking.md" }, + ] }, + { "System properties" = [ + { "Security Model" = "book/21-security-model.md" }, + { "Determinism" = "book/22-determinism.md" }, + { "Testing as Spec" = "book/23-testing-as-spec.md" }, + { "Compatibility and Versioning" = "book/24-compatibility-and-versioning.md" }, + { "Platform Observability" = "book/26-platform-observability.md" }, + { "Corpus Analytics" = "book/27-corpus-analytics.md" }, + ] }, + { "MCP interface" = [ + { "Overview" = "book/25-mcp-interface/index.md" }, + { "Tools" = [ + { "Analysis" = "book/25-mcp-interface/tools/analysis.md" }, + { "Implementation context" = "book/25-mcp-interface/tools/implementation-context.md" }, + { "Help topics" = "book/25-mcp-interface/tools/help-and-topics.md" }, + { "Report & findings" = "book/25-mcp-interface/tools/report-and-findings.md" }, + { "Checks" = "book/25-mcp-interface/tools/checks.md" }, + { "Workflow" = "book/25-mcp-interface/tools/workflow.md" }, + { "Atomic change control" = "book/25-mcp-interface/tools/atomic-change-control.md" }, + { "Session & memory" = "book/25-mcp-interface/tools/session-and-memory.md" }, + { "Platform observability" = "book/25-mcp-interface/tools/platform-observability.md" }, + { "IDE governance" = "book/25-mcp-interface/tools/ide-governance.md" }, + ] }, + { "Resources" = "book/25-mcp-interface/resources.md" }, + { "Payload conventions" = "book/25-mcp-interface/payload-conventions.md" }, + { "Determinism & tests" = "book/25-mcp-interface/determinism-and-tests.md" }, + ] }, + { "Integrations" = [ + { "VS Code" = "book/integrations/vs-code-extension.md" }, + { "Cursor plugin" = "book/integrations/cursor-plugin.md" }, + { "Claude Code plugin" = "book/integrations/claude-code-plugin.md" }, + { "Codex plugin" = "book/integrations/codex-plugin.md" }, + { "Claude Desktop" = "book/integrations/claude-desktop-bundle.md" }, + { "GitHub Action" = "book/integrations/github-action.md" }, + { "SARIF" = "book/integrations/sarif.md" }, + ] }, + { "Appendix" = [ + { "Status Enums" = "book/appendix/a-status-enums.md" }, + { "Schema Layouts" = "book/appendix/b-schema-layouts.md" }, + { "Error Catalog" = "book/appendix/c-error-catalog.md" }, + ] }, + ] }, + { "Legal & plans" = [ + { "Privacy Policy" = "privacy-policy.md" }, + { "Terms of Use" = "terms-of-use.md" }, + { "Plans and Retention" = "plans-and-retention.md" }, + ] }, + { "Maintainers" = [ + { "Diagnostics" = [ + { "Maintainer workflow" = "guide/observability/maintainer-workflow.md" }, + { "Quick diagnostics" = "guide/observability/diagnostics.md" }, + { "Corpus Analytics" = "guide/analytics/overview.md" }, + ] }, + { "Docs site" = [ + { "Publishing the docs site" = "publishing.md" }, + { "Releasing & storefront sync" = "releasing.md" }, + ] }, + ] }, +] + +[project.theme] +logo = "assets/codeclone-docs-wordmark.svg" +favicon = "assets/favicon.svg" + +features = [ + "navigation.tabs", + "navigation.sections", + "navigation.top", + "navigation.instant", + "navigation.tracking", + "search.highlight", + "content.code.copy", + "content.tabs.link", +] + +[project.theme.icon] +repo = "fontawesome/brands/github" + +[project.theme.font] +text = "Inter" +code = "JetBrains Mono" + +[[project.theme.palette]] +scheme = "default" +primary = "white" +accent = "indigo" +toggle.icon = "material/weather-night" +toggle.name = "Switch to dark mode" + +[[project.theme.palette]] +scheme = "slate" +primary = "black" +accent = "indigo" +toggle.icon = "material/weather-sunny" +toggle.name = "Switch to light mode" + +[project.markdown_extensions.admonition] +[project.markdown_extensions.attr_list] +[project.markdown_extensions.def_list] +[project.markdown_extensions.footnotes] +[project.markdown_extensions.tables] + +[project.markdown_extensions.toc] +permalink = true + +[project.markdown_extensions.pymdownx.details] + +[project.markdown_extensions.pymdownx.highlight] +anchor_linenums = true + +[project.markdown_extensions.pymdownx.inlinehilite] + +[project.markdown_extensions.pymdownx.superfences] +custom_fences = [ + { name = "mermaid", class = "mermaid", format = "pymdownx.superfences.fence_code_format" }, +] + +[project.markdown_extensions.pymdownx.tabbed] +alternate_style = true