From 4aa40eca136b5a89e8a2211d91f942e56a518574 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 18 Jun 2026 14:36:36 +0500 Subject: [PATCH 001/113] feat(metrics): extract shared deterministic dep-graph sampler --- codeclone/metrics/dependencies.py | 80 +++++++++++++++++- .../report/html/sections/_dependencies.py | 57 +++---------- tests/test_metrics_modules.py | 81 +++++++++++++++++++ 3 files changed, 170 insertions(+), 48 deletions(-) diff --git a/codeclone/metrics/dependencies.py b/codeclone/metrics/dependencies.py index 573cc9e2..f73d3ded 100644 --- a/codeclone/metrics/dependencies.py +++ b/codeclone/metrics/dependencies.py @@ -10,9 +10,10 @@ from typing import TYPE_CHECKING from ..models import DepGraph, ModuleDep +from ..utils import coerce if TYPE_CHECKING: - from collections.abc import Iterable, Sequence + from collections.abc import Callable, Iterable, Sequence DepAdjacency = dict[str, set[str]] @@ -270,3 +271,80 @@ def build_dep_graph(*, modules: Iterable[str], deps: Sequence[ModuleDep]) -> Dep p95_depth=p95_depth, longest_chains=chains, ) + + +def select_dependency_graph_nodes( + edges: Sequence[tuple[str, str]], + *, + dep_cycles: Sequence[object], + longest_chains: Sequence[object], + max_nodes: int, + max_edges: int, + node_id_fn: Callable[[str], str] | None = None, +) -> tuple[list[str], list[tuple[str, str]], dict[str, object]]: + """Deterministic subgraph sample over directed import edges. + + Seeds cycle members, then longest-chain members, then fills remaining slots + by descending node degree (tie-break id ascending), so structurally + important nodes survive downsampling. ``node_id_fn`` maps a module name to + the active zoom id (package prefix or identity) before membership checks + when seeding from cycles and chains. Returns the shown nodes, the induced + (edge-capped) edges, and a truncation metadata mapping. + """ + all_nodes = sorted({part for edge in edges for part in edge}) + node_universe_count = len(all_nodes) + edge_universe_count = len(edges) + if node_universe_count > max_nodes: + degree_count: dict[str, int] = dict.fromkeys(all_nodes, 0) + for source, target in edges: + degree_count[source] = degree_count.get(source, 0) + 1 + degree_count[target] = degree_count.get(target, 0) + 1 + all_node_set = set(all_nodes) + nodes: list[str] = [] + node_set: set[str] = set() + + def _seed_node(node: object) -> None: + node_name = str(node).strip() + if node_id_fn is not None: + node_name = node_id_fn(node_name) + if ( + not node_name + or node_name not in all_node_set + or node_name in node_set + or len(nodes) >= max_nodes + ): + return + nodes.append(node_name) + node_set.add(node_name) + + for cycle in dep_cycles: + for node in coerce.as_sequence(cycle): + _seed_node(node) + for chain in longest_chains: + for node in coerce.as_sequence(chain): + _seed_node(node) + for node in sorted( + all_nodes, key=lambda item: (-degree_count.get(item, 0), item) + ): + _seed_node(node) + if len(nodes) >= max_nodes: + break + nodes.sort() + else: + nodes = list(all_nodes) + node_set = set(nodes) + filtered = [ + (source, target) + for source, target in edges + if source in node_set and target in node_set + ][:max_edges] + truncation: dict[str, object] = { + "truncated": len(nodes) < node_universe_count + or len(filtered) < edge_universe_count, + "node_universe_count": node_universe_count, + "node_shown_count": len(nodes), + "edge_universe_count": edge_universe_count, + "edge_shown_count": len(filtered), + "seed_policy": "cycles_then_chains_then_degree", + } + return nodes, filtered, truncation diff --git a/codeclone/report/html/sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py index c3fbbfbe..ebbeb465 100644 --- a/codeclone/report/html/sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -12,6 +12,7 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING +from codeclone.metrics.dependencies import select_dependency_graph_nodes from codeclone.utils import coerce as _coerce from ..primitives.escape import _escape_html @@ -41,53 +42,15 @@ def _select_dep_nodes( dep_cycles: Sequence[object], longest_chains: Sequence[object], ) -> tuple[list[str], list[tuple[str, str]]]: - all_nodes = sorted({part for edge in edges for part in edge}) - if len(all_nodes) > 20: - degree_count: dict[str, int] = dict.fromkeys(all_nodes, 0) - for source, target in edges: - degree_count[source] = degree_count.get(source, 0) + 1 - degree_count[target] = degree_count.get(target, 0) + 1 - all_node_set = set(all_nodes) - nodes: list[str] = [] - node_set: set[str] = set() - - def _seed_node(node: object) -> None: - node_name = str(node).strip() - if ( - not node_name - or node_name not in all_node_set - or node_name in node_set - or len(nodes) >= 20 - ): - return - nodes.append(node_name) - node_set.add(node_name) - - # Keep the visual graph aligned with the dependency tables. When we - # downsample a large graph, cycle members and longest-chain nodes must - # remain visible instead of being dropped behind high-degree hubs. - for cycle in dep_cycles: - for node in _as_sequence(cycle): - _seed_node(node) - for chain in longest_chains: - for node in _as_sequence(chain): - _seed_node(node) - - for node in sorted( - all_nodes, key=lambda item: (-degree_count.get(item, 0), item) - ): - _seed_node(node) - if len(nodes) >= 20: - break - nodes.sort() - else: - nodes = all_nodes - node_set = set(nodes) - filtered = [ - (source, target) - for source, target in edges - if source in node_set and target in node_set - ][:100] + # Shared deterministic sampler (metrics.dependencies). Dependencies tab keeps + # its historical caps (20 nodes / 100 edges) and module-level identity zoom. + nodes, filtered, _truncation = select_dependency_graph_nodes( + edges, + dep_cycles=dep_cycles, + longest_chains=longest_chains, + max_nodes=20, + max_edges=100, + ) return nodes, filtered diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py index b26ef890..278e443f 100644 --- a/tests/test_metrics_modules.py +++ b/tests/test_metrics_modules.py @@ -36,6 +36,7 @@ find_cycles, longest_chains, max_depth, + select_dependency_graph_nodes, ) from codeclone.metrics.health import HealthInputs, compute_health from codeclone.models import DeadCandidate, DeadItem, ModuleDep @@ -747,3 +748,83 @@ def _health_inputs( assert safe.dimensions["dependencies"] == 100 assert warn.dimensions["dependencies"] == 96 + + +def test_select_dependency_graph_nodes_small_graph_keeps_all() -> None: + nodes, filtered, truncation = select_dependency_graph_nodes( + [("a", "b"), ("b", "c")], + dep_cycles=[], + longest_chains=[], + max_nodes=20, + max_edges=100, + ) + assert nodes == ["a", "b", "c"] + assert filtered == [("a", "b"), ("b", "c")] + assert truncation == { + "truncated": False, + "node_universe_count": 3, + "node_shown_count": 3, + "edge_universe_count": 2, + "edge_shown_count": 2, + "seed_policy": "cycles_then_chains_then_degree", + } + + +def test_select_dependency_graph_nodes_seeds_cycles_chains_then_degree() -> None: + edges = [ + ("h", "a"), + ("h", "b"), + ("h", "c"), + ("h", "d"), + ("x", "y"), + ("y", "x"), + ] + nodes, filtered, truncation = select_dependency_graph_nodes( + edges, + dep_cycles=[["x", "y"]], + longest_chains=[["d"]], + max_nodes=4, + max_edges=100, + ) + # Cycle members and the chain member survive downsampling even though they + # are lower-degree than the hub; the hub fills the last slot by degree. + assert nodes == ["d", "h", "x", "y"] + assert truncation["truncated"] is True + assert truncation["node_universe_count"] == 7 + assert truncation["node_shown_count"] == 4 + assert ("h", "d") in filtered + assert ("x", "y") in filtered and ("y", "x") in filtered + + +def test_select_dependency_graph_nodes_node_id_fn_seeds_by_zoom_id() -> None: + def to_package(module: str) -> str: + return module.split(":", 1)[0] + + nodes, _filtered, truncation = select_dependency_graph_nodes( + [("p0", "p1"), ("p0", "p2"), ("p0", "p3"), ("p4", "p5")], + dep_cycles=[["p5:m1", "p5:m2"]], + longest_chains=[], + max_nodes=2, + max_edges=100, + node_id_fn=to_package, + ) + # The module-level cycle node "p5:m1" maps to package id "p5" before the + # membership check, so the package survives the seed pass. + assert nodes == ["p0", "p5"] + assert truncation["truncated"] is True + + +def test_select_dependency_graph_nodes_caps_edges_without_node_truncation() -> None: + nodes, filtered, truncation = select_dependency_graph_nodes( + [("a", "b"), ("b", "c"), ("c", "a"), ("a", "c")], + dep_cycles=[], + longest_chains=[], + max_nodes=20, + max_edges=2, + ) + assert nodes == ["a", "b", "c"] + assert filtered == [("a", "b"), ("b", "c")] + assert truncation["truncated"] is True + assert truncation["node_shown_count"] == 3 + assert truncation["edge_universe_count"] == 4 + assert truncation["edge_shown_count"] == 2 From f01bd02dad744569efb4f4361479b0c3220ac9bb Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 18 Jun 2026 15:15:02 +0500 Subject: [PATCH 002/113] feat(report): derive module_map projection in derived report --- codeclone/report/document/builder.py | 7 +- codeclone/report/document/derived.py | 408 ++++++++++++++++++++++++++- tests/test_module_map.py | 219 ++++++++++++++ 3 files changed, 631 insertions(+), 3 deletions(-) create mode 100644 tests/test_module_map.py diff --git a/codeclone/report/document/builder.py b/codeclone/report/document/builder.py index 9d22dfaa..27e36cbc 100644 --- a/codeclone/report/document/builder.py +++ b/codeclone/report/document/builder.py @@ -23,7 +23,11 @@ ) from ._common import _collect_report_file_list -from .derived import _build_derived_overview, _build_derived_suggestions +from .derived import ( + _build_derived_module_map, + _build_derived_overview, + _build_derived_suggestions, +) from .findings import _build_findings_payload from .integrity import _build_integrity_payload from .inventory import ( @@ -95,6 +99,7 @@ def build_report_document( "suggestions": _build_derived_suggestions(suggestions), "overview": overview_payload, "hotlists": hotlists_payload, + "module_map": _build_derived_module_map(metrics_payload), } integrity_payload = _build_integrity_payload( report_schema_version=report_schema_version, diff --git a/codeclone/report/document/derived.py b/codeclone/report/document/derived.py index add20423..3461ea90 100644 --- a/codeclone/report/document/derived.py +++ b/codeclone/report/document/derived.py @@ -7,8 +7,8 @@ from __future__ import annotations from collections import Counter -from collections.abc import Mapping, Sequence -from typing import TYPE_CHECKING +from collections.abc import Callable, Mapping, Sequence +from typing import TYPE_CHECKING, Final from ...domain.findings import ( CATEGORY_COHESION, @@ -45,6 +45,8 @@ design_group_id, structural_group_id, ) +from ...metrics.dependencies import select_dependency_graph_nodes +from ...metrics.overloaded_modules import _score_quantile from ...utils.coerce import as_float as _as_float from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping @@ -423,3 +425,405 @@ def _build_derived_suggestions( } for suggestion in suggestion_rows ] + + +_MODULE_MAP_SCHEMA_VERSION: Final = "1" +_MODULE_MAP_MAX_PACKAGE_NODES: Final = 28 +_MODULE_MAP_MAX_MODULE_NODES: Final = 40 +_MODULE_MAP_MAX_EDGES: Final = 120 +_MODULE_MAP_UNWIND_CANDIDATE_CAP: Final = 25 +_MODULE_MAP_OVERMERGE_MODULE_FLOOR: Final = 80 +_MODULE_MAP_MONOLITH_PACKAGE_CEILING: Final = 2 +_MODULE_MAP_OVERMERGE_PACKAGE_CEILING: Final = 3 +_MODULE_MAP_CANDIDATE: Final = "candidate" +_MODULE_MAP_RANKED_ONLY: Final = "ranked_only" +_MODULE_MAP_NON_CANDIDATE: Final = "non_candidate" +_MODULE_MAP_SEED_POLICY: Final = "cycles_then_chains_then_degree" + + +def _module_prefix(module: str, depth: int) -> str: + parts = module.split(".") + if len(parts) <= depth: + return module + return ".".join(parts[:depth]) + + +def _package_node_id(depth: int) -> Callable[[str], str]: + def _to_package(module: str) -> str: + return _module_prefix(module, depth) + + return _to_package + + +def _module_edges_from_items(edge_items: Sequence[object]) -> list[tuple[str, str]]: + edges: list[tuple[str, str]] = [] + for item in edge_items: + mapping = _as_mapping(item) + source = str(mapping.get("source", "")).strip() + target = str(mapping.get("target", "")).strip() + if source and target: + edges.append((source, target)) + return edges + + +def _string_paths(raw: Sequence[object]) -> list[list[str]]: + return [[str(node) for node in _as_sequence(path)] for path in raw] + + +def _module_map_unavailable_shell(reason: str) -> dict[str, object]: + def _empty_truncation() -> dict[str, object]: + return { + "truncated": False, + "node_universe_count": 0, + "node_shown_count": 0, + "edge_universe_count": 0, + "edge_shown_count": 0, + "seed_policy": _MODULE_MAP_SEED_POLICY, + } + + return { + "schema_version": _MODULE_MAP_SCHEMA_VERSION, + "scope": "report_only", + "default_zoom": "packages", + "summary": { + "available": False, + "reason": reason, + "module_count": 0, + "package_count_depth2": 0, + "edge_count": 0, + "unwind_candidate_count": 0, + "overloaded_candidate_count": 0, + "overloaded_population_status": "limited", + }, + "graph_packages": { + "zoom": "packages", + "package_depth": None, + "truncation": _empty_truncation(), + "nodes": [], + "edges": [], + }, + "graph_modules": { + "zoom": "modules", + "package_depth": None, + "truncation": _empty_truncation(), + "nodes": [], + "edges": [], + }, + "unwind_candidates": [], + } + + +def _module_map_zoom_decision( + modules: Sequence[str], module_count: int +) -> tuple[str, int]: + if module_count <= _MODULE_MAP_MAX_MODULE_NODES: + return "modules", 2 + p1 = len({_module_prefix(module, 1) for module in modules}) + p2 = len({_module_prefix(module, 2) for module in modules}) + if p1 <= _MODULE_MAP_MONOLITH_PACKAGE_CEILING: + return "packages", 2 + if ( + p2 <= _MODULE_MAP_OVERMERGE_PACKAGE_CEILING + and module_count > _MODULE_MAP_OVERMERGE_MODULE_FLOOR + ): + return "packages", 3 + if p2 <= _MODULE_MAP_MAX_PACKAGE_NODES: + return "packages", 2 + if p1 <= _MODULE_MAP_MAX_PACKAGE_NODES: + return "packages", 1 + return "packages", 2 + + +def _aggregate_node_overlay( + members: Sequence[str], + *, + overloaded_by_module: Mapping[str, Mapping[str, object]], + cycle_modules: frozenset[str], +) -> dict[str, object]: + scores: list[float] = [] + statuses: set[str] = set() + reasons: set[str] = set() + source_kinds: set[str] = set() + fan_in = 0 + fan_out = 0 + in_cycle = False + for module in members: + item = overloaded_by_module.get(module, {}) + scores.append(_as_float(item.get("score"))) + statuses.add(str(item.get("candidate_status", _MODULE_MAP_NON_CANDIDATE))) + reasons.update( + str(reason) for reason in _as_sequence(item.get("candidate_reasons")) + ) + source_kinds.add(str(item.get("source_kind", ""))) + fan_in += _as_int(item.get("fan_in")) + fan_out += _as_int(item.get("fan_out")) + in_cycle = in_cycle or module in cycle_modules + if _MODULE_MAP_CANDIDATE in statuses: + candidate_status = _MODULE_MAP_CANDIDATE + elif _MODULE_MAP_RANKED_ONLY in statuses: + candidate_status = _MODULE_MAP_RANKED_ONLY + else: + candidate_status = _MODULE_MAP_NON_CANDIDATE + return { + "fan_in": fan_in, + "fan_out": fan_out, + "source_kinds": sorted(source_kinds), + "in_cycle": in_cycle, + "overloaded": { + "score": max(scores) if scores else 0.0, + "candidate_status": candidate_status, + "candidate_reasons": sorted(reasons), + }, + } + + +def _module_map_node( + node_id: str, + *, + package_depth: int | None, + overloaded_by_module: Mapping[str, Mapping[str, object]], + cycle_modules: frozenset[str], +) -> dict[str, object]: + if package_depth is not None: + members = sorted( + module + for module in overloaded_by_module + if _module_prefix(module, package_depth) == node_id + ) + overlay = _aggregate_node_overlay( + members, + overloaded_by_module=overloaded_by_module, + cycle_modules=cycle_modules, + ) + fan_in = _as_int(overlay["fan_in"]) + fan_out = _as_int(overlay["fan_out"]) + source_kinds: object = overlay["source_kinds"] + in_cycle = bool(overlay["in_cycle"]) + overloaded: object = overlay["overloaded"] + else: + item = overloaded_by_module.get(node_id, {}) + fan_in = _as_int(item.get("fan_in")) + fan_out = _as_int(item.get("fan_out")) + source_kinds = sorted({str(item.get("source_kind", ""))}) if item else [] + in_cycle = node_id in cycle_modules + overloaded = { + "score": _as_float(item.get("score")), + "candidate_status": str( + item.get("candidate_status", _MODULE_MAP_NON_CANDIDATE) + ), + "candidate_reasons": sorted( + str(reason) for reason in _as_sequence(item.get("candidate_reasons")) + ), + } + return { + "id": node_id, + "label": node_id, + "fan_in": fan_in, + "fan_out": fan_out, + "total_degree": fan_in + fan_out, + "source_kinds": source_kinds, + "in_cycle": in_cycle, + "overloaded": overloaded, + } + + +def _build_module_graph_view( + module_edges: Sequence[tuple[str, str]], + *, + zoom: str, + package_depth: int | None, + dep_cycles: Sequence[Sequence[str]], + longest_chains: Sequence[Sequence[str]], + max_nodes: int, + overloaded_by_module: Mapping[str, Mapping[str, object]], + cycle_modules: frozenset[str], +) -> dict[str, object]: + weights: Counter[tuple[str, str]] = Counter() + node_id_fn: Callable[[str], str] | None + if package_depth is not None: + node_id_fn = _package_node_id(package_depth) + for source, target in module_edges: + edge = ( + _module_prefix(source, package_depth), + _module_prefix(target, package_depth), + ) + if edge[0] != edge[1]: + weights[edge] += 1 + else: + node_id_fn = None + for source, target in module_edges: + if source != target: + weights[(source, target)] += 1 + nodes, sampled_edges, truncation = select_dependency_graph_nodes( + sorted(weights), + dep_cycles=dep_cycles, + longest_chains=longest_chains, + max_nodes=max_nodes, + max_edges=_MODULE_MAP_MAX_EDGES, + node_id_fn=node_id_fn, + ) + return { + "zoom": zoom, + "package_depth": package_depth, + "truncation": truncation, + "nodes": [ + _module_map_node( + node_id, + package_depth=package_depth, + overloaded_by_module=overloaded_by_module, + cycle_modules=cycle_modules, + ) + for node_id in nodes + ], + "edges": [ + {"source": source, "target": target, "weight": weights[(source, target)]} + for source, target in sampled_edges + ], + } + + +def _unwind_signals( + item: Mapping[str, object], + *, + chain_modules: frozenset[str], + p90_fan_in: float, +) -> list[str]: + reasons = {str(reason) for reason in _as_sequence(item.get("candidate_reasons"))} + fan_in = _as_int(item.get("fan_in")) + fan_out = _as_int(item.get("fan_out")) + instability = _as_float(item.get("instability")) + signals: list[str] = [] + if "dependency_pressure" in reasons: + signals.append("dependency_pressure") + if "hub_like_shape" in reasons: + signals.append("hub_like_shape") + if "repeated_import_pressure" in reasons: + signals.append("repeated_import_pressure") + if str(item.get("module")) in chain_modules: + signals.append("chain_bottleneck") + if instability >= 0.75 and fan_out >= 3: + signals.append("high_instability") + if fan_in >= p90_fan_in and fan_in > 2 * fan_out + 1: + signals.append("central_sink") + return signals + + +def _module_map_unwind_candidates( + overloaded_items: Sequence[Mapping[str, object]], + *, + longest_chains: Sequence[Sequence[str]], +) -> list[dict[str, object]]: + chain_modules = frozenset(str(node) for chain in longest_chains for node in chain) + fan_in_sorted = sorted(_as_int(item.get("fan_in")) for item in overloaded_items) + p90_fan_in = ( + _score_quantile([float(value) for value in fan_in_sorted], 0.9) + if fan_in_sorted + else 0.0 + ) + rows: list[dict[str, object]] = [] + for item in overloaded_items: + signals = _unwind_signals( + item, chain_modules=chain_modules, p90_fan_in=p90_fan_in + ) + candidate_status = str(item.get("candidate_status", _MODULE_MAP_NON_CANDIDATE)) + emit = bool(signals) and ( + candidate_status == _MODULE_MAP_CANDIDATE + or "chain_bottleneck" in signals + or "high_instability" in signals + or "central_sink" in signals + ) + if not emit: + continue + rows.append( + { + "module": str(item.get("module")), + "filepath": str(item.get("filepath", "")), + "source_kind": str(item.get("source_kind", "")), + "fan_in": _as_int(item.get("fan_in")), + "fan_out": _as_int(item.get("fan_out")), + "score": _as_float(item.get("score")), + "dependency_score": _as_float(item.get("dependency_score")), + "candidate_status": candidate_status, + "signals": signals, + } + ) + rows.sort( + key=lambda row: ( + -len(_as_sequence(row["signals"])), + -_as_float(row["dependency_score"]), + -_as_int(row["fan_in"]), + -_as_int(row["fan_out"]), + str(row["module"]), + ) + ) + return rows[:_MODULE_MAP_UNWIND_CANDIDATE_CAP] + + +def _build_derived_module_map( + metrics_payload: Mapping[str, object], +) -> dict[str, object]: + families = _as_mapping(metrics_payload.get("families")) + dependencies = _as_mapping(families.get("dependencies")) + module_edges = _module_edges_from_items(_as_sequence(dependencies.get("items"))) + if not dependencies or not module_edges: + return _module_map_unavailable_shell("dependencies_skipped") + modules = sorted({node for edge in module_edges for node in edge}) + module_count = len(modules) + dep_cycles = _string_paths(_as_sequence(dependencies.get("cycles"))) + longest_chains = _string_paths(_as_sequence(dependencies.get("longest_chains"))) + cycle_modules = frozenset(node for cycle in dep_cycles for node in cycle) + overloaded = _as_mapping(families.get("overloaded_modules")) + overloaded_items = [ + _as_mapping(item) for item in _as_sequence(overloaded.get("items")) + ] + overloaded_summary = _as_mapping(overloaded.get("summary")) + population_status = str(overloaded_summary.get("population_status") or "ok") + overloaded_by_module: dict[str, Mapping[str, object]] = { + str(item.get("module")): item for item in overloaded_items + } + zoom, package_depth = _module_map_zoom_decision(modules, module_count) + unwind = _module_map_unwind_candidates( + overloaded_items, longest_chains=longest_chains + ) + overloaded_candidate_count = sum( + 1 + for item in overloaded_items + if str(item.get("candidate_status")) == _MODULE_MAP_CANDIDATE + ) + return { + "schema_version": _MODULE_MAP_SCHEMA_VERSION, + "scope": "report_only", + "default_zoom": zoom, + "summary": { + "available": True, + "module_count": module_count, + "package_count_depth2": len( + {_module_prefix(module, 2) for module in modules} + ), + "edge_count": len(set(module_edges)), + "unwind_candidate_count": len(unwind), + "overloaded_candidate_count": overloaded_candidate_count, + "overloaded_population_status": population_status, + }, + "graph_packages": _build_module_graph_view( + module_edges, + zoom="packages", + package_depth=package_depth, + dep_cycles=dep_cycles, + longest_chains=longest_chains, + max_nodes=_MODULE_MAP_MAX_PACKAGE_NODES, + overloaded_by_module=overloaded_by_module, + cycle_modules=cycle_modules, + ), + "graph_modules": _build_module_graph_view( + module_edges, + zoom="modules", + package_depth=None, + dep_cycles=dep_cycles, + longest_chains=longest_chains, + max_nodes=_MODULE_MAP_MAX_MODULE_NODES, + overloaded_by_module=overloaded_by_module, + cycle_modules=cycle_modules, + ), + "unwind_candidates": unwind, + } diff --git a/tests/test_module_map.py b/tests/test_module_map.py new file mode 100644 index 00000000..4dc5f7eb --- /dev/null +++ b/tests/test_module_map.py @@ -0,0 +1,219 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + +from codeclone.report.document.derived import _build_derived_module_map + + +def _payload( + *, + edges: Sequence[tuple[str, str]], + cycles: Sequence[Sequence[str]] = (), + chains: Sequence[Sequence[str]] = (), + overloaded: Sequence[dict[str, object]] = (), + population_status: str = "ok", +) -> dict[str, object]: + modules = {node for edge in edges for node in edge} + return { + "families": { + "dependencies": { + "summary": {"modules": len(modules), "edges": len(edges)}, + "items": [ + { + "source": source, + "target": target, + "import_type": "import", + "line": index + 1, + } + for index, (source, target) in enumerate(edges) + ], + "cycles": [list(cycle) for cycle in cycles], + "longest_chains": [list(chain) for chain in chains], + }, + "overloaded_modules": { + "summary": {"population_status": population_status}, + "items": [dict(item) for item in overloaded], + }, + } + } + + +def _overloaded( + module: str, + *, + fan_in: int = 0, + fan_out: int = 0, + score: float = 0.0, + dependency_score: float = 0.0, + candidate_status: str = "non_candidate", + candidate_reasons: Sequence[str] = (), + instability: float = 0.0, + source_kind: str = "production", +) -> dict[str, object]: + return { + "module": module, + "filepath": module.replace(".", "/") + ".py", + "source_kind": source_kind, + "fan_in": fan_in, + "fan_out": fan_out, + "score": score, + "dependency_score": dependency_score, + "candidate_status": candidate_status, + "candidate_reasons": list(candidate_reasons), + "instability": instability, + } + + +def test_small_repo_default_zoom_modules_not_truncated() -> None: + module_map: Any = _build_derived_module_map( + _payload(edges=[("pkg.a", "pkg.b"), ("pkg.b", "pkg.c")]) + ) + assert module_map["schema_version"] == "1" + assert module_map["scope"] == "report_only" + assert module_map["summary"]["available"] is True + assert module_map["default_zoom"] == "modules" + assert module_map["summary"]["module_count"] == 3 + assert module_map["graph_modules"]["truncation"]["truncated"] is False + + +def test_dependencies_skipped_shell() -> None: + module_map: Any = _build_derived_module_map({"families": {}}) + assert module_map["summary"]["available"] is False + assert module_map["summary"]["reason"] == "dependencies_skipped" + assert module_map["graph_packages"]["nodes"] == [] + assert module_map["graph_modules"]["nodes"] == [] + assert module_map["unwind_candidates"] == [] + + +def test_monolith_avoids_depth_one() -> None: + mods = [f"app.m{index}" for index in range(50)] + edges = [(mods[index], mods[index + 1]) for index in range(len(mods) - 1)] + module_map: Any = _build_derived_module_map(_payload(edges=edges)) + assert module_map["summary"]["module_count"] == 50 + assert module_map["default_zoom"] == "packages" + assert module_map["graph_packages"]["package_depth"] == 2 + + +def test_overmerge_uses_depth_three() -> None: + mods = [f"{root}.x.m{index}" for root in "abc" for index in range(30)] + edges = [(mods[index], mods[index + 1]) for index in range(len(mods) - 1)] + module_map: Any = _build_derived_module_map(_payload(edges=edges)) + assert module_map["summary"]["module_count"] == 90 + assert module_map["graph_packages"]["package_depth"] == 3 + + +def test_medium_repo_full_package_graph_depth_two() -> None: + mods = [f"p{pkg}.sub.m{index}" for pkg in range(10) for index in range(5)] + edges = [(mods[index], mods[index + 1]) for index in range(len(mods) - 1)] + module_map: Any = _build_derived_module_map(_payload(edges=edges)) + assert module_map["summary"]["module_count"] == 50 + assert module_map["default_zoom"] == "packages" + assert module_map["graph_packages"]["package_depth"] == 2 + assert module_map["graph_packages"]["truncation"]["truncated"] is False + + +def test_flat_namespace_uses_depth_one() -> None: + # >40 modules (past row B), 15 roots (P1<=28), 45 depth-2 prefixes (P2>28) -> row F. + mods = [ + f"r{root}.s{sub}.x{leaf}" + for root in range(15) + for sub in range(3) + for leaf in range(2) + ] + edges = [(mods[index], mods[index + 1]) for index in range(len(mods) - 1)] + module_map: Any = _build_derived_module_map(_payload(edges=edges)) + assert module_map["summary"]["module_count"] == 90 + assert module_map["graph_packages"]["package_depth"] == 1 + + +def test_edge_aggregation_weights() -> None: + module_map: Any = _build_derived_module_map( + _payload(edges=[("pkg.a", "pkg.b"), ("pkg.a", "pkg.b"), ("pkg.b", "pkg.c")]) + ) + weights = { + (edge["source"], edge["target"]): edge["weight"] + for edge in module_map["graph_modules"]["edges"] + } + assert weights[("pkg.a", "pkg.b")] == 2 + assert weights[("pkg.b", "pkg.c")] == 1 + assert module_map["summary"]["edge_count"] == 2 + + +def test_truncation_preserves_cycle_nodes_at_package_zoom() -> None: + mods = [f"pkg.m{index}" for index in range(45)] + edges = [(mods[0], mods[index]) for index in range(1, 43)] + edges += [(mods[43], mods[44]), (mods[44], mods[43])] + module_map: Any = _build_derived_module_map( + _payload(edges=edges, cycles=[[mods[43], mods[44]]]) + ) + assert module_map["default_zoom"] == "packages" + assert module_map["graph_packages"]["truncation"]["truncated"] is True + package_nodes = {node["id"] for node in module_map["graph_packages"]["nodes"]} + assert "pkg.m43" in package_nodes + assert "pkg.m44" in package_nodes + + +def test_unwind_candidate_signals_and_ignore_graph_truncation() -> None: + overloaded = [ + _overloaded( + "a.b", + fan_in=10, + fan_out=5, + dependency_score=0.9, + candidate_status="candidate", + candidate_reasons=["dependency_pressure", "hub_like_shape"], + ), + _overloaded("z.z", candidate_status="non_candidate"), + ] + module_map: Any = _build_derived_module_map( + _payload( + edges=[("a.b", "c.d")], + chains=[["a.b", "c.d"]], + overloaded=overloaded, + ) + ) + rows = {row["module"]: row for row in module_map["unwind_candidates"]} + assert "z.z" not in rows + assert rows["a.b"]["signals"] == [ + "dependency_pressure", + "hub_like_shape", + "chain_bottleneck", + ] + + +def test_payload_is_order_independent() -> None: + edges = [("a.b", "c.d"), ("c.d", "e.f"), ("a.b", "e.f")] + overloaded = [_overloaded(module, fan_in=2, fan_out=1) for module in "ab"] + first = _build_derived_module_map(_payload(edges=edges, overloaded=overloaded)) + second = _build_derived_module_map( + _payload(edges=list(reversed(edges)), overloaded=list(reversed(overloaded))) + ) + assert first == second + + +def test_ranked_only_population_has_no_candidate_overlay() -> None: + overloaded = [ + _overloaded("a.b", candidate_status="ranked_only"), + _overloaded("c.d", candidate_status="ranked_only"), + ] + module_map: Any = _build_derived_module_map( + _payload( + edges=[("a.b", "c.d")], + overloaded=overloaded, + population_status="limited", + ) + ) + assert module_map["summary"]["overloaded_population_status"] == "limited" + assert module_map["summary"]["overloaded_candidate_count"] == 0 + statuses = { + node["overloaded"]["candidate_status"] + for node in module_map["graph_modules"]["nodes"] + } + assert "candidate" not in statuses From 3b60b737e5896b04724d412e81a09d09fb50e20b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 18 Jun 2026 19:51:14 +0500 Subject: [PATCH 003/113] feat(mcp): expose module_map report section in get_report_section --- codeclone/surfaces/mcp/_session_shared.py | 2 + .../surfaces/mcp/_session_state_mixin.py | 7 +++ codeclone/surfaces/mcp/messages/params.py | 2 +- .../contract_snapshots/mcp_tool_schemas.json | 2 +- tests/test_mcp_service.py | 47 ++++++++++++++++++- 5 files changed, 57 insertions(+), 3 deletions(-) diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py index ba8960e8..5cd25a2a 100644 --- a/codeclone/surfaces/mcp/_session_shared.py +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -168,6 +168,7 @@ "metrics", "metrics_detail", "derived", + "module_map", "changed", "integrity", ] @@ -276,6 +277,7 @@ "metrics", "metrics_detail", "derived", + "module_map", "changed", "integrity", } diff --git a/codeclone/surfaces/mcp/_session_state_mixin.py b/codeclone/surfaces/mcp/_session_state_mixin.py index da11545e..6104cdf9 100644 --- a/codeclone/surfaces/mcp/_session_state_mixin.py +++ b/codeclone/surfaces/mcp/_session_state_mixin.py @@ -968,6 +968,13 @@ def get_report_section( ) if validated_section == "derived": return self._derived_section_payload(record) + if validated_section == "module_map": + derived = _helpers._as_mapping(report_document.get("derived")) + if not derived: + raise MCPServiceContractError( + "Report section 'module_map' is not available in this run." + ) + return dict(_helpers._as_mapping(derived.get("module_map"))) payload = report_document.get(validated_section) if not isinstance(payload, Mapping): raise MCPServiceContractError( diff --git a/codeclone/surfaces/mcp/messages/params.py b/codeclone/surfaces/mcp/messages/params.py index 270b1ed1..2e13579e 100644 --- a/codeclone/surfaces/mcp/messages/params.py +++ b/codeclone/surfaces/mcp/messages/params.py @@ -266,7 +266,7 @@ Field( description=( "meta, inventory, findings, metrics, metrics_detail, changed, " - "derived, integrity, or all." + "derived, module_map, integrity, or all." ) ), ] diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index 56c68e1d..cca5be7c 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -1898,7 +1898,7 @@ }, "section": { "default": "all", - "description": "meta, inventory, findings, metrics, metrics_detail, changed, derived, integrity, or all.", + "description": "meta, inventory, findings, metrics, metrics_detail, changed, derived, module_map, integrity, or all.", "title": "Section", "type": "string" }, diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index e01b17da..5c6f21f1 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -18,7 +18,7 @@ from datetime import timedelta from pathlib import Path from types import SimpleNamespace -from typing import Any, cast +from typing import Any, Literal, cast from unittest.mock import patch import pytest @@ -11557,3 +11557,48 @@ def test_implementation_context_public_surface_full_detail( assert rows[0]["record_kind"] == "symbol" assert rows[0]["module"] == "pkg.mod" assert rows[0]["params"] == [{"name": "value"}] + + +def _module_map_service( + tmp_path: Path, *, analysis_mode: Literal["full", "clones_only"] = "full" +) -> CodeCloneMCPService: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + analysis_mode=analysis_mode, + ) + ) + return service + + +def test_get_report_section_module_map_matches_derived(tmp_path: Path) -> None: + service = _module_map_service(tmp_path) + module_map = service.get_report_section(section="module_map") + derived = cast( + "dict[str, object]", service.get_report_section(section="all")["derived"] + ) + assert module_map == derived["module_map"] + assert set(module_map) >= { + "schema_version", + "scope", + "default_zoom", + "summary", + "graph_packages", + "graph_modules", + "unwind_candidates", + } + assert module_map["schema_version"] == "1" + + +def test_get_report_section_module_map_clones_only_returns_shell( + tmp_path: Path, +) -> None: + module_map = _module_map_service( + tmp_path, analysis_mode="clones_only" + ).get_report_section(section="module_map") + assert cast("dict[str, object]", module_map["summary"])["available"] is False + assert module_map["unwind_candidates"] == [] From cd779be9033ad192c31b7299a7f968bb903924b6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 18 Jun 2026 21:03:19 +0500 Subject: [PATCH 004/113] feat(html): add Module map tab and extract shared dep-graph layout --- codeclone/report/html/assemble.py | 14 + codeclone/report/html/assets/css.py | 4 + .../report/html/sections/_dependencies.py | 285 +----------- codeclone/report/html/sections/_module_map.py | 409 ++++++++++++++++++ .../report/html/widgets/dep_graph_layout.py | 302 +++++++++++++ codeclone/report/html/widgets/icons.py | 7 + codeclone/report/messages/chrome.py | 1 + tests/test_html_report.py | 297 ++++++++++++- tests/test_html_report_helpers.py | 12 +- 9 files changed, 1050 insertions(+), 281 deletions(-) create mode 100644 codeclone/report/html/sections/_module_map.py create mode 100644 codeclone/report/html/widgets/dep_graph_layout.py diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index dacfd710..253f9aec 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -40,6 +40,7 @@ TAB_DEAD_CODE, TAB_DEPENDENCIES, TAB_FINDINGS, + TAB_MODULE_MAP, TAB_OVERVIEW, TAB_QUALITY, TAB_SUGGESTIONS, @@ -56,6 +57,7 @@ from .sections._dead_code import render_dead_code_panel from .sections._dependencies import render_dependencies_panel from .sections._meta import build_topbar_provenance_summary, render_meta_panel +from .sections._module_map import render_module_map_panel from .sections._overview import render_overview_panel from .sections._structural import render_structural_panel from .sections._suggestions import render_suggestions_panel @@ -113,6 +115,7 @@ def build_html_report( overview_html = render_overview_panel(ctx) clones_html, _novelty_enabled, _total_new, _total_known = render_clones_panel(ctx) quality_html = render_quality_panel(ctx) + module_map_html = render_module_map_panel(ctx) dependencies_html = render_dependencies_panel(ctx) dead_code_html = render_dead_code_panel(ctx) suggestions_html = render_suggestions_panel(ctx) @@ -136,6 +139,10 @@ def build_html_report( == CONFIDENCE_HIGH ) dep_cycles = len(_as_sequence(ctx.dependencies_map.get("cycles"))) + module_map_summary = _as_mapping( + _as_mapping(ctx.derived_map.get("module_map")).get("summary") + ) + module_map_unwind = _as_int(module_map_summary.get("unwind_candidate_count")) structural_count = len( tuple(normalize_structural_findings(ctx.structural_findings)) ) @@ -169,6 +176,7 @@ def _tab_badge(count: int) -> str: "overview": "overview", "clones": "clones", "quality": "quality", + "module-map": "module-map", "dependencies": "dependencies", "dead-code": "dead-code", "suggestions": "suggestions", @@ -178,6 +186,12 @@ def _tab_badge(count: int) -> str: ("overview", TAB_OVERVIEW, overview_html, ""), ("clones", TAB_CLONES, clones_html, _tab_badge(ctx.clone_groups_total)), ("quality", TAB_QUALITY, quality_html, _tab_badge(quality_issues)), + ( + "module-map", + TAB_MODULE_MAP, + module_map_html, + _tab_badge(module_map_unwind), + ), ("dependencies", TAB_DEPENDENCIES, dependencies_html, _tab_badge(dep_cycles)), ("dead-code", TAB_DEAD_CODE, dead_code_html, _tab_badge(dead_high_conf)), ( diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index af4776dd..4b5664e8 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -854,6 +854,10 @@ .dep-node{transition:fill-opacity var(--dur-fast) var(--ease)} .dep-edge{transition:stroke-opacity var(--dur-fast) var(--ease)} .dep-label{transition:fill var(--dur-fast) var(--ease)} +.mm-candidate-ring{fill:none;stroke:var(--warning);stroke-width:1.5;stroke-opacity:.9} +.mm-truncation-notice{margin-bottom:var(--sp-4);padding:var(--sp-2) var(--sp-4); + font-size:.8rem;color:var(--text-muted);background:var(--bg-raised); + border:1px solid var(--border);border-radius:var(--radius-lg)} /* Hub bar */ .dep-hub-bar{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap; diff --git a/codeclone/report/html/sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py index ebbeb465..3648e6f7 100644 --- a/codeclone/report/html/sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -8,8 +8,7 @@ from __future__ import annotations -import math -from collections.abc import Mapping, Sequence +from collections.abc import Sequence from typing import TYPE_CHECKING from codeclone.metrics.dependencies import select_dependency_graph_nodes @@ -24,6 +23,17 @@ _tab_empty, ) from ..widgets.components import Tone, insight_block +from ..widgets.dep_graph_layout import ( + _build_cycle_edges, + _build_degree_maps, + _build_layer_groups, + _build_node_radii, + _build_svg_defs, + _hub_threshold, + _layout_dep_graph, + _render_dep_edges, + _render_dep_nodes_and_labels, +) from ..widgets.glossary import glossary_tip from ..widgets.tables import render_rows_table @@ -54,277 +64,6 @@ def _select_dep_nodes( return nodes, filtered -def _build_degree_maps( - nodes: Sequence[str], - edges: Sequence[tuple[str, str]], -) -> tuple[dict[str, int], dict[str, int]]: - in_degree: dict[str, int] = dict.fromkeys(nodes, 0) - out_degree: dict[str, int] = dict.fromkeys(nodes, 0) - for source, target in edges: - in_degree[target] += 1 - out_degree[source] += 1 - return in_degree, out_degree - - -def _build_layer_groups( - nodes: Sequence[str], - edges: Sequence[tuple[str, str]], - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], -) -> dict[int, list[str]]: - children: dict[str, list[str]] = {node: [] for node in nodes} - for source, target in edges: - children[source].append(target) - - layers: dict[str, int] = {} - roots = sorted(node for node in nodes if in_degree[node] == 0) - if not roots: - roots = sorted(nodes, key=lambda node: -out_degree.get(node, 0))[:1] - queue = list(roots) - for node in queue: - layers.setdefault(node, 0) - while queue: - node = queue.pop(0) - for child in children.get(node, []): - if child in layers: - continue - layers[child] = layers[node] + 1 - queue.append(child) - - max_layer = max(layers.values(), default=0) - for node in nodes: - if node not in layers: - layers[node] = max_layer + 1 - - layer_groups: dict[int, list[str]] = {} - for node, layer in layers.items(): - layer_groups.setdefault(layer, []).append(node) - for layer in layer_groups: - layer_groups[layer].sort() - return layer_groups - - -def _layout_dep_graph( - layer_groups: Mapping[int, Sequence[str]], - *, - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], -) -> tuple[int, int, int, dict[str, tuple[float, float]]]: - num_layers = max(layer_groups.keys(), default=0) + 1 - max_per_layer = max((len(members) for members in layer_groups.values()), default=1) - pad_x, pad_y = 56.0, 36.0 - prefer_horizontal = num_layers >= 6 and num_layers > max_per_layer + 2 - - def _ordered_members(members: Sequence[str]) -> list[str]: - if not prefer_horizontal or len(members) < 3: - return list(members) - ranked = sorted( - members, - key=lambda node: ( - -(in_degree.get(node, 0) + out_degree.get(node, 0)), - node, - ), - ) - center = (len(ranked) - 1) / 2 - slot_order = sorted( - range(len(ranked)), - key=lambda index: (abs(index - center), index), - ) - ordered = [""] * len(ranked) - for node, slot in zip(ranked, slot_order, strict=False): - ordered[slot] = node - return ordered - - if prefer_horizontal: - width = max(920, min(1600, num_layers * 118 + max_per_layer * 28 + 180)) - height = max(300, max_per_layer * 84 + 104) - else: - width = max(600, min(1200, max_per_layer * 70 + 140)) - height = max(260, num_layers * 80 + 80) - - positions: dict[str, tuple[float, float]] = {} - for layer_index in range(num_layers): - members = layer_groups.get(layer_index, []) - count = len(members) - if prefer_horizontal: - members = _ordered_members(members) - layer_step = (width - 2 * pad_x) / max(1, num_layers - 1) - x = pad_x + layer_index * layer_step - fan = min(14.0, layer_step * 0.12) - offset_unit = fan / max(1, count - 1) - center = (count - 1) / 2 - for index, node in enumerate(members): - y = pad_y + (index + 0.5) * ((height - 2 * pad_y) / max(1, count)) - positions[node] = (x + (index - center) * offset_unit, y) - continue - - y = pad_y + layer_index * ((height - 2 * pad_y) / max(1, num_layers - 1)) - for index, node in enumerate(members): - x = pad_x + (index + 0.5) * ((width - 2 * pad_x) / max(1, count)) - positions[node] = (x, y) - return width, height, max_per_layer, positions - - -def _hub_threshold( - nodes: Sequence[str], in_degree: Mapping[str, int], out_degree: Mapping[str, int] -) -> int: - degrees = [in_degree.get(node, 0) + out_degree.get(node, 0) for node in nodes] - if not degrees: - return 99 - degrees_sorted = sorted(degrees, reverse=True) - return int(degrees_sorted[max(0, len(degrees_sorted) // 5)]) - - -def _build_node_radii( - nodes: Sequence[str], - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], - cycle_node_set: set[str], - hub_threshold: int, -) -> dict[str, float]: - node_radii: dict[str, float] = {} - for node in nodes: - degree = in_degree.get(node, 0) + out_degree.get(node, 0) - if node in cycle_node_set: - node_radii[node] = min(8.0, max(5.0, 3.5 + degree * 0.4)) - elif degree >= hub_threshold and degree > 2: - node_radii[node] = min(10.0, max(6.0, 4.0 + degree * 0.5)) - elif degree <= 1: - node_radii[node] = 3.0 - else: - node_radii[node] = min(6.0, max(3.5, 3.0 + degree * 0.3)) - return node_radii - - -def _build_svg_defs() -> str: - return ( - "" - '' - '' - '' - '' - '' - '' - "" - ) - - -def _build_cycle_edges(dep_cycles: Sequence[object]) -> set[tuple[str, str]]: - cycle_edges: set[tuple[str, str]] = set() - for cycle in dep_cycles: - parts = [str(part) for part in _as_sequence(cycle)] - for index in range(len(parts)): - cycle_edges.add((parts[index], parts[(index + 1) % len(parts)])) - return cycle_edges - - -def _render_dep_edges( - edges: Sequence[tuple[str, str]], - positions: Mapping[str, tuple[float, float]], - node_radii: Mapping[str, float], - cycle_edges: set[tuple[str, str]], -) -> list[str]: - rendered: list[str] = [] - for source, target in edges: - x1, y1 = positions[source] - x2, y2 = positions[target] - source_radius, target_radius = node_radii[source], node_radii[target] - dx, dy = x2 - x1, y2 - y1 - distance = math.sqrt(dx * dx + dy * dy) or 1.0 - ux, uy = dx / distance, dy / distance - x1a, y1a = x1 + ux * (source_radius + 2), y1 + uy * (source_radius + 2) - x2a, y2a = x2 - ux * (target_radius + 4), y2 - uy * (target_radius + 4) - mx = (x1a + x2a) / 2 - (y2a - y1a) * 0.06 - my = (y1a + y2a) / 2 + (x2a - x1a) * 0.06 - is_cycle = (source, target) in cycle_edges - stroke = "var(--danger)" if is_cycle else "var(--border-strong)" - opacity = "0.6" if is_cycle else "0.3" - marker = "dep-arrow-cycle" if is_cycle else "dep-arrow" - rendered.append( - f'' - ) - return rendered - - -def _render_dep_nodes_and_labels( - nodes: Sequence[str], - *, - positions: Mapping[str, tuple[float, float]], - node_radii: Mapping[str, float], - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], - cycle_node_set: set[str], - hub_threshold: int, - max_per_layer: int, - prefer_horizontal: bool, -) -> tuple[list[str], list[str]]: - nodes_svg: list[str] = [] - labels_svg: list[str] = [] - rotate_labels = prefer_horizontal or max_per_layer > 6 - - for node in nodes: - x, y = positions[node] - radius = node_radii[node] - degree = in_degree.get(node, 0) + out_degree.get(node, 0) - label = _short_label(node) - is_cycle = node in cycle_node_set - is_hub = degree >= hub_threshold and degree > 2 - is_secondary = not is_hub and not is_cycle - - if is_cycle: - fill, fill_opacity, extra = ( - "var(--danger)", - "0.85", - 'stroke="var(--danger)" stroke-width="1.5" stroke-dasharray="3,2"', - ) - elif is_hub: - fill, fill_opacity, extra = ( - "var(--accent-primary)", - "1", - 'filter="url(#glow)"', - ) - elif degree <= 1: - fill, fill_opacity, extra = "var(--text-muted)", "0.4", "" - else: - fill, fill_opacity, extra = "var(--accent-primary)", "0.7", "" - - nodes_svg.append( - f'' - ) - - font_size = "10" if is_hub else ("8" if is_secondary else "9") - if rotate_labels: - label_x = ( - x + radius + (4 if is_secondary else 6 if prefer_horizontal else 0) - ) - label_y = ( - y - radius - (1 if is_secondary else 2 if prefer_horizontal else 6) - ) - labels_svg.append( - f'' - f"{_escape_html(node)}{_escape_html(label)}" - ) - continue - - labels_svg.append( - f'' - f"{_escape_html(node)}{_escape_html(label)}" - ) - - return nodes_svg, labels_svg - - def _render_dep_svg( edges: Sequence[tuple[str, str]], cycle_node_set: set[str], diff --git a/codeclone/report/html/sections/_module_map.py b/codeclone/report/html/sections/_module_map.py new file mode 100644 index 00000000..b4f71c52 --- /dev/null +++ b/codeclone/report/html/sections/_module_map.py @@ -0,0 +1,409 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Module map panel renderer. + +Render-only: draws the precomputed ``derived.module_map`` graph (sampled +packages/modules), unwind-candidate triage, and a top-overloaded slice. No +projection math lives here — the graph, truncation, and unwind rows are +computed once in ``report.document.derived``. +""" + +from __future__ import annotations + +import math +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from codeclone.utils import coerce as _coerce + +from ..primitives.escape import _escape_html +from ..widgets.badges import _micro_badges, _short_label, _stat_card, _tab_empty +from ..widgets.components import Tone, insight_block +from ..widgets.dep_graph_layout import ( + _build_degree_maps, + _build_layer_groups, + _build_node_radii, + _build_svg_defs, + _hub_threshold, + _layout_dep_graph, +) +from ..widgets.glossary import glossary_tip +from ..widgets.tables import render_rows_table + +if TYPE_CHECKING: + from .._context import ReportContext + +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + +_CANDIDATE = "candidate" +_OVERLOADED_TOP_CAP = 10 +_EMPTY_GRAPH_MESSAGE = "Dependency graph is not available." +_METRICS_SKIPPED = "Metrics are skipped for this run." + +# Mandatory honesty copy (spec §11): report-only, sampled SVG, full tables. +_MODULE_MAP_INSIGHT = ( + "Report-only import-graph signals for refactor triage. Not CI gates. The SVG " + "may show a deterministic sample of packages/modules on large repos; unwind " + "and overload tables list module-level facts for the full codebase. Verify in " + "source before editing." +) + +_MM_LEGEND = ( + '
' + '' + ' Hub' + '' + '' + ' ' + "Overload candidate" + '' + ' ' + "In cycle" + '' + ' Leaf
' +) + + +def _mm_edge_stroke_width(weight: int) -> int: + if weight <= 1: + return 1 + return 1 + min(3, math.floor(math.log2(weight))) + + +def _render_mm_edges( + edges: Sequence[tuple[str, str]], + positions: Mapping[str, tuple[float, float]], + node_radii: Mapping[str, float], + weights: Mapping[tuple[str, str], int], +) -> list[str]: + rendered: list[str] = [] + for source, target in edges: + x1, y1 = positions[source] + x2, y2 = positions[target] + ux, uy = _unit_vector(x1, y1, x2, y2) + sx, sy = x1 + ux * (node_radii[source] + 2), y1 + uy * (node_radii[source] + 2) + tx, ty = x2 - ux * (node_radii[target] + 4), y2 - uy * (node_radii[target] + 4) + stroke_width = _mm_edge_stroke_width(weights.get((source, target), 1)) + rendered.append( + f'' + ) + return rendered + + +def _unit_vector(x1: float, y1: float, x2: float, y2: float) -> tuple[float, float]: + dx, dy = x2 - x1, y2 - y1 + distance = math.hypot(dx, dy) or 1.0 + return dx / distance, dy / distance + + +def _mm_node_fill( + *, in_cycle: bool, is_hub: bool, total_degree: int, is_tests: bool +) -> tuple[str, str, str]: + if in_cycle: + return ( + "var(--danger)", + "0.85", + 'stroke="var(--danger)" stroke-width="1.5" stroke-dasharray="3,2"', + ) + if is_hub: + return "var(--accent-primary)", "1", 'filter="url(#glow)"' + if total_degree <= 1: + return "var(--text-muted)", "0.4", "" + extra = ( + 'stroke="var(--border-strong)" stroke-width="1" stroke-dasharray="2,2"' + if is_tests + else "" + ) + return "var(--accent-primary)", "0.7", extra + + +def _mm_node_title(node: Mapping[str, object], overloaded: Mapping[str, object]) -> str: + reasons = ", ".join( + str(reason) for reason in _as_sequence(overloaded.get("candidate_reasons")) + ) + title = ( + f"{node.get('id')} · in {_as_int(node.get('fan_in'))} · " + f"out {_as_int(node.get('fan_out'))} · " + f"score {_as_float(overloaded.get('score')):.2f}" + ) + if reasons: + title = f"{title} · {reasons}" + return title + + +def _render_mm_nodes( + nodes: Sequence[Mapping[str, object]], + *, + positions: Mapping[str, tuple[float, float]], + node_radii: Mapping[str, float], + hub_threshold: int, +) -> tuple[list[str], list[str]]: + nodes_svg: list[str] = [] + labels_svg: list[str] = [] + for node in nodes: + node_id = str(node.get("id")) + x, y = positions[node_id] + radius = node_radii[node_id] + total_degree = _as_int(node.get("total_degree")) + overloaded = _as_mapping(node.get("overloaded")) + is_hub = total_degree >= hub_threshold and total_degree > 2 + is_tests = [str(k) for k in _as_sequence(node.get("source_kinds"))] == ["tests"] + fill, opacity, extra = _mm_node_fill( + in_cycle=bool(node.get("in_cycle")), + is_hub=is_hub, + total_degree=total_degree, + is_tests=is_tests, + ) + nodes_svg.append( + f'' + ) + if str(overloaded.get("candidate_status")) == _CANDIDATE: + nodes_svg.append( + f'' + ) + labels_svg.append( + f'' + f"{_escape_html(_mm_node_title(node, overloaded))}" + f"{_escape_html(_short_label(node_id))}" + ) + return nodes_svg, labels_svg + + +def _render_module_map_svg(graph: Mapping[str, object]) -> str: + nodes = [_as_mapping(node) for node in _as_sequence(graph.get("nodes"))] + if not nodes: + return _tab_empty(_EMPTY_GRAPH_MESSAGE) + node_ids = [str(node.get("id")) for node in nodes] + edge_rows = [_as_mapping(edge) for edge in _as_sequence(graph.get("edges"))] + edges = [(str(e.get("source")), str(e.get("target"))) for e in edge_rows] + weights = { + (str(e.get("source")), str(e.get("target"))): _as_int(e.get("weight")) + for e in edge_rows + } + cycle_node_set = { + str(node.get("id")) for node in nodes if bool(node.get("in_cycle")) + } + total_in = {str(n.get("id")): _as_int(n.get("total_degree")) for n in nodes} + total_out = dict.fromkeys(node_ids, 0) + + layout_in, layout_out = _build_degree_maps(node_ids, edges) + layer_groups = _build_layer_groups(node_ids, edges, layout_in, layout_out) + width, height, _max_per_layer, positions = _layout_dep_graph( + layer_groups, in_degree=layout_in, out_degree=layout_out + ) + hub_threshold = _hub_threshold(node_ids, total_in, total_out) + node_radii = _build_node_radii( + node_ids, total_in, total_out, cycle_node_set, hub_threshold + ) + + defs = _build_svg_defs() + edge_svg = _render_mm_edges(edges, positions, node_radii, weights) + node_svg, label_svg = _render_mm_nodes( + nodes, positions=positions, node_radii=node_radii, hub_threshold=hub_threshold + ) + + pad = 60 + return ( + '
' + f'' + f"{defs}{''.join(edge_svg)}{''.join(node_svg)}{''.join(label_svg)}" + "
" + ) + + +def _mm_stat_cards( + summary: Mapping[str, object], active_graph: Mapping[str, object] +) -> str: + truncation = _as_mapping(active_graph.get("truncation")) + cards = [ + _stat_card( + "Nodes shown", + _as_int(truncation.get("node_shown_count")), + detail=_micro_badges( + ("of", _as_int(truncation.get("node_universe_count"))) + ), + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Edges shown", + _as_int(truncation.get("edge_shown_count")), + detail=_micro_badges( + ("of", _as_int(truncation.get("edge_universe_count"))) + ), + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Unwind candidates", + _as_int(summary.get("unwind_candidate_count")), + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Overload candidates", + _as_int(summary.get("overloaded_candidate_count")), + detail=_micro_badges( + ("modules", _as_int(summary.get("module_count"))), + ("packages", _as_int(summary.get("package_count_depth2"))), + ), + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ), + ] + if str(summary.get("overloaded_population_status")) == "limited": + cards.append( + _stat_card( + "Overload population", + "limited", + detail=_micro_badges(("rings", "off")), + value_tone="muted", + css_class="meta-item", + glossary_tip_fn=glossary_tip, + ) + ) + return "".join(cards) + + +def _mm_truncation_notice(active_graph: Mapping[str, object]) -> str: + truncation = _as_mapping(active_graph.get("truncation")) + if not bool(truncation.get("truncated")): + return "" + return ( + '
' + f"Showing {_as_int(truncation.get('node_shown_count'))} of " + f"{_as_int(truncation.get('node_universe_count'))} nodes and " + f"{_as_int(truncation.get('edge_shown_count'))} of " + f"{_as_int(truncation.get('edge_universe_count'))} edges — a deterministic " + "sample seeded by cycles, then chains, then degree. Tables below are full." + "
" + ) + + +def _mm_zoom_toggle( + default_zoom: str, + graph_packages: Mapping[str, object], + graph_modules: Mapping[str, object], +) -> str: + packages_svg = _render_module_map_svg(graph_packages) + modules_svg = _render_module_map_svg(graph_modules) + package_count = len(_as_sequence(graph_packages.get("nodes"))) + module_count = len(_as_sequence(graph_modules.get("nodes"))) + packages_active = "active" if default_zoom == "packages" else "" + modules_active = "" if default_zoom == "packages" else "active" + return ( + '" + f'
{packages_svg}
' + f'
{modules_svg}
' + ) + + +def _mm_unwind_table(unwind_candidates: Sequence[object], ctx: ReportContext) -> str: + rows = [ + ( + str(_as_mapping(row).get("module")), + str(_as_int(_as_mapping(row).get("fan_in"))), + str(_as_int(_as_mapping(row).get("fan_out"))), + f"{_as_float(_as_mapping(row).get('score')):.2f}", + str(_as_mapping(row).get("candidate_status")), + ", ".join(str(s) for s in _as_sequence(_as_mapping(row).get("signals"))), + ) + for row in unwind_candidates + ] + return render_rows_table( + headers=("Module", "Fan-in", "Fan-out", "Score", "Status", "Signals"), + rows=rows, + empty_message="No unwind candidates detected.", + ctx=ctx, + ) + + +def _mm_overloaded_table(ctx: ReportContext) -> str: + items = [ + _as_mapping(item) + for item in _as_sequence(ctx.overloaded_modules_map.get("items")) + ] + ranked = sorted(items, key=lambda item: -_as_float(item.get("score"))) + rows = [ + ( + str(item.get("module")), + f"{_as_float(item.get('score')):.2f}", + str(_as_int(item.get("fan_in"))), + str(_as_int(item.get("fan_out"))), + str(item.get("candidate_status")), + ) + for item in ranked[:_OVERLOADED_TOP_CAP] + ] + return render_rows_table( + headers=("Module", "Score", "Fan-in", "Fan-out", "Status"), + rows=rows, + empty_message="No overloaded modules detected.", + ctx=ctx, + ) + + +def render_module_map_panel(ctx: ReportContext) -> str: + module_map = _as_mapping(ctx.derived_map.get("module_map")) + summary = _as_mapping(module_map.get("summary")) + + answer = _MODULE_MAP_INSIGHT if ctx.metrics_available else _METRICS_SKIPPED + tone: Tone = "info" + insight = insight_block( + question="Where should refactoring unwind dependencies?", + answer=answer, + tone=tone, + ) + + if not module_map or not bool(summary.get("available")): + return insight + _tab_empty(_EMPTY_GRAPH_MESSAGE) + + default_zoom = str(module_map.get("default_zoom") or "packages") + graph_packages = _as_mapping(module_map.get("graph_packages")) + graph_modules = _as_mapping(module_map.get("graph_modules")) + active_graph = graph_packages if default_zoom == "packages" else graph_modules + + return ( + insight + + _mm_truncation_notice(active_graph) + + f'
{_mm_stat_cards(summary, active_graph)}
' + + _mm_zoom_toggle(default_zoom, graph_packages, graph_modules) + + _MM_LEGEND + + '

Unwind candidates

' + + _mm_unwind_table(_as_sequence(module_map.get("unwind_candidates")), ctx) + + '

Top overloaded modules

' + + _mm_overloaded_table(ctx) + ) diff --git a/codeclone/report/html/widgets/dep_graph_layout.py b/codeclone/report/html/widgets/dep_graph_layout.py new file mode 100644 index 00000000..b5284726 --- /dev/null +++ b/codeclone/report/html/widgets/dep_graph_layout.py @@ -0,0 +1,302 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared dependency-graph SVG layout primitives. + +Layout = topological depth; arrows = import direction (``source`` → ``target``). +Both the Dependencies tab (``sections/_dependencies.py``) and the Module map tab +(``sections/_module_map.py``) draw precomputed nodes/edges through these helpers, +so the SVG geometry stays identical across panels. +""" + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +from codeclone.utils.coerce import as_sequence + +from ..primitives.escape import _escape_html +from .badges import _short_label + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + +def _build_degree_maps( + nodes: Sequence[str], + edges: Sequence[tuple[str, str]], +) -> tuple[dict[str, int], dict[str, int]]: + in_degree: dict[str, int] = dict.fromkeys(nodes, 0) + out_degree: dict[str, int] = dict.fromkeys(nodes, 0) + for source, target in edges: + in_degree[target] += 1 + out_degree[source] += 1 + return in_degree, out_degree + + +def _build_layer_groups( + nodes: Sequence[str], + edges: Sequence[tuple[str, str]], + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], +) -> dict[int, list[str]]: + children: dict[str, list[str]] = {node: [] for node in nodes} + for source, target in edges: + children[source].append(target) + + layers: dict[str, int] = {} + roots = sorted(node for node in nodes if in_degree[node] == 0) + if not roots: + roots = sorted(nodes, key=lambda node: -out_degree.get(node, 0))[:1] + queue = list(roots) + for node in queue: + layers.setdefault(node, 0) + while queue: + node = queue.pop(0) + for child in children.get(node, []): + if child in layers: + continue + layers[child] = layers[node] + 1 + queue.append(child) + + max_layer = max(layers.values(), default=0) + for node in nodes: + if node not in layers: + layers[node] = max_layer + 1 + + layer_groups: dict[int, list[str]] = {} + for node, layer in layers.items(): + layer_groups.setdefault(layer, []).append(node) + for layer in layer_groups: + layer_groups[layer].sort() + return layer_groups + + +def _layout_dep_graph( + layer_groups: Mapping[int, Sequence[str]], + *, + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], +) -> tuple[int, int, int, dict[str, tuple[float, float]]]: + num_layers = max(layer_groups.keys(), default=0) + 1 + max_per_layer = max((len(members) for members in layer_groups.values()), default=1) + pad_x, pad_y = 56.0, 36.0 + prefer_horizontal = num_layers >= 6 and num_layers > max_per_layer + 2 + + def _ordered_members(members: Sequence[str]) -> list[str]: + if not prefer_horizontal or len(members) < 3: + return list(members) + ranked = sorted( + members, + key=lambda node: ( + -(in_degree.get(node, 0) + out_degree.get(node, 0)), + node, + ), + ) + center = (len(ranked) - 1) / 2 + slot_order = sorted( + range(len(ranked)), + key=lambda index: (abs(index - center), index), + ) + ordered = [""] * len(ranked) + for node, slot in zip(ranked, slot_order, strict=False): + ordered[slot] = node + return ordered + + if prefer_horizontal: + width = max(920, min(1600, num_layers * 118 + max_per_layer * 28 + 180)) + height = max(300, max_per_layer * 84 + 104) + else: + width = max(600, min(1200, max_per_layer * 70 + 140)) + height = max(260, num_layers * 80 + 80) + + positions: dict[str, tuple[float, float]] = {} + for layer_index in range(num_layers): + members = layer_groups.get(layer_index, []) + count = len(members) + if prefer_horizontal: + members = _ordered_members(members) + layer_step = (width - 2 * pad_x) / max(1, num_layers - 1) + x = pad_x + layer_index * layer_step + fan = min(14.0, layer_step * 0.12) + offset_unit = fan / max(1, count - 1) + center = (count - 1) / 2 + for index, node in enumerate(members): + y = pad_y + (index + 0.5) * ((height - 2 * pad_y) / max(1, count)) + positions[node] = (x + (index - center) * offset_unit, y) + continue + + y = pad_y + layer_index * ((height - 2 * pad_y) / max(1, num_layers - 1)) + for index, node in enumerate(members): + x = pad_x + (index + 0.5) * ((width - 2 * pad_x) / max(1, count)) + positions[node] = (x, y) + return width, height, max_per_layer, positions + + +def _hub_threshold( + nodes: Sequence[str], in_degree: Mapping[str, int], out_degree: Mapping[str, int] +) -> int: + degrees = [in_degree.get(node, 0) + out_degree.get(node, 0) for node in nodes] + if not degrees: + return 99 + degrees_sorted = sorted(degrees, reverse=True) + return int(degrees_sorted[max(0, len(degrees_sorted) // 5)]) + + +def _build_node_radii( + nodes: Sequence[str], + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], + cycle_node_set: set[str], + hub_threshold: int, +) -> dict[str, float]: + node_radii: dict[str, float] = {} + for node in nodes: + degree = in_degree.get(node, 0) + out_degree.get(node, 0) + if node in cycle_node_set: + node_radii[node] = min(8.0, max(5.0, 3.5 + degree * 0.4)) + elif degree >= hub_threshold and degree > 2: + node_radii[node] = min(10.0, max(6.0, 4.0 + degree * 0.5)) + elif degree <= 1: + node_radii[node] = 3.0 + else: + node_radii[node] = min(6.0, max(3.5, 3.0 + degree * 0.3)) + return node_radii + + +def _build_svg_defs() -> str: + return ( + "" + '' + '' + '' + '' + '' + '' + '' + "" + ) + + +def _build_cycle_edges(dep_cycles: Sequence[object]) -> set[tuple[str, str]]: + cycle_edges: set[tuple[str, str]] = set() + for cycle in dep_cycles: + parts = [str(part) for part in as_sequence(cycle)] + for index in range(len(parts)): + cycle_edges.add((parts[index], parts[(index + 1) % len(parts)])) + return cycle_edges + + +def _render_dep_edges( + edges: Sequence[tuple[str, str]], + positions: Mapping[str, tuple[float, float]], + node_radii: Mapping[str, float], + cycle_edges: set[tuple[str, str]], +) -> list[str]: + rendered: list[str] = [] + for source, target in edges: + x1, y1 = positions[source] + x2, y2 = positions[target] + source_radius, target_radius = node_radii[source], node_radii[target] + dx, dy = x2 - x1, y2 - y1 + distance = math.sqrt(dx * dx + dy * dy) or 1.0 + ux, uy = dx / distance, dy / distance + x1a, y1a = x1 + ux * (source_radius + 2), y1 + uy * (source_radius + 2) + x2a, y2a = x2 - ux * (target_radius + 4), y2 - uy * (target_radius + 4) + mx = (x1a + x2a) / 2 - (y2a - y1a) * 0.06 + my = (y1a + y2a) / 2 + (x2a - x1a) * 0.06 + is_cycle = (source, target) in cycle_edges + stroke = "var(--danger)" if is_cycle else "var(--border-strong)" + opacity = "0.6" if is_cycle else "0.3" + marker = "dep-arrow-cycle" if is_cycle else "dep-arrow" + rendered.append( + f'' + ) + return rendered + + +def _render_dep_nodes_and_labels( + nodes: Sequence[str], + *, + positions: Mapping[str, tuple[float, float]], + node_radii: Mapping[str, float], + in_degree: Mapping[str, int], + out_degree: Mapping[str, int], + cycle_node_set: set[str], + hub_threshold: int, + max_per_layer: int, + prefer_horizontal: bool, +) -> tuple[list[str], list[str]]: + nodes_svg: list[str] = [] + labels_svg: list[str] = [] + rotate_labels = prefer_horizontal or max_per_layer > 6 + + for node in nodes: + x, y = positions[node] + radius = node_radii[node] + degree = in_degree.get(node, 0) + out_degree.get(node, 0) + label = _short_label(node) + is_cycle = node in cycle_node_set + is_hub = degree >= hub_threshold and degree > 2 + is_secondary = not is_hub and not is_cycle + + if is_cycle: + fill, fill_opacity, extra = ( + "var(--danger)", + "0.85", + 'stroke="var(--danger)" stroke-width="1.5" stroke-dasharray="3,2"', + ) + elif is_hub: + fill, fill_opacity, extra = ( + "var(--accent-primary)", + "1", + 'filter="url(#glow)"', + ) + elif degree <= 1: + fill, fill_opacity, extra = "var(--text-muted)", "0.4", "" + else: + fill, fill_opacity, extra = "var(--accent-primary)", "0.7", "" + + nodes_svg.append( + f'' + ) + + font_size = "10" if is_hub else ("8" if is_secondary else "9") + if rotate_labels: + label_x = ( + x + radius + (4 if is_secondary else 6 if prefer_horizontal else 0) + ) + label_y = ( + y - radius - (1 if is_secondary else 2 if prefer_horizontal else 6) + ) + labels_svg.append( + f'' + f"{_escape_html(node)}{_escape_html(label)}" + ) + continue + + labels_svg.append( + f'' + f"{_escape_html(node)}{_escape_html(label)}" + ) + + return nodes_svg, labels_svg diff --git a/codeclone/report/html/widgets/icons.py b/codeclone/report/html/widgets/icons.py index 12dde5c1..cb0b9a68 100644 --- a/codeclone/report/html/widgets/icons.py +++ b/codeclone/report/html/widgets/icons.py @@ -132,6 +132,13 @@ def _svg_with_class(size: int, sw: str, body: str, *, class_name: str = "") -> s '' '', ), + "module-map": ( + "2", + '' + '' + '' + '', + ), "dead-code": ( "2", '' diff --git a/codeclone/report/messages/chrome.py b/codeclone/report/messages/chrome.py index dc7d5c78..2da24adf 100644 --- a/codeclone/report/messages/chrome.py +++ b/codeclone/report/messages/chrome.py @@ -16,6 +16,7 @@ TAB_OVERVIEW: Final = "Overview" TAB_CLONES: Final = "Clones" TAB_QUALITY: Final = "Quality" +TAB_MODULE_MAP: Final = "Module map" TAB_DEPENDENCIES: Final = "Dependencies" TAB_DEAD_CODE: Final = "Dead Code" TAB_SUGGESTIONS: Final = "Suggestions" diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 5d4aa6f5..ed78a356 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -40,6 +40,7 @@ location_file_target, relative_location_path, ) +from codeclone.report.html.sections._module_map import render_module_map_panel from codeclone.report.html.sections._security_surfaces import ( _coverage_join_review_text, _coverage_review_cues, @@ -2705,7 +2706,7 @@ def test_html_report_quality_coverage_join_empty_and_invalid_states() -> None: ) invalid_panel = invalid_html.split('data-clone-panel="coverage-join"', 1)[1] invalid_panel = invalid_panel.split( - '
None: invalid_html = _render_metrics_html(metrics) coverage_join_panel = invalid_html.split('data-clone-panel="coverage-join"', 1)[1] coverage_join_panel = coverage_join_panel.split( - '
None: assert '
n/a
' not in html # Issue breakdown replaces old hotspot sections assert "Issue breakdown" in html + + +# --------------------------------------------------------------------------- +# Module map panel (Phase 32) +# --------------------------------------------------------------------------- + + +def _mm_node( + node_id: str, + fan_in: int, + fan_out: int, + *, + in_cycle: bool = False, + status: str = "non_candidate", + reasons: tuple[str, ...] = (), + kinds: tuple[str, ...] = ("production",), +) -> dict[str, object]: + return { + "id": node_id, + "label": node_id, + "fan_in": fan_in, + "fan_out": fan_out, + "total_degree": fan_in + fan_out, + "source_kinds": list(kinds), + "in_cycle": in_cycle, + "overloaded": { + "score": 0.9, + "candidate_status": status, + "candidate_reasons": list(reasons), + }, + } + + +def _mm_graph( + *, + zoom: str, + package_depth: object, + nodes: list[dict[str, object]], + edges: list[dict[str, object]], + truncated: bool, +) -> dict[str, object]: + return { + "zoom": zoom, + "package_depth": package_depth, + "truncation": { + "truncated": truncated, + "node_universe_count": 40 if truncated else len(nodes), + "node_shown_count": len(nodes), + "edge_universe_count": 30 if truncated else len(edges), + "edge_shown_count": len(edges), + "seed_policy": "cycles_then_chains_then_degree", + }, + "nodes": nodes, + "edges": edges, + } + + +def _module_map_payload( + *, + truncated: bool = False, + default_zoom: str = "packages", + population_status: str = "ok", + with_unwind: bool = True, + packages_nodes: bool = True, +) -> dict[str, object]: + nodes = [ + _mm_node( + "pkg.core", 12, 8, status="candidate", reasons=("dependency_pressure",) + ), + _mm_node("pkg.api", 6, 2, in_cycle=True), + _mm_node("pkg.svc", 2, 2), + _mm_node("pkg.util", 2, 1, kinds=("tests",)), + _mm_node("pkg.leaf", 0, 1), + ] + edges = [ + {"source": "pkg.api", "target": "pkg.core", "weight": 5}, + {"source": "pkg.core", "target": "pkg.util", "weight": 1}, + {"source": "pkg.leaf", "target": "pkg.core", "weight": 2}, + {"source": "pkg.svc", "target": "pkg.core", "weight": 3}, + ] + unwind = ( + [ + { + "module": "pkg.core", + "filepath": "pkg/core.py", + "source_kind": "production", + "fan_in": 12, + "fan_out": 8, + "score": 0.9, + "dependency_score": 0.95, + "candidate_status": "candidate", + "signals": ["dependency_pressure", "chain_bottleneck"], + } + ] + if with_unwind + else [] + ) + return { + "schema_version": "1", + "scope": "report_only", + "default_zoom": default_zoom, + "summary": { + "available": True, + "module_count": 5, + "package_count_depth2": 5, + "edge_count": 4, + "unwind_candidate_count": len(unwind), + "overloaded_candidate_count": 1, + "overloaded_population_status": population_status, + }, + "graph_packages": _mm_graph( + zoom="packages", + package_depth=2, + nodes=nodes if packages_nodes else [], + edges=edges if packages_nodes else [], + truncated=truncated, + ), + "graph_modules": _mm_graph( + zoom="modules", + package_depth=None, + nodes=nodes, + edges=edges, + truncated=False, + ), + "unwind_candidates": unwind, + } + + +def _module_map_unavailable_payload() -> dict[str, object]: + empty = { + "truncated": False, + "node_universe_count": 0, + "node_shown_count": 0, + "edge_universe_count": 0, + "edge_shown_count": 0, + "seed_policy": "cycles_then_chains_then_degree", + } + graph: dict[str, object] = { + "zoom": "packages", + "package_depth": None, + "truncation": empty, + "nodes": [], + "edges": [], + } + return { + "schema_version": "1", + "scope": "report_only", + "default_zoom": "packages", + "summary": { + "available": False, + "reason": "dependencies_skipped", + "module_count": 0, + "package_count_depth2": 0, + "edge_count": 0, + "unwind_candidate_count": 0, + "overloaded_candidate_count": 0, + "overloaded_population_status": "limited", + }, + "graph_packages": graph, + "graph_modules": {**graph, "zoom": "modules"}, + "unwind_candidates": [], + } + + +def _module_map_base_metrics() -> dict[str, object]: + return _metrics_payload( + health_score=80, + health_grade="B", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=2, + dead_total=0, + dead_critical=0, + ) + + +def _module_map_metrics() -> dict[str, object]: + metrics = _module_map_base_metrics() + metrics["overloaded_modules"] = { + "summary": {"candidates": 1, "population_status": "ok"}, + "items": [ + { + "module": "pkg.core", + "score": 0.9, + "fan_in": 12, + "fan_out": 8, + "candidate_status": "candidate", + }, + { + "module": "pkg.api", + "score": 0.4, + "fan_in": 6, + "fan_out": 2, + "candidate_status": "ranked_only", + }, + ], + } + return metrics + + +def _render_module_map_report( + module_map: dict[str, object], + *, + metrics: dict[str, object] | None = None, +) -> str: + return build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=metrics if metrics is not None else _module_map_metrics(), + report_document={"derived": {"module_map": module_map}}, + ) + + +def _module_map_panel_slice(html: str) -> str: + panel = html.split('id="panel-module-map"', 1)[1] + return panel.split('id="panel-dependencies"', 1)[0] + + +def test_html_report_renders_module_map_panel() -> None: + html = _render_module_map_report(_module_map_payload()) + _assert_html_contains( + html, + "Module map", + 'id="panel-module-map"', + "dep-graph-svg", + "mm-candidate-ring", + 'data-subtab-group="module-map-zoom"', + "Report-only import-graph signals for refactor triage.", + "Unwind candidates", + "Top overloaded modules", + "dependency_pressure", + ) + panel = _module_map_panel_slice(html) + assert "pkg.core" in panel + assert 'stroke-width="3"' in panel # weight=5 edge -> 1+floor(log2 5)=3 + assert 'stroke-dasharray="2,2"' in panel # tests-only node dashed stroke + assert 'stroke-dasharray="3,2"' in panel # in-cycle node dashed stroke + # overloaded slice ordered by -score: pkg.core (0.90) row before pkg.api (0.40) + overloaded = panel.split("Top overloaded modules", 1)[1] + assert overloaded.index("pkg.core") < overloaded.index("pkg.api") + + +def test_module_map_truncation_notice_when_sampled() -> None: + html = _render_module_map_report(_module_map_payload(truncated=True)) + panel = _module_map_panel_slice(html) + _assert_html_contains( + panel, "mm-truncation-notice", "Showing", "deterministic sample" + ) + + +def test_module_map_panel_unavailable_when_skipped() -> None: + html = _render_module_map_report(_module_map_unavailable_payload()) + panel = _module_map_panel_slice(html) + assert "Dependency graph is not available." in panel + assert "dep-graph-svg" not in panel + assert 'data-subtab-group="module-map-zoom"' not in panel + + +def test_module_map_panel_metrics_skipped_insight() -> None: + ctx = cast( + Any, + SimpleNamespace( + derived_map={}, + overloaded_modules_map={}, + metrics_available=False, + ), + ) + html = render_module_map_panel(ctx) + assert "Metrics are skipped for this run." in html + assert "Dependency graph is not available." in html + + +def test_module_map_panel_modules_zoom_and_limited_population() -> None: + payload = _module_map_payload( + default_zoom="modules", + population_status="limited", + with_unwind=False, + packages_nodes=False, + ) + html = _render_module_map_report(payload, metrics=_module_map_base_metrics()) + panel = _module_map_panel_slice(html) + assert "Overload population" in panel + assert "No unwind candidates detected." in panel + assert "No overloaded modules detected." in panel + # modules graph (active) renders an SVG; empty packages graph shows the message + assert "dep-graph-svg" in panel + assert "Dependency graph is not available." in panel diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py index 915a8963..f0e486fc 100644 --- a/tests/test_html_report_helpers.py +++ b/tests/test_html_report_helpers.py @@ -21,12 +21,7 @@ _render_group_explanation, ) from codeclone.report.html.sections._dead_code import render_dead_code_panel -from codeclone.report.html.sections._dependencies import ( - _hub_threshold, - _layout_dep_graph, - _render_dep_nodes_and_labels, - _select_dep_nodes, -) +from codeclone.report.html.sections._dependencies import _select_dep_nodes from codeclone.report.html.sections._meta import _path_basename, render_meta_panel from codeclone.report.html.sections._overview import ( _directory_hotspot_bucket_body, @@ -48,6 +43,11 @@ overview_source_breakdown_html, overview_summary_item_html, ) +from codeclone.report.html.widgets.dep_graph_layout import ( + _hub_threshold, + _layout_dep_graph, + _render_dep_nodes_and_labels, +) from codeclone.report.html.widgets.icons import section_icon_html from codeclone.report.html.widgets.snippets import _FileCache from codeclone.report.html.widgets.tabs import render_split_tabs From aa8d7dc2956e40b740d82a349d7972a6b3d4612a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 18 Jun 2026 21:03:44 +0500 Subject: [PATCH 005/113] docs: document Phase 32 Module map --- CHANGELOG.md | 9 +++++ docs/book/05-report.md | 36 +++++++++++++++++++ docs/book/06-html-render.md | 13 ++++++- .../tools/report-and-findings.md | 10 ++++++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5ae6599..f2370c85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -133,6 +133,15 @@ Added schema is now `1.2`; JSON export schema `1.3` adds control-plane contract `1.0`, profile context/summary, and active selection without changing technical-validity semantics. +* **Module map.** A default-on, report-only `derived.module_map` projection + reprojects existing dependency and overloaded-module facts into deterministic + package/module graph views and unwind-candidate triage rows for refactor + scoping. A new `Module map` HTML tab (between Quality and Dependencies) renders + the precomputed graph with a Packages/Modules zoom toggle, candidate/cycle/hub + cues, a truncation notice on sampled graphs, and full-size unwind and + top-overloaded tables. `get_report_section(section="module_map")` returns the + projection directly. No new analysis pass, metrics family, or report schema + bump — `derived` stays excluded from the integrity digest. Changed diff --git a/docs/book/05-report.md b/docs/book/05-report.md index 7c29cf5f..d0fcb3f5 100644 --- a/docs/book/05-report.md +++ b/docs/book/05-report.md @@ -78,6 +78,42 @@ Refs: - `codeclone/report/document/_findings_groups.py:_build_clone_groups` - `codeclone/report/document/_findings_groups.py:_build_structural_groups` +### Module map (`derived.module_map`) + +`derived.module_map` is a report-only projection for refactor triage. It does +not re-scan sources or add a metrics family — it reprojects the existing +`metrics.families.dependencies` (import edges, cycles, longest chains) and +`metrics.families.overloaded_modules` facts into graph views and unwind-candidate +rows. It carries its own `schema_version: "1"` and `scope: "report_only"`; like +the rest of `derived` it is excluded from the integrity digest, so adding it does +not bump `report_schema_version`. + +Shape: + +- `summary` — `available`, `module_count`, `package_count_depth2`, `edge_count`, + `unwind_candidate_count`, `overloaded_candidate_count`, + `overloaded_population_status` (`reason` is added when `available` is false). +- `default_zoom` — `"packages"` or `"modules"`, chosen by a deterministic + decision table over module/package counts (monolith and over-merge guards + included). +- `graph_packages` / `graph_modules` — precomputed `nodes`, `edges`, and a + `truncation` block (`truncated`, universe/shown node and edge counts, + `seed_policy: "cycles_then_chains_then_degree"`). Both views are always emitted + when `available` is true so consumers can swap zoom without recomputation. The + SVG may show a deterministic sample on large repositories; the tables stay + full-size. +- `unwind_candidates[]` — report-only refactor-triage rows over the full + `overloaded_modules` set (capped at 25), each with derived `signals` ids. + +When the dependencies family is skipped or empty the projection emits an +unavailable shell (`summary.available: false`, `reason: "dependencies_skipped"`, +empty graphs and `unwind_candidates`). + +Refs: + +- `codeclone/report/document/derived.py:_build_derived_module_map` +- `codeclone/metrics/dependencies.py:select_dependency_graph_nodes` + ## Contracts - JSON is the source of truth for report semantics. diff --git a/docs/book/06-html-render.md b/docs/book/06-html-render.md index 65b44dda..d3a347dc 100644 --- a/docs/book/06-html-render.md +++ b/docs/book/06-html-render.md @@ -36,10 +36,19 @@ Output: - HTML must not recompute detection semantics; it renders facts from report/core layers. - Provenance panels mirror canonical report/meta facts. -- Overview, Quality, Suggestions, Dead Code, and Clones tabs are projections over canonical report sections. +- Overview, Quality, Module map, Suggestions, Dead Code, Dependencies, and + Clones tabs are projections over canonical report sections. - Quality may include report-only subtabs such as `Coverage Join` and `Security Surfaces`; these remain factual projections over canonical metrics families rather than HTML-only analysis. +- The `Module map` tab (between Quality and Dependencies) is render-only: it + draws the precomputed `derived.module_map` graph views, unwind-candidate + triage, and a top-overloaded slice. It re-samples nothing and adds no health + dimension — the SVG may show a deterministic sample on large repositories while + the unwind and overload tables stay full-size. The `Packages`/`Modules` toggle + swaps the two precomputed graphs without recomputation. When the dependencies + family is skipped it shows the same "Dependency graph is not available." copy + as the Dependencies tab. - IDE deep links are HTML-only UX over canonical path/line facts. - Missing snippets or optional meta fields render safe factual fallbacks rather than invented data. @@ -47,6 +56,8 @@ Refs: - `codeclone/report/html/assemble.py:build_html_report` - `codeclone/report/html/sections/_clones.py:_render_group_explanation` +- `codeclone/report/html/sections/_module_map.py:render_module_map_panel` +- `codeclone/report/html/widgets/dep_graph_layout.py` - `codeclone/report/html/sections/_meta.py:render_meta_panel` - `codeclone/report/html/assets/js.py:_IDE_LINKS` - `codeclone/report/overview.py:materialize_report_overview` diff --git a/docs/book/25-mcp-interface/tools/report-and-findings.md b/docs/book/25-mcp-interface/tools/report-and-findings.md index df35cb34..6ac908f5 100644 --- a/docs/book/25-mcp-interface/tools/report-and-findings.md +++ b/docs/book/25-mcp-interface/tools/report-and-findings.md @@ -8,3 +8,13 @@ | `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation/explainability for one finding | | `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, `limit`, `max_results` | Priority-ranked hotspot views by kind | | `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented markdown or JSON summary | + +`get_report_section` `section` accepts `meta`, `inventory`, `findings`, +`metrics`, `metrics_detail`, `changed`, `derived`, `integrity`, `module_map`, +or `all`. `section="module_map"` returns the exact +`report_document["derived"]["module_map"]` projection (graph views, +`unwind_candidates`, truncation, and `summary.available`) so agents read the +module map directly without `section="all"` or manual metrics-family joins. When +the run skipped the dependencies family the call returns the unavailable shell +(`summary.available: false`) rather than an error; only a run with no `derived` +section at all raises `MCPServiceContractError`. From 0aba2783ff13ba9f55bf768d6b2f007c2c719e6f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 18 Jun 2026 22:11:32 +0500 Subject: [PATCH 006/113] fix(html): replace dead Critical card in overloaded Quality stats --- codeclone/report/html/sections/_coupling.py | 35 +++++++++++++-------- codeclone/report/messages/glossary.py | 4 +++ tests/test_html_report.py | 34 +++++++++++++++++++- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/codeclone/report/html/sections/_coupling.py b/codeclone/report/html/sections/_coupling.py index 23a860d3..99cc1383 100644 --- a/codeclone/report/html/sections/_coupling.py +++ b/codeclone/report/html/sections/_coupling.py @@ -32,6 +32,7 @@ from .._context import ReportContext +_as_float = _coerce.as_float _as_int = _coerce.as_int _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence @@ -218,17 +219,18 @@ def _overloaded_cards( ) -> str: candidates = _as_int(summary.get("candidates")) total_modules = _as_int(summary.get("total")) - critical = sum( + ranked_only = sum( 1 for r in rows_data - if str(_as_mapping(r).get("candidate_status", "")).strip().lower() == "critical" + if str(_as_mapping(r).get("candidate_status", "")).strip().lower() + == "ranked_only" ) - scores = [ - _as_int(_as_mapping(r).get("score")) - for r in rows_data - if _as_int(_as_mapping(r).get("score")) > 0 - ] - max_score = max(scores) if scores else 0 + population_status = str(summary.get("population_status", "")).strip().lower() + max_score = _as_float(summary.get("top_score")) + if max_score <= 0.0: + row_scores = [_as_float(_as_mapping(r).get("score")) for r in rows_data] + max_score = max(row_scores) if row_scores else 0.0 + cutoff = _as_float(summary.get("candidate_score_cutoff")) locs = [ _as_int(_as_mapping(r).get("loc")) for r in rows_data @@ -244,15 +246,22 @@ def _overloaded_cards( glossary_tip_fn=glossary_tip, ), _stat_card( - "Critical", - critical, - value_tone="bad" if critical > 0 else "good", + "Ranked only", + ranked_only, + detail=_micro_badges(("population", population_status)) + if population_status + else "", + value_tone=( + "warn" + if population_status == "limited" + else ("muted" if ranked_only else "good") + ), glossary_tip_fn=glossary_tip, ), _stat_card( "Max score", - max_score, - detail=_micro_badges(("threshold", summary.get("threshold", "n/a"))), + f"{max_score:.2f}", + detail=_micro_badges(("cutoff", f"{cutoff:.2f}")) if cutoff > 0.0 else "", value_tone="warn" if max_score > 0 else "muted", glossary_tip_fn=glossary_tip, ), diff --git a/codeclone/report/messages/glossary.py b/codeclone/report/messages/glossary.py index 3f0461c5..06cfeeb0 100644 --- a/codeclone/report/messages/glossary.py +++ b/codeclone/report/messages/glossary.py @@ -72,6 +72,10 @@ "overloaded": ( "Modules exceeding acceptable thresholds for size, complexity, or coupling" ), + "ranked only": ( + "Modules ranked by overload score but not flagged as candidates " + "(e.g. small repo population)" + ), "critical": "Items with critical status requiring immediate attention", "max score": "Highest overload score among all modules", "avg loc": "Average lines of code per module", diff --git a/tests/test_html_report.py b/tests/test_html_report.py index ed78a356..0fc1e1ca 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1801,7 +1801,33 @@ def test_html_report_renders_overloaded_modules_in_quality_and_overview() -> Non "dependency_pressure", "hub_like_shape", ], - } + }, + { + "module": "pkg.util", + "relative_path": "pkg/util.py", + "source_kind": "production", + "loc": 120, + "functions": 2, + "methods": 0, + "classes": 0, + "callable_count": 2, + "complexity_total": 8, + "complexity_max": 5, + "fan_in": 1, + "fan_out": 2, + "total_deps": 3, + "import_edges": 3, + "reimport_edges": 0, + "reimport_ratio": 0.0, + "instability": 0.6667, + "hub_balance": 0.5, + "size_score": 0.4, + "dependency_score": 0.35, + "shape_score": 0.3, + "score": 0.75, + "candidate_status": "ranked_only", + "candidate_reasons": [], + }, ] html = build_html_report( @@ -1818,11 +1844,17 @@ def test_html_report_renders_overloaded_modules_in_quality_and_overview() -> Non "pkg.hub", "Top candidates", "0.93", + "Ranked only", "overloaded-modules", ) assert "hub-like shape" not in html assert "Candidate cutoff" not in html assert "Ranked modules" not in html + overloaded_panel = html.split('data-clone-panel="overloaded-modules"', 1)[1].split( + "data-clone-panel=", 1 + )[0] + assert "Critical" not in overloaded_panel + assert "0.88" in overloaded_panel def test_html_report_overloaded_modules_fallback_module_path_in_overview() -> None: From 519a55c964edb1566c2115beda05da8af3498b0d Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 19 Jun 2026 16:16:26 +0500 Subject: [PATCH 007/113] refactor(html): move overloaded-modules profile from Quality into Module map Co-Authored-By: Claude Opus 4.8 --- codeclone/report/html/assemble.py | 3 - codeclone/report/html/sections/_coupling.py | 111 +---------- codeclone/report/html/sections/_module_map.py | 181 +++++++++++++----- docs/book/06-html-render.md | 25 ++- tests/test_html_report.py | 36 ++-- 5 files changed, 171 insertions(+), 185 deletions(-) diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index 253f9aec..3cd200f4 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -159,9 +159,6 @@ def build_html_report( _as_int(_as_mapping(ctx.complexity_map.get("summary")).get("high_risk")) + _as_int(_as_mapping(ctx.coupling_map.get("summary")).get("high_risk")) + _as_int(_as_mapping(ctx.cohesion_map.get("summary")).get("low_cohesion")) - + _as_int( - _as_mapping(ctx.overloaded_modules_map.get("summary")).get("candidates") - ) + coverage_review_items + _as_int(_as_mapping(ctx.security_surfaces_map.get("summary")).get("items")) ) diff --git a/codeclone/report/html/sections/_coupling.py b/codeclone/report/html/sections/_coupling.py index 99cc1383..3db20927 100644 --- a/codeclone/report/html/sections/_coupling.py +++ b/codeclone/report/html/sections/_coupling.py @@ -32,7 +32,6 @@ from .._context import ReportContext -_as_float = _coerce.as_float _as_int = _coerce.as_int _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence @@ -213,80 +212,15 @@ def _cohesion_cards(summary: Mapping[str, object]) -> str: return f'
{"".join(cards)}
' -def _overloaded_cards( - summary: Mapping[str, object], - rows_data: Sequence[object], -) -> str: - candidates = _as_int(summary.get("candidates")) - total_modules = _as_int(summary.get("total")) - ranked_only = sum( - 1 - for r in rows_data - if str(_as_mapping(r).get("candidate_status", "")).strip().lower() - == "ranked_only" - ) - population_status = str(summary.get("population_status", "")).strip().lower() - max_score = _as_float(summary.get("top_score")) - if max_score <= 0.0: - row_scores = [_as_float(_as_mapping(r).get("score")) for r in rows_data] - max_score = max(row_scores) if row_scores else 0.0 - cutoff = _as_float(summary.get("candidate_score_cutoff")) - locs = [ - _as_int(_as_mapping(r).get("loc")) - for r in rows_data - if _as_int(_as_mapping(r).get("loc")) > 0 - ] - avg_loc = int(sum(locs) / len(locs)) if locs else 0 - cards = [ - _stat_card( - "Overloaded", - candidates, - detail=_micro_badges(("total analyzed", total_modules)), - value_tone="bad" if candidates > 0 else "good", - glossary_tip_fn=glossary_tip, - ), - _stat_card( - "Ranked only", - ranked_only, - detail=_micro_badges(("population", population_status)) - if population_status - else "", - value_tone=( - "warn" - if population_status == "limited" - else ("muted" if ranked_only else "good") - ), - glossary_tip_fn=glossary_tip, - ), - _stat_card( - "Max score", - f"{max_score:.2f}", - detail=_micro_badges(("cutoff", f"{cutoff:.2f}")) if cutoff > 0.0 else "", - value_tone="warn" if max_score > 0 else "muted", - glossary_tip_fn=glossary_tip, - ), - _stat_card( - "Avg LOC", - avg_loc, - detail=_micro_badges(("modules", len(locs))), - value_tone="muted", - glossary_tip_fn=glossary_tip, - ), - ] - return f'
{"".join(cards)}
' - - def render_quality_panel(ctx: ReportContext) -> str: """Build the unified Quality tab (Complexity + Coupling + Cohesion sub-tabs).""" coupling_summary = _as_mapping(ctx.coupling_map.get("summary")) cohesion_summary = _as_mapping(ctx.cohesion_map.get("summary")) complexity_summary = _as_mapping(ctx.complexity_map.get("summary")) - overloaded_modules_summary = _as_mapping(ctx.overloaded_modules_map.get("summary")) coverage_join_summary = coverage_join_quality_summary(ctx) coupling_high_risk = _as_int(coupling_summary.get("high_risk")) cohesion_low = _as_int(cohesion_summary.get("low_cohesion")) complexity_high_risk = _as_int(complexity_summary.get("high_risk")) - overloaded_module_candidates = _as_int(overloaded_modules_summary.get("candidates")) coverage_review_items = coverage_join_quality_count(ctx) security_surface_items = security_surfaces_quality_count(ctx) coverage_hotspots = _as_int(coverage_join_summary.get("coverage_hotspots")) @@ -305,7 +239,6 @@ def render_quality_panel(ctx: ReportContext) -> str: f"High-complexity: {complexity_high_risk}; " f"high-coupling: {coupling_high_risk}; " f"low-cohesion: {cohesion_low}; " - f"overloaded modules: {overloaded_module_candidates}; " f"security surfaces: {security_surface_items}; " f"max CC {cc_max}; " f"max CBO {coupling_summary.get('max', 'n/a')}; " @@ -319,9 +252,7 @@ def render_quality_panel(ctx: ReportContext) -> str: ) else: answer += " Coverage join unavailable." - if overloaded_module_candidates > 0 or ( - coupling_high_risk > 0 and cohesion_low > 0 - ): + if coupling_high_risk > 0 and cohesion_low > 0: tone = "risk" elif ( coupling_high_risk > 0 @@ -401,50 +332,10 @@ def render_quality_panel(ctx: ReportContext) -> str: ctx=ctx, ) - gm_rows_data = _as_sequence(ctx.overloaded_modules_map.get("items")) - gm_rows = [ - ( - str(_as_mapping(r).get("module", "")), - str( - _as_mapping(r).get("relative_path") - or _as_mapping(r).get("filepath") - or "" - ), - str(_as_mapping(r).get("score", "")), - str(_as_mapping(r).get("candidate_status", "")), - str(_as_mapping(r).get("loc", "")), - f"{_as_mapping(r).get('fan_in', '')}/{_as_mapping(r).get('fan_out', '')}", - str(_as_mapping(r).get("complexity_total", "")), - ) - for r in gm_rows_data[:50] - ] - gm_panel = _overloaded_cards( - overloaded_modules_summary, gm_rows_data - ) + render_rows_table( - headers=( - "Module", - "File", - "Score", - "Status", - "LOC", - "Fan-in/out", - "Complexity total", - ), - rows=gm_rows, - empty_message="Overloaded-module profiling is not available.", - ctx=ctx, - ) - sub_tabs: list[tuple[str, str, int, str]] = [ ("complexity", "Complexity", complexity_high_risk, cx_panel), ("coupling", "Coupling (CBO)", coupling_high_risk, cp_panel), ("cohesion", "Cohesion (LCOM4)", cohesion_low, ch_panel), - ( - "overloaded-modules", - "Overloaded Modules", - overloaded_module_candidates, - gm_panel, - ), ] coverage_join_panel = render_coverage_join_panel(ctx) if coverage_join_panel: diff --git a/codeclone/report/html/sections/_module_map.py b/codeclone/report/html/sections/_module_map.py index b4f71c52..16a567c4 100644 --- a/codeclone/report/html/sections/_module_map.py +++ b/codeclone/report/html/sections/_module_map.py @@ -43,8 +43,10 @@ _as_sequence = _coerce.as_sequence _CANDIDATE = "candidate" -_OVERLOADED_TOP_CAP = 10 +_OVERLOADED_TABLE_CAP = 50 +_OVERLOADED_HEADING = "Overloaded Modules" _EMPTY_GRAPH_MESSAGE = "Dependency graph is not available." +_OVERLOADED_EMPTY_MESSAGE = "Overloaded-module profiling is not available." _METRICS_SKIPPED = "Metrics are skipped for this run." # Mandatory honesty copy (spec §11): report-only, sampled SVG, full tables. @@ -261,12 +263,6 @@ def _mm_stat_cards( _stat_card( "Unwind candidates", _as_int(summary.get("unwind_candidate_count")), - css_class="meta-item", - glossary_tip_fn=glossary_tip, - ), - _stat_card( - "Overload candidates", - _as_int(summary.get("overloaded_candidate_count")), detail=_micro_badges( ("modules", _as_int(summary.get("module_count"))), ("packages", _as_int(summary.get("package_count_depth2"))), @@ -275,17 +271,6 @@ def _mm_stat_cards( glossary_tip_fn=glossary_tip, ), ] - if str(summary.get("overloaded_population_status")) == "limited": - cards.append( - _stat_card( - "Overload population", - "limited", - detail=_micro_badges(("rings", "off")), - value_tone="muted", - css_class="meta-item", - glossary_tip_fn=glossary_tip, - ) - ) return "".join(cards) @@ -352,44 +337,121 @@ def _mm_unwind_table(unwind_candidates: Sequence[object], ctx: ReportContext) -> ) -def _mm_overloaded_table(ctx: ReportContext) -> str: - items = [ - _as_mapping(item) - for item in _as_sequence(ctx.overloaded_modules_map.get("items")) +def _overloaded_cards( + summary: Mapping[str, object], + rows_data: Sequence[object], +) -> str: + candidates = _as_int(summary.get("candidates")) + total_modules = _as_int(summary.get("total")) + ranked_only = sum( + 1 + for r in rows_data + if str(_as_mapping(r).get("candidate_status", "")).strip().lower() + == "ranked_only" + ) + population_status = str(summary.get("population_status", "")).strip().lower() + max_score = _as_float(summary.get("top_score")) + if max_score <= 0.0: + row_scores = [_as_float(_as_mapping(r).get("score")) for r in rows_data] + max_score = max(row_scores) if row_scores else 0.0 + cutoff = _as_float(summary.get("candidate_score_cutoff")) + locs = [ + _as_int(_as_mapping(r).get("loc")) + for r in rows_data + if _as_int(_as_mapping(r).get("loc")) > 0 + ] + avg_loc = int(sum(locs) / len(locs)) if locs else 0 + cards = [ + _stat_card( + "Overloaded", + candidates, + detail=_micro_badges(("total analyzed", total_modules)), + value_tone="bad" if candidates > 0 else "good", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Ranked only", + ranked_only, + detail=_micro_badges(("population", population_status)) + if population_status + else "", + value_tone=( + "warn" + if population_status == "limited" + else ("muted" if ranked_only else "good") + ), + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Max score", + f"{max_score:.2f}", + detail=_micro_badges(("cutoff", f"{cutoff:.2f}")) if cutoff > 0.0 else "", + value_tone="warn" if max_score > 0 else "muted", + glossary_tip_fn=glossary_tip, + ), + _stat_card( + "Avg LOC", + avg_loc, + detail=_micro_badges(("modules", len(locs))), + value_tone="muted", + glossary_tip_fn=glossary_tip, + ), ] - ranked = sorted(items, key=lambda item: -_as_float(item.get("score"))) + return f'
{"".join(cards)}
' + + +def _render_overloaded_modules_section(ctx: ReportContext) -> str: + """Render the full overloaded-modules profile (cards + table). + + Driven by ``metrics.families.overloaded_modules`` directly, so it renders + independently of dependency-graph availability — overloaded responsibility + is module-level and belongs in the Module map regardless of graph sampling. + """ + overloaded = _as_mapping(ctx.overloaded_modules_map) + if not overloaded: + return "" + summary = _as_mapping(overloaded.get("summary")) + rows_data = _as_sequence(overloaded.get("items")) rows = [ ( - str(item.get("module")), - f"{_as_float(item.get('score')):.2f}", - str(_as_int(item.get("fan_in"))), - str(_as_int(item.get("fan_out"))), - str(item.get("candidate_status")), + str(_as_mapping(r).get("module", "")), + str( + _as_mapping(r).get("relative_path") + or _as_mapping(r).get("filepath") + or "" + ), + str(_as_mapping(r).get("score", "")), + str(_as_mapping(r).get("candidate_status", "")), + str(_as_mapping(r).get("loc", "")), + f"{_as_mapping(r).get('fan_in', '')}/{_as_mapping(r).get('fan_out', '')}", + str(_as_mapping(r).get("complexity_total", "")), ) - for item in ranked[:_OVERLOADED_TOP_CAP] + for r in rows_data[:_OVERLOADED_TABLE_CAP] ] - return render_rows_table( - headers=("Module", "Score", "Fan-in", "Fan-out", "Status"), - rows=rows, - empty_message="No overloaded modules detected.", - ctx=ctx, + return ( + f'

{_OVERLOADED_HEADING}

' + + _overloaded_cards(summary, rows_data) + + render_rows_table( + headers=( + "Module", + "File", + "Score", + "Status", + "LOC", + "Fan-in/out", + "Complexity total", + ), + rows=rows, + empty_message=_OVERLOADED_EMPTY_MESSAGE, + ctx=ctx, + ) ) -def render_module_map_panel(ctx: ReportContext) -> str: - module_map = _as_mapping(ctx.derived_map.get("module_map")) +def _render_graph_block(ctx: ReportContext, module_map: Mapping[str, object]) -> str: summary = _as_mapping(module_map.get("summary")) - - answer = _MODULE_MAP_INSIGHT if ctx.metrics_available else _METRICS_SKIPPED - tone: Tone = "info" - insight = insight_block( - question="Where should refactoring unwind dependencies?", - answer=answer, - tone=tone, - ) - if not module_map or not bool(summary.get("available")): - return insight + _tab_empty(_EMPTY_GRAPH_MESSAGE) + return _tab_empty(_EMPTY_GRAPH_MESSAGE) default_zoom = str(module_map.get("default_zoom") or "packages") graph_packages = _as_mapping(module_map.get("graph_packages")) @@ -397,13 +459,32 @@ def render_module_map_panel(ctx: ReportContext) -> str: active_graph = graph_packages if default_zoom == "packages" else graph_modules return ( - insight - + _mm_truncation_notice(active_graph) + _mm_truncation_notice(active_graph) + f'
{_mm_stat_cards(summary, active_graph)}
' + _mm_zoom_toggle(default_zoom, graph_packages, graph_modules) + _MM_LEGEND + '

Unwind candidates

' + _mm_unwind_table(_as_sequence(module_map.get("unwind_candidates")), ctx) - + '

Top overloaded modules

' - + _mm_overloaded_table(ctx) + ) + + +def render_module_map_panel(ctx: ReportContext) -> str: + module_map = _as_mapping(ctx.derived_map.get("module_map")) + + answer = _MODULE_MAP_INSIGHT if ctx.metrics_available else _METRICS_SKIPPED + tone: Tone = "info" + insight = insight_block( + question="Where should refactoring unwind dependencies?", + answer=answer, + tone=tone, + ) + + # The import graph + unwind triage need the derived projection; the + # overloaded-modules profile is a module-level metrics view that renders + # independently (it moved here from the Quality tab — single home for + # module responsibility). + return ( + insight + + _render_graph_block(ctx, module_map) + + _render_overloaded_modules_section(ctx) ) diff --git a/docs/book/06-html-render.md b/docs/book/06-html-render.md index d3a347dc..815ca93f 100644 --- a/docs/book/06-html-render.md +++ b/docs/book/06-html-render.md @@ -38,17 +38,22 @@ Output: - Provenance panels mirror canonical report/meta facts. - Overview, Quality, Module map, Suggestions, Dead Code, Dependencies, and Clones tabs are projections over canonical report sections. -- Quality may include report-only subtabs such as `Coverage Join` and - `Security Surfaces`; these remain factual projections over canonical metrics - families rather than HTML-only analysis. -- The `Module map` tab (between Quality and Dependencies) is render-only: it - draws the precomputed `derived.module_map` graph views, unwind-candidate - triage, and a top-overloaded slice. It re-samples nothing and adds no health +- Quality covers per-function/class metrics (Complexity, Coupling, Cohesion) plus + report-only subtabs such as `Coverage Join` and `Security Surfaces`; these + remain factual projections over canonical metrics families rather than + HTML-only analysis. Quality does not host the overloaded-modules profile — that + module-level view lives in the `Module map` tab. +- The `Module map` tab (between Quality and Dependencies) is render-only and is + the single home for module-level responsibility. It draws the precomputed + `derived.module_map` graph views and unwind-candidate triage, plus the full + `overloaded_modules` profile (stat cards + table) read directly from + `metrics.families.overloaded_modules`. It re-samples nothing and adds no health dimension — the SVG may show a deterministic sample on large repositories while - the unwind and overload tables stay full-size. The `Packages`/`Modules` toggle - swaps the two precomputed graphs without recomputation. When the dependencies - family is skipped it shows the same "Dependency graph is not available." copy - as the Dependencies tab. + the unwind and overloaded tables stay full-size. The `Packages`/`Modules` + toggle swaps the two precomputed graphs without recomputation. The graph block + shows "Dependency graph is not available." when the dependencies family is + skipped; the overloaded-modules section renders whenever that metrics family is + present, independent of graph availability. - IDE deep links are HTML-only UX over canonical path/line facts. - Missing snippets or optional meta fields render safe factual fallbacks rather than invented data. diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 0fc1e1ca..31dbd53a 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1748,7 +1748,7 @@ def test_html_report_metrics_risk_branches() -> None: ) -def test_html_report_renders_overloaded_modules_in_quality_and_overview() -> None: +def test_html_report_renders_overloaded_modules_in_module_map_and_overview() -> None: payload = _metrics_payload( health_score=72, health_grade="B", @@ -1845,16 +1845,22 @@ def test_html_report_renders_overloaded_modules_in_quality_and_overview() -> Non "Top candidates", "0.93", "Ranked only", - "overloaded-modules", ) assert "hub-like shape" not in html assert "Candidate cutoff" not in html assert "Ranked modules" not in html - overloaded_panel = html.split('data-clone-panel="overloaded-modules"', 1)[1].split( - "data-clone-panel=", 1 + # Overloaded profile now lives in the Module map tab, not Quality. + module_map_panel = html.split('id="panel-module-map"', 1)[1].split( + 'id="panel-dependencies"', 1 )[0] - assert "Critical" not in overloaded_panel - assert "0.88" in overloaded_panel + assert "Overloaded Modules" in module_map_panel + assert "0.88" in module_map_panel # candidate score cutoff + assert "Critical" not in module_map_panel + quality_panel = html.split('id="panel-quality"', 1)[1].split( + 'id="panel-module-map"', 1 + )[0] + assert "Overloaded Modules" not in quality_panel + assert "overloaded-modules" not in quality_panel def test_html_report_overloaded_modules_fallback_module_path_in_overview() -> None: @@ -4236,7 +4242,8 @@ def test_html_report_renders_module_map_panel() -> None: 'data-subtab-group="module-map-zoom"', "Report-only import-graph signals for refactor triage.", "Unwind candidates", - "Top overloaded modules", + "Overloaded Modules", + "Complexity total", "dependency_pressure", ) panel = _module_map_panel_slice(html) @@ -4244,8 +4251,8 @@ def test_html_report_renders_module_map_panel() -> None: assert 'stroke-width="3"' in panel # weight=5 edge -> 1+floor(log2 5)=3 assert 'stroke-dasharray="2,2"' in panel # tests-only node dashed stroke assert 'stroke-dasharray="3,2"' in panel # in-cycle node dashed stroke - # overloaded slice ordered by -score: pkg.core (0.90) row before pkg.api (0.40) - overloaded = panel.split("Top overloaded modules", 1)[1] + # overloaded table rows in items order: pkg.core before pkg.api + overloaded = panel.split("Overloaded Modules", 1)[1] assert overloaded.index("pkg.core") < overloaded.index("pkg.api") @@ -4258,11 +4265,15 @@ def test_module_map_truncation_notice_when_sampled() -> None: def test_module_map_panel_unavailable_when_skipped() -> None: - html = _render_module_map_report(_module_map_unavailable_payload()) + # Real skip-dependencies runs drop both the graph and the overloaded family. + metrics = _module_map_base_metrics() + metrics.pop("overloaded_modules", None) + html = _render_module_map_report(_module_map_unavailable_payload(), metrics=metrics) panel = _module_map_panel_slice(html) assert "Dependency graph is not available." in panel assert "dep-graph-svg" not in panel assert 'data-subtab-group="module-map-zoom"' not in panel + assert "Overloaded Modules" not in panel def test_module_map_panel_metrics_skipped_insight() -> None: @@ -4288,9 +4299,10 @@ def test_module_map_panel_modules_zoom_and_limited_population() -> None: ) html = _render_module_map_report(payload, metrics=_module_map_base_metrics()) panel = _module_map_panel_slice(html) - assert "Overload population" in panel + # overloaded family present but empty -> section heading + empty-profile message + assert "Overloaded Modules" in panel + assert "Overloaded-module profiling is not available." in panel assert "No unwind candidates detected." in panel - assert "No overloaded modules detected." in panel # modules graph (active) renders an SVG; empty packages graph shows the message assert "dep-graph-svg" in panel assert "Dependency graph is not available." in panel From 30c9b6557c431cbde8399b7aa332e868c92f57de Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Fri, 19 Jun 2026 22:26:12 +0500 Subject: [PATCH 008/113] feat(html): unify module map and dependency graphs on shared block diagram --- codeclone/report/html/assemble.py | 34 +- codeclone/report/html/assets/css.py | 48 +- codeclone/report/html/assets/js.py | 64 +- .../report/html/sections/_dependencies.py | 90 ++- codeclone/report/html/sections/_module_map.py | 212 ++---- codeclone/report/html/widgets/badges.py | 64 +- .../report/html/widgets/dep_graph_layout.py | 649 +++++++++++++----- codeclone/report/html/widgets/tables.py | 37 +- tests/test_html_report.py | 31 +- tests/test_html_report_helpers.py | 268 +++++--- 10 files changed, 963 insertions(+), 534 deletions(-) diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index 3cd200f4..c7af8a16 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -163,10 +163,11 @@ def build_html_report( + _as_int(_as_mapping(ctx.security_surfaces_map.get("summary")).get("items")) ) - def _tab_badge(count: int) -> str: + def _tab_badge(count: int, unit: str) -> str: if count == 0: return "" - return f'{count}' + title = f"{count} {unit}" + return f'{count}' # -- Main tab navigation -- tab_icon_keys: dict[str, str] = { @@ -181,27 +182,42 @@ def _tab_badge(count: int) -> str: } tab_defs = [ ("overview", TAB_OVERVIEW, overview_html, ""), - ("clones", TAB_CLONES, clones_html, _tab_badge(ctx.clone_groups_total)), - ("quality", TAB_QUALITY, quality_html, _tab_badge(quality_issues)), + ( + "clones", + TAB_CLONES, + clones_html, + _tab_badge(ctx.clone_groups_total, "clone groups"), + ), + ("quality", TAB_QUALITY, quality_html, _tab_badge(quality_issues, "issues")), ( "module-map", TAB_MODULE_MAP, module_map_html, - _tab_badge(module_map_unwind), + _tab_badge(module_map_unwind, "unwind candidates"), + ), + ( + "dependencies", + TAB_DEPENDENCIES, + dependencies_html, + _tab_badge(dep_cycles, "dependency cycles"), + ), + ( + "dead-code", + TAB_DEAD_CODE, + dead_code_html, + _tab_badge(dead_high_conf, "high-confidence dead-code items"), ), - ("dependencies", TAB_DEPENDENCIES, dependencies_html, _tab_badge(dep_cycles)), - ("dead-code", TAB_DEAD_CODE, dead_code_html, _tab_badge(dead_high_conf)), ( "suggestions", TAB_SUGGESTIONS, suggestions_html, - _tab_badge(len(ctx.suggestions)), + _tab_badge(len(ctx.suggestions), "suggestions"), ), ( "structural-findings", TAB_FINDINGS, structural_html, - _tab_badge(structural_count), + _tab_badge(structural_count, "structural findings"), ), ] diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 4b5664e8..c85c8d1f 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -401,7 +401,27 @@ .table .col-risk,.table .col-badge,.table .col-cat{white-space:nowrap} .table .col-steps{max-width:120px;word-break:break-word} .table .col-wide{max-width:320px;word-break:break-all} +.table .col-score{min-width:130px;white-space:nowrap} +.table .col-chips{max-width:300px} .table-empty{padding:var(--sp-8);text-align:center;color:var(--text-muted);font-size:.9rem} + +/* Typed table cells: score bar, status pill, chips (shared badge vocabulary) */ +.score-bar{display:inline-flex;align-items:center;gap:7px;min-width:110px} +.score-bar-track{flex:1;height:5px;border-radius:3px;background:var(--accent-muted);overflow:hidden} +.score-bar-fill{display:block;height:100%;border-radius:3px;background:var(--accent-primary)} +.score-bar--strong .score-bar-fill{background:var(--accent-hover)} +.score-bar-val{font-family:var(--font-numeric);font-variant-numeric:tabular-nums; + font-size:.78rem;color:var(--text-secondary)} +.score-bar--strong .score-bar-val{color:var(--accent-primary);font-weight:600} +.status-pill{display:inline-flex;align-items:center;font-size:.68rem;font-weight:500; + padding:2px 9px;border-radius:999px;white-space:nowrap;letter-spacing:.01em; + font-family:var(--font-sans)} +.status-pill--candidate{background:var(--accent-muted);color:var(--accent-primary)} +.status-pill--ranked{background:var(--bg-overlay);color:var(--text-secondary)} +.status-pill--neutral{background:var(--bg-overlay);color:var(--text-muted)} +.chip{display:inline-flex;align-items:center;font-size:.66rem;font-family:var(--font-mono); + padding:2px 7px;margin:1px 3px 1px 0;border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-secondary);border:1px solid var(--border)} """ # --------------------------------------------------------------------------- @@ -664,6 +684,13 @@ .meta-item .meta-value--bad{color:var(--error)} .meta-item .meta-value--warn{color:var(--warning)} .meta-item .meta-value--muted{color:var(--text-muted)} +.meta-item .meta-value--accent{color:var(--accent-primary)} +.meta-item .meta-value-sec{font-family:var(--font-numeric);font-size:.9rem;font-weight:500; + color:var(--text-muted);margin-left:5px;letter-spacing:0} +.meta-item .meta-subtext{font-family:var(--font-sans);font-size:.7rem;color:var(--text-muted); + margin-top:3px;line-height:1.35} +.meta-item--accent{border-color:var(--accent-primary)} +.meta-item--accent:hover{border-color:var(--accent-primary)} .kpi-detail{display:flex;flex-wrap:wrap;gap:3px;margin-top:2px} .kpi-detail code{font-size:.78rem} .kpi-micro{display:inline-flex;align-items:center;gap:3px;font-size:.62rem; @@ -848,13 +875,20 @@ .stat-cards .meta-item .meta-value,.dep-stats .meta-item .meta-value{display:flex;align-items:center} .stat-cards .kpi-detail,.dep-stats .kpi-detail{margin-top:0;align-self:end} .dep-graph-wrap{overflow:hidden;margin-bottom:var(--sp-4);border:1px solid var(--border); - border-radius:var(--radius-lg);background:var(--bg-surface);padding:var(--sp-4)} -.dep-graph-svg{display:block;width:100%;height:auto;max-height:680px;margin:0 auto} -.dep-graph-svg text{fill:var(--text-secondary);font-family:var(--font-mono)} -.dep-node{transition:fill-opacity var(--dur-fast) var(--ease)} -.dep-edge{transition:stroke-opacity var(--dur-fast) var(--ease)} -.dep-label{transition:fill var(--dur-fast) var(--ease)} -.mm-candidate-ring{fill:none;stroke:var(--warning);stroke-width:1.5;stroke-opacity:.9} + border-radius:var(--radius-lg); + background:linear-gradient(180deg,var(--bg-surface),var(--bg-raised)); + padding:var(--sp-5)} +.dep-graph-svg{display:block;height:auto;margin:0 auto;overflow:visible} +.dep-graph-svg text{font-family:var(--font-mono)} +.dep-edge{transition:stroke-opacity var(--dur-fast) var(--ease),stroke-width var(--dur-fast) var(--ease)} +.block-node{transition:opacity var(--dur-fast) var(--ease),filter var(--dur-fast) var(--ease); + vector-effect:non-scaling-stroke} +.block-node-label{font-size:12px;font-weight:600;pointer-events:none; + letter-spacing:.01em;transition:opacity var(--dur-fast) var(--ease)} +.block-node-ring{pointer-events:none;transition:opacity var(--dur-fast) var(--ease); + vector-effect:non-scaling-stroke} +.dep-graph-svg[data-graph-density="wide"] .block-node-label{font-size:12.5px} +.dep-graph-svg .block-node:hover{filter:brightness(1.08) drop-shadow(0 2px 6px rgb(79 70 229 / .18))} .mm-truncation-notice{margin-bottom:var(--sp-4);padding:var(--sp-2) var(--sp-4); font-size:.8rem;color:var(--text-muted);background:var(--bg-raised); border:1px solid var(--border);border-radius:var(--radius-lg)} diff --git a/codeclone/report/html/assets/js.py b/codeclone/report/html/assets/js.py index e4c948e5..569501a8 100644 --- a/codeclone/report/html/assets/js.py +++ b/codeclone/report/html/assets/js.py @@ -463,33 +463,51 @@ _DEP_GRAPH = """\ (function initDepGraph(){ - const svg=$('.dep-graph-svg'); - if(!svg)return; - const nodes=$$('.dep-node'); - const labels=$$('.dep-label'); - const edges=$$('.dep-edge'); - - function highlight(name){ - nodes.forEach(n=>{n.style.fillOpacity=n.dataset.node===name?'1':'0.15'}); - labels.forEach(l=>{l.style.fill=l.dataset.node===name?'var(--text-primary)':'var(--text-muted)'; - l.style.fillOpacity=l.dataset.node===name?'1':'0.3'}); + $$('.dep-graph-svg').forEach(svg=>{ + const q=s=>[...svg.querySelectorAll(s)]; + const nodes=q('.block-node'); + const labels=q('.block-node-label'); + const rings=q('.block-node-ring'); + const edges=q('.dep-edge'); + if(!nodes.length)return; + + const adj={}; edges.forEach(e=>{ - const connected=e.dataset.source===name||e.dataset.target===name; - e.style.strokeOpacity=connected?'0.8':'0.05'; - e.style.strokeWidth=connected?'2':'1'; + const s=e.dataset.source,t=e.dataset.target; + e.dataset.baseWidth=e.getAttribute('stroke-width')||'1'; + e.dataset.baseMarker=e.getAttribute('marker-end')||''; + (adj[s]=adj[s]||new Set()).add(t); + (adj[t]=adj[t]||new Set()).add(s); }); - } - function reset(){ - nodes.forEach(n=>{n.style.fillOpacity=''}); - labels.forEach(l=>{l.style.fill='';l.style.fillOpacity=''}); - edges.forEach(e=>{e.style.strokeOpacity='';e.style.strokeWidth=''}); - } + function highlight(name){ + const near=adj[name]||new Set(); + const on=n=>n===name||near.has(n); + [...nodes,...labels,...rings].forEach(el=>{ + el.style.opacity=on(el.dataset.node)?'1':'0.16'; + }); + edges.forEach(e=>{ + const connected=e.dataset.source===name||e.dataset.target===name; + e.style.strokeOpacity=connected?'0.9':'0.06'; + e.style.strokeWidth=connected?String(Number(e.dataset.baseWidth||1)+0.7):e.dataset.baseWidth; + e.setAttribute('marker-end',connected?e.dataset.baseMarker:'none'); + }); + } - [...nodes,...labels].forEach(el=>{ - el.addEventListener('mouseenter',()=>highlight(el.dataset.node)); - el.addEventListener('mouseleave',reset); - el.style.cursor='pointer'; + function reset(){ + [...nodes,...labels,...rings].forEach(el=>{el.style.opacity=''}); + edges.forEach(e=>{ + e.style.strokeOpacity=''; + e.style.strokeWidth=e.dataset.baseWidth||''; + e.setAttribute('marker-end',e.dataset.baseMarker||''); + }); + } + + [...nodes,...labels].forEach(el=>{ + el.addEventListener('mouseenter',()=>highlight(el.dataset.node)); + el.addEventListener('mouseleave',reset); + el.style.cursor='pointer'; + }); }); })(); """ diff --git a/codeclone/report/html/sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py index 3648e6f7..48afb04e 100644 --- a/codeclone/report/html/sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -24,15 +24,12 @@ ) from ..widgets.components import Tone, insight_block from ..widgets.dep_graph_layout import ( + BlockNodeStyle, _build_cycle_edges, _build_degree_maps, - _build_layer_groups, - _build_node_radii, - _build_svg_defs, _hub_threshold, - _layout_dep_graph, - _render_dep_edges, - _render_dep_nodes_and_labels, + block_node_style_for, + render_block_diagram, ) from ..widgets.glossary import glossary_tip from ..widgets.tables import render_rows_table @@ -64,6 +61,21 @@ def _select_dep_nodes( return nodes, filtered +def _dep_node_style( + node: str, + *, + degree: int, + hub_threshold: int, + in_cycle: bool, +) -> BlockNodeStyle: + return block_node_style_for( + in_cycle=in_cycle, + is_hub=degree >= hub_threshold and degree > 2, + is_leaf=degree <= 1, + title=node, + ) + + def _render_dep_svg( edges: Sequence[tuple[str, str]], cycle_node_set: set[str], @@ -79,50 +91,23 @@ def _render_dep_svg( longest_chains=longest_chains, ) in_degree, out_degree = _build_degree_maps(nodes, filtered_edges) - layer_groups = _build_layer_groups(nodes, filtered_edges, in_degree, out_degree) - width, height, max_per_layer, positions = _layout_dep_graph( - layer_groups, - in_degree=in_degree, - out_degree=out_degree, - ) - prefer_horizontal = width > height hub_threshold = _hub_threshold(nodes, in_degree, out_degree) - node_radii = _build_node_radii( - nodes, - in_degree, - out_degree, - cycle_node_set, - hub_threshold, - ) cycle_edges = _build_cycle_edges(dep_cycles) - defs = _build_svg_defs() - edge_svg = _render_dep_edges(filtered_edges, positions, node_radii, cycle_edges) - node_svg, label_svg = _render_dep_nodes_and_labels( - nodes, - positions=positions, - node_radii=node_radii, - in_degree=in_degree, - out_degree=out_degree, - cycle_node_set=cycle_node_set, - hub_threshold=hub_threshold, - max_per_layer=max_per_layer, - prefer_horizontal=prefer_horizontal, - ) - label_pad = 44 if prefer_horizontal else (50 if max_per_layer > 6 else 0) - label_pad_x = 52 if prefer_horizontal else (28 if max_per_layer > 6 else 0) - vb_x = -label_pad_x - vb_y = -label_pad - vb_w = width + label_pad_x * 2 - vb_h = height + label_pad + def _style(node: str) -> BlockNodeStyle: + return _dep_node_style( + node, + degree=in_degree.get(node, 0) + out_degree.get(node, 0), + hub_threshold=hub_threshold, + in_cycle=node in cycle_node_set, + ) - return ( - '
' - f'' - f"{defs}{''.join(edge_svg)}{''.join(node_svg)}{''.join(label_svg)}" - "
" + return render_block_diagram( + nodes, + filtered_edges, + style_fn=_style, + aria_label="Module dependency graph", + danger_edges=cycle_edges, ) @@ -232,16 +217,19 @@ def render_dependencies_panel(ctx: ReportContext) -> str: else "" ) - # Legend + # Legend (box swatches matching the block-diagram nodes) legend = ( '
' '' - ' Hub' + ' Hub' '' - ' Leaf' + ' Leaf' '' - ' Cycle
' + ' ' + "Cycle
" ) # Tables diff --git a/codeclone/report/html/sections/_module_map.py b/codeclone/report/html/sections/_module_map.py index 16a567c4..02ebe21b 100644 --- a/codeclone/report/html/sections/_module_map.py +++ b/codeclone/report/html/sections/_module_map.py @@ -14,22 +14,18 @@ from __future__ import annotations -import math from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING from codeclone.utils import coerce as _coerce -from ..primitives.escape import _escape_html -from ..widgets.badges import _micro_badges, _short_label, _stat_card, _tab_empty +from ..widgets.badges import _micro_badges, _stat_card, _tab_empty from ..widgets.components import Tone, insight_block from ..widgets.dep_graph_layout import ( - _build_degree_maps, - _build_layer_groups, - _build_node_radii, - _build_svg_defs, + BlockNodeStyle, _hub_threshold, - _layout_dep_graph, + block_node_style_for, + render_block_diagram, ) from ..widgets.glossary import glossary_tip from ..widgets.tables import render_rows_table @@ -60,81 +56,23 @@ _MM_LEGEND = ( '
' '' - ' Hub' '' - '' - ' ' - "Overload candidate" + '' + ' Overload candidate' '' - ' ' + ' ' "In cycle" '' - ' Leaf
' + ' Leaf
' ) -def _mm_edge_stroke_width(weight: int) -> int: - if weight <= 1: - return 1 - return 1 + min(3, math.floor(math.log2(weight))) - - -def _render_mm_edges( - edges: Sequence[tuple[str, str]], - positions: Mapping[str, tuple[float, float]], - node_radii: Mapping[str, float], - weights: Mapping[tuple[str, str], int], -) -> list[str]: - rendered: list[str] = [] - for source, target in edges: - x1, y1 = positions[source] - x2, y2 = positions[target] - ux, uy = _unit_vector(x1, y1, x2, y2) - sx, sy = x1 + ux * (node_radii[source] + 2), y1 + uy * (node_radii[source] + 2) - tx, ty = x2 - ux * (node_radii[target] + 4), y2 - uy * (node_radii[target] + 4) - stroke_width = _mm_edge_stroke_width(weights.get((source, target), 1)) - rendered.append( - f'' - ) - return rendered - - -def _unit_vector(x1: float, y1: float, x2: float, y2: float) -> tuple[float, float]: - dx, dy = x2 - x1, y2 - y1 - distance = math.hypot(dx, dy) or 1.0 - return dx / distance, dy / distance - - -def _mm_node_fill( - *, in_cycle: bool, is_hub: bool, total_degree: int, is_tests: bool -) -> tuple[str, str, str]: - if in_cycle: - return ( - "var(--danger)", - "0.85", - 'stroke="var(--danger)" stroke-width="1.5" stroke-dasharray="3,2"', - ) - if is_hub: - return "var(--accent-primary)", "1", 'filter="url(#glow)"' - if total_degree <= 1: - return "var(--text-muted)", "0.4", "" - extra = ( - 'stroke="var(--border-strong)" stroke-width="1" stroke-dasharray="2,2"' - if is_tests - else "" - ) - return "var(--accent-primary)", "0.7", extra - - def _mm_node_title(node: Mapping[str, object], overloaded: Mapping[str, object]) -> str: reasons = ", ".join( str(reason) for reason in _as_sequence(overloaded.get("candidate_reasons")) @@ -149,48 +87,19 @@ def _mm_node_title(node: Mapping[str, object], overloaded: Mapping[str, object]) return title -def _render_mm_nodes( - nodes: Sequence[Mapping[str, object]], - *, - positions: Mapping[str, tuple[float, float]], - node_radii: Mapping[str, float], - hub_threshold: int, -) -> tuple[list[str], list[str]]: - nodes_svg: list[str] = [] - labels_svg: list[str] = [] - for node in nodes: - node_id = str(node.get("id")) - x, y = positions[node_id] - radius = node_radii[node_id] - total_degree = _as_int(node.get("total_degree")) - overloaded = _as_mapping(node.get("overloaded")) - is_hub = total_degree >= hub_threshold and total_degree > 2 - is_tests = [str(k) for k in _as_sequence(node.get("source_kinds"))] == ["tests"] - fill, opacity, extra = _mm_node_fill( - in_cycle=bool(node.get("in_cycle")), - is_hub=is_hub, - total_degree=total_degree, - is_tests=is_tests, - ) - nodes_svg.append( - f'' - ) - if str(overloaded.get("candidate_status")) == _CANDIDATE: - nodes_svg.append( - f'' - ) - labels_svg.append( - f'' - f"{_escape_html(_mm_node_title(node, overloaded))}" - f"{_escape_html(_short_label(node_id))}" - ) - return nodes_svg, labels_svg +def _mm_node_style(node: Mapping[str, object], *, hub_threshold: int) -> BlockNodeStyle: + total_degree = _as_int(node.get("total_degree")) + overloaded = _as_mapping(node.get("overloaded")) + is_candidate = str(overloaded.get("candidate_status")) == _CANDIDATE + is_tests = [str(k) for k in _as_sequence(node.get("source_kinds"))] == ["tests"] + return block_node_style_for( + in_cycle=bool(node.get("in_cycle")), + is_hub=total_degree >= hub_threshold and total_degree > 2, + is_leaf=total_degree <= 1, + ring="var(--warning)" if is_candidate else "", + dashed=is_tests, + title=_mm_node_title(node, overloaded), + ) def _render_module_map_svg(graph: Mapping[str, object]) -> str: @@ -198,42 +107,25 @@ def _render_module_map_svg(graph: Mapping[str, object]) -> str: if not nodes: return _tab_empty(_EMPTY_GRAPH_MESSAGE) node_ids = [str(node.get("id")) for node in nodes] + by_id = {str(node.get("id")): node for node in nodes} edge_rows = [_as_mapping(edge) for edge in _as_sequence(graph.get("edges"))] edges = [(str(e.get("source")), str(e.get("target"))) for e in edge_rows] weights = { (str(e.get("source")), str(e.get("target"))): _as_int(e.get("weight")) for e in edge_rows } - cycle_node_set = { - str(node.get("id")) for node in nodes if bool(node.get("in_cycle")) - } - total_in = {str(n.get("id")): _as_int(n.get("total_degree")) for n in nodes} - total_out = dict.fromkeys(node_ids, 0) - - layout_in, layout_out = _build_degree_maps(node_ids, edges) - layer_groups = _build_layer_groups(node_ids, edges, layout_in, layout_out) - width, height, _max_per_layer, positions = _layout_dep_graph( - layer_groups, in_degree=layout_in, out_degree=layout_out - ) - hub_threshold = _hub_threshold(node_ids, total_in, total_out) - node_radii = _build_node_radii( - node_ids, total_in, total_out, cycle_node_set, hub_threshold - ) - - defs = _build_svg_defs() - edge_svg = _render_mm_edges(edges, positions, node_radii, weights) - node_svg, label_svg = _render_mm_nodes( - nodes, positions=positions, node_radii=node_radii, hub_threshold=hub_threshold - ) - - pad = 60 - return ( - '
' - f'' - f"{defs}{''.join(edge_svg)}{''.join(node_svg)}{''.join(label_svg)}" - "
" + total_degree = {nid: _as_int(by_id[nid].get("total_degree")) for nid in node_ids} + hub_threshold = _hub_threshold(node_ids, total_degree, dict.fromkeys(node_ids, 0)) + + def _style(node_id: str) -> BlockNodeStyle: + return _mm_node_style(by_id[node_id], hub_threshold=hub_threshold) + + return render_block_diagram( + node_ids, + edges, + style_fn=_style, + aria_label="Module map graph", + edge_weight_fn=lambda edge: weights.get(edge, 1), ) @@ -241,33 +133,37 @@ def _mm_stat_cards( summary: Mapping[str, object], active_graph: Mapping[str, object] ) -> str: truncation = _as_mapping(active_graph.get("truncation")) + node_total = _as_int(truncation.get("node_universe_count")) + edge_total = _as_int(truncation.get("edge_universe_count")) + graph_subtext = ( + "deterministic sample" if bool(truncation.get("truncated")) else "full graph" + ) cards = [ _stat_card( "Nodes shown", _as_int(truncation.get("node_shown_count")), - detail=_micro_badges( - ("of", _as_int(truncation.get("node_universe_count"))) - ), + secondary=f"/ {node_total}", + subtext=graph_subtext, css_class="meta-item", glossary_tip_fn=glossary_tip, ), _stat_card( "Edges shown", _as_int(truncation.get("edge_shown_count")), - detail=_micro_badges( - ("of", _as_int(truncation.get("edge_universe_count"))) - ), + secondary=f"/ {edge_total}", + subtext=graph_subtext, css_class="meta-item", glossary_tip_fn=glossary_tip, ), _stat_card( "Unwind candidates", _as_int(summary.get("unwind_candidate_count")), - detail=_micro_badges( - ("modules", _as_int(summary.get("module_count"))), - ("packages", _as_int(summary.get("package_count_depth2"))), + subtext=( + f"of {_as_int(summary.get('module_count'))} modules · " + f"{_as_int(summary.get('package_count_depth2'))} packages" ), - css_class="meta-item", + value_tone="accent", + css_class="meta-item meta-item--accent", glossary_tip_fn=glossary_tip, ), ] @@ -333,6 +229,7 @@ def _mm_unwind_table(unwind_candidates: Sequence[object], ctx: ReportContext) -> headers=("Module", "Fan-in", "Fan-out", "Score", "Status", "Signals"), rows=rows, empty_message="No unwind candidates detected.", + column_types={"Score": "score", "Status": "status", "Signals": "chips"}, ctx=ctx, ) @@ -443,6 +340,7 @@ def _render_overloaded_modules_section(ctx: ReportContext) -> str: ), rows=rows, empty_message=_OVERLOADED_EMPTY_MESSAGE, + column_types={"Score": "score", "Status": "status"}, ctx=ctx, ) ) diff --git a/codeclone/report/html/widgets/badges.py b/codeclone/report/html/widgets/badges.py index ba77e0a3..d9600bd4 100644 --- a/codeclone/report/html/widgets/badges.py +++ b/codeclone/report/html/widgets/badges.py @@ -36,13 +36,16 @@ __all__ = [ "CHECK_CIRCLE_SVG", "INFO_CIRCLE_SVG", + "_chips_html", "_inline_empty", "_micro_badges", "_quality_badge_html", "_render_chain_flow", + "_score_bar_html", "_short_label", "_source_kind_badge_html", "_stat_card", + "_status_pill_html", "_tab_empty", "_tab_empty_info", ] @@ -111,6 +114,46 @@ def _source_kind_badge_html(source_kind: str) -> str: ) +_STATUS_PILL_CLASSES: dict[str, str] = { + "candidate": "status-pill--candidate", + "ranked_only": "status-pill--ranked", + "non_candidate": "status-pill--neutral", +} + + +def _status_pill_html(status: str) -> str: + """Render a candidate-status value as a coloured pill.""" + key = status.strip().lower() + if not key: + return "" + cls = _STATUS_PILL_CLASSES.get(key, "status-pill--neutral") + return ( + f'{_escape_html(key.replace("_", " "))}' + ) + + +def _score_bar_html(value: str) -> str: + """Render a 0..1 score as an indigo progress bar plus its rounded value.""" + try: + score = float(value) + except (TypeError, ValueError): + return _escape_html(str(value)) + pct = max(0, min(100, round(score * 100))) + strong = " score-bar--strong" if score >= 0.8 else "" + return ( + f'' + f'' + f'' + f'{score:.2f}' + ) + + +def _chips_html(text: str) -> str: + """Render a comma-separated string as a row of compact chips.""" + parts = [part.strip() for part in str(text).split(",") if part.strip()] + return "".join(f'{_escape_html(part)}' for part in parts) + + _INLINE_EMPTY_ICONS: dict[str, str] = { "good": ( '?' + secondary_html = ( + f'{_escape_html(secondary)}' + if secondary + else "" + ) + subtext_html = ( + f'
{_escape_html(subtext)}
' if subtext else "" + ) detail_html = "" if detail: detail_html = f'
{detail}
' @@ -267,7 +326,8 @@ def _stat_card( return ( f'
' f'
{_escape_html(label)}{tip_html}{delta_html}
' - f'
{_escape_html(str(value))}
' - f"{detail_html}" + f'
' + f"{_escape_html(str(value))}{secondary_html}
" + f"{subtext_html}{detail_html}" "
" ) diff --git a/codeclone/report/html/widgets/dep_graph_layout.py b/codeclone/report/html/widgets/dep_graph_layout.py index b5284726..8e9dac32 100644 --- a/codeclone/report/html/widgets/dep_graph_layout.py +++ b/codeclone/report/html/widgets/dep_graph_layout.py @@ -4,17 +4,22 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -"""Shared dependency-graph SVG layout primitives. - -Layout = topological depth; arrows = import direction (``source`` → ``target``). -Both the Dependencies tab (``sections/_dependencies.py``) and the Module map tab -(``sections/_module_map.py``) draw precomputed nodes/edges through these helpers, -so the SVG geometry stays identical across panels. +"""Shared block-diagram SVG layout for module/dependency graphs. + +Renders a layered flowchart: rectangular nodes with the label inside, stacked +top→bottom by topological depth, joined by lane-aware curved connectors whose +arrows point in import direction (``source`` → ``target``). Both the +Dependencies tab and the Module map tab draw through +:func:`render_block_diagram`, passing a per-node :class:`BlockNodeStyle` +callback, so the geometry stays identical and each tab only owns its own node +accents. """ from __future__ import annotations import math +from dataclasses import dataclass +from hashlib import sha1 from typing import TYPE_CHECKING from codeclone.utils.coerce import as_sequence @@ -23,7 +28,97 @@ from .badges import _short_label if TYPE_CHECKING: - from collections.abc import Mapping, Sequence + from collections.abc import Callable, Mapping, Sequence + from collections.abc import Set as AbstractSet + +_BOX_H = 32 +_BOX_W_MIN = 76 +_BOX_W_MAX = 184 +_BOX_CHAR_W = 8 +_BOX_PAD_X = 30 +_LABEL_PAD_X = 28 +_ROW_GAP = 92 +_COL_GAP = 30 +_BLOCK_PAD = 34 +_LABEL_MAX = 20 +_MAX_ROW_WIDTH = 980 +_WRAPPED_ROW_GAP = 54 +# Fan endpoints spread across this fraction of a box edge so converging arrows +# enter/leave at distinct points instead of clumping at the centre. +_FAN_SPREAD_FRAC = 0.70 +_FAN_SPREAD_STEP = 17.0 +_LANE_STEP = 10.0 +_COMPACT_NODE_LIMIT = 8 +_WIDE_NODE_LIMIT = 18 +_COMPACT_RENDER_MAX = 820 +_COMFORTABLE_RENDER_MAX = 1320 +_WIDE_RENDER_MAX = 1180 + + +@dataclass(frozen=True, slots=True) +class BlockNodeStyle: + """Per-node visual accent for a block-diagram node. + + ``ring`` draws an outer halo (overload candidate); ``dashed`` dashes the box + border (test-only modules); empty strings/False mean "no accent". + """ + + fill: str + text_fill: str + stroke: str = "var(--border)" + ring: str = "" + dashed: bool = False + title: str = "" + + +def block_node_style_for( + *, + in_cycle: bool, + is_hub: bool, + is_leaf: bool, + ring: str = "", + dashed: bool = False, + title: str = "", +) -> BlockNodeStyle: + """Shared node palette for both graph tabs (single visual vocabulary). + + Precedence: cycle (danger, dashed) → hub (indigo fill) → leaf (muted) → + ordinary. ``ring`` (overload candidate) and ``dashed`` (test-only modules) + are independent accents the caller opts into. + """ + if in_cycle: + return BlockNodeStyle( + fill="var(--bg-surface)", + text_fill="var(--danger)", + stroke="var(--danger)", + ring=ring, + dashed=True, + title=title, + ) + if is_hub: + return BlockNodeStyle( + fill="var(--accent-primary)", + text_fill="#fff", + stroke="var(--accent-primary)", + ring=ring, + title=title, + ) + if is_leaf: + return BlockNodeStyle( + fill="var(--bg-surface)", + text_fill="var(--text-muted)", + stroke="var(--border)", + ring=ring, + title=title, + ) + return BlockNodeStyle( + fill="var(--bg-overlay)", + text_fill="var(--text-secondary)", + stroke="var(--border-strong)", + ring=ring, + dashed=dashed, + title=title, + ) def _build_degree_maps( @@ -76,227 +171,407 @@ def _build_layer_groups( return layer_groups -def _layout_dep_graph( +def _hub_threshold( + nodes: Sequence[str], in_degree: Mapping[str, int], out_degree: Mapping[str, int] +) -> int: + degrees = [in_degree.get(node, 0) + out_degree.get(node, 0) for node in nodes] + if not degrees: + return 99 + degrees_sorted = sorted(degrees, reverse=True) + return int(degrees_sorted[max(0, len(degrees_sorted) // 5)]) + + +def _build_cycle_edges(dep_cycles: Sequence[object]) -> set[tuple[str, str]]: + cycle_edges: set[tuple[str, str]] = set() + for cycle in dep_cycles: + parts = [str(part) for part in as_sequence(cycle)] + for index in range(len(parts)): + cycle_edges.add((parts[index], parts[(index + 1) % len(parts)])) + return cycle_edges + + +def _box_width(label: str) -> int: + return min(_BOX_W_MAX, max(_BOX_W_MIN, len(label) * _BOX_CHAR_W + _BOX_PAD_X)) + + +def _label_fit_attrs(label: str, width: int) -> str: + """Clamp long SVG text to the node's inner width across browser fonts.""" + max_text_width = max(18.0, width - _LABEL_PAD_X) + if len(label) * _BOX_CHAR_W <= max_text_width: + return "" + return f' textLength="{max_text_width:.1f}" lengthAdjust="spacingAndGlyphs"' + + +def _edge_stroke_width(weight: int) -> int: + if weight <= 1: + return 1 + return 1 + min(2, math.floor(math.log2(weight))) + + +def _layout_block_diagram( layer_groups: Mapping[int, Sequence[str]], - *, - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], -) -> tuple[int, int, int, dict[str, tuple[float, float]]]: + box_widths: Mapping[str, int], + degree: Mapping[str, int] | None = None, +) -> tuple[int, int, dict[str, tuple[float, float]]]: + """Place each node box centre-aligned per topological layer (top→bottom).""" + degree = degree or {} num_layers = max(layer_groups.keys(), default=0) + 1 - max_per_layer = max((len(members) for members in layer_groups.values()), default=1) - pad_x, pad_y = 56.0, 36.0 - prefer_horizontal = num_layers >= 6 and num_layers > max_per_layer + 2 def _ordered_members(members: Sequence[str]) -> list[str]: - if not prefer_horizontal or len(members) < 3: + if len(members) < 3: return list(members) - ranked = sorted( - members, - key=lambda node: ( - -(in_degree.get(node, 0) + out_degree.get(node, 0)), - node, - ), - ) + ranked = sorted(members, key=lambda node: (-degree.get(node, 0), node)) center = (len(ranked) - 1) / 2 - slot_order = sorted( - range(len(ranked)), - key=lambda index: (abs(index - center), index), - ) + slots = sorted(range(len(ranked)), key=lambda idx: (abs(idx - center), idx)) ordered = [""] * len(ranked) - for node, slot in zip(ranked, slot_order, strict=False): + for node, slot in zip(ranked, slots, strict=False): ordered[slot] = node return ordered - if prefer_horizontal: - width = max(920, min(1600, num_layers * 118 + max_per_layer * 28 + 180)) - height = max(300, max_per_layer * 84 + 104) - else: - width = max(600, min(1200, max_per_layer * 70 + 140)) - height = max(260, num_layers * 80 + 80) + def _row_width(members: Sequence[str]) -> int: + if not members: + return 0 + return sum(box_widths[m] for m in members) + _COL_GAP * (len(members) - 1) + + def _wrapped_rows(members: Sequence[str]) -> list[list[str]]: + rows: list[list[str]] = [] + current: list[str] = [] + current_width = 0 + for member in members: + member_width = box_widths[member] + next_width = ( + member_width if not current else current_width + _COL_GAP + member_width + ) + if current and next_width > _MAX_ROW_WIDTH: + rows.append(current) + current = [member] + current_width = member_width + continue + current.append(member) + current_width = next_width + if current: + rows.append(current) + return rows or [[]] + + visual_rows: list[list[str]] = [] + row_logical_layers: list[int] = [] + for layer in range(num_layers): + rows = _wrapped_rows(_ordered_members(layer_groups.get(layer, []))) + visual_rows.extend(rows) + row_logical_layers.extend([layer] * len(rows)) + row_widths = [_row_width(row) for row in visual_rows] + canvas_width = max(row_widths, default=0) positions: dict[str, tuple[float, float]] = {} - for layer_index in range(num_layers): - members = layer_groups.get(layer_index, []) - count = len(members) - if prefer_horizontal: - members = _ordered_members(members) - layer_step = (width - 2 * pad_x) / max(1, num_layers - 1) - x = pad_x + layer_index * layer_step - fan = min(14.0, layer_step * 0.12) - offset_unit = fan / max(1, count - 1) - center = (count - 1) / 2 - for index, node in enumerate(members): - y = pad_y + (index + 0.5) * ((height - 2 * pad_y) / max(1, count)) - positions[node] = (x + (index - center) * offset_unit, y) - continue - - y = pad_y + layer_index * ((height - 2 * pad_y) / max(1, num_layers - 1)) - for index, node in enumerate(members): - x = pad_x + (index + 0.5) * ((width - 2 * pad_x) / max(1, count)) - positions[node] = (x, y) - return width, height, max_per_layer, positions - - -def _hub_threshold( - nodes: Sequence[str], in_degree: Mapping[str, int], out_degree: Mapping[str, int] -) -> int: - degrees = [in_degree.get(node, 0) + out_degree.get(node, 0) for node in nodes] - if not degrees: - return 99 - degrees_sorted = sorted(degrees, reverse=True) - return int(degrees_sorted[max(0, len(degrees_sorted) // 5)]) + current_y = _BOX_H / 2 + previous_layer: int | None = None + for visual_index, members in enumerate(visual_rows): + layer = row_logical_layers[visual_index] + if visual_index > 0: + current_y += _WRAPPED_ROW_GAP if previous_layer == layer else _ROW_GAP + cursor = (canvas_width - row_widths[visual_index]) / 2 + for member in members: + width = box_widths[member] + positions[member] = (cursor + width / 2, current_y) + cursor += width + _COL_GAP + previous_layer = layer + canvas_height = int( + (max((pos[1] for pos in positions.values()), default=_BOX_H / 2)) + _BOX_H / 2 + ) + return canvas_width, canvas_height, positions -def _build_node_radii( - nodes: Sequence[str], - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], - cycle_node_set: set[str], - hub_threshold: int, -) -> dict[str, float]: - node_radii: dict[str, float] = {} - for node in nodes: - degree = in_degree.get(node, 0) + out_degree.get(node, 0) - if node in cycle_node_set: - node_radii[node] = min(8.0, max(5.0, 3.5 + degree * 0.4)) - elif degree >= hub_threshold and degree > 2: - node_radii[node] = min(10.0, max(6.0, 4.0 + degree * 0.5)) - elif degree <= 1: - node_radii[node] = 3.0 - else: - node_radii[node] = min(6.0, max(3.5, 3.0 + degree * 0.3)) - return node_radii +def _marker_suffix( + nodes: Sequence[str], edges: Sequence[tuple[str, str]], aria_label: str +) -> str: + payload = "\n".join( + [aria_label, *nodes, *[f"{source}->{target}" for source, target in edges]] + ) + return sha1(payload.encode("utf-8")).hexdigest()[:10] -def _build_svg_defs() -> str: +def _block_diagram_defs(marker_suffix: str) -> str: + arrow_id = f"block-arrow-{marker_suffix}" + danger_id = f"block-arrow-danger-{marker_suffix}" return ( "" - '' - '' - '' - '' - '' - '' - '' + f'' + '' + f'' + '' "" ) -def _build_cycle_edges(dep_cycles: Sequence[object]) -> set[tuple[str, str]]: - cycle_edges: set[tuple[str, str]] = set() - for cycle in dep_cycles: - parts = [str(part) for part in as_sequence(cycle)] - for index in range(len(parts)): - cycle_edges.add((parts[index], parts[(index + 1) % len(parts)])) - return cycle_edges +def _marker_url(*, marker_suffix: str, danger: bool) -> str: + marker = "block-arrow-danger" if danger else "block-arrow" + return f"url(#{marker}-{marker_suffix})" + + +def _spread_x(center_x: float, box_width: int, rank: int, count: int) -> float: + """Distribute *count* edge endpoints across a box edge, ordered by *rank*.""" + if count <= 1: + return center_x + span = min(box_width * _FAN_SPREAD_FRAC, _FAN_SPREAD_STEP * (count - 1)) + return center_x - span / 2 + rank * (span / (count - 1)) + + +def _rank_endpoints( + edges: Sequence[tuple[str, str]], + positions: Mapping[str, tuple[float, float]], + *, + by_key: int, + sort_key: int, +) -> tuple[dict[tuple[str, str], int], dict[str, int]]: + groups: dict[str, list[tuple[str, str]]] = {} + for edge in edges: + groups.setdefault(edge[by_key], []).append(edge) + for group in groups.values(): + group.sort(key=lambda edge: positions[edge[sort_key]][0]) + rank = { + edge: index for group in groups.values() for index, edge in enumerate(group) + } + count = {node: len(group) for node, group in groups.items()} + return rank, count + + +def _rank_lanes( + edges: Sequence[tuple[str, str]], + positions: Mapping[str, tuple[float, float]], +) -> tuple[dict[tuple[str, str], int], dict[tuple[int, int], int]]: + groups: dict[tuple[int, int], list[tuple[str, str]]] = {} + for edge in edges: + sy = round(positions[edge[0]][1]) + ty = round(positions[edge[1]][1]) + groups.setdefault((sy, ty), []).append(edge) + for group in groups.values(): + group.sort( + key=lambda edge: (positions[edge[0]][0], positions[edge[1]][0], edge) + ) + rank = { + edge: index for group in groups.values() for index, edge in enumerate(group) + } + count = {key: len(group) for key, group in groups.items()} + return rank, count + +def _lane_offset(rank: int, count: int) -> float: + if count <= 1: + return 0.0 + return (rank - (count - 1) / 2) * _LANE_STEP -def _render_dep_edges( + +def _curved_vertical_path( + exit_x: float, + exit_y: float, + entry_x: float, + entry_y: float, + *, + lane: float, +) -> str: + mid = (exit_y + entry_y) / 2 + lane + return ( + f"M{exit_x:.1f},{exit_y:.1f} " + f"C{exit_x:.1f},{mid:.1f} {entry_x:.1f},{mid:.1f} " + f"{entry_x:.1f},{entry_y:.1f}" + ) + + +def _same_layer_path( + source_x: float, + source_y: float, + target_x: float, + target_y: float, + source_width: int, + target_width: int, + *, + lane: float, +) -> str: + side = 1 if target_x >= source_x else -1 + exit_x = source_x + side * source_width / 2 + entry_x = target_x - side * target_width / 2 + lift = _BOX_H * 1.75 + abs(lane) + bend_y = min(source_y, target_y) - lift + return ( + f"M{exit_x:.1f},{source_y:.1f} " + f"C{exit_x + side * 24:.1f},{bend_y:.1f} " + f"{entry_x - side * 24:.1f},{bend_y:.1f} " + f"{entry_x:.1f},{target_y:.1f}" + ) + + +def _render_block_edges( edges: Sequence[tuple[str, str]], positions: Mapping[str, tuple[float, float]], - node_radii: Mapping[str, float], - cycle_edges: set[tuple[str, str]], + box_widths: Mapping[str, int], + box_heights: Mapping[str, int], + *, + danger_edges: AbstractSet[tuple[str, str]], + weight_fn: Callable[[tuple[str, str]], int] | None, + marker_suffix: str, ) -> list[str]: + out_rank, out_count = _rank_endpoints(edges, positions, by_key=0, sort_key=1) + in_rank, in_count = _rank_endpoints(edges, positions, by_key=1, sort_key=0) + lane_rank, lane_count = _rank_lanes(edges, positions) rendered: list[str] = [] for source, target in edges: - x1, y1 = positions[source] - x2, y2 = positions[target] - source_radius, target_radius = node_radii[source], node_radii[target] - dx, dy = x2 - x1, y2 - y1 - distance = math.sqrt(dx * dx + dy * dy) or 1.0 - ux, uy = dx / distance, dy / distance - x1a, y1a = x1 + ux * (source_radius + 2), y1 + uy * (source_radius + 2) - x2a, y2a = x2 - ux * (target_radius + 4), y2 - uy * (target_radius + 4) - mx = (x1a + x2a) / 2 - (y2a - y1a) * 0.06 - my = (y1a + y2a) / 2 + (x2a - x1a) * 0.06 - is_cycle = (source, target) in cycle_edges - stroke = "var(--danger)" if is_cycle else "var(--border-strong)" - opacity = "0.6" if is_cycle else "0.3" - marker = "dep-arrow-cycle" if is_cycle else "dep-arrow" + sx, sy = positions[source] + tx, ty = positions[target] + lane_key = (round(sy), round(ty)) + lane = _lane_offset(lane_rank[(source, target)], lane_count[lane_key]) + if ty > sy + box_heights[source]: + exit_x = _spread_x( + sx, box_widths[source], out_rank[(source, target)], out_count[source] + ) + entry_x = _spread_x( + tx, box_widths[target], in_rank[(source, target)], in_count[target] + ) + path = _curved_vertical_path( + exit_x, + sy + box_heights[source] / 2, + entry_x, + ty - box_heights[target] / 2, + lane=lane, + ) + elif ty < sy - box_heights[source]: + exit_x = _spread_x( + sx, box_widths[source], out_rank[(source, target)], out_count[source] + ) + entry_x = _spread_x( + tx, box_widths[target], in_rank[(source, target)], in_count[target] + ) + path = _curved_vertical_path( + exit_x, + sy - box_heights[source] / 2, + entry_x, + ty + box_heights[target] / 2, + lane=lane, + ) + else: + path = _same_layer_path( + sx, + sy, + tx, + ty, + box_widths[source], + box_widths[target], + lane=lane, + ) + is_danger = (source, target) in danger_edges + stroke = "var(--danger)" if is_danger else "var(--border-strong)" + opacity = "0.66" if is_danger else "0.34" + weight = weight_fn((source, target)) if weight_fn is not None else 1 + marker_url = _marker_url(marker_suffix=marker_suffix, danger=is_danger) rendered.append( f'' + f'd="{path}" fill="none" stroke="{stroke}" stroke-opacity="{opacity}" ' + 'stroke-linecap="round" stroke-linejoin="round" ' + f'stroke-width="{_edge_stroke_width(weight)}" ' + f'marker-end="{marker_url}">' + "" + f"{_escape_html(source)} → {_escape_html(target)}" ) return rendered -def _render_dep_nodes_and_labels( +def _render_block_nodes( nodes: Sequence[str], - *, positions: Mapping[str, tuple[float, float]], - node_radii: Mapping[str, float], - in_degree: Mapping[str, int], - out_degree: Mapping[str, int], - cycle_node_set: set[str], - hub_threshold: int, - max_per_layer: int, - prefer_horizontal: bool, -) -> tuple[list[str], list[str]]: - nodes_svg: list[str] = [] - labels_svg: list[str] = [] - rotate_labels = prefer_horizontal or max_per_layer > 6 - + box_widths: Mapping[str, int], + style_fn: Callable[[str], BlockNodeStyle], +) -> list[str]: + rendered: list[str] = [] for node in nodes: - x, y = positions[node] - radius = node_radii[node] - degree = in_degree.get(node, 0) + out_degree.get(node, 0) - label = _short_label(node) - is_cycle = node in cycle_node_set - is_hub = degree >= hub_threshold and degree > 2 - is_secondary = not is_hub and not is_cycle - - if is_cycle: - fill, fill_opacity, extra = ( - "var(--danger)", - "0.85", - 'stroke="var(--danger)" stroke-width="1.5" stroke-dasharray="3,2"', - ) - elif is_hub: - fill, fill_opacity, extra = ( - "var(--accent-primary)", - "1", - 'filter="url(#glow)"', + cx, cy = positions[node] + width = box_widths[node] + x = cx - width / 2 + y = cy - _BOX_H / 2 + style = style_fn(node) + label = _short_label(node, _LABEL_MAX) + parts: list[str] = [] + if style.ring: + parts.append( + f'' ) - elif degree <= 1: - fill, fill_opacity, extra = "var(--text-muted)", "0.4", "" - else: - fill, fill_opacity, extra = "var(--accent-primary)", "0.7", "" - - nodes_svg.append( - f'' + dash = ' stroke-dasharray="4,3"' if style.dashed else "" + parts.append( + f'' ) - - font_size = "10" if is_hub else ("8" if is_secondary else "9") - if rotate_labels: - label_x = ( - x + radius + (4 if is_secondary else 6 if prefer_horizontal else 0) - ) - label_y = ( - y - radius - (1 if is_secondary else 2 if prefer_horizontal else 6) - ) - labels_svg.append( - f'' - f"{_escape_html(node)}{_escape_html(label)}" - ) - continue - - labels_svg.append( - f'' - f"{_escape_html(node)}{_escape_html(label)}" + parts.append( + f'" + f"{_escape_html(style.title or node)}" + f"{_escape_html(label)}" ) + rendered.append("".join(parts)) + return rendered + - return nodes_svg, labels_svg +def render_block_diagram( + nodes: Sequence[str], + edges: Sequence[tuple[str, str]], + *, + style_fn: Callable[[str], BlockNodeStyle], + aria_label: str, + danger_edges: AbstractSet[tuple[str, str]] = frozenset(), + edge_weight_fn: Callable[[tuple[str, str]], int] | None = None, +) -> str: + """Render a layered block diagram for *nodes* / *edges* as a single SVG.""" + in_degree, out_degree = _build_degree_maps(nodes, edges) + layer_groups = _build_layer_groups(nodes, edges, in_degree, out_degree) + box_widths = {node: _box_width(_short_label(node, _LABEL_MAX)) for node in nodes} + box_heights = dict.fromkeys(nodes, _BOX_H) + degree = {node: in_degree.get(node, 0) + out_degree.get(node, 0) for node in nodes} + width, height, positions = _layout_block_diagram( + layer_groups, box_widths, degree=degree + ) + marker_suffix = _marker_suffix(nodes, edges, aria_label) + + edge_svg = _render_block_edges( + edges, + positions, + box_widths, + box_heights, + danger_edges=danger_edges, + weight_fn=edge_weight_fn, + marker_suffix=marker_suffix, + ) + node_svg = _render_block_nodes(nodes, positions, box_widths, style_fn) + vb_w = width + _BLOCK_PAD * 2 + vb_h = height + _BLOCK_PAD * 2 + if len(nodes) >= _WIDE_NODE_LIMIT or vb_w >= 980: + density = "wide" + render_width = min(max(round(vb_w * 1.08), 1040), _WIDE_RENDER_MAX) + svg_style = f"width:100%;max-width:{render_width}px" + elif len(nodes) > _COMPACT_NODE_LIMIT: + density = "comfortable" + render_width = min(max(round(vb_w * 1.18), 900), _COMFORTABLE_RENDER_MAX) + svg_style = f"width:100%;max-width:{render_width}px" + else: + density = "compact" + render_width = min(round(vb_w * 1.45), _COMPACT_RENDER_MAX) + svg_style = f"width:100%;max-width:{render_width}px" + return ( + '
' + f'' + f"{_block_diagram_defs(marker_suffix)}{''.join(edge_svg)}{''.join(node_svg)}" + "
" + ) diff --git a/codeclone/report/html/widgets/tables.py b/codeclone/report/html/widgets/tables.py index 59cbf0e6..2e270823 100644 --- a/codeclone/report/html/widgets/tables.py +++ b/codeclone/report/html/widgets/tables.py @@ -8,11 +8,17 @@ from __future__ import annotations -from collections.abc import Collection, Sequence +from collections.abc import Collection, Mapping, Sequence from typing import TYPE_CHECKING from ..primitives.escape import _escape_html -from .badges import _quality_badge_html, _tab_empty +from .badges import ( + _chips_html, + _quality_badge_html, + _score_bar_html, + _status_pill_html, + _tab_empty, +) from .glossary import glossary_tip if TYPE_CHECKING: @@ -67,6 +73,19 @@ _COL_CLS["steps"] = "col-steps" +_CELL_RENDERERS = { + "score": _score_bar_html, + "status": _status_pill_html, + "chips": _chips_html, +} + +_CELL_TYPE_CLS = { + "score": "col-score", + "status": "col-badge", + "chips": "col-chips", +} + + def render_rows_table( *, headers: Sequence[str], @@ -74,14 +93,22 @@ def render_rows_table( empty_message: str, empty_description: str | None = "Nothing to report - keep up the good work.", raw_html_headers: Collection[str] = (), + column_types: Mapping[str, str] | None = None, ctx: ReportContext | None = None, ) -> str: - """Render a data table with badges, tooltips, and col sizing.""" + """Render a data table with badges, tooltips, and col sizing. + + *column_types* maps a header to a typed cell renderer: ``"score"`` (indigo + progress bar + value), ``"status"`` (candidate-status pill), or ``"chips"`` + (comma-separated values as compact chips). Typed columns own their own + badge markup, so the table stays the single rendering authority. + """ if not rows: return _tab_empty(empty_message, description=empty_description) lower_headers = [h.lower() for h in headers] raw_html_set = {h.lower() for h in raw_html_headers} + typed_cols = {h.lower(): t for h, t in (column_types or {}).items()} # colgroup cg = [""] @@ -98,6 +125,10 @@ def render_rows_table( # tbody def _td(col_idx: int, cell: str) -> str: h = lower_headers[col_idx] if col_idx < len(lower_headers) else "" + cell_type = typed_cols.get(h) + if cell_type in _CELL_RENDERERS: + cls = _CELL_TYPE_CLS[cell_type] + return f'{_CELL_RENDERERS[cell_type](cell)}' cls = _COL_CLS.get(h, "") cls_attr = f' class="{cls}"' if cls else "" if h in raw_html_set: diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 31dbd53a..e5041c20 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1744,7 +1744,8 @@ def test_html_report_metrics_risk_branches() -> None: "5 candidates total; 2 high-confidence items; 0 suppressed.", '", + card_class="review-card", + data_attrs=' data-id="x"', + ) + assert_all_contained( + card, + 'class="finding-card finding-card--warning review-card" data-id="x"', + ' Date: Sat, 20 Jun 2026 21:38:37 +0500 Subject: [PATCH 010/113] feat(report): add derived review queue projection for guided finding review --- codeclone/report/document/builder.py | 2 + codeclone/report/document/derived.py | 74 ++++++++++++++++++++-- tests/test_module_map.py | 92 +++++++++++++++++++++++++++- 3 files changed, 162 insertions(+), 6 deletions(-) diff --git a/codeclone/report/document/builder.py b/codeclone/report/document/builder.py index 27e36cbc..8b80ab9c 100644 --- a/codeclone/report/document/builder.py +++ b/codeclone/report/document/builder.py @@ -26,6 +26,7 @@ from .derived import ( _build_derived_module_map, _build_derived_overview, + _build_derived_review_queue, _build_derived_suggestions, ) from .findings import _build_findings_payload @@ -100,6 +101,7 @@ def build_report_document( "overview": overview_payload, "hotlists": hotlists_payload, "module_map": _build_derived_module_map(metrics_payload), + "review_queue": _build_derived_review_queue(suggestions), } integrity_payload = _build_integrity_payload( report_schema_version=report_schema_version, diff --git a/codeclone/report/document/derived.py b/codeclone/report/document/derived.py index 3461ea90..205661d8 100644 --- a/codeclone/report/document/derived.py +++ b/codeclone/report/document/derived.py @@ -398,11 +398,12 @@ def _suggestion_finding_id(suggestion: Suggestion) -> str: ) -def _build_derived_suggestions( +def _sorted_suggestions( suggestions: Sequence[Suggestion] | None, -) -> list[dict[str, object]]: - suggestion_rows = list(suggestions or ()) - suggestion_rows.sort( +) -> list[Suggestion]: + """Deterministic priority order shared by every suggestion-derived view.""" + rows = list(suggestions or ()) + rows.sort( key=lambda suggestion: ( -suggestion.priority, SEVERITY_ORDER.get(suggestion.severity, 9), @@ -410,6 +411,12 @@ def _build_derived_suggestions( _suggestion_finding_id(suggestion), ) ) + return rows + + +def _build_derived_suggestions( + suggestions: Sequence[Suggestion] | None, +) -> list[dict[str, object]]: return [ { "id": f"suggestion:{_suggestion_finding_id(suggestion)}", @@ -423,10 +430,67 @@ def _build_derived_suggestions( "steps": list(suggestion.steps), }, } - for suggestion in suggestion_rows + for suggestion in _sorted_suggestions(suggestions) ] +_REVIEW_QUEUE_SCHEMA_VERSION: Final = "1" +_REVIEW_SEVERITIES: Final = ("critical", "warning", "info") + + +def _review_item_row(suggestion: Suggestion) -> dict[str, object]: + finding_id = _suggestion_finding_id(suggestion) + return { + "id": f"suggestion:{finding_id}", + "finding_id": finding_id, + "family": suggestion.finding_family, + "category": suggestion.category, + "severity": suggestion.severity, + "priority": suggestion.priority, + "source_kind": suggestion.source_kind, + "title": suggestion.title, + "summary": suggestion.fact_summary, + "location": suggestion.location_label or suggestion.location, + "representative_locations": _representative_location_rows(suggestion), + "effort": suggestion.effort, + "steps": list(suggestion.steps), + } + + +def _build_derived_review_queue( + suggestions: Sequence[Suggestion] | None, +) -> dict[str, object]: + """Prioritised cross-family actionable queue that drives the review hub. + + Items are the actionable suggestions (clones, structural, dead-code, design + all normalise into suggestions), ordered by priority; the summary carries the + counts the UI needs for progress and filters. ``reviewed`` starts at 0 — the + HTML tracks per-finding review state client-side. + """ + rows = _sorted_suggestions(suggestions) + by_severity = dict.fromkeys(_REVIEW_SEVERITIES, 0) + by_family: dict[str, int] = {} + for suggestion in rows: + by_severity[suggestion.severity] += 1 + by_family[suggestion.finding_family] = ( + by_family.get(suggestion.finding_family, 0) + 1 + ) + return { + "schema_version": _REVIEW_QUEUE_SCHEMA_VERSION, + "scope": "report_only", + "summary": { + "total": len(rows), + "reviewed": 0, + "by_severity": by_severity, + "by_family": dict(sorted(by_family.items())), + "top_priority": max( + (suggestion.priority for suggestion in rows), default=0.0 + ), + }, + "items": [_review_item_row(suggestion) for suggestion in rows], + } + + _MODULE_MAP_SCHEMA_VERSION: Final = "1" _MODULE_MAP_MAX_PACKAGE_NODES: Final = 28 _MODULE_MAP_MAX_MODULE_NODES: Final = 40 diff --git a/tests/test_module_map.py b/tests/test_module_map.py index 4dc5f7eb..6e383806 100644 --- a/tests/test_module_map.py +++ b/tests/test_module_map.py @@ -7,8 +7,9 @@ from __future__ import annotations from collections.abc import Sequence -from typing import Any +from typing import Any, cast +from codeclone.models import Suggestion from codeclone.report.document.derived import _build_derived_module_map @@ -217,3 +218,92 @@ def test_ranked_only_population_has_no_candidate_overlay() -> None: for node in module_map["graph_modules"]["nodes"] } assert "candidate" not in statuses + + +def _review_suggestion( + *, + severity: str, + category: str, + family: str, + title: str, + priority: float, + effort: str, + subject_key: str, +) -> Suggestion: + return Suggestion( + severity=cast("Any", severity), + category=cast("Any", category), + title=title, + location=f"pkg/{subject_key}.py:1", + steps=("do the thing",), + effort=cast("Any", effort), + priority=priority, + finding_family=cast("Any", family), + subject_key=subject_key, + fact_summary=f"{title} summary", + ) + + +def test_build_derived_review_queue_orders_and_summarizes() -> None: + from codeclone.report.document.derived import _build_derived_review_queue + + suggestions = [ + _review_suggestion( + severity="warning", + category="structural", + family="structural", + title="B structural", + priority=0.5, + effort="moderate", + subject_key="b", + ), + _review_suggestion( + severity="critical", + category="clone", + family="clones", + title="A duplicated", + priority=0.9, + effort="hard", + subject_key="a", + ), + _review_suggestion( + severity="info", + category="dead_code", + family="metrics", + title="C unused", + priority=0.2, + effort="easy", + subject_key="c", + ), + ] + queue: Any = _build_derived_review_queue(suggestions) + assert queue["schema_version"] == "1" + assert queue["scope"] == "report_only" + summary = queue["summary"] + assert summary["total"] == 3 + assert summary["reviewed"] == 0 + assert summary["by_severity"] == {"critical": 1, "warning": 1, "info": 1} + assert summary["by_family"] == {"clones": 1, "metrics": 1, "structural": 1} + assert summary["top_priority"] == 0.9 + # priority-ordered: A(0.9) -> B(0.5) -> C(0.2) + assert [item["title"] for item in queue["items"]] == [ + "A duplicated", + "B structural", + "C unused", + ] + first = queue["items"][0] + assert first["family"] == "clones" + assert first["severity"] == "critical" + assert first["effort"] == "hard" + assert first["location"] == "pkg/a.py:1" + assert str(first["id"]).startswith("suggestion:") + + +def test_build_derived_review_queue_empty_shell() -> None: + from codeclone.report.document.derived import _build_derived_review_queue + + queue: Any = _build_derived_review_queue(None) + assert queue["items"] == [] + assert queue["summary"]["total"] == 0 + assert queue["summary"]["top_priority"] == 0.0 + assert queue["summary"]["by_severity"] == {"critical": 0, "warning": 0, "info": 0} From 7ddfb5b7f6502f66137066ef6d262470f52bb3cd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 20 Jun 2026 21:52:26 +0500 Subject: [PATCH 011/113] feat(html): add Review hub tab with prioritized finding queue --- codeclone/report/html/assemble.py | 15 +++ codeclone/report/html/assets/css.py | 37 ++++++ codeclone/report/html/assets/js.py | 60 +++++++++ codeclone/report/html/sections/_review.py | 149 ++++++++++++++++++++++ codeclone/report/html/widgets/icons.py | 5 + codeclone/report/messages/chrome.py | 1 + tests/test_html_report.py | 114 +++++++++++++++++ 7 files changed, 381 insertions(+) create mode 100644 codeclone/report/html/sections/_review.py diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index c7af8a16..b0573d25 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -43,6 +43,7 @@ TAB_MODULE_MAP, TAB_OVERVIEW, TAB_QUALITY, + TAB_REVIEW, TAB_SUGGESTIONS, TABLIST_ARIA_LABEL, THEME_BUTTON_TEXT, @@ -59,6 +60,7 @@ from .sections._meta import build_topbar_provenance_summary, render_meta_panel from .sections._module_map import render_module_map_panel from .sections._overview import render_overview_panel +from .sections._review import render_review_panel from .sections._structural import render_structural_panel from .sections._suggestions import render_suggestions_panel from .template import FONT_CSS_URL, REPORT_TEMPLATE @@ -113,6 +115,7 @@ def build_html_report( # -- Render sections -- overview_html = render_overview_panel(ctx) + review_html = render_review_panel(ctx) clones_html, _novelty_enabled, _total_new, _total_known = render_clones_panel(ctx) quality_html = render_quality_panel(ctx) module_map_html = render_module_map_panel(ctx) @@ -143,6 +146,11 @@ def build_html_report( _as_mapping(ctx.derived_map.get("module_map")).get("summary") ) module_map_unwind = _as_int(module_map_summary.get("unwind_candidate_count")) + review_total = _as_int( + _as_mapping( + _as_mapping(ctx.derived_map.get("review_queue")).get("summary") + ).get("total") + ) structural_count = len( tuple(normalize_structural_findings(ctx.structural_findings)) ) @@ -172,6 +180,7 @@ def _tab_badge(count: int, unit: str) -> str: # -- Main tab navigation -- tab_icon_keys: dict[str, str] = { "overview": "overview", + "review": "review", "clones": "clones", "quality": "quality", "module-map": "module-map", @@ -182,6 +191,12 @@ def _tab_badge(count: int, unit: str) -> str: } tab_defs = [ ("overview", TAB_OVERVIEW, overview_html, ""), + ( + "review", + TAB_REVIEW, + review_html, + _tab_badge(review_total, "findings to review"), + ), ( "clones", TAB_CLONES, diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 983632a5..b37e97df 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -992,6 +992,43 @@ /* Body — context + summary */ .finding-card-body{margin-top:9px;display:flex;flex-direction:column;gap:var(--sp-1)} + +/* Review hub: progress · filters · queue · per-item reviewed toggle */ +.review-progress{background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg);padding:var(--sp-3) var(--sp-4);margin-bottom:var(--sp-4)} +.review-progress-head{display:flex;justify-content:space-between;align-items:baseline; + font-size:.78rem;color:var(--text-secondary);margin-bottom:7px} +.review-progress-title{font-weight:500} +.review-progress-label b{color:var(--text-primary);font-variant-numeric:tabular-nums} +.review-progress-track{height:7px;border-radius:4px;background:var(--bg-overlay);overflow:hidden} +.review-progress-bar{height:100%;border-radius:4px;background:var(--accent-primary); + transition:width var(--dur-base) var(--ease)} +.review-filters{display:flex;flex-wrap:wrap;align-items:center;gap:6px;margin-bottom:var(--sp-4)} +.review-filter-sep{width:1px;align-self:stretch;background:var(--border);margin:2px var(--sp-1)} +.review-chip{display:inline-flex;align-items:center;gap:5px;font-size:.72rem; + font-family:var(--font-sans);padding:4px 10px;border-radius:999px;cursor:pointer; + background:var(--bg-overlay);color:var(--text-secondary);border:1px solid var(--border); + transition:border-color var(--dur-fast) var(--ease),color var(--dur-fast) var(--ease)} +.review-chip:hover{border-color:var(--border-strong)} +.review-chip.is-active{border-color:var(--accent-primary);color:var(--accent-primary); + background:var(--accent-muted)} +.review-chip--critical.is-active{border-color:var(--danger);color:var(--danger); + background:color-mix(in oklch,var(--danger) 16%,transparent)} +.review-chip--warning.is-active{border-color:var(--warning);color:var(--warning); + background:color-mix(in oklch,var(--warning) 16%,transparent)} +.review-chip-count{font-family:var(--font-numeric);font-variant-numeric:tabular-nums; + font-size:.66rem;opacity:.85} +.review-queue{display:flex;flex-direction:column;gap:9px} +.review-toggle{display:inline-flex;align-items:center;justify-content:center; + width:30px;height:30px;border-radius:8px;cursor:pointer;color:var(--text-muted); + background:transparent;border:1px solid var(--border); + transition:border-color var(--dur-fast) var(--ease),color var(--dur-fast) var(--ease)} +.review-toggle:hover{border-color:var(--accent-primary);color:var(--accent-primary)} +.review-card.is-reviewed{opacity:.55} +.review-card.is-reviewed .finding-card-title-text{text-decoration:line-through; + text-decoration-color:var(--text-muted)} +.review-card.is-reviewed .review-toggle{background:var(--accent-primary); + border-color:var(--accent-primary);color:#fff} .suggestion-context{display:flex;gap:var(--sp-1);flex-wrap:wrap} .suggestion-chip{font-size:.68rem;font-weight:500;padding:2px var(--sp-2);border-radius:var(--radius-sm); background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} diff --git a/codeclone/report/html/assets/js.py b/codeclone/report/html/assets/js.py index 569501a8..9d969197 100644 --- a/codeclone/report/html/assets/js.py +++ b/codeclone/report/html/assets/js.py @@ -868,6 +868,65 @@ })(); """ +# --------------------------------------------------------------------------- +# Review hub: per-finding reviewed state (localStorage) + progress + filters +# --------------------------------------------------------------------------- + +_REVIEW = """\ +(function initReview(){ + const panel=$('[data-review-panel]'); + if(!panel)return; + const KEY='codeclone-reviewed'; + function load(){try{return new Set(JSON.parse(localStorage.getItem(KEY)||'[]'))}catch(e){return new Set()}} + function save(s){try{localStorage.setItem(KEY,JSON.stringify([...s]))}catch(e){}} + const reviewed=load(); + const cards=$$('[data-review-card]'); + const total=cards.length; + const bar=$('[data-review-progress-bar]'); + const label=$('[data-review-progress-label]'); + function refresh(){ + let done=0; + cards.forEach(c=>{ + const on=reviewed.has(c.dataset.findingId); + c.classList.toggle('is-reviewed',on); + const btn=c.querySelector('[data-review-toggle]'); + if(btn)btn.setAttribute('aria-pressed',on?'true':'false'); + if(on)done++; + }); + if(bar)bar.style.width=(total?Math.round(done/total*100):0)+'%'; + if(label)label.textContent=done+' / '+total; + } + panel.addEventListener('click',function(e){ + const btn=e.target.closest('[data-review-toggle]'); + if(!btn)return; + const card=btn.closest('[data-review-card]'); + if(!card)return; + const id=card.dataset.findingId; + if(reviewed.has(id))reviewed.delete(id);else reviewed.add(id); + save(reviewed);refresh(); + }); + const active={severity:'',family:''}; + function applyFilters(){ + cards.forEach(c=>{ + const okS=!active.severity||c.dataset.severity===active.severity; + const okF=!active.family||c.dataset.family===active.family; + c.style.display=(okS&&okF)?'':'none'; + }); + } + $$('[data-review-filter]').forEach(function(btn){ + btn.addEventListener('click',function(){ + const dim=btn.dataset.reviewFilter,val=btn.dataset.reviewValue; + active[dim]=active[dim]===val?'':val; + $$('[data-review-filter="'+dim+'"]').forEach(function(b){ + b.classList.toggle('is-active',b.dataset.reviewValue===active[dim]); + }); + applyFilters(); + }); + }); + refresh(); +})(); +""" + # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- @@ -883,6 +942,7 @@ _MODALS, _SUGGESTIONS, _DEP_GRAPH, + _REVIEW, _META_PANEL, _EXPORT, _CMD_PALETTE, diff --git a/codeclone/report/html/sections/_review.py b/codeclone/report/html/sections/_review.py new file mode 100644 index 00000000..d4ce7f86 --- /dev/null +++ b/codeclone/report/html/sections/_review.py @@ -0,0 +1,149 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Review hub panel — the prioritized, cross-family finding-review queue. + +Render-only: reads the precomputed ``derived.review_queue`` and draws each +actionable item with the shared :func:`finding_card`. A per-item reviewed toggle +and the progress bar are wired client-side (localStorage keyed by finding id); +no projection logic lives here. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from codeclone.utils import coerce as _coerce + +from ..primitives.escape import _escape_html +from ..widgets.badges import _tab_empty +from ..widgets.cards import finding_card, meta_badge_html +from ..widgets.components import Tone, insight_block + +if TYPE_CHECKING: + from collections.abc import Mapping + + from .._context import ReportContext + +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + +_EMPTY_MESSAGE = "No findings to review." +_METRICS_SKIPPED = "Metrics are skipped for this run." +_REVIEW_INSIGHT = ( + "Findings to review, highest priority first. Mark items reviewed as you go — " + "progress is saved in your browser. Report-only triage: verify in source " + "before editing." +) +_FAMILY_LABELS = {"clones": "Clones", "structural": "Structural", "metrics": "Quality"} +_SEVERITIES = ("critical", "warning", "info") + +_REVIEW_TOGGLE = ( + '" +) + + +def _family_label(family: str) -> str: + return _FAMILY_LABELS.get(family, family or "other") + + +def _render_review_item(item: Mapping[str, object]) -> str: + finding_id = str(item.get("finding_id")) + family = str(item.get("family")) + severity = str(item.get("severity")) + effort = str(item.get("effort")) + meta_badges = ( + meta_badge_html(f"priority {_as_float(item.get('priority')):.2f}"), + meta_badge_html(effort, tone=effort), + meta_badge_html(_family_label(family)), + ) + data_attrs = ( + ' data-review-card="true" ' + f'data-finding-id="{_escape_html(finding_id)}" ' + f'data-severity="{_escape_html(severity)}" ' + f'data-family="{_escape_html(family)}"' + ) + return finding_card( + severity=severity, + title=str(item.get("title")), + eyebrow=f"{_family_label(family)} · {item.get('source_kind')}", + location=str(item.get("location")), + meta_badges=meta_badges, + body_html=_escape_html(str(item.get("summary"))), + actions_html=_REVIEW_TOGGLE, + card_class="review-card", + data_attrs=data_attrs, + ) + + +def _review_progress(total: int) -> str: + return ( + '
' + '
' + 'Progress' + '' + f"0 / {total} reviewed
" + '
' + '
' + ) + + +def _review_filter_chips(summary: Mapping[str, object]) -> str: + by_severity = _as_mapping(summary.get("by_severity")) + by_family = _as_mapping(summary.get("by_family")) + severity_chips = "".join( + f'" + for severity in _SEVERITIES + if _as_int(by_severity.get(severity)) > 0 + ) + family_chips = "".join( + f'' + for family, count in sorted(by_family.items()) + ) + return ( + '
' + f'{severity_chips}{family_chips}
' + ) + + +def render_review_panel(ctx: ReportContext) -> str: + queue = _as_mapping(ctx.derived_map.get("review_queue")) + summary = _as_mapping(queue.get("summary")) + items = [_as_mapping(item) for item in _as_sequence(queue.get("items"))] + + answer = _REVIEW_INSIGHT if ctx.metrics_available else _METRICS_SKIPPED + tone: Tone = "info" + insight = insight_block( + question="What needs review, and in what order?", + answer=answer, + tone=tone, + ) + if not items: + return insight + _tab_empty(_EMPTY_MESSAGE) + + cards = "".join(_render_review_item(item) for item in items) + return ( + "
" + + insight + + _review_progress(len(items)) + + _review_filter_chips(summary) + + f'
{cards}
' + ) diff --git a/codeclone/report/html/widgets/icons.py b/codeclone/report/html/widgets/icons.py index cb0b9a68..49dbbaa5 100644 --- a/codeclone/report/html/widgets/icons.py +++ b/codeclone/report/html/widgets/icons.py @@ -115,6 +115,11 @@ def _svg_with_class(size: int, sw: str, body: str, *, class_name: str = "") -> s '' '', ), + "review": ( + "2", + '' + '', + ), "clones": ( "2", '' diff --git a/codeclone/report/messages/chrome.py b/codeclone/report/messages/chrome.py index 2da24adf..b6e02e9d 100644 --- a/codeclone/report/messages/chrome.py +++ b/codeclone/report/messages/chrome.py @@ -14,6 +14,7 @@ BRAND_TITLE: Final = "CodeClone Report" TAB_OVERVIEW: Final = "Overview" +TAB_REVIEW: Final = "Review" TAB_CLONES: Final = "Clones" TAB_QUALITY: Final = "Quality" TAB_MODULE_MAP: Final = "Module map" diff --git a/tests/test_html_report.py b/tests/test_html_report.py index e5041c20..8325e169 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -4307,3 +4307,117 @@ def test_module_map_panel_modules_zoom_and_limited_population() -> None: # modules graph (active) renders an SVG; empty packages graph shows the message assert "dep-graph-svg" in panel assert "Dependency graph is not available." in panel + + +def _review_queue_payload(*, with_items: bool = True) -> dict[str, object]: + items: list[dict[str, object]] = [] + if with_items: + items = [ + { + "id": "suggestion:clone-a", + "finding_id": "clone:a", + "family": "clones", + "category": "clone", + "severity": "critical", + "priority": 0.91, + "source_kind": "production", + "title": "Duplicated branch logic", + "summary": "3 near-identical branches", + "location": "pkg/a.py:12", + "representative_locations": [], + "effort": "hard", + "steps": ["extract a handler"], + }, + { + "id": "suggestion:struct-b", + "finding_id": "struct:b", + "family": "structural", + "category": "structural", + "severity": "warning", + "priority": 0.6, + "source_kind": "tests", + "title": "Repeated assertions", + "summary": "repeated assert template", + "location": "tests/test_b.py:30", + "representative_locations": [], + "effort": "easy", + "steps": ["collapse"], + }, + ] + return { + "schema_version": "1", + "scope": "report_only", + "summary": { + "total": len(items), + "reviewed": 0, + "by_severity": { + "critical": 1 if with_items else 0, + "warning": 1 if with_items else 0, + "info": 0, + }, + "by_family": {"clones": 1, "structural": 1} if with_items else {}, + "top_priority": 0.91 if with_items else 0.0, + }, + "items": items, + } + + +def _render_review_report(review_queue: dict[str, object]) -> str: + metrics = _metrics_payload( + health_score=80, + health_grade="B", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=2, + dead_total=0, + dead_critical=0, + ) + return build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=metrics, + report_document={"derived": {"review_queue": review_queue}}, + ) + + +def test_html_report_renders_review_panel() -> None: + html = _render_review_report(_review_queue_payload()) + _assert_html_contains( + html, + 'Review', + 'id="panel-review"', + "data-review-panel", + "data-review-progress-bar", + "What needs review, and in what order?", + ) + panel = html.split('id="panel-review"', 1)[1].split('id="panel-clones"', 1)[0] + _assert_html_contains( + panel, + 'data-review-card="true"', + 'data-finding-id="clone:a"', + 'data-severity="critical"', + 'data-family="clones"', + "finding-card--critical", + "data-review-toggle", + "Duplicated branch logic", + 'data-review-filter="severity"', + 'data-review-value="critical"', + ) + # priority order preserved (input order): critical clone before warning struct + assert panel.index("Duplicated branch logic") < panel.index("Repeated assertions") + + +def test_review_panel_empty_when_no_items() -> None: + html = _render_review_report(_review_queue_payload(with_items=False)) + panel = html.split('id="panel-review"', 1)[1].split('id="panel-clones"', 1)[0] + assert "No findings to review." in panel + assert "data-review-card" not in panel + # tab badge hidden when zero + assert ( + 'Review Date: Sat, 20 Jun 2026 21:56:47 +0500 Subject: [PATCH 012/113] feat(html): add Overview launchpad linking into the Review hub --- codeclone/report/html/assets/css.py | 20 +++++++++++ codeclone/report/html/assets/js.py | 8 +++++ codeclone/report/html/sections/_overview.py | 40 +++++++++++++++++++++ tests/test_html_report.py | 22 ++++++++++++ 4 files changed, 90 insertions(+) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index b37e97df..c6e180b9 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -993,6 +993,26 @@ /* Body — context + summary */ .finding-card-body{margin-top:9px;display:flex;flex-direction:column;gap:var(--sp-1)} +/* Overview launchpad: entry banner into the Review hub */ +.review-launchpad{display:flex;align-items:center;justify-content:space-between;gap:var(--sp-4); + flex-wrap:wrap;margin-bottom:var(--sp-4);padding:var(--sp-3) var(--sp-4); + border:1px solid var(--accent-primary);border-radius:var(--radius-lg); + background:var(--accent-muted)} +.review-launchpad-title{font-size:.95rem;font-weight:600;color:var(--text-primary)} +.review-launchpad-sevs{display:flex;flex-wrap:wrap;gap:6px;margin-top:5px} +.launchpad-sev{font-size:.7rem;font-weight:500;padding:2px 9px;border-radius:999px; + font-family:var(--font-numeric);color:var(--text-secondary);background:var(--bg-overlay)} +.launchpad-sev--critical{color:var(--danger); + background:color-mix(in oklch,var(--danger) 14%,transparent)} +.launchpad-sev--warning{color:var(--warning); + background:color-mix(in oklch,var(--warning) 14%,transparent)} +.review-launchpad-cta{display:inline-flex;align-items:center;gap:7px;flex-shrink:0; + font-size:.82rem;font-weight:600;font-family:var(--font-sans);cursor:pointer; + padding:9px 16px;border-radius:var(--radius-md);border:0; + color:#fff;background:var(--accent-primary); + transition:background var(--dur-fast) var(--ease)} +.review-launchpad-cta:hover{background:var(--accent-hover)} + /* Review hub: progress · filters · queue · per-item reviewed toggle */ .review-progress{background:var(--bg-surface);border:1px solid var(--border); border-radius:var(--radius-lg);padding:var(--sp-3) var(--sp-4);margin-bottom:var(--sp-4)} diff --git a/codeclone/report/html/assets/js.py b/codeclone/report/html/assets/js.py index 9d969197..a54ac310 100644 --- a/codeclone/report/html/assets/js.py +++ b/codeclone/report/html/assets/js.py @@ -91,6 +91,14 @@ tabs.forEach(t=>t.addEventListener('click',()=>activate(t.dataset.tab))); + // Cross-tab jump buttons (e.g. Overview launchpad -> Review) + $$('[data-goto-tab]').forEach(el=>{ + el.addEventListener('click',()=>{ + const id=el.dataset.gotoTab; + if(tabs.some(t=>t.dataset.tab===id)){activate(id);window.scrollTo(0,0)} + }); + }); + // Keyboard: arrow left/right const tabList=$('[role="tablist"].main-tabs'); if(tabList){ diff --git a/codeclone/report/html/sections/_overview.py b/codeclone/report/html/sections/_overview.py index 26a78b23..b7e7978d 100644 --- a/codeclone/report/html/sections/_overview.py +++ b/codeclone/report/html/sections/_overview.py @@ -786,6 +786,45 @@ def _overloaded_modules_section(ctx: ReportContext) -> str: ) +_LAUNCHPAD_SEVERITIES = ( + ("critical", "critical"), + ("warning", "warning"), + ("info", "info"), +) +_LAUNCHPAD_ARROW = ( + '' +) + + +def _review_launchpad_html(ctx: ReportContext) -> str: + """Entry banner: surface the review queue and jump into the Review tab.""" + derived = _as_mapping(getattr(ctx, "derived_map", {})) + summary = _as_mapping(_as_mapping(derived.get("review_queue")).get("summary")) + total = _as_int(summary.get("total")) + if total <= 0: + return "" + by_severity = _as_mapping(summary.get("by_severity")) + chips = "".join( + f'' + f"{_as_int(by_severity.get(key))} {label}" + for key, label in _LAUNCHPAD_SEVERITIES + if _as_int(by_severity.get(key)) > 0 + ) + noun = "finding" if total == 1 else "findings" + return ( + '
' + '
' + f'
{total} {noun} ready to review
' + f'
{chips}
' + "
" + '" + "
" + ) + + def render_overview_panel(ctx: ReportContext) -> str: """Build the Overview tab panel HTML.""" complexity_summary = _as_mapping(ctx.complexity_map.get("summary")) @@ -1042,6 +1081,7 @@ def _baselined_detail( answer=overview_answer, tone=overview_tone, ) + + _review_launchpad_html(ctx) + '
' + health_gauge + '
' diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 8325e169..94dd0eb6 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -4421,3 +4421,25 @@ def test_review_panel_empty_when_no_items() -> None: assert ( 'Review None: + html = _render_review_report(_review_queue_payload()) + overview = html.split('id="panel-overview"', 1)[1].split('id="panel-review"', 1)[0] + _assert_html_contains( + overview, + "review-launchpad", + 'data-goto-tab="review"', + "2 findings ready to review", + "Start review", + "launchpad-sev--critical", + ) + # JS cross-tab jump handler shipped + assert "gotoTab" in html + + +def test_overview_launchpad_absent_when_queue_empty() -> None: + html = _render_review_report(_review_queue_payload(with_items=False)) + overview = html.split('id="panel-overview"', 1)[1].split('id="panel-review"', 1)[0] + assert "review-launchpad" not in overview + assert "data-goto-tab" not in overview From 7a0cfe93269398406f508190114a5e9e5b6cb9fa Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 20 Jun 2026 21:59:18 +0500 Subject: [PATCH 013/113] Add analysis phase observability --- codeclone/analysis/fingerprint.py | 17 +- codeclone/analysis/phase_ledger.py | 199 ++++++++++++++++ codeclone/analysis/units.py | 213 +++++++++++------- codeclone/core/_types.py | 16 +- codeclone/core/parallelism.py | 17 ++ codeclone/core/worker.py | 15 ++ codeclone/observability/analysis_phases.py | 34 +++ codeclone/observability/query.py | 70 +++++- codeclone/observability/render_html.py | 74 ++++++ codeclone/observability/store/reader.py | 84 +++++++ codeclone/observability/views.py | 15 ++ codeclone/surfaces/cli/execution.py | 5 + .../surfaces/mcp/messages/help_topics.py | 9 +- codeclone/surfaces/mcp/messages/params.py | 2 +- codeclone/surfaces/mcp/messages/tools.py | 7 +- codeclone/surfaces/mcp/session.py | 18 +- .../tools/platform-observability.md | 7 + docs/book/26-platform-observability.md | 16 +- .../contract_snapshots/mcp_tool_schemas.json | 2 +- tests/test_analysis_phase_ledger.py | 97 ++++++++ tests/test_architecture.py | 1 + tests/test_mcp_service.py | 1 + tests/test_observability_analysis_phases.py | 124 ++++++++++ tests/test_observability_cli_pipeline.py | 133 +++++++++-- tests/test_observability_query.py | 74 ++++++ tests/test_observability_render.py | 48 ++++ tests/test_observability_runtime.py | 4 + tests/test_observability_store.py | 71 ++++++ 28 files changed, 1258 insertions(+), 115 deletions(-) create mode 100644 codeclone/analysis/phase_ledger.py create mode 100644 codeclone/observability/analysis_phases.py create mode 100644 tests/test_analysis_phase_ledger.py create mode 100644 tests/test_observability_analysis_phases.py diff --git a/codeclone/analysis/fingerprint.py b/codeclone/analysis/fingerprint.py index dff7dbcc..ee4638b3 100644 --- a/codeclone/analysis/fingerprint.py +++ b/codeclone/analysis/fingerprint.py @@ -16,6 +16,7 @@ NormalizationConfig, normalized_ast_dump_from_list, ) +from .phase_ledger import INERT_PHASE_LEDGER, AnalysisPhaseKey, PhaseLedger def sha1(s: str) -> str: @@ -37,6 +38,8 @@ def _cfg_fingerprint_and_complexity( node: _qualnames.FunctionNode, cfg: NormalizationConfig, qualname: str, + *, + phase_ledger: PhaseLedger = INERT_PHASE_LEDGER, ) -> tuple[str, int]: """ Generate a structural fingerprint for a function using CFG analysis. @@ -60,7 +63,8 @@ def _cfg_fingerprint_and_complexity( 40-character hex SHA-1 hash of the normalized CFG """ builder = CFGBuilder() - graph = builder.build(qualname, node) + with phase_ledger.phase(AnalysisPhaseKey.UNIT_CFG): + graph = builder.build(qualname, node) cfg_normalizer = AstNormalizer(cfg) # Use generator to avoid building large list of strings @@ -69,11 +73,12 @@ def _cfg_fingerprint_and_complexity( succ_ids = ",".join( str(s.id) for s in sorted(block.successors, key=lambda s: s.id) ) - block_dump = normalized_ast_dump_from_list( - block.statements, - cfg, - normalizer=cfg_normalizer, - ) + with phase_ledger.phase(AnalysisPhaseKey.UNIT_NORMALIZE_CFG): + block_dump = normalized_ast_dump_from_list( + block.statements, + cfg, + normalizer=cfg_normalizer, + ) parts.append(f"BLOCK[{block.id}]:{block_dump}|SUCCESSORS:{succ_ids}") return sha1("|".join(parts)), cyclomatic_complexity(graph) diff --git a/codeclone/analysis/phase_ledger.py b/codeclone/analysis/phase_ledger.py new file mode 100644 index 00000000..96ff1979 --- /dev/null +++ b/codeclone/analysis/phase_ledger.py @@ -0,0 +1,199 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass, fields +from enum import Enum +from time import perf_counter_ns +from types import TracebackType +from typing import Literal + + +class AnalysisPhaseKey(str, Enum): + PARSE = "parse" + QUALNAME = "qualname" + MODULE_WALK = "module_walk" + RELATIONSHIP = "relationship" + SUPPRESSIONS = "suppressions" + UNIT_CFG = "unit_cfg" + UNIT_NORMALIZE_CFG = "unit_normalize_cfg" + UNIT_STRUCTURAL = "unit_structural" + UNIT_NORMALIZE_STMT = "unit_normalize_stmt" + UNIT_BLOCKS = "unit_blocks" + UNIT_SEGMENTS = "unit_segments" + CLASS_METRICS = "class_metrics" + DEAD_CODE = "dead_code" + MODULE_PASSES = "module_passes" + + +class AnalysisVolumeKey(str, Enum): + FILES_TIMED = "files_timed" + UNITS_SEEN = "units_seen" + UNITS_ELIGIBLE = "units_eligible" + UNITS_FINGERPRINTED = "units_fingerprinted" + BLOCKS_EMITTED = "blocks_emitted" + SEGMENTS_EMITTED = "segments_emitted" + + +PHASE_US_COUNTER_SUFFIXES: tuple[str, ...] = tuple( + f"phase_{key.value}_us" for key in AnalysisPhaseKey +) +PHASE_VOLUME_COUNTER_SUFFIXES: tuple[str, ...] = tuple( + key.value for key in AnalysisVolumeKey +) + + +@dataclass(frozen=True, slots=True) +class PhaseTotals: + parse_ns: int = 0 + qualname_ns: int = 0 + module_walk_ns: int = 0 + relationship_ns: int = 0 + suppressions_ns: int = 0 + unit_cfg_ns: int = 0 + unit_normalize_cfg_ns: int = 0 + unit_structural_ns: int = 0 + unit_normalize_stmt_ns: int = 0 + unit_blocks_ns: int = 0 + unit_segments_ns: int = 0 + class_metrics_ns: int = 0 + dead_code_ns: int = 0 + module_passes_ns: int = 0 + + def merge(self, other: PhaseTotals) -> PhaseTotals: + return PhaseTotals( + **{ + field.name: getattr(self, field.name) + getattr(other, field.name) + for field in fields(self) + } + ) + + def counter_map_us(self) -> dict[str, int]: + return { + f"phase_{key.value}_us": getattr(self, f"{key.value}_ns") // 1000 + for key in AnalysisPhaseKey + } + + +@dataclass(frozen=True, slots=True) +class PhaseSnapshot: + totals: PhaseTotals + volumes: tuple[tuple[str, int], ...] + + @classmethod + def empty(cls) -> PhaseSnapshot: + return cls(totals=PhaseTotals(), volumes=()) + + def merge(self, other: PhaseSnapshot) -> PhaseSnapshot: + merged_volumes = self.volume_map() + for key, value in other.volumes: + merged_volumes[key] = merged_volumes.get(key, 0) + value + return PhaseSnapshot( + totals=self.totals.merge(other.totals), + volumes=tuple(sorted(merged_volumes.items())), + ) + + def volume_map(self) -> dict[str, int]: + return dict(self.volumes) + + +class _InertPhaseContext: + __slots__ = () + + def __enter__(self) -> None: + return None + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> Literal[False]: + return False + + +_INERT_PHASE_CONTEXT = _InertPhaseContext() + + +class _ActivePhaseContext: + __slots__ = ("_key", "_ledger", "_started_ns") + + def __init__(self, ledger: PhaseLedger, key: AnalysisPhaseKey) -> None: + self._ledger = ledger + self._key = key + self._started_ns: int | None = None + + def __enter__(self) -> None: + self._started_ns = perf_counter_ns() + return None + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> Literal[False]: + started = self._started_ns + if started is not None: + self._ledger._add_elapsed(self._key, perf_counter_ns() - started) + return False + + +class PhaseLedger: + __slots__ = ("_active", "_totals", "_volumes") + + def __init__(self, *, active: bool) -> None: + self._active = active + self._totals: dict[AnalysisPhaseKey, int] = {} + self._volumes: dict[AnalysisVolumeKey, int] = {} + + @property + def active(self) -> bool: + return self._active + + def phase(self, key: AnalysisPhaseKey) -> _InertPhaseContext | _ActivePhaseContext: + if not isinstance(key, AnalysisPhaseKey): + raise TypeError("phase key must be an AnalysisPhaseKey") + if not self._active: + return _INERT_PHASE_CONTEXT + return _ActivePhaseContext(self, key) + + def add_volume(self, key: AnalysisVolumeKey, value: int = 1) -> None: + if not isinstance(key, AnalysisVolumeKey): + raise TypeError("volume key must be an AnalysisVolumeKey") + if not self._active: + return + self._volumes[key] = self._volumes.get(key, 0) + value + + def snapshot(self) -> PhaseSnapshot: + totals = PhaseTotals( + **{f"{key.value}_ns": self._totals.get(key, 0) for key in AnalysisPhaseKey} + ) + return PhaseSnapshot( + totals=totals, + volumes=tuple( + sorted((key.value, value) for key, value in self._volumes.items()) + ), + ) + + def _add_elapsed(self, key: AnalysisPhaseKey, elapsed_ns: int) -> None: + self._totals[key] = self._totals.get(key, 0) + elapsed_ns + + +INERT_PHASE_LEDGER = PhaseLedger(active=False) + + +__all__ = [ + "INERT_PHASE_LEDGER", + "PHASE_US_COUNTER_SUFFIXES", + "PHASE_VOLUME_COUNTER_SUFFIXES", + "AnalysisPhaseKey", + "AnalysisVolumeKey", + "PhaseLedger", + "PhaseSnapshot", + "PhaseTotals", +] diff --git a/codeclone/analysis/units.py b/codeclone/analysis/units.py index b4fcf7d2..e6f2b791 100644 --- a/codeclone/analysis/units.py +++ b/codeclone/analysis/units.py @@ -7,7 +7,10 @@ from __future__ import annotations import ast +from collections.abc import Callable +from functools import partial from hashlib import sha1 as _sha1 +from typing import TypeVar from .. import qualnames as _qualnames from ..blocks import extract_blocks, extract_segments @@ -43,11 +46,19 @@ from .fingerprint import _cfg_fingerprint_and_complexity, bucket_loc from .normalizer import NormalizationConfig, stmt_hashes from .parser import PARSE_TIMEOUT_SECONDS, _parse_with_limits +from .phase_ledger import ( + INERT_PHASE_LEDGER, + AnalysisPhaseKey, + AnalysisVolumeKey, + PhaseLedger, +) from .reachability import collect_runtime_reachability from .security_surfaces import collect_security_surfaces __all__ = ["extract_units_and_stats_from_source"] +_TCloneUnit = TypeVar("_TCloneUnit", BlockUnit, SegmentUnit) + def _stmt_count(node: ast.AST) -> int: body = getattr(node, "body", None) @@ -86,6 +97,19 @@ def _eligible_unit_shape( return start, end, loc, stmt_count +def _collect_timed_clone_units( + *, + phase_ledger: PhaseLedger, + phase_key: AnalysisPhaseKey, + volume_key: AnalysisVolumeKey, + collect: Callable[[], list[_TCloneUnit]], +) -> list[_TCloneUnit]: + with phase_ledger.phase(phase_key): + items = collect() + phase_ledger.add_volume(volume_key, len(items)) + return items + + def extract_units_and_stats_from_source( source: str, filepath: str, @@ -101,6 +125,7 @@ def extract_units_and_stats_from_source( collect_structural_findings: bool = True, collect_api_surface: bool = False, api_include_private_modules: bool = False, + phase_ledger: PhaseLedger = INERT_PHASE_LEDGER, ) -> tuple[ list[Unit], list[BlockUnit], @@ -110,26 +135,29 @@ def extract_units_and_stats_from_source( list[StructuralFindingGroup], ]: try: - tree = _parse_with_limits(source, PARSE_TIMEOUT_SECONDS) + with phase_ledger.phase(AnalysisPhaseKey.PARSE): + tree = _parse_with_limits(source, PARSE_TIMEOUT_SECONDS) except SyntaxError as e: raise ParseError(f"Failed to parse {filepath}: {e}") from e if not isinstance(tree, ast.Module): raise ParseError(f"Failed to parse {filepath}: expected module AST root") collector = _qualnames.QualnameCollector() - collector.visit(tree) + with phase_ledger.phase(AnalysisPhaseKey.QUALNAME): + collector.visit(tree) source_lines = source.splitlines() source_line_count = len(source_lines) is_test_file = is_test_filepath(filepath) # Single-pass AST walk replaces 3 separate functions / 4 walks. - _walk = _collect_module_walk_data( - tree=tree, - module_name=module_name, - collector=collector, - collect_referenced_names=not is_test_file, - ) + with phase_ledger.phase(AnalysisPhaseKey.MODULE_WALK): + _walk = _collect_module_walk_data( + tree=tree, + module_name=module_name, + collector=collector, + collect_referenced_names=not is_test_file, + ) import_names = _walk.import_names module_deps = _walk.module_deps referenced_names = _walk.referenced_names @@ -139,20 +167,22 @@ def extract_units_and_stats_from_source( non_runtime_decorator_aliases = _walk.non_runtime_decorator_aliases pydantic_module_aliases = _walk.pydantic_module_aliases cohesion_ignored_decorator_aliases = _walk.cohesion_ignored_decorator_aliases - function_relationship_facts = _collect_function_relationship_facts( - tree=tree, - module_name=module_name, - filepath=filepath, - collector=collector, - origin_lane="test" if is_test_file else "production", - ) + with phase_ledger.phase(AnalysisPhaseKey.RELATIONSHIP): + function_relationship_facts = _collect_function_relationship_facts( + tree=tree, + module_name=module_name, + filepath=filepath, + collector=collector, + origin_lane="test" if is_test_file else "production", + ) - suppression_index = _build_suppression_index_for_source( - source=source, - filepath=filepath, - module_name=module_name, - collector=collector, - ) + with phase_ledger.phase(AnalysisPhaseKey.SUPPRESSIONS): + suppression_index = _build_suppression_index_for_source( + source=source, + filepath=filepath, + module_name=module_name, + collector=collector, + ) class_names = frozenset(class_node.name for _, class_node in collector.class_nodes) module_import_names = set(import_names) module_class_names = set(class_names) @@ -164,6 +194,7 @@ def extract_units_and_stats_from_source( structural_findings: list[StructuralFindingGroup] = [] for local_name, node in collector.units: + phase_ledger.add_volume(AnalysisVolumeKey.UNITS_SEEN) unit_shape = _eligible_unit_shape( node, min_loc=min_loc, @@ -171,16 +202,24 @@ def extract_units_and_stats_from_source( ) if unit_shape is None: continue + phase_ledger.add_volume(AnalysisVolumeKey.UNITS_ELIGIBLE) start, end, loc, stmt_count = unit_shape qualname = f"{module_name}:{local_name}" - fingerprint, complexity = _cfg_fingerprint_and_complexity(node, cfg, qualname) - structure_facts = scan_function_structure( + fingerprint, complexity = _cfg_fingerprint_and_complexity( node, - filepath, + cfg, qualname, - collect_findings=collect_structural_findings, + phase_ledger=phase_ledger, ) + phase_ledger.add_volume(AnalysisVolumeKey.UNITS_FINGERPRINTED) + with phase_ledger.phase(AnalysisPhaseKey.UNIT_STRUCTURAL): + structure_facts = scan_function_structure( + node, + filepath, + qualname, + collect_findings=collect_structural_findings, + ) depth = structure_facts.nesting_depth risk = risk_level(complexity) raw_hash = _raw_source_hash_for_range(source_lines, start, end) @@ -223,11 +262,16 @@ def extract_units_and_stats_from_source( body = getattr(node, "body", None) hashes: list[str] | None = None if isinstance(body, list): - hashes = stmt_hashes(body, cfg) + with phase_ledger.phase(AnalysisPhaseKey.UNIT_NORMALIZE_STMT): + hashes = stmt_hashes(body, cfg) if needs_blocks: - block_units.extend( - extract_blocks( + blocks = _collect_timed_clone_units( + phase_ledger=phase_ledger, + phase_key=AnalysisPhaseKey.UNIT_BLOCKS, + volume_key=AnalysisVolumeKey.BLOCKS_EMITTED, + collect=partial( + extract_blocks, node, filepath=filepath, qualname=qualname, @@ -235,12 +279,17 @@ def extract_units_and_stats_from_source( block_size=4, max_blocks=15, precomputed_hashes=hashes, - ) + ), ) + block_units.extend(blocks) if needs_segments: - segment_units.extend( - extract_segments( + segments = _collect_timed_clone_units( + phase_ledger=phase_ledger, + phase_key=AnalysisPhaseKey.UNIT_SEGMENTS, + volume_key=AnalysisVolumeKey.SEGMENTS_EMITTED, + collect=partial( + extract_segments, node, filepath=filepath, qualname=qualname, @@ -248,42 +297,45 @@ def extract_units_and_stats_from_source( window_size=6, max_segments=60, precomputed_hashes=hashes, - ) + ), ) + segment_units.extend(segments) if collect_structural_findings: structural_findings.extend(structure_facts.structural_findings) - for class_qualname, class_node in collector.class_nodes: - cohesion_ignored_methods = _cohesion_ignored_method_names( - class_node, + with phase_ledger.phase(AnalysisPhaseKey.CLASS_METRICS): + for class_qualname, class_node in collector.class_nodes: + cohesion_ignored_methods = _cohesion_ignored_method_names( + class_node, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + pydantic_module_aliases=pydantic_module_aliases, + cohesion_ignored_decorator_aliases=cohesion_ignored_decorator_aliases, + ) + class_metric = _class_metrics_for_node( + module_name=module_name, + class_qualname=class_qualname, + class_node=class_node, + filepath=filepath, + module_import_names=module_import_names, + module_class_names=module_class_names, + cohesion_ignored_methods=cohesion_ignored_methods, + ) + if class_metric is not None: + class_metrics.append(class_metric) + + with phase_ledger.phase(AnalysisPhaseKey.DEAD_CODE): + dead_candidates = _collect_dead_candidates( + filepath=filepath, + module_name=module_name, + collector=collector, protocol_symbol_aliases=protocol_symbol_aliases, protocol_module_aliases=protocol_module_aliases, + non_runtime_decorator_aliases=non_runtime_decorator_aliases, pydantic_module_aliases=pydantic_module_aliases, - cohesion_ignored_decorator_aliases=cohesion_ignored_decorator_aliases, - ) - class_metric = _class_metrics_for_node( - module_name=module_name, - class_qualname=class_qualname, - class_node=class_node, - filepath=filepath, - module_import_names=module_import_names, - module_class_names=module_class_names, - cohesion_ignored_methods=cohesion_ignored_methods, + suppression_rules_by_target=suppression_index, ) - if class_metric is not None: - class_metrics.append(class_metric) - - dead_candidates = _collect_dead_candidates( - filepath=filepath, - module_name=module_name, - collector=collector, - protocol_symbol_aliases=protocol_symbol_aliases, - protocol_module_aliases=protocol_module_aliases, - non_runtime_decorator_aliases=non_runtime_decorator_aliases, - pydantic_module_aliases=pydantic_module_aliases, - suppression_rules_by_target=suppression_index, - ) sorted_class_metrics = tuple( sorted( @@ -296,34 +348,35 @@ def extract_units_and_stats_from_source( ), ) ) - typing_coverage, docstring_coverage = collect_module_adoption( - tree=tree, - module_name=module_name, - filepath=filepath, - collector=collector, - imported_names=import_names, - ) - api_surface = None - if collect_api_surface: - api_surface = collect_module_api_surface( + with phase_ledger.phase(AnalysisPhaseKey.MODULE_PASSES): + typing_coverage, docstring_coverage = collect_module_adoption( tree=tree, module_name=module_name, filepath=filepath, collector=collector, imported_names=import_names, - include_private_modules=api_include_private_modules, ) - security_surfaces = collect_security_surfaces( - tree=tree, - module_name=module_name, - filepath=filepath, - ) - runtime_reachability = collect_runtime_reachability( - tree=tree, - module_name=module_name, - filepath=filepath, - collector=collector, - ) + api_surface = None + if collect_api_surface: + api_surface = collect_module_api_surface( + tree=tree, + module_name=module_name, + filepath=filepath, + collector=collector, + imported_names=import_names, + include_private_modules=api_include_private_modules, + ) + security_surfaces = collect_security_surfaces( + tree=tree, + module_name=module_name, + filepath=filepath, + ) + runtime_reachability = collect_runtime_reachability( + tree=tree, + module_name=module_name, + filepath=filepath, + collector=collector, + ) return ( units, diff --git a/codeclone/core/_types.py b/codeclone/core/_types.py index 6b7dd25a..5660085c 100644 --- a/codeclone/core/_types.py +++ b/codeclone/core/_types.py @@ -8,9 +8,10 @@ from argparse import Namespace from collections.abc import Mapping -from dataclasses import dataclass +from dataclasses import dataclass, field from hashlib import sha256 from pathlib import Path +from typing import TYPE_CHECKING import orjson @@ -43,6 +44,9 @@ ) from ..utils.coerce import as_int, as_mapping, as_str +if TYPE_CHECKING: + from ..analysis.phase_ledger import PhaseSnapshot + MAX_FILE_SIZE = 10 * 1024 * 1024 DEFAULT_BATCH_SIZE = 100 PARALLEL_MIN_FILES_PER_WORKER = 8 @@ -115,6 +119,11 @@ class FileProcessResult: error_kind: str | None = None file_metrics: FileMetrics | None = None structural_findings: list[StructuralFindingGroup] | None = None + phase_snapshot: PhaseSnapshot | None = field( + default=None, + compare=False, + repr=False, + ) @dataclass(frozen=True, slots=True) @@ -143,6 +152,11 @@ class ProcessingResult: structural_findings: tuple[StructuralFindingGroup, ...] = () function_relationship_facts: tuple[FunctionRelationshipFacts, ...] = () source_stats_by_file: tuple[tuple[str, int, int, int, int], ...] = () + phase_snapshot: PhaseSnapshot | None = field( + default=None, + compare=False, + repr=False, + ) @dataclass(frozen=True, slots=True) diff --git a/codeclone/core/parallelism.py b/codeclone/core/parallelism.py index b3bcbfe1..4e969d02 100644 --- a/codeclone/core/parallelism.py +++ b/codeclone/core/parallelism.py @@ -9,6 +9,7 @@ from collections.abc import Callable, Sequence from concurrent.futures import ProcessPoolExecutor, as_completed +from ..analysis.phase_ledger import PhaseLedger, PhaseSnapshot from ..cache.entries import SourceStatsDict from ..cache.store import Cache from ..models import ( @@ -161,8 +162,16 @@ def process( block_min_stmt = int(boot.args.block_min_stmt) segment_min_loc = int(boot.args.segment_min_loc) segment_min_stmt = int(boot.args.segment_min_stmt) + from codeclone.observability.runtime import is_observability_enabled + + collect_phases = is_observability_enabled() + batch_snapshot = PhaseSnapshot.empty() + + def _phase_ledger_for_file() -> PhaseLedger | None: + return PhaseLedger(active=True) if collect_phases else None def _accept_result(result: FileProcessResult) -> None: + nonlocal batch_snapshot nonlocal files_analyzed nonlocal files_skipped nonlocal analyzed_lines @@ -171,6 +180,8 @@ def _accept_result(result: FileProcessResult) -> None: nonlocal analyzed_classes if result.success and result.stat is not None: + if result.phase_snapshot is not None: + batch_snapshot = batch_snapshot.merge(result.phase_snapshot) source_stats_payload = SourceStatsDict( lines=result.lines, functions=result.functions, @@ -271,6 +282,7 @@ def _run_sequential(files: Sequence[str]) -> None: block_min_stmt=block_min_stmt, segment_min_loc=segment_min_loc, segment_min_stmt=segment_min_stmt, + phase_ledger=_phase_ledger_for_file(), ) ) if on_advance is not None: @@ -296,6 +308,7 @@ def _run_sequential(files: Sequence[str]) -> None: block_min_stmt=block_min_stmt, segment_min_loc=segment_min_loc, segment_min_stmt=segment_min_stmt, + phase_ledger=_phase_ledger_for_file(), ) for filepath in batch ] @@ -321,6 +334,9 @@ def _run_sequential(files: Sequence[str]) -> None: else: _run_sequential(files_to_process) + volumes = batch_snapshot.volume_map() + phase_snapshot = batch_snapshot if volumes.get("files_timed", 0) > 0 else None + return ProcessingResult( units=tuple(sorted(all_units, key=_group_item_sort_key)), blocks=tuple(sorted(all_blocks, key=_group_item_sort_key)), @@ -388,4 +404,5 @@ def _run_sequential(files: Sequence[str]) -> None: (filepath, *stats) for filepath, stats in sorted(source_stats_by_file.items()) ), + phase_snapshot=phase_snapshot, ) diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py index 4e45a0ec..39474be0 100644 --- a/codeclone/core/worker.py +++ b/codeclone/core/worker.py @@ -12,6 +12,11 @@ from functools import lru_cache from ..analysis.normalizer import NormalizationConfig +from ..analysis.phase_ledger import ( + INERT_PHASE_LEDGER, + AnalysisVolumeKey, + PhaseLedger, +) from ..analysis.units import extract_units_and_stats_from_source from ..cache.entries import FileStat from ..contracts import ( @@ -37,6 +42,7 @@ def process_file( block_min_stmt: int = DEFAULT_BLOCK_MIN_STMT, segment_min_loc: int = DEFAULT_SEGMENT_MIN_LOC, segment_min_stmt: int = DEFAULT_SEGMENT_MIN_STMT, + phase_ledger: PhaseLedger = INERT_PHASE_LEDGER, ) -> FileProcessResult: try: resolved = resolved_path_under_root(filepath, root) @@ -102,8 +108,13 @@ def process_file( collect_structural_findings=collect_structural_findings, collect_api_surface=collect_api_surface, api_include_private_modules=api_include_private_modules, + phase_ledger=phase_ledger, ) ) + phase_snapshot = None + if phase_ledger.active: + phase_ledger.add_volume(AnalysisVolumeKey.FILES_TIMED) + phase_snapshot = phase_ledger.snapshot() return FileProcessResult( filepath=filepath, success=True, @@ -117,6 +128,7 @@ def process_file( stat=stat, file_metrics=file_metrics, structural_findings=structural_findings, + phase_snapshot=phase_snapshot, ) except Exception as exc: # pragma: no cover - defensive shell around workers return FileProcessResult( @@ -141,6 +153,7 @@ def _invoke_process_file( block_min_stmt: int, segment_min_loc: int, segment_min_stmt: int, + phase_ledger: PhaseLedger | None = None, ) -> FileProcessResult: optional_kwargs: dict[str, object] = { "collect_structural_findings": collect_structural_findings, @@ -151,6 +164,8 @@ def _invoke_process_file( "segment_min_loc": segment_min_loc, "segment_min_stmt": segment_min_stmt, } + if phase_ledger is not None: + optional_kwargs["phase_ledger"] = phase_ledger process_callable: Callable[..., FileProcessResult] = process_file supported_names = _supported_process_file_kwarg_names(process_callable) if supported_names is None: diff --git a/codeclone/observability/analysis_phases.py b/codeclone/observability/analysis_phases.py new file mode 100644 index 00000000..3732becf --- /dev/null +++ b/codeclone/observability/analysis_phases.py @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..analysis.phase_ledger import ( + PHASE_US_COUNTER_SUFFIXES, + PHASE_VOLUME_COUNTER_SUFFIXES, + PhaseSnapshot, +) +from .runtime import SpanHandle + + +def apply_pipeline_process_phase_counters( + span: SpanHandle, + *, + phase_snapshot: PhaseSnapshot | None, +) -> None: + if phase_snapshot is None: + return + + phase_counters = phase_snapshot.totals.counter_map_us() + for key in PHASE_US_COUNTER_SUFFIXES: + span.set_counter(key, phase_counters.get(key, 0)) + + volumes = phase_snapshot.volume_map() + for key in PHASE_VOLUME_COUNTER_SUFFIXES: + span.set_counter(key, volumes.get(key, 0)) + + +__all__ = ["apply_pipeline_process_phase_counters"] diff --git a/codeclone/observability/query.py b/codeclone/observability/query.py index 674c0e37..04c1e4dc 100644 --- a/codeclone/observability/query.py +++ b/codeclone/observability/query.py @@ -37,6 +37,7 @@ _CONTEXT_HEAVY_PCT = 25 _MEMORY_HEAVY_MB = 200.0 _CONTEXT_PRESSURE_TOKENS = 8000 +_ANALYSIS_HEAVY_WORKER_MS = 2000.0 _AGGREGATE_SECTIONS = ( "summary", @@ -48,6 +49,7 @@ "correlated_chains", "costly_noops", "pipeline", + "analysis_phase_cost", ) @@ -225,6 +227,32 @@ def _agent_context_body(agg: AggregatesView, cap: int) -> dict[str, object]: return {"total_response_tokens": total, "rows": rows} +def _analysis_phase_body(agg: AggregatesView, cap: int) -> dict[str, object]: + rows = [ + { + "phase": row.phase, + "worker_elapsed_ms": row.worker_elapsed_ms, + "share_permille": row.share_permille, + "verdict": row.verdict, + } + for row in agg.analysis_phases[:cap] + ] + body: dict[str, object] = { + "phase_worker_elapsed_total_ms": (agg.analysis_phase_worker_elapsed_total_ms), + "pipeline_process_wall_ms": agg.analysis_phase_pipeline_wall_ms, + "source_spans": agg.analysis_phase_source_spans, + "files_timed": agg.analysis_phase_files_timed, + "units_eligible": agg.analysis_phase_units_eligible, + "rows": rows, + } + if not rows: + body["message"] = ( + "no analysis phase counters in window; run with " + "CODECLONE_OBSERVABILITY_ENABLED=1 and a full analyze." + ) + return body + + def _chain_descendant_names(op: OperationView) -> list[str]: names: list[str] = [] for child in op.children: @@ -328,18 +356,34 @@ def _context_diagnostic(agg: AggregatesView) -> dict[str, object] | None: } +def _analysis_diagnostic(agg: AggregatesView) -> dict[str, object] | None: + if not agg.analysis_phases: + return None + top = agg.analysis_phases[0] + if top.verdict != "phase_heavy": + return None + return { + "kind": "analysis", + "message": ( + f"{top.phase} consumed {top.share_permille / 10:.0f}% of measured " + f"extract time ({top.worker_elapsed_ms:.0f} ms)." + ), + } + + def _top_diagnostics(agg: AggregatesView) -> list[dict[str, object]]: candidates = ( _memory_diagnostic(agg), _db_diagnostic(agg), _context_diagnostic(agg), + _analysis_diagnostic(agg), ) return [d for d in candidates if d is not None][:_MAX_DIAGNOSTICS] def _summary_body(trace: TraceView) -> dict[str, object]: agg = trace.aggregates - return { + body: dict[str, object] = { "operations": agg.operation_count, "peak_rss_delta_mb": _round1(agg.max_rss_delta_mb), "peak_rss_mb": _round1(agg.max_peak_rss_mb), @@ -347,6 +391,18 @@ def _summary_body(trace: TraceView) -> dict[str, object]: "costly_noops": sum(1 for s in agg.semantic_costs if s.no_op), "top_diagnostics": _top_diagnostics(agg), } + if agg.analysis_phases: + body["analysis_phase_worker_elapsed_total_ms"] = ( + agg.analysis_phase_worker_elapsed_total_ms + ) + body["top_analysis_phases"] = [ + { + "phase": row.phase, + "share_permille": row.share_permille, + } + for row in agg.analysis_phases[:_MAX_DIAGNOSTICS] + ] + return body def _recommended_next_sections( @@ -372,6 +428,16 @@ def _recommended_next_sections( recs.append( {"section": "costly_noops", "reason": "a span ran but produced nothing."} ) + if ( + agg.analysis_phase_worker_elapsed_total_ms is not None + and agg.analysis_phase_worker_elapsed_total_ms >= _ANALYSIS_HEAVY_WORKER_MS + ) or any(row.verdict == "phase_heavy" for row in agg.analysis_phases): + recs.append( + { + "section": "analysis_phase_cost", + "reason": "pipeline.process phase breakdown available.", + } + ) return recs @@ -432,6 +498,8 @@ def query_platform_observability( response.update(_summary_body(trace)) elif section == "agent_context": response.update(_agent_context_body(agg, row_cap)) + elif section == "analysis_phase_cost": + response.update(_analysis_phase_body(agg, row_cap)) elif section == "correlated_chains": response["rows"] = _correlated_chains(trace, row_cap) else: diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 2e09cbc3..033fb87c 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -26,6 +26,7 @@ from .views import ( AgentTokenRow, AggregatesView, + AnalysisPhaseRow, DbCostRow, DbFingerprintRow, McpToolAggregate, @@ -42,6 +43,22 @@ # A no-op span only deserves a "costly" warning once it has actually spent time. _NOOP_COSTLY_MS = 50.0 _KNOWN_SURFACES = frozenset({"mcp", "cli", "memory"}) +_ANALYSIS_PHASE_LABELS = { + "parse": "Parse (ast.parse)", + "qualname": "Qualname index", + "module_walk": "Module walk", + "relationship": "Relationship facts", + "suppressions": "Suppressions", + "unit_cfg": "CFG build", + "unit_normalize_cfg": "Normalize (CFG blocks)", + "unit_structural": "Structural scan", + "unit_normalize_stmt": "Normalize (statements)", + "unit_blocks": "Block extract", + "unit_segments": "Segment extract", + "class_metrics": "Class metrics", + "dead_code": "Dead-code collect", + "module_passes": "Module passes", +} # Reuse of the CodeClone brand mark (report/html/widgets/icons.py:BRAND_LOGO). _LOGO = ( @@ -451,6 +468,20 @@ def _highlights(agg: AggregatesView) -> str: metric_html=f"{_esc(_ms(cpu_ms))} · {ratio:.1f}x wall", ) ) + if agg.analysis_phases: + top = agg.analysis_phases[0] + rows.append( + _highlight_row( + "Hottest extract phase", + badge_html="", + primary=top.phase, + context=_ANALYSIS_PHASE_LABELS.get(top.phase, top.phase), + metric_html=( + f"{_esc(_ms(top.worker_elapsed_ms))} · " + f"{top.share_permille / 10:.1f}%" + ), + ) + ) return f'
{"".join(rows)}
' if rows else "" @@ -825,6 +856,48 @@ def _pipeline_section(agg: AggregatesView) -> str: ) +def _analysis_phase_row(row: AnalysisPhaseRow) -> str: + chip_cls = " warn" if row.verdict == "phase_heavy" else "" + label = _ANALYSIS_PHASE_LABELS.get(row.phase, row.phase) + return ( + f'
{_esc(label)}
' + f'
{_esc(row.phase)}
' + f'{_esc(_ms(row.worker_elapsed_ms))}' + f'{row.share_permille / 10:.1f}%' + f'{_esc(row.verdict)}' + ) + + +def _analysis_phases_section(agg: AggregatesView) -> str: + if not agg.analysis_phases: + return "" + rows = "".join(_analysis_phase_row(row) for row in agg.analysis_phases) + headers = ( + ("Phase", False), + ("Worker elapsed", True), + ("Share", True), + ("Signal", False), + ) + footer = ( + f"Worker elapsed (summed): " + f"{_ms(agg.analysis_phase_worker_elapsed_total_ms or 0.0)} · " + f"pipeline.process wall: {_ms(agg.analysis_phase_pipeline_wall_ms or 0.0)} · " + f"files timed: {agg.analysis_phase_files_timed} · " + f"units eligible: {agg.analysis_phase_units_eligible}" + ) + body = _table(headers, rows) + f'

{_esc(footer)}

' + return _section( + "Analysis extract phases", + body, + subtitle=( + "Summed per-file worker elapsed time inside pipeline.process " + "(parse, walk, CFG, normalize). Dev-only; not repository quality. " + "Under parallel execution, summed worker elapsed may exceed parent " + "pipeline wall time." + ), + ) + + def render_trace_html(trace: TraceView) -> str: """Render a ``TraceView`` as a self-contained, branded diagnosis cockpit.""" foot = f"CodeClone · platform observability · schema {_esc(trace.schema_version)}" @@ -844,6 +917,7 @@ def render_trace_html(trace: TraceView) -> str: + _agent(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) + _pipeline_section(trace.aggregates) + + _analysis_phases_section(trace.aggregates) + f'

{foot}

' + "
" ) diff --git a/codeclone/observability/store/reader.py b/codeclone/observability/store/reader.py index 5ef6f15d..6ebe2a04 100644 --- a/codeclone/observability/store/reader.py +++ b/codeclone/observability/store/reader.py @@ -14,18 +14,24 @@ import sqlite3 from collections import defaultdict +from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import cast import orjson +from ...analysis.phase_ledger import ( + PHASE_US_COUNTER_SUFFIXES, + PHASE_VOLUME_COUNTER_SUFFIXES, +) from ...contracts import PLATFORM_OBSERVABILITY_SCHEMA_VERSION from ..db_fingerprint import describe_fingerprint from ..views import ( AgentTokenRow, AgentView, AggregatesView, + AnalysisPhaseRow, DbCostRow, DbFingerprintRow, McpToolAggregate, @@ -48,6 +54,8 @@ _MEMORY_PIPELINE_PREFIX = "memory." _SEMANTIC_COST_LIMIT = 8 _DB_FINGERPRINT_ROW_LIMIT = 15 +_PIPELINE_PROCESS_SPAN = "pipeline.process" +_PHASE_HEAVY_PERMILLE = 250 # Waste thresholds: a no-op span is only worth flagging once it has spent time; # an MCP response is "heavy" past these payload sizes. @@ -466,6 +474,73 @@ def _db_fingerprints(flat: list[OperationView]) -> tuple[DbFingerprintRow, ...]: return tuple(rows[:_DB_FINGERPRINT_ROW_LIMIT]) +@dataclass(frozen=True, slots=True) +class _AnalysisPhaseBundle: + rows: tuple[AnalysisPhaseRow, ...] + worker_elapsed_total_ms: float | None + pipeline_wall_ms: float | None + source_spans: int + files_timed: int + units_eligible: int + + +def _phase_name_from_counter(counter: str) -> str: + return counter[len("phase_") : -len("_us")] + + +def _analysis_phase_bundle(flat: list[OperationView]) -> _AnalysisPhaseBundle: + pipeline_spans = [ + span for op in flat for span in op.spans if span.name == _PIPELINE_PROCESS_SPAN + ] + contributing_spans = [ + span + for span in pipeline_spans + if any(key in span.counters for key in PHASE_US_COUNTER_SUFFIXES) + ] + if not contributing_spans: + return _AnalysisPhaseBundle( + rows=(), + worker_elapsed_total_ms=None, + pipeline_wall_ms=None, + source_spans=0, + files_timed=0, + units_eligible=0, + ) + + phase_us = { + key: sum(span.counters.get(key, 0) for span in contributing_spans) + for key in PHASE_US_COUNTER_SUFFIXES + } + volume_totals = { + key: sum(span.counters.get(key, 0) for span in contributing_spans) + for key in PHASE_VOLUME_COUNTER_SUFFIXES + } + total_us = sum(phase_us.values()) + rows = [ + AnalysisPhaseRow( + phase=_phase_name_from_counter(key), + worker_elapsed_ms=round(value / 1000, 1), + share_permille=round(1000 * value / total_us) if total_us else 0, + verdict=( + "phase_heavy" + if total_us and round(1000 * value / total_us) >= _PHASE_HEAVY_PERMILLE + else "ok" + ), + ) + for key, value in phase_us.items() + if value + ] + rows.sort(key=lambda row: (-row.worker_elapsed_ms, row.phase)) + return _AnalysisPhaseBundle( + rows=tuple(rows), + worker_elapsed_total_ms=round(total_us / 1000, 1), + pipeline_wall_ms=round(sum(span.duration_ms for span in contributing_spans), 1), + source_spans=len(contributing_spans), + files_timed=volume_totals.get("files_timed", 0), + units_eligible=volume_totals.get("units_eligible", 0), + ) + + def _aggregates( flat: list[OperationView], spans_by_op: dict[str, tuple[SpanView, ...]] ) -> AggregatesView: @@ -532,6 +607,7 @@ def _aggregates( mcp_tools = _mcp_tool_aggregates(flat) cpu_ranked = sorted(flat, key=lambda v: (-_cpu_ms(v), v.operation_id)) heaviest_cpu = cpu_ranked[0] if cpu_ranked and _cpu_ms(cpu_ranked[0]) > 0 else None + analysis_phase_bundle = _analysis_phase_bundle(flat) return AggregatesView( operation_count=len(flat), slowest=slowest, @@ -551,6 +627,14 @@ def _aggregates( heaviest_cpu=heaviest_cpu, pipeline=_pipeline(flat), db_fingerprints=_db_fingerprints(flat), + analysis_phases=analysis_phase_bundle.rows, + analysis_phase_worker_elapsed_total_ms=( + analysis_phase_bundle.worker_elapsed_total_ms + ), + analysis_phase_pipeline_wall_ms=analysis_phase_bundle.pipeline_wall_ms, + analysis_phase_source_spans=analysis_phase_bundle.source_spans, + analysis_phase_files_timed=analysis_phase_bundle.files_timed, + analysis_phase_units_eligible=analysis_phase_bundle.units_eligible, ) diff --git a/codeclone/observability/views.py b/codeclone/observability/views.py index 66b9e5a9..4eb7a99f 100644 --- a/codeclone/observability/views.py +++ b/codeclone/observability/views.py @@ -131,6 +131,14 @@ class DbFingerprintRow: summary: str = "" +@dataclass(frozen=True, slots=True) +class AnalysisPhaseRow: + phase: str + worker_elapsed_ms: float + share_permille: int + verdict: str + + @dataclass(frozen=True, slots=True) class AgentTokenRow: """One MCP tool's cumulative token economics across the window.""" @@ -197,6 +205,12 @@ class AggregatesView: heaviest_cpu: OperationView | None = None pipeline: tuple[PipelineGroup, ...] = () db_fingerprints: tuple[DbFingerprintRow, ...] = () + analysis_phases: tuple[AnalysisPhaseRow, ...] = () + analysis_phase_worker_elapsed_total_ms: float | None = None + analysis_phase_pipeline_wall_ms: float | None = None + analysis_phase_source_spans: int = 0 + analysis_phase_files_timed: int = 0 + analysis_phase_units_eligible: int = 0 @dataclass(frozen=True, slots=True) @@ -244,6 +258,7 @@ class TraceView: "AgentTokenRow", "AgentView", "AggregatesView", + "AnalysisPhaseRow", "DbCostRow", "DbFingerprintRow", "McpToolAggregate", diff --git a/codeclone/surfaces/cli/execution.py b/codeclone/surfaces/cli/execution.py index 6a7318ef..177f7fb7 100644 --- a/codeclone/surfaces/cli/execution.py +++ b/codeclone/surfaces/cli/execution.py @@ -31,6 +31,7 @@ from ...core.reporting import GatingResult from ...models import MetricsDiff from ...observability import SpanHandle, is_observability_enabled, span +from ...observability.analysis_phases import apply_pipeline_process_phase_counters from . import state as cli_state from .attrs import bool_attr from .console import PlainConsole @@ -87,6 +88,10 @@ def _discover_counters(stage_span: SpanHandle, result: DiscoveryResult) -> None: def _process_counters(stage_span: SpanHandle, result: PipelineProcessingResult) -> None: stage_span.set_counter("files_analyzed", result.files_analyzed) stage_span.set_counter("failed_files", len(result.failed_files)) + apply_pipeline_process_phase_counters( + stage_span, + phase_snapshot=result.phase_snapshot, + ) def run_analysis_stages( diff --git a/codeclone/surfaces/mcp/messages/help_topics.py b/codeclone/surfaces/mcp/messages/help_topics.py index c4b35501..48d23157 100644 --- a/codeclone/surfaces/mcp/messages/help_topics.py +++ b/codeclone/surfaces/mcp/messages/help_topics.py @@ -627,8 +627,13 @@ class MCPHelpTopicSpec: ( "Sections: summary | slow_operations | memory_pipeline_cost | " "db_cost | agent_context | mcp_tool_matrix | correlated_chains " - "| costly_noops | pipeline. Start at summary, then follow " - "recommended_next_sections." + "| costly_noops | pipeline | analysis_phase_cost. Start at " + "summary, then follow recommended_next_sections." + ), + ( + "analysis_phase_cost breaks pipeline.process into summed worker " + "elapsed micro-phases (parse, walk, CFG, normalize); under " + "ProcessPool this may exceed parent pipeline wall time." ), ( "detail_level compact|normal; full is reserved for future " diff --git a/codeclone/surfaces/mcp/messages/params.py b/codeclone/surfaces/mcp/messages/params.py index 2e13579e..a136b52f 100644 --- a/codeclone/surfaces/mcp/messages/params.py +++ b/codeclone/surfaces/mcp/messages/params.py @@ -637,7 +637,7 @@ description=( "Telemetry section to project: summary | slow_operations | " "memory_pipeline_cost | db_cost | agent_context | mcp_tool_matrix | " - "correlated_chains | costly_noops | pipeline." + "correlated_chains | costly_noops | pipeline | analysis_phase_cost." ), ), ] diff --git a/codeclone/surfaces/mcp/messages/tools.py b/codeclone/surfaces/mcp/messages/tools.py index 4b7640de..81753dfd 100644 --- a/codeclone/surfaces/mcp/messages/tools.py +++ b/codeclone/surfaces/mcp/messages/tools.py @@ -131,9 +131,10 @@ "bad; high MCP payload != code quality low; hot semantic reindex != unsafe " "change. Sections: summary, slow_operations, memory_pipeline_cost, " "db_cost, agent_context, mcp_tool_matrix, correlated_chains, costly_noops, " - "pipeline. detail_level compact|normal (full downgrades to normal for " - "aggregate sections). Intended for CodeClone maintainers and development " - "agents; do not use it to make user-facing quality claims about a repo." + "pipeline, analysis_phase_cost. detail_level compact|normal (full " + "downgrades to normal for aggregate sections). Intended for CodeClone " + "maintainers and development agents; do not use it to make user-facing " + "quality claims about a repo." ) EVALUATE_GATES: Final = ( diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index bc17ba71..1adf3c18 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -15,7 +15,8 @@ from ...audit.runtime import open_audit_writer_for_root from ...cache.store import resolve_cache_status from ...memory.ide_governance import IdeGovernanceSessionState -from ...observability import span +from ...observability import is_observability_enabled, span +from ...observability.analysis_phases import apply_pipeline_process_phase_counters from ...report.meta import build_report_meta as _build_report_meta from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc from . import _session_helpers as _helpers @@ -275,10 +276,23 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: root=root_path, filepaths=discovery_result.all_file_paths, ) - with span(name="pipeline.process"): + with span(name="pipeline.process") as process_span: processing_result = process( boot=boot, discovery=discovery_result, cache=cache ) + if is_observability_enabled(): + process_span.set_counter( + "files_analyzed", + processing_result.files_analyzed, + ) + process_span.set_counter( + "failed_files", + len(processing_result.failed_files), + ) + apply_pipeline_process_phase_counters( + process_span, + phase_snapshot=processing_result.phase_snapshot, + ) unit_inventory = build_unit_location_inventory( root=root_path, units=processing_result.units, diff --git a/docs/book/25-mcp-interface/tools/platform-observability.md b/docs/book/25-mcp-interface/tools/platform-observability.md index a85ee6c4..337251d9 100644 --- a/docs/book/25-mcp-interface/tools/platform-observability.md +++ b/docs/book/25-mcp-interface/tools/platform-observability.md @@ -39,10 +39,17 @@ Supported sections: - `correlated_chains` - `costly_noops` - `pipeline` +- `analysis_phase_cost` Each call returns one section only. Compact detail is bounded to five rows; normal detail is bounded by `limit`. +`analysis_phase_cost` reports summed worker elapsed time inside +`pipeline.process`, grouped by analysis micro-phase. The top-level scalar +`phase_worker_elapsed_total_ms` may exceed `pipeline_process_wall_ms` when +analysis ran in a process pool. Treat the section as a CodeClone performance +diagnostic only; it does not indicate repository quality. + ## Inert states When observability is disabled, the tool returns a disabled status. When no diff --git a/docs/book/26-platform-observability.md b/docs/book/26-platform-observability.md index 9c088974..77411bd1 100644 --- a/docs/book/26-platform-observability.md +++ b/docs/book/26-platform-observability.md @@ -132,8 +132,14 @@ store is an informational empty state and exits successfully. The HTML cockpit is self-contained and includes operation chains, a span waterfall, pipeline and Engineering Memory costs, MCP tool aggregates, database -costs, normalized SQL fingerprints, agent context, and costly no-op signals. -It has no external assets or JavaScript dependency. +costs, normalized SQL fingerprints, agent context, analysis extract phases, and +costly no-op signals. It has no external assets or JavaScript dependency. + +When analysis phase counters are present, the cockpit shows **Analysis extract +phases** after the pipeline section. These values are summed per-file worker +elapsed time from `pipeline.process` counters. Under parallel execution the sum +can exceed the parent `pipeline.process` wall time; this is expected and is not +CPU time. ## MCP projection @@ -148,12 +154,18 @@ It has no external assets or JavaScript dependency. - `correlated_chains` - `costly_noops` - `pipeline` +- `analysis_phase_cost` `detail_level=compact` returns at most five rows. `normal` honors `limit`, clamped to `1..50`; `full` currently downgrades to `normal`. `window` accepts `latest` or a correlation ID. `operation_id` and `span_id` are reserved and reported as ignored parameters. +`analysis_phase_cost` projects the same phase rows shown in HTML: parse, +qualname indexing, module walks, CFG build, normalization, block/segment +extraction, and module-level metric passes. It is a CodeClone runtime diagnostic, +not repository quality evidence. + The response explicitly declares a CodeClone-development audience and states that it is not user-facing quality evidence. See [MCP determinism and tests](25-mcp-interface/determinism-and-tests.md) for the diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json index cca5be7c..706744e0 100644 --- a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -2846,7 +2846,7 @@ "type": "string" }, "section": { - "description": "Telemetry section to project: summary | slow_operations | memory_pipeline_cost | db_cost | agent_context | mcp_tool_matrix | correlated_chains | costly_noops | pipeline.", + "description": "Telemetry section to project: summary | slow_operations | memory_pipeline_cost | db_cost | agent_context | mcp_tool_matrix | correlated_chains | costly_noops | pipeline | analysis_phase_cost.", "title": "Section", "type": "string" }, diff --git a/tests/test_analysis_phase_ledger.py b/tests/test_analysis_phase_ledger.py new file mode 100644 index 00000000..a03084ff --- /dev/null +++ b/tests/test_analysis_phase_ledger.py @@ -0,0 +1,97 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import fields + +import pytest + +from codeclone.analysis import phase_ledger as phase_mod +from codeclone.analysis.phase_ledger import ( + INERT_PHASE_LEDGER, + PHASE_US_COUNTER_SUFFIXES, + PHASE_VOLUME_COUNTER_SUFFIXES, + AnalysisPhaseKey, + AnalysisVolumeKey, + PhaseLedger, + PhaseSnapshot, + PhaseTotals, +) + + +def test_phase_enum_derived_counter_suffixes() -> None: + assert ( + tuple(f"phase_{key.value}_us" for key in AnalysisPhaseKey) + == PHASE_US_COUNTER_SUFFIXES + ) + assert ( + tuple(key.value for key in AnalysisVolumeKey) == PHASE_VOLUME_COUNTER_SUFFIXES + ) + assert tuple(field.name for field in fields(PhaseTotals)) == tuple( + f"{key.value}_ns" for key in AnalysisPhaseKey + ) + + +def test_phase_ledger_inert_does_not_call_perf_counter( + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls = 0 + + def _forbidden() -> int: + nonlocal calls + calls += 1 + raise AssertionError("inert phase must not read the clock") + + monkeypatch.setattr(phase_mod, "perf_counter_ns", _forbidden) + with INERT_PHASE_LEDGER.phase(AnalysisPhaseKey.PARSE): + pass + INERT_PHASE_LEDGER.add_volume(AnalysisVolumeKey.UNITS_SEEN) + assert calls == 0 + + +def test_phase_ledger_records_elapsed_and_volumes( + monkeypatch: pytest.MonkeyPatch, +) -> None: + ticks = iter((1_000, 2_501, 10_000, 13_499)) + monkeypatch.setattr(phase_mod, "perf_counter_ns", lambda: next(ticks)) + + ledger = PhaseLedger(active=True) + with ledger.phase(AnalysisPhaseKey.PARSE): + pass + with ledger.phase(AnalysisPhaseKey.UNIT_CFG): + pass + ledger.add_volume(AnalysisVolumeKey.UNITS_SEEN) + ledger.add_volume(AnalysisVolumeKey.UNITS_SEEN, 2) + + snapshot = ledger.snapshot() + assert snapshot.totals.counter_map_us()["phase_parse_us"] == 1 + assert snapshot.totals.counter_map_us()["phase_unit_cfg_us"] == 3 + assert snapshot.volume_map() == {"units_seen": 3} + + +def test_phase_snapshot_merge_is_deterministic() -> None: + left = PhaseSnapshot( + totals=PhaseTotals(parse_ns=1_000), + volumes=(("units_seen", 1),), + ) + right = PhaseSnapshot( + totals=PhaseTotals(parse_ns=2_000, unit_blocks_ns=3_000), + volumes=(("files_timed", 2), ("units_seen", 3)), + ) + + merged = left.merge(right) + assert merged.totals.counter_map_us()["phase_parse_us"] == 3 + assert merged.totals.counter_map_us()["phase_unit_blocks_us"] == 3 + assert merged.volumes == (("files_timed", 2), ("units_seen", 4)) + + +def test_phase_ledger_rejects_raw_string_keys() -> None: + ledger = PhaseLedger(active=True) + with pytest.raises(TypeError): + ledger.phase("parse") # type: ignore[arg-type] + with pytest.raises(TypeError): + ledger.add_volume("files_timed") # type: ignore[arg-type] diff --git a/tests/test_architecture.py b/tests/test_architecture.py index 944c272c..1246c174 100644 --- a/tests/test_architecture.py +++ b/tests/test_architecture.py @@ -96,6 +96,7 @@ def test_architecture_layer_violations() -> None: "codeclone.report", "codeclone.surfaces", "codeclone.config", + "codeclone.observability", ), ), ( diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 5c6f21f1..b76bd329 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -2582,6 +2582,7 @@ def test_help_observability_topic_surfaces_query_tool() -> None: assert "query_platform_observability" in str(payload["recommended_tools"]) text = str(payload["key_points"]).lower() assert "anti-inference" in text and "dev-only" in text + assert "analysis_phase_cost" in text def test_mcp_service_help_validates_topic_and_detail() -> None: diff --git a/tests/test_observability_analysis_phases.py b/tests/test_observability_analysis_phases.py new file mode 100644 index 00000000..bc325c1b --- /dev/null +++ b/tests/test_observability_analysis_phases.py @@ -0,0 +1,124 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import replace + +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.analysis.phase_ledger import ( + PHASE_US_COUNTER_SUFFIXES, + PHASE_VOLUME_COUNTER_SUFFIXES, + AnalysisVolumeKey, + PhaseLedger, + PhaseSnapshot, + PhaseTotals, +) +from codeclone.analysis.units import extract_units_and_stats_from_source +from codeclone.core._types import ProcessingResult +from codeclone.observability.analysis_phases import ( + apply_pipeline_process_phase_counters, +) + + +class _FakeSpan: + def __init__(self) -> None: + self.counters: dict[str, int] = {} + + def set_counter(self, key: str, value: int) -> None: + self.counters[key] = value + + +def _processing_result(snapshot: PhaseSnapshot | None = None) -> ProcessingResult: + return ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + files_analyzed=1, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + phase_snapshot=snapshot, + ) + + +def test_apply_pipeline_process_phase_counters_closed_key_set() -> None: + snapshot = PhaseSnapshot( + totals=PhaseTotals(parse_ns=1_500_000, unit_cfg_ns=2_000_000), + volumes=( + (AnalysisVolumeKey.FILES_TIMED.value, 2), + (AnalysisVolumeKey.UNITS_ELIGIBLE.value, 5), + ), + ) + span = _FakeSpan() + + apply_pipeline_process_phase_counters(span, phase_snapshot=snapshot) # type: ignore[arg-type] + + assert frozenset(span.counters) == frozenset( + (*PHASE_US_COUNTER_SUFFIXES, *PHASE_VOLUME_COUNTER_SUFFIXES) + ) + assert span.counters["phase_parse_us"] == 1500 + assert span.counters["phase_unit_cfg_us"] == 2000 + assert span.counters["files_timed"] == 2 + assert span.counters["units_eligible"] == 5 + assert span.counters["blocks_emitted"] == 0 + + +def test_extract_units_records_phase_snapshot_data() -> None: + ledger = PhaseLedger(active=True) + source = """ +def example(value): + total = value + 1 + total += 1 + total += 2 + if total > 2: + total += 3 + else: + total -= 4 + total += 5 + return total +""" + + units, blocks, segments, *_ = extract_units_and_stats_from_source( + source=source, + filepath="pkg/example.py", + module_name="pkg.example", + cfg=NormalizationConfig(), + min_loc=3, + min_stmt=2, + block_min_loc=3, + block_min_stmt=2, + segment_min_loc=3, + segment_min_stmt=2, + phase_ledger=ledger, + ) + + snapshot = ledger.snapshot() + counters = snapshot.totals.counter_map_us() + volumes = snapshot.volume_map() + assert units + assert blocks + assert segments + assert counters["phase_parse_us"] >= 0 + assert volumes["units_seen"] == 1 + assert volumes["units_eligible"] == 1 + assert volumes["units_fingerprinted"] == 1 + assert volumes["blocks_emitted"] == len(blocks) + assert volumes["segments_emitted"] == len(segments) + + +def test_core_result_equality_ignores_phase_snapshot() -> None: + base = _processing_result() + snapshot = PhaseSnapshot(totals=PhaseTotals(parse_ns=1_000), volumes=()) + assert base == replace(base, phase_snapshot=snapshot) diff --git a/tests/test_observability_cli_pipeline.py b/tests/test_observability_cli_pipeline.py index 0d1325cc..6a014ba0 100644 --- a/tests/test_observability_cli_pipeline.py +++ b/tests/test_observability_cli_pipeline.py @@ -9,13 +9,20 @@ from argparse import Namespace from collections.abc import Iterator from pathlib import Path -from typing import cast +from typing import Any, cast import orjson import pytest import codeclone.surfaces.cli.workflow as cli from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.analysis.phase_ledger import ( + PHASE_US_COUNTER_SUFFIXES, + PHASE_VOLUME_COUNTER_SUFFIXES, + AnalysisVolumeKey, + PhaseSnapshot, + PhaseTotals, +) from codeclone.cache.store import Cache from codeclone.config.observability import ObservabilityConfig from codeclone.contracts import ExitCode @@ -77,6 +84,33 @@ def _processing() -> ProcessingResult: analyzed_classes=0, failed_files=(), source_read_failures=(), + phase_snapshot=PhaseSnapshot( + totals=PhaseTotals(parse_ns=1_500_000, unit_cfg_ns=2_000_000), + volumes=( + (AnalysisVolumeKey.FILES_TIMED.value, 2), + (AnalysisVolumeKey.UNITS_ELIGIBLE.value, 3), + ), + ), + ) + + +def _processing_without_phase_snapshot() -> ProcessingResult: + return ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + files_analyzed=0, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), ) @@ -107,41 +141,70 @@ def save(self) -> None: return None +def _run_observed_pipeline( + tmp_path: Path, + args: Namespace, +) -> None: + cli._run_analysis_stages( + args=args, + boot=_boot(tmp_path, args), + cache=cast(Cache, _FakeCache()), + ) + + +def _read_span_rows(tmp_path: Path) -> tuple[list[Any], dict[str, object]]: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + rows = conn.execute( + "SELECT name, counters_json, operation_id FROM platform_spans" + ).fetchall() + finally: + conn.close() + return ( + rows, + { + "process_counters": orjson.loads( + next(row[1] for row in rows if row[0] == "pipeline.process") + ), + }, + ) + + +def _boot(tmp_path: Path, args: Namespace) -> BootstrapResult: + return BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=args, + output_paths=OutputPaths(), + cache_path=tmp_path / "cache.json", + ) + + def test_cli_pipeline_emits_stage_spans( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: files = ("a.py", "b.py") + processing = _processing() monkeypatch.setattr(cli, "discover", lambda **_kw: _discovery(files)) - monkeypatch.setattr(cli, "process", lambda **_kw: _processing()) + monkeypatch.setattr(cli, "process", lambda **_kw: processing) monkeypatch.setattr(cli, "analyze", lambda **_kw: _analysis()) args = Namespace( quiet=True, no_progress=True, blast_radius=False, patch_verify=False ) - boot = BootstrapResult( - root=tmp_path, - config=NormalizationConfig(), - args=args, - output_paths=OutputPaths(), - cache_path=tmp_path / "cache.json", - ) bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) try: with operation(name="cli.analyze", surface="cli"): - cli._run_analysis_stages( - args=args, boot=boot, cache=cast(Cache, _FakeCache()) - ) + _run_observed_pipeline(tmp_path, args) finally: shutdown() + span_rows, _ = _read_span_rows(tmp_path) conn = open_observability_store(observability_store_path(tmp_path)) try: op_row = conn.execute( "SELECT name, surface FROM platform_operations" ).fetchone() - span_rows = conn.execute( - "SELECT name, counters_json, operation_id FROM platform_spans" - ).fetchall() finally: conn.close() @@ -158,9 +221,47 @@ def test_cli_pipeline_emits_stage_spans( "files_to_process": 2, "cache_hits": 3, } - assert orjson.loads(by_name["pipeline.process"][1]) == { + process_counters = orjson.loads(by_name["pipeline.process"][1]) + expected_keys = frozenset( + {"files_analyzed", "failed_files"} + | set(PHASE_US_COUNTER_SUFFIXES) + | set(PHASE_VOLUME_COUNTER_SUFFIXES) + ) + assert frozenset(process_counters) == expected_keys + expected_values = { "files_analyzed": 2, "failed_files": 0, + "phase_parse_us": 1500, + "phase_unit_cfg_us": 2000, + "files_timed": 2, + "units_eligible": 3, + "blocks_emitted": 0, + } + assert {key: process_counters[key] for key in expected_values} == expected_values + + +def test_cli_pipeline_cache_only_keeps_legacy_process_counters( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + processing = _processing_without_phase_snapshot() + monkeypatch.setattr(cli, "discover", lambda **_kw: _discovery(())) + monkeypatch.setattr(cli, "process", lambda **_kw: processing) + monkeypatch.setattr(cli, "analyze", lambda **_kw: _analysis()) + args = Namespace( + quiet=True, no_progress=True, blast_radius=False, patch_verify=False + ) + + bootstrap(ObservabilityConfig(enabled=True), root=tmp_path) + try: + with operation(name="cli.analyze", surface="cli"): + _run_observed_pipeline(tmp_path, args) + finally: + shutdown() + _, observed = _read_span_rows(tmp_path) + + assert observed["process_counters"] == { + "files_analyzed": 0, + "failed_files": 0, } diff --git a/tests/test_observability_query.py b/tests/test_observability_query.py index e7c6252b..a3b131be 100644 --- a/tests/test_observability_query.py +++ b/tests/test_observability_query.py @@ -117,6 +117,43 @@ def _seed(tmp_path: Path) -> None: conn.close() +def _seed_analysis_phases(tmp_path: Path) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + OperationRecord( + operation_id="P", + correlation_id="P", + surface="cli", + name="cli.analyze", + started_at_utc="2026-06-12T00:00:00Z", + duration_ms=200.0, + status="ok", + spans=( + SpanRecord( + span_id="process", + operation_id="P", + name="pipeline.process", + started_at_utc="2026-06-12T00:00:00Z", + duration_ms=180.0, + status="ok", + counters={ + "files_analyzed": 2, + "failed_files": 0, + "phase_parse_us": 500, + "phase_unit_cfg_us": 2500, + "files_timed": 2, + "units_eligible": 3, + }, + ), + ), + ), + ) + finally: + conn.close() + + def test_summary_returns_envelope_diagnostics_and_routing(tmp_path: Path) -> None: _seed(tmp_path) out = query_platform_observability(root=tmp_path, section="summary") @@ -130,6 +167,42 @@ def test_summary_returns_envelope_diagnostics_and_routing(tmp_path: Path) -> Non assert {"db_cost", "agent_context", "costly_noops"} <= routed +def test_analysis_phase_cost_section_and_summary_routing(tmp_path: Path) -> None: + _seed_analysis_phases(tmp_path) + + out = query_platform_observability( + root=tmp_path, + section="analysis_phase_cost", + detail_level="normal", + ) + + expected_scalars = { + "phase_worker_elapsed_total_ms": 3.0, + "pipeline_process_wall_ms": 180.0, + "source_spans": 1, + "files_timed": 2, + "units_eligible": 3, + } + assert {key: out[key] for key in expected_scalars} == expected_scalars + assert _rows(out["rows"])[0] == { + "phase": "unit_cfg", + "worker_elapsed_ms": 2.5, + "share_permille": 833, + "verdict": "phase_heavy", + } + + summary = query_platform_observability(root=tmp_path, section="summary") + assert summary["analysis_phase_worker_elapsed_total_ms"] == 3.0 + assert _rows(summary["top_analysis_phases"])[0] == { + "phase": "unit_cfg", + "share_permille": 833, + } + routed = {r["section"] for r in _rows(summary["recommended_next_sections"])} + assert "analysis_phase_cost" in routed + diagnostics = {d["kind"] for d in _rows(summary["top_diagnostics"])} + assert "analysis" in diagnostics + + def test_summary_does_not_embed_raw_trace(tmp_path: Path) -> None: _seed(tmp_path) out = query_platform_observability(root=tmp_path, section="summary") @@ -220,6 +293,7 @@ def test_unknown_section_returns_validation_envelope(tmp_path: Path) -> None: assert out["status"] == "invalid_section" assert out["section"] == "bogus" assert "summary" in _texts(out["available_sections"]) + assert "analysis_phase_cost" in _texts(out["available_sections"]) assert out["rows"] == [] diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index 85df0bd7..d38a06e5 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -23,6 +23,7 @@ AgentTokenRow, AgentView, AggregatesView, + AnalysisPhaseRow, DbCostRow, DbFingerprintRow, McpToolAggregate, @@ -136,6 +137,53 @@ def test_render_trace_html_shows_db_query_shapes() -> None: assert needle in html +def test_render_trace_html_shows_analysis_phase_section() -> None: + agg = AggregatesView( + operation_count=1, + analysis_phases=( + AnalysisPhaseRow( + phase="unit_cfg", + worker_elapsed_ms=2.5, + share_permille=833, + verdict="phase_heavy", + ), + AnalysisPhaseRow( + phase="parse", + worker_elapsed_ms=0.5, + share_permille=167, + verdict="ok", + ), + ), + analysis_phase_worker_elapsed_total_ms=3.0, + analysis_phase_pipeline_wall_ms=2.0, + analysis_phase_source_spans=1, + analysis_phase_files_timed=2, + analysis_phase_units_eligible=3, + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="2026-06-10T04:00:00Z", + window_ended_at_utc="2026-06-10T04:00:01Z", + aggregates=agg, + ) + + html = render_trace_html(trace) + _assert_html_contains( + html, + "Hottest extract phase", + "Analysis extract phases", + "CFG build", + "Parse (ast.parse)", + "phase_heavy", + "Worker elapsed (summed): 3ms", + "pipeline.process wall: 2ms", + "files timed: 2", + "units eligible: 3", + ) + payload = json.loads(render_trace_json(trace)) + assert payload["aggregates"]["analysis_phases"][0]["phase"] == "unit_cfg" + + def _cockpit_trace() -> TraceView: reindex = SpanView( span_id="sx", diff --git a/tests/test_observability_runtime.py b/tests/test_observability_runtime.py index 4ba5011c..db9160c8 100644 --- a/tests/test_observability_runtime.py +++ b/tests/test_observability_runtime.py @@ -6,6 +6,7 @@ from __future__ import annotations +import importlib import sys from collections.abc import Iterator from pathlib import Path @@ -51,6 +52,9 @@ def test_disabled_is_inert_and_imports_no_store() -> None: sp.set_counter("skipped", 0) sp.set_reason_kind("model_changed") + importlib.import_module("codeclone.analysis.units") + importlib.import_module("codeclone.core.parallelism") + assert not any(m.startswith("codeclone.observability.store") for m in sys.modules) assert "psutil" not in sys.modules diff --git a/tests/test_observability_store.py b/tests/test_observability_store.py index 29ff7ef4..a78708ef 100644 --- a/tests/test_observability_store.py +++ b/tests/test_observability_store.py @@ -6,6 +6,7 @@ from __future__ import annotations +import sqlite3 from pathlib import Path import pytest @@ -16,6 +17,7 @@ ProfileSample, SpanRecord, ) +from codeclone.observability.store.reader import build_trace_view from codeclone.observability.store.schema import ( observability_store_path, open_observability_store, @@ -177,6 +179,75 @@ def test_observability_span_error_and_sql_classification(tmp_path: Path) -> None assert elapsed_row is not None +def test_reader_derives_analysis_phase_bundle_from_contributing_spans( + tmp_path: Path, +) -> None: + conn = open_observability_store(observability_store_path(tmp_path)) + try: + write_operation( + conn, + _op( + "A", + correlation_id="A", + spans=( + _span( + "legacy", + operation_id="A", + name="pipeline.process", + duration_ms=10.0, + counters={"files_analyzed": 2, "failed_files": 0}, + ), + ), + ), + ) + write_operation( + conn, + _op( + "B", + correlation_id="B", + spans=( + _span( + "phase", + operation_id="B", + name="pipeline.process", + duration_ms=20.0, + counters={ + "files_analyzed": 2, + "failed_files": 0, + "phase_parse_us": 1000, + "phase_unit_cfg_us": 3000, + "files_timed": 2, + "units_eligible": 3, + }, + ), + ), + ), + ) + finally: + conn.close() + conn = open_observability_store(observability_store_path(tmp_path)) + conn.row_factory = sqlite3.Row + try: + trace = build_trace_view(conn) + finally: + conn.close() + + agg = trace.aggregates + expected_scalars = { + "analysis_phase_source_spans": 1, + "analysis_phase_pipeline_wall_ms": 20.0, + "analysis_phase_worker_elapsed_total_ms": 4.0, + "analysis_phase_files_timed": 2, + "analysis_phase_units_eligible": 3, + } + actual_scalars = {key: getattr(agg, key) for key in expected_scalars} + assert actual_scalars == expected_scalars + assert [(row.phase, row.share_permille) for row in agg.analysis_phases] == [ + ("unit_cfg", 750), + ("parse", 250), + ] + + def test_observability_schema_migrates_legacy_span_columns(tmp_path: Path) -> None: import sqlite3 From edb813400c621998bfd76f3eb13e3581ab6539c4 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 13:23:56 +0500 Subject: [PATCH 014/113] refactor(html): render structural findings with the shared finding card --- codeclone/report/html/assets/css.py | 18 +----- codeclone/report/html/sections/_structural.py | 56 ++++++++++--------- tests/test_html_report.py | 6 ++ 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index c6e180b9..68ddc7e2 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -1103,22 +1103,8 @@ _STRUCTURAL = """\ /* Structural findings — list layout */ .sf-list{display:flex;flex-direction:column;gap:var(--sp-2)} -.sf-card{background:var(--bg-surface);border:1px solid var(--border);border-left:3px solid var(--info); - border-radius:var(--radius-lg); - overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} -.sf-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} - -/* Header row */ -.sf-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} -.sf-kind-badge{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.03em; - padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap; - background:var(--info-muted);color:var(--info)} -.sf-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} -.sf-meta{display:flex;align-items:center;gap:var(--sp-1);flex-shrink:0;flex-wrap:wrap} +/* Card chrome is the shared .finding-card; only structural content rules below. */ .sf-why-btn{font-size:.72rem;color:var(--accent-primary);font-weight:500} - -/* Body */ -.sf-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-2)} .sf-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1)} .sf-scope-text{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary)} .sf-inline-action{display:flex;align-items:flex-start;gap:var(--sp-2);padding:var(--sp-2) var(--sp-3); @@ -1418,8 +1404,6 @@ .overview-row-spread{margin-left:0;width:100%} .suggestion-head{flex-direction:column;align-items:flex-start} .suggestion-facts{grid-template-columns:1fr} - .sf-head{flex-direction:column;align-items:flex-start} - .sf-meta{width:100%} .dir-hotspot-head{flex-wrap:wrap;align-items:flex-start} .dir-hotspot-detail{flex-wrap:wrap;align-items:flex-start} .dir-hotspot-bar-track{width:min(148px,42%);min-width:96px} diff --git a/codeclone/report/html/sections/_structural.py b/codeclone/report/html/sections/_structural.py index 24b8db63..aa9775dc 100644 --- a/codeclone/report/html/sections/_structural.py +++ b/codeclone/report/html/sections/_structural.py @@ -34,6 +34,7 @@ ) from ..primitives.escape import _escape_html from ..widgets.badges import _source_kind_badge_html, _tab_empty +from ..widgets.cards import finding_card, meta_badge_html from ..widgets.snippets import _FileCache, _render_code_block from ..widgets.tabs import render_split_tabs @@ -401,44 +402,49 @@ def _render_finding_card( func_word = "function" if spread["functions"] == 1 else "functions" file_word = "file" if spread["files"] == 1 else "files" kind_label = _KIND_LABEL.get(group.finding_kind, group.finding_kind) - source_chip = _escape_html(source_kind_label(source_kind)) - finding_kind_chip = _escape_html(group.finding_kind.replace("_", " ")) - context_chips = ( - f'{source_chip}' - f'{finding_kind_chip}' - ) scope_text = _finding_scope_text(deduped_items) finding_id = structural_group_id(group.finding_kind, group.finding_key) chips_html = _signature_chips_html(group.signature) - return ( - f'
' - '
' - 'info' - f'{_escape_html(kind_label)}' - '' - f'' - f"{spread['functions']} {func_word} \u00b7 {spread['files']} {file_word}" - f'' - "
" - '
' - f'
{context_chips}
' + ) + spread_badge = meta_badge_html( + f"{spread['functions']} {func_word} \u00b7 {spread['files']} {file_word}" + ) + body_html = ( f'
{chips_html}
' f'
{_escape_html(scope_text)}
' f"{inline_action_html}" - "
" + ) + details_html = ( '
' f"Occurrences ({count})" f'
{table_html}
' "
" - "
", + ) + data_attrs = ( + f' id="finding-{_escape_html(finding_id)}"' + f' data-finding-id="{_escape_html(finding_id)}"' + ' data-sf-group="true"' + f' data-source-kind="{_escape_html(source_kind)}"' + f' data-spread-bucket="{_escape_html(spread_bucket)}"' + ) + + return ( + finding_card( + severity="info", + title=kind_label, + eyebrow=source_kind_label(source_kind), + meta_badges=(spread_badge,), + body_html=body_html, + details_html=details_html, + actions_html=why_button, + card_class="sf-card", + data_attrs=data_attrs, + ), source_kind, ) diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 94dd0eb6..13fca706 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -4003,8 +4003,14 @@ def test_html_report_overview_uses_canonical_report_overview_hotlists() -> None: "source-kind-badge source-kind-fixtures", "source-kind-badge source-kind-production", 'breakdown-count">1', + # Structural findings use the shared finding_card chrome (Stage 4) + "finding-card finding-card--info sf-card", + 'data-sf-group="true"', + "data-finding-why-btn", ): assert needle in html + # Bespoke sf-card chrome was fully replaced by the shared component + assert '
n/a
' not in html # Issue breakdown replaces old hotspot sections assert "Issue breakdown" in html From 37a6cf7990cf9088f9e7d77003af18c201e2e83c Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 13:26:23 +0500 Subject: [PATCH 015/113] docs: document the Review hub, Overview launchpad, and shared finding card --- CHANGELOG.md | 11 +++++++++++ docs/book/06-html-render.md | 19 +++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2370c85..1aa1a7d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -142,6 +142,17 @@ Added top-overloaded tables. `get_report_section(section="module_map")` returns the projection directly. No new analysis pass, metrics family, or report schema bump — `derived` stays excluded from the integrity digest. +* **Guided finding review.** A default-on, report-only `derived.review_queue` + projection orders the existing suggestions into a prioritized, cross-family + actionable queue (severity, priority, family, location, effort) with summary + counts. A new `Review` HTML tab (between Overview and Clones) renders it as a + walkable list of shared finding cards with a per-finding reviewed toggle + (persisted in `localStorage`), a progress bar, and severity/family filters; the + `Overview` tab gains a launchpad banner that links into it. A new shared + `finding_card` component centralizes the card chrome now used by the + Suggestions, Review, and Structural Findings surfaces. No new analysis pass, + metrics family, or report schema bump — `derived` stays excluded from the + integrity digest. Changed diff --git a/docs/book/06-html-render.md b/docs/book/06-html-render.md index 815ca93f..cfb5bfc5 100644 --- a/docs/book/06-html-render.md +++ b/docs/book/06-html-render.md @@ -36,8 +36,21 @@ Output: - HTML must not recompute detection semantics; it renders facts from report/core layers. - Provenance panels mirror canonical report/meta facts. -- Overview, Quality, Module map, Suggestions, Dead Code, Dependencies, and - Clones tabs are projections over canonical report sections. +- Overview, Review, Quality, Module map, Suggestions, Dead Code, Dependencies, + and Clones tabs are projections over canonical report sections. +- The `Review` tab (between Overview and Clones) is render-only and is the guided + entry point for triaging findings. It draws the precomputed + `derived.review_queue` — a prioritized, cross-family actionable queue — as a list + of shared finding cards. The reviewed state (per finding id, persisted in + `localStorage`), the progress bar, and the severity/family filters are + client-side UX over those rendered facts; it recomputes nothing. The `Overview` + launchpad surfaces the queue total plus severity counts and jumps into this tab + via `data-goto-tab`; the queue is empty (and the launchpad hidden) when there are + no suggestions, mirroring `derived.suggestions` exactly. +- `codeclone/report/html/widgets/cards.py:finding_card` is the single card chrome + shared by the Suggestions, Review, and Structural Findings surfaces — one + severity stripe + badge + meta/body/details/actions shell, so per-surface markup + is not duplicated. - Quality covers per-function/class metrics (Complexity, Coupling, Cohesion) plus report-only subtabs such as `Coverage Join` and `Security Surfaces`; these remain factual projections over canonical metrics families rather than @@ -60,6 +73,8 @@ Output: Refs: - `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/report/html/sections/_review.py:render_review_panel` +- `codeclone/report/html/widgets/cards.py:finding_card` - `codeclone/report/html/sections/_clones.py:_render_group_explanation` - `codeclone/report/html/sections/_module_map.py:render_module_map_panel` - `codeclone/report/html/widgets/dep_graph_layout.py` From 06d04ad0ff2189f5cb64480ffd163ba46d819ac7 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 13:48:18 +0500 Subject: [PATCH 016/113] Explain cached analysis phase observability --- codeclone/observability/render_html.py | 67 ++++++++++++++++++++++++-- tests/test_observability_render.py | 37 ++++++++++++++ 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/codeclone/observability/render_html.py b/codeclone/observability/render_html.py index 033fb87c..f0425673 100644 --- a/codeclone/observability/render_html.py +++ b/codeclone/observability/render_html.py @@ -20,7 +20,7 @@ from __future__ import annotations -from collections.abc import Mapping +from collections.abc import Iterable, Mapping from html import escape from .views import ( @@ -868,9 +868,68 @@ def _analysis_phase_row(row: AnalysisPhaseRow) -> str: ) -def _analysis_phases_section(agg: AggregatesView) -> str: - if not agg.analysis_phases: +def _iter_operation_tree(ops: tuple[OperationView, ...]) -> Iterable[OperationView]: + for op in ops: + yield op + yield from _iter_operation_tree(op.children) + + +def _pipeline_process_spans(trace: TraceView) -> tuple[SpanView, ...]: + roots = trace.operation_tree or trace.correlated_operations + spans: list[SpanView] = [] + seen: set[str] = set() + for op in _iter_operation_tree(roots): + for span in op.spans: + if span.name == "pipeline.process" and span.span_id not in seen: + spans.append(span) + seen.add(span.span_id) + return tuple(spans) + + +def _empty_analysis_phase_section(trace: TraceView) -> str: + process_spans = _pipeline_process_spans(trace) + if not process_spans: return "" + files_analyzed = sum( + span.counters.get("files_analyzed", 0) for span in process_spans + ) + failed_files = sum(span.counters.get("failed_files", 0) for span in process_spans) + if files_analyzed == 0: + reason = ( + "No uncached files were processed in this window; the analysis was " + "served from cache, so file extraction micro-stages did not run. " + "Use a cold cache or changed files to capture phase timings." + ) + else: + reason = ( + "pipeline.process ran, but no analysis phase counters were recorded. " + "Restart the producing process with CODECLONE_OBSERVABILITY_ENABLED=1 " + "and Phase 33 instrumentation." + ) + counters = ( + f"pipeline.process files_analyzed={files_analyzed} · " + f"failed_files={failed_files}" + ) + body = ( + '
' + f"{_esc(reason)}" + f'
{_esc(counters)}
' + "
" + ) + return _section( + "Analysis extract phases", + body, + subtitle=( + "Summed per-file worker elapsed time inside pipeline.process " + "(parse, walk, CFG, normalize). Dev-only; not repository quality." + ), + ) + + +def _analysis_phases_section(trace: TraceView) -> str: + agg = trace.aggregates + if not agg.analysis_phases: + return _empty_analysis_phase_section(trace) rows = "".join(_analysis_phase_row(row) for row in agg.analysis_phases) headers = ( ("Phase", False), @@ -917,7 +976,7 @@ def render_trace_html(trace: TraceView) -> str: + _agent(trace.aggregates) + _mcp(trace.aggregates.mcp_tools) + _pipeline_section(trace.aggregates) - + _analysis_phases_section(trace.aggregates) + + _analysis_phases_section(trace) + f'

{foot}

' + "" ) diff --git a/tests/test_observability_render.py b/tests/test_observability_render.py index d38a06e5..5bc74904 100644 --- a/tests/test_observability_render.py +++ b/tests/test_observability_render.py @@ -184,6 +184,43 @@ def test_render_trace_html_shows_analysis_phase_section() -> None: assert payload["aggregates"]["analysis_phases"][0]["phase"] == "unit_cfg" +def test_render_trace_html_explains_cache_only_analysis_phase_window() -> None: + process_span = SpanView( + span_id="sp", + name="pipeline.process", + duration_ms=1.0, + status="ok", + counters={"files_analyzed": 0, "failed_files": 0}, + ) + op = OperationView( + operation_id="op", + correlation_id="op", + surface="cli", + name="cli.analyze", + started_at_utc="2026-06-10T04:00:00Z", + duration_ms=10.0, + status="ok", + spans=(process_span,), + ) + trace = TraceView( + schema_version="1.0", + window_started_at_utc="2026-06-10T04:00:00Z", + window_ended_at_utc="2026-06-10T04:00:01Z", + aggregates=AggregatesView(operation_count=1), + operation_tree=(op,), + ) + + html = render_trace_html(trace) + _assert_html_contains( + html, + "Analysis extract phases", + "No uncached files were processed", + "served from cache", + "files_analyzed=0", + "failed_files=0", + ) + + def _cockpit_trace() -> TraceView: reindex = SpanView( span_id="sx", From 14a5605d61a4520620b61b003ae1d03a6301a2f0 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 14:21:05 +0500 Subject: [PATCH 017/113] feat(report): source the review queue from findings across all families --- codeclone/report/document/builder.py | 2 +- codeclone/report/document/derived.py | 287 +++++++++++++++++++--- codeclone/report/html/assets/css.py | 2 + codeclone/report/html/sections/_review.py | 25 +- tests/test_html_report.py | 58 ++++- tests/test_module_map.py | 218 ++++++++++++---- 6 files changed, 494 insertions(+), 98 deletions(-) diff --git a/codeclone/report/document/builder.py b/codeclone/report/document/builder.py index 8b80ab9c..b9239c98 100644 --- a/codeclone/report/document/builder.py +++ b/codeclone/report/document/builder.py @@ -101,7 +101,7 @@ def build_report_document( "overview": overview_payload, "hotlists": hotlists_payload, "module_map": _build_derived_module_map(metrics_payload), - "review_queue": _build_derived_review_queue(suggestions), + "review_queue": _build_derived_review_queue(findings_payload, suggestions), } integrity_payload = _build_integrity_payload( report_schema_version=report_schema_version, diff --git a/codeclone/report/document/derived.py b/codeclone/report/document/derived.py index 205661d8..23f209fa 100644 --- a/codeclone/report/document/derived.py +++ b/codeclone/report/document/derived.py @@ -434,19 +434,132 @@ def _build_derived_suggestions( ] -_REVIEW_QUEUE_SCHEMA_VERSION: Final = "1" +_REVIEW_QUEUE_SCHEMA_VERSION: Final = "2" _REVIEW_SEVERITIES: Final = ("critical", "warning", "info") +_REVIEW_FAMILIES: Final = ("clones", "structural", "dead_code", "design") +_REVIEW_FAMILY_BY_FINDING: Final = { + FAMILY_CLONE: "clones", + FAMILY_STRUCTURAL: "structural", + FAMILY_DEAD_CODE: "dead_code", + FAMILY_DESIGN: "design", +} +_REVIEW_FAMILY_BY_SUGGESTION: Final = { + FAMILY_CLONES: "clones", + FAMILY_STRUCTURAL: "structural", +} +_CLONE_REVIEW_TITLES: Final = { + CLONE_KIND_FUNCTION: "Function clone group", + CLONE_KIND_BLOCK: "Block clone group", + CLONE_KIND_SEGMENT: "Segment clone group", +} + + +def _humanize(value: str) -> str: + text = value.replace("_", " ").strip() + return text[:1].upper() + text[1:] if text else text + + +def _flatten_finding_groups( + findings: Mapping[str, object], +) -> list[Mapping[str, object]]: + """Canonical findings across families, flattened (mirrors overview).""" + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get(FAMILY_CLONES)) + flat: list[Mapping[str, object]] = [ + _as_mapping(group) + for key in ("functions", "blocks", "segments") + for group in _as_sequence(clones.get(key)) + ] + for family_key in (FAMILY_STRUCTURAL, FAMILY_DEAD_CODE, "design"): + flat.extend( + _as_mapping(group) + for group in _as_sequence(_as_mapping(groups.get(family_key)).get("groups")) + ) + return flat + + +def _finding_first_item(group: Mapping[str, object]) -> Mapping[str, object]: + items = _as_sequence(group.get("items")) + return _as_mapping(items[0]) if items else {} + + +def _finding_review_title(group: Mapping[str, object]) -> str: + family = str(group.get("family")) + category = str(group.get("category")) + qualname = str(_finding_first_item(group).get("qualname", "")).strip() + if family == FAMILY_CLONE: + base = _CLONE_REVIEW_TITLES.get(category, "Clone group") + return f"{base} ({_as_int(group.get('count'))} occurrences)" + if family == FAMILY_DEAD_CODE: + return f"Unused {category}: {qualname}" if qualname else f"Unused {category}" + if family == FAMILY_DESIGN: + return f"{_humanize(category)}: {qualname}" if qualname else _humanize(category) + return _humanize(category) + + +def _finding_review_location(group: Mapping[str, object]) -> str: + first = _finding_first_item(group) + # `path` is "" for absolute paths, so we never surface an absolute path — + # we fall back to the qualified name instead. + path = _safe_relative_path(first) + qualname = str(first.get("qualname", "")).strip() + line = _as_int(first.get("start_line")) + base = (f"{path}:{line}" if line else path) if path else qualname + extra = _as_int(group.get("count")) - 1 + if extra > 0: + return f"{base} +{extra} more" if base else f"{extra + 1} locations" + return base + + +def _finding_review_summary(group: Mapping[str, object]) -> str: + count = _as_int(group.get("count")) + spread = _as_mapping(group.get("spread")) + files = _as_int(spread.get("files")) + functions = _as_int(spread.get("functions")) + scope = str(_as_mapping(group.get("source_scope")).get("dominant_kind", "")).strip() + parts = [f"{count} occurrence{'s' if count != 1 else ''}"] + if functions or files: + parts.append( + f"{functions} function{'s' if functions != 1 else ''}" + f" / {files} file{'s' if files != 1 else ''}" + ) + if scope: + parts.append(scope) + return " · ".join(parts) -def _review_item_row(suggestion: Suggestion) -> dict[str, object]: - finding_id = _suggestion_finding_id(suggestion) +def _safe_relative_path(item: Mapping[str, object]) -> str: + """Relative path only — never surface an absolute path in the payload.""" + path = str(item.get("relative_path", "")).strip() + return path if path and not _is_absolute_path(path) else "" + + +def _finding_representative_rows( + group: Mapping[str, object], +) -> list[dict[str, object]]: + rows = [ + { + "relative_path": _safe_relative_path(item), + "start_line": _as_int(item.get("start_line")), + "end_line": _as_int(item.get("end_line")), + "qualname": str(item.get("qualname", "")), + "source_kind": str(item.get("source_kind", "")), + } + for item in (_as_mapping(row) for row in _as_sequence(group.get("items"))) + ] + rows.sort( + key=lambda row: ( + str(row["relative_path"]), + _as_int(row["start_line"]), + str(row["qualname"]), + ) + ) + return rows[:3] + + +def _suggestion_review_fields(suggestion: Suggestion) -> dict[str, object]: + """Remediation fields shared by every suggestion-backed review item.""" return { - "id": f"suggestion:{finding_id}", - "finding_id": finding_id, - "family": suggestion.finding_family, - "category": suggestion.category, - "severity": suggestion.severity, - "priority": suggestion.priority, "source_kind": suggestion.source_kind, "title": suggestion.title, "summary": suggestion.fact_summary, @@ -454,40 +567,150 @@ def _review_item_row(suggestion: Suggestion) -> dict[str, object]: "representative_locations": _representative_location_rows(suggestion), "effort": suggestion.effort, "steps": list(suggestion.steps), + "has_action": True, + } + + +def _finding_identity(group: Mapping[str, object]) -> dict[str, object]: + finding_id = str(group.get("id")) + return { + "id": finding_id, + "finding_id": finding_id, + "family": _REVIEW_FAMILY_BY_FINDING.get(str(group.get("family")), "design"), + "category": str(group.get("category", "")), + "severity": str(group.get("severity", SEVERITY_INFO)), + "priority": _as_float(group.get("priority")), + "novelty": str(group.get("novelty") or "known"), + } + + +def _finding_review_item( + group: Mapping[str, object], + suggestion: Suggestion | None, +) -> dict[str, object]: + item = _finding_identity(group) + if suggestion is not None: + item.update(_suggestion_review_fields(suggestion)) + else: + item.update( + { + "source_kind": str( + _as_mapping(group.get("source_scope")).get("dominant_kind", "") + ), + "title": _finding_review_title(group), + "summary": _finding_review_summary(group), + "location": _finding_review_location(group), + "representative_locations": _finding_representative_rows(group), + "effort": "", + "steps": [], + "has_action": False, + } + ) + return item + + +def _suggestion_review_item(suggestion: Suggestion) -> dict[str, object]: + finding_id = _suggestion_finding_id(suggestion) + family = _REVIEW_FAMILY_BY_SUGGESTION.get(suggestion.finding_family) or ( + "dead_code" if suggestion.category == CATEGORY_DEAD_CODE else "design" + ) + return { + "id": finding_id, + "finding_id": finding_id, + "family": family, + "category": suggestion.category, + "severity": suggestion.severity, + "priority": suggestion.priority, + "novelty": "known", + **_suggestion_review_fields(suggestion), + } + + +def _review_sort_key(item: Mapping[str, object]) -> tuple[float, int, str, str]: + return ( + -_as_float(item.get("priority")), + SEVERITY_ORDER.get(str(item.get("severity")), 9), + str(item.get("title")), + str(item.get("finding_id")), + ) + + +def _dedup_append( + items: list[dict[str, object]], + seen: set[str], + finding_id: str, + item: dict[str, object], +) -> None: + """Append a review item once per finding id (first writer wins).""" + if finding_id in seen: + return + seen.add(finding_id) + items.append(item) + + +def _review_summary(items: Sequence[Mapping[str, object]]) -> dict[str, object]: + by_severity = dict.fromkeys(_REVIEW_SEVERITIES, 0) + by_family = dict.fromkeys(_REVIEW_FAMILIES, 0) + by_novelty = {"new": 0, "known": 0} + actionable = 0 + for item in items: + severity = str(item.get("severity")) + if severity in by_severity: + by_severity[severity] += 1 + family = str(item.get("family")) + by_family[family] = by_family.get(family, 0) + 1 + by_novelty["new" if str(item.get("novelty")) == "new" else "known"] += 1 + if item.get("has_action"): + actionable += 1 + return { + "total": len(items), + "reviewed": 0, + "actionable": actionable, + "by_severity": by_severity, + "by_family": {key: count for key, count in sorted(by_family.items()) if count}, + "by_novelty": by_novelty, + "top_priority": max( + (_as_float(item.get("priority")) for item in items), default=0.0 + ), } def _build_derived_review_queue( + findings: Mapping[str, object], suggestions: Sequence[Suggestion] | None, ) -> dict[str, object]: - """Prioritised cross-family actionable queue that drives the review hub. - - Items are the actionable suggestions (clones, structural, dead-code, design - all normalise into suggestions), ordered by priority; the summary carries the - counts the UI needs for progress and filters. ``reviewed`` starts at 0 — the - HTML tracks per-finding review state client-side. + """Prioritised cross-family review queue projected over canonical findings. + + Every finding in ``findings.groups`` (clones, structural, dead-code, design) + becomes one review item, enriched with the matching suggestion's remediation + steps when one exists (the suggestion wins on title/summary/location). + Findings without a suggestion carry ``has_action=False``. The summary carries + the severity/family/novelty counts the review hub needs; ``reviewed`` starts + at 0 — the HTML tracks per-finding review state client-side. """ - rows = _sorted_suggestions(suggestions) - by_severity = dict.fromkeys(_REVIEW_SEVERITIES, 0) - by_family: dict[str, int] = {} - for suggestion in rows: - by_severity[suggestion.severity] += 1 - by_family[suggestion.finding_family] = ( - by_family.get(suggestion.finding_family, 0) + 1 + suggestion_by_id: dict[str, Suggestion] = {} + for suggestion in suggestions or (): + suggestion_by_id.setdefault(_suggestion_finding_id(suggestion), suggestion) + + items: list[dict[str, object]] = [] + seen: set[str] = set() + for group in _flatten_finding_groups(findings): + finding_id = str(group.get("id")) + _dedup_append( + items, + seen, + finding_id, + _finding_review_item(group, suggestion_by_id.get(finding_id)), ) + for finding_id, suggestion in suggestion_by_id.items(): + _dedup_append(items, seen, finding_id, _suggestion_review_item(suggestion)) + + items.sort(key=_review_sort_key) return { "schema_version": _REVIEW_QUEUE_SCHEMA_VERSION, "scope": "report_only", - "summary": { - "total": len(rows), - "reviewed": 0, - "by_severity": by_severity, - "by_family": dict(sorted(by_family.items())), - "top_priority": max( - (suggestion.priority for suggestion in rows), default=0.0 - ), - }, - "items": [_review_item_row(suggestion) for suggestion in rows], + "summary": _review_summary(items), + "items": items, } diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 68ddc7e2..f30ae3f8 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -987,6 +987,8 @@ .finding-meta-badge--easy{color:var(--success);background:var(--success-muted, rgba(34,197,94,.1))} .finding-meta-badge--moderate{color:var(--warning);background:var(--warning-muted)} .finding-meta-badge--hard{color:var(--error);background:var(--error-muted)} +.finding-meta-badge--new{color:var(--accent-primary);background:var(--accent-muted); + text-transform:uppercase;letter-spacing:.04em} .suggestion-sev-inline{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); border-radius:var(--radius-sm)} diff --git a/codeclone/report/html/sections/_review.py b/codeclone/report/html/sections/_review.py index d4ce7f86..a32488dc 100644 --- a/codeclone/report/html/sections/_review.py +++ b/codeclone/report/html/sections/_review.py @@ -40,7 +40,13 @@ "progress is saved in your browser. Report-only triage: verify in source " "before editing." ) -_FAMILY_LABELS = {"clones": "Clones", "structural": "Structural", "metrics": "Quality"} +_FAMILY_LABELS = { + "clones": "Clones", + "structural": "Structural", + "dead_code": "Dead code", + "design": "Quality", + "metrics": "Quality", +} _SEVERITIES = ("critical", "warning", "info") _REVIEW_TOGGLE = ( @@ -62,23 +68,26 @@ def _render_review_item(item: Mapping[str, object]) -> str: family = str(item.get("family")) severity = str(item.get("severity")) effort = str(item.get("effort")) - meta_badges = ( - meta_badge_html(f"priority {_as_float(item.get('priority')):.2f}"), - meta_badge_html(effort, tone=effort), - meta_badge_html(_family_label(family)), - ) + novelty = str(item.get("novelty")) + meta_badges = [meta_badge_html(f"priority {_as_float(item.get('priority')):.2f}")] + if effort: + meta_badges.append(meta_badge_html(effort, tone=effort)) + meta_badges.append(meta_badge_html(_family_label(family))) + if novelty == "new": + meta_badges.append(meta_badge_html("new", tone="new")) data_attrs = ( ' data-review-card="true" ' f'data-finding-id="{_escape_html(finding_id)}" ' f'data-severity="{_escape_html(severity)}" ' - f'data-family="{_escape_html(family)}"' + f'data-family="{_escape_html(family)}" ' + f'data-novelty="{_escape_html(novelty)}"' ) return finding_card( severity=severity, title=str(item.get("title")), eyebrow=f"{_family_label(family)} · {item.get('source_kind')}", location=str(item.get("location")), - meta_badges=meta_badges, + meta_badges=tuple(meta_badges), body_html=_escape_html(str(item.get("summary"))), actions_html=_REVIEW_TOGGLE, card_class="review-card", diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 13fca706..12ed520c 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -3139,8 +3139,12 @@ def test_html_report_directory_hotspots_use_test_scope_roots() -> None: "Hotspots by Directory", 'title="tests/fixtures">tests/fixtures', ) - assert "golden_project" not in html - assert "clone_metrics_cycle" not in html + # Directory hotspots (in the Overview panel) collapse to scope roots and must + # not leak raw fixture paths. The Review queue legitimately surfaces the + # finding itself, so scope the negative assertion to the Overview panel. + overview = html.split('id="panel-overview"', 1)[1].split('id="panel-review"', 1)[0] + assert "golden_project" not in overview + assert "clone_metrics_cycle" not in overview def test_html_report_metrics_bad_health_score_and_dead_code_ok_tone() -> None: @@ -4320,13 +4324,15 @@ def _review_queue_payload(*, with_items: bool = True) -> dict[str, object]: if with_items: items = [ { - "id": "suggestion:clone-a", + "id": "clone:a", "finding_id": "clone:a", "family": "clones", - "category": "clone", + "category": "function", "severity": "critical", "priority": 0.91, + "novelty": "new", "source_kind": "production", + "has_action": True, "title": "Duplicated branch logic", "summary": "3 near-identical branches", "location": "pkg/a.py:12", @@ -4335,13 +4341,15 @@ def _review_queue_payload(*, with_items: bool = True) -> dict[str, object]: "steps": ["extract a handler"], }, { - "id": "suggestion:struct-b", + "id": "struct:b", "finding_id": "struct:b", "family": "structural", - "category": "structural", + "category": "duplicated_branches", "severity": "warning", "priority": 0.6, + "novelty": "known", "source_kind": "tests", + "has_action": True, "title": "Repeated assertions", "summary": "repeated assert template", "location": "tests/test_b.py:30", @@ -4349,19 +4357,42 @@ def _review_queue_payload(*, with_items: bool = True) -> dict[str, object]: "effort": "easy", "steps": ["collapse"], }, + { + "id": "dead:c", + "finding_id": "dead:c", + "family": "dead_code", + "category": "function", + "severity": "info", + "priority": 0.3, + "novelty": "known", + "source_kind": "production", + "has_action": False, + "title": "Unused function: pkg.mod:helper", + "summary": "1 occurrence · production", + "location": "pkg/mod.py:40", + "representative_locations": [], + "effort": "", + "steps": [], + }, ] return { - "schema_version": "1", + "schema_version": "2", "scope": "report_only", "summary": { "total": len(items), "reviewed": 0, + "actionable": 2 if with_items else 0, "by_severity": { "critical": 1 if with_items else 0, "warning": 1 if with_items else 0, - "info": 0, + "info": 1 if with_items else 0, }, - "by_family": {"clones": 1, "structural": 1} if with_items else {}, + "by_family": ( + {"clones": 1, "dead_code": 1, "structural": 1} if with_items else {} + ), + "by_novelty": {"new": 1, "known": 2} + if with_items + else {"new": 0, "known": 0}, "top_priority": 0.91 if with_items else 0.0, }, "items": items, @@ -4413,6 +4444,13 @@ def test_html_report_renders_review_panel() -> None: "Duplicated branch logic", 'data-review-filter="severity"', 'data-review-value="critical"', + # novelty marker on the new finding + 'data-novelty="new"', + "finding-meta-badge--new", + # cross-family findings (incl. a no-action dead-code item) are reviewable + 'data-family="dead_code"', + 'data-finding-id="dead:c"', + "Unused function: pkg.mod:helper", ) # priority order preserved (input order): critical clone before warning struct assert panel.index("Duplicated branch logic") < panel.index("Repeated assertions") @@ -4436,7 +4474,7 @@ def test_overview_launchpad_links_to_review() -> None: overview, "review-launchpad", 'data-goto-tab="review"', - "2 findings ready to review", + "3 findings ready to review", "Start review", "launchpad-sev--critical", ) diff --git a/tests/test_module_map.py b/tests/test_module_map.py index 6e383806..3db52b36 100644 --- a/tests/test_module_map.py +++ b/tests/test_module_map.py @@ -244,66 +244,190 @@ def _review_suggestion( ) -def test_build_derived_review_queue_orders_and_summarizes() -> None: +def _finding_group( + *, + gid: str, + family: str, + category: str, + severity: str, + priority: float, + novelty: str = "known", + count: int = 1, + source_kind: str = "production", + qualname: str = "pkg.mod:fn", + path: str = "pkg/mod.py", + line: int = 10, +) -> dict[str, Any]: + return { + "id": gid, + "family": family, + "category": category, + "kind": category, + "severity": severity, + "priority": priority, + "novelty": novelty, + "count": count, + "source_scope": {"dominant_kind": source_kind, "impact_scope": "runtime"}, + "spread": {"files": 1, "functions": 1}, + "items": [ + { + "relative_path": path, + "qualname": qualname, + "start_line": line, + "end_line": line + 3, + "source_kind": source_kind, + } + ], + } + + +def _findings_payload( + *, + clones: tuple[dict[str, Any], ...] = (), + structural: tuple[dict[str, Any], ...] = (), + dead_code: tuple[dict[str, Any], ...] = (), + design: tuple[dict[str, Any], ...] = (), +) -> dict[str, Any]: + from codeclone.domain.findings import ( + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_STRUCTURAL, + ) + + return { + "groups": { + FAMILY_CLONES: {"functions": list(clones), "blocks": [], "segments": []}, + FAMILY_STRUCTURAL: {"groups": list(structural)}, + FAMILY_DEAD_CODE: {"groups": list(dead_code)}, + "design": {"groups": list(design)}, + } + } + + +def test_build_derived_review_queue_projects_findings_across_families() -> None: + from codeclone.domain.findings import ( + CLONE_KIND_FUNCTION, + FAMILY_CLONE, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, + ) from codeclone.report.document.derived import _build_derived_review_queue - suggestions = [ - _review_suggestion( - severity="warning", - category="structural", - family="structural", - title="B structural", - priority=0.5, - effort="moderate", - subject_key="b", + findings = _findings_payload( + clones=( + _finding_group( + gid="clone:a", + family=FAMILY_CLONE, + category=CLONE_KIND_FUNCTION, + severity="critical", + priority=0.9, + count=3, + ), ), - _review_suggestion( - severity="critical", - category="clone", - family="clones", - title="A duplicated", - priority=0.9, - effort="hard", - subject_key="a", + structural=( + _finding_group( + gid="struct:b", + family=FAMILY_STRUCTURAL, + category="duplicated_branches", + severity="info", + priority=0.4, + novelty="new", + ), ), - _review_suggestion( - severity="info", - category="dead_code", - family="metrics", - title="C unused", - priority=0.2, - effort="easy", - subject_key="c", + dead_code=( + _finding_group( + gid="dead:c", + family=FAMILY_DEAD_CODE, + category="function", + severity="warning", + priority=0.6, + ), ), - ] - queue: Any = _build_derived_review_queue(suggestions) - assert queue["schema_version"] == "1" + design=( + _finding_group( + gid="design:d", + family=FAMILY_DESIGN, + category="complexity", + severity="warning", + priority=0.5, + ), + ), + ) + queue: Any = _build_derived_review_queue(findings, None) + assert queue["schema_version"] == "2" assert queue["scope"] == "report_only" - summary = queue["summary"] - assert summary["total"] == 3 - assert summary["reviewed"] == 0 - assert summary["by_severity"] == {"critical": 1, "warning": 1, "info": 1} - assert summary["by_family"] == {"clones": 1, "metrics": 1, "structural": 1} - assert summary["top_priority"] == 0.9 - # priority-ordered: A(0.9) -> B(0.5) -> C(0.2) - assert [item["title"] for item in queue["items"]] == [ - "A duplicated", - "B structural", - "C unused", + assert queue["summary"] == { + "total": 4, + "reviewed": 0, + "actionable": 0, + "by_severity": {"critical": 1, "warning": 2, "info": 1}, + "by_family": {"clones": 1, "dead_code": 1, "design": 1, "structural": 1}, + "by_novelty": {"new": 1, "known": 3}, + "top_priority": 0.9, + } + # priority-ordered: clone(0.9) -> dead(0.6) -> design(0.5) -> struct(0.4) + assert [item["finding_id"] for item in queue["items"]] == [ + "clone:a", + "dead:c", + "design:d", + "struct:b", ] - first = queue["items"][0] - assert first["family"] == "clones" - assert first["severity"] == "critical" - assert first["effort"] == "hard" - assert first["location"] == "pkg/a.py:1" - assert str(first["id"]).startswith("suggestion:") + clone_item = queue["items"][0] + assert clone_item["family"] == "clones" + assert clone_item["has_action"] is False + assert clone_item["title"] == "Function clone group (3 occurrences)" + assert str(clone_item["location"]).startswith("pkg/mod.py:10") + struct_item = queue["items"][-1] + assert struct_item["novelty"] == "new" + assert struct_item["title"] == "Duplicated branches" + + +def test_build_derived_review_queue_enriches_with_matching_suggestion() -> None: + from codeclone.domain.findings import FAMILY_STRUCTURAL + from codeclone.findings.ids import structural_group_id + from codeclone.report.document.derived import _build_derived_review_queue + + gid = structural_group_id("duplicated_branches", "b") + findings = _findings_payload( + structural=( + _finding_group( + gid=gid, + family=FAMILY_STRUCTURAL, + category="duplicated_branches", + severity="warning", + priority=0.5, + ), + ) + ) + suggestion = _review_suggestion( + severity="warning", + category="structural", + family="structural", + title="Refactor duplicated branches", + priority=0.5, + effort="moderate", + subject_key="b", + ) + queue: Any = _build_derived_review_queue(findings, [suggestion]) + assert queue["summary"]["total"] == 1 + assert queue["summary"]["actionable"] == 1 + item = queue["items"][0] + assert item["finding_id"] == gid + assert item["has_action"] is True + # suggestion wins on title + carries remediation steps + assert item["title"] == "Refactor duplicated branches" + assert item["effort"] == "moderate" + assert item["steps"] == ["do the thing"] def test_build_derived_review_queue_empty_shell() -> None: from codeclone.report.document.derived import _build_derived_review_queue - queue: Any = _build_derived_review_queue(None) + queue: Any = _build_derived_review_queue({}, None) assert queue["items"] == [] assert queue["summary"]["total"] == 0 assert queue["summary"]["top_priority"] == 0.0 assert queue["summary"]["by_severity"] == {"critical": 0, "warning": 0, "info": 0} + assert queue["summary"]["by_novelty"] == {"new": 0, "known": 0} + assert queue["summary"]["by_family"] == {} From 852dc68d6613838a4fd6d18aab89aa4baacdcb9b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 14:32:14 +0500 Subject: [PATCH 018/113] feat(html): solid active-tab pill and refined insight banner --- codeclone/report/html/assets/css.py | 34 +++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index f30ae3f8..147b8f2a 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -202,13 +202,16 @@ border:none;cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted); white-space:nowrap;border-radius:var(--radius-md);transition:all var(--dur-fast) var(--ease)} .main-tab:hover{color:var(--text-primary);background:var(--bg-raised)} -.main-tab[aria-selected="true"]{color:var(--accent-primary);background:var(--accent-muted)} +.main-tab[aria-selected="true"]{color:#fff;background:var(--accent-primary); + box-shadow:0 1px 4px color-mix(in oklch,var(--accent-primary) 42%,transparent)} +.main-tab[aria-selected="true"]:hover{background:var(--accent-hover)} .main-tab-icon{flex-shrink:0;opacity:.72} +.main-tab[aria-selected="true"] .main-tab-icon{opacity:1} .main-tab-label{display:inline-flex;align-items:center} .tab-count{display:inline-flex;align-items:center;justify-content:center;min-width:18px; height:18px;padding:0 5px;font-size:.68rem;font-weight:700;border-radius:var(--radius-sm); background:var(--bg-overlay);color:var(--text-muted);margin-left:var(--sp-1)} -.main-tab[aria-selected="true"] .tab-count{background:var(--accent-primary); +.main-tab[aria-selected="true"] .tab-count{background:rgba(255,255,255,.24); color:#fff} /* Tab panels */ @@ -350,16 +353,23 @@ # --------------------------------------------------------------------------- _INSIGHT = """\ -.insight-banner{padding:var(--sp-3) var(--sp-4);border-radius:var(--radius-md); - margin-bottom:var(--sp-4);border-left:3px solid var(--border);background:none} -.insight-question{font-size:.78rem;font-weight:500;color:var(--text-muted); - text-transform:uppercase;letter-spacing:.03em;margin-bottom:2px} -.insight-answer{font-size:.82rem;color:var(--text-secondary);line-height:1.5} - -.insight-ok{border-left-color:var(--success);background:var(--success-muted)} -.insight-warn{border-left-color:var(--warning);background:var(--warning-muted)} -.insight-risk{border-left-color:var(--error);background:var(--error-muted)} -.insight-info{border-left-color:var(--info);background:var(--info-muted)} +.insight-banner{position:relative;padding:var(--sp-4) var(--sp-5); + border-radius:var(--radius-lg);margin-bottom:var(--sp-5); + border:1px solid var(--border);background:var(--bg-surface);overflow:hidden} +.insight-banner::before{content:"";position:absolute;inset:0 auto 0 0;width:3px; + background:var(--border-strong)} +.insight-question{font-size:.72rem;font-weight:600;color:var(--text-muted); + text-transform:uppercase;letter-spacing:.06em;margin-bottom:5px} +.insight-answer{font-size:.88rem;color:var(--text-secondary);line-height:1.55} + +.insight-ok::before{background:var(--success)} +.insight-ok{background:color-mix(in oklch,var(--success-muted) 55%,var(--bg-surface))} +.insight-warn::before{background:var(--warning)} +.insight-warn{background:color-mix(in oklch,var(--warning-muted) 55%,var(--bg-surface))} +.insight-risk::before{background:var(--error)} +.insight-risk{background:color-mix(in oklch,var(--error-muted) 55%,var(--bg-surface))} +.insight-info::before{background:var(--info)} +.insight-info{background:color-mix(in oklch,var(--info-muted) 55%,var(--bg-surface))} .insight-banner .overview-summary-grid{margin:0} .insight-banner .overview-summary-item{background:none;border:none;border-radius:0;padding:0} .insight-banner .overview-summary-label{font-size:.76rem;margin-bottom:var(--sp-2); From 00ab3f80953d1dcdfd9dbf3d4a65f2391914b47d Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 14:37:17 +0500 Subject: [PATCH 019/113] Expand benchmark scenarios and summaries --- .github/workflows/benchmark.yml | 67 ++++- benchmarks/run_benchmark.py | 415 +++++++++++++++++++++++++++-- benchmarks/run_docker_benchmark.sh | 4 + docs/book/20-benchmarking.md | 56 +++- tests/test_benchmark.py | 124 +++++++++ 5 files changed, 632 insertions(+), 34 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ed74cd94..4f12f6d0 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -112,6 +112,8 @@ jobs: env: RUNS: ${{ matrix.runs }} WARMUPS: ${{ matrix.warmups }} + SCENARIO_PROFILE: ${{ matrix.profile }} + STARTUP_RUNS: "3" CPUS: ${{ matrix.cpus }} MEMORY: ${{ matrix.memory }} run: | @@ -125,6 +127,8 @@ jobs: --target . \ --runs "${{ matrix.runs }}" \ --warmups "${{ matrix.warmups }}" \ + --scenario-profile "${{ matrix.profile }}" \ + --startup-runs 3 \ --tmp-dir "/tmp/codeclone-bench-${{ matrix.label }}" \ --output "$BENCH_JSON" @@ -143,21 +147,39 @@ jobs: raise SystemExit(1) payload = json.loads(report_path.read_text(encoding="utf-8")) + startup_probes = payload.get("startup_probes", []) scenarios = payload.get("scenarios", []) comparisons = payload.get("comparisons", {}) print("CodeClone benchmark summary") print(f"label={os.environ.get('RUNNER_OS','unknown').lower()} / {os.environ.get('GITHUB_JOB','benchmark')}") + if startup_probes: + print("startup probes:") + for probe in startup_probes: + name = str(probe.get("name", "unknown")) + stats = probe.get("stats_seconds", {}) + cpu_stats = probe.get("child_cpu_stats_seconds", {}) + print( + f"- {name:22s} median={float(stats.get('median', 0.0)):.4f}s " + f"first={float(probe.get('first_seconds', 0.0)):.4f}s " + f"cpu={float(cpu_stats.get('median', 0.0)):.4f}s" + ) for scenario in scenarios: name = str(scenario.get("name", "unknown")) stats = scenario.get("stats_seconds", {}) + cpu_stats = scenario.get("child_cpu_stats_seconds", {}) + inventory = scenario.get("inventory_sample", {}) median = float(stats.get("median", 0.0)) p95 = float(stats.get("p95", 0.0)) stdev = float(stats.get("stdev", 0.0)) digest = str(scenario.get("digest", "")) print( f"- {name:16s} median={median:.4f}s " - f"p95={p95:.4f}s stdev={stdev:.4f}s digest={digest}" + f"p95={p95:.4f}s stdev={stdev:.4f}s " + f"cpu={float(cpu_stats.get('median', 0.0)):.4f}s " + f"files={inventory.get('analyzed', 0)}/{inventory.get('cached', 0)} " + f"artifacts={float(scenario.get('artifact_total_kib_sample', 0.0)):.1f}KiB " + f"exit={scenario.get('exit_code_counts', {})} digest={digest}" ) if comparisons: @@ -174,24 +196,57 @@ jobs: "", f"- Tool: `{payload['tool']['name']} {payload['tool']['version']}`", f"- Target: `{payload['config']['target']}`", + f"- Scenario profile: `{payload['config'].get('scenario_profile', 'smoke')}`", f"- Runs: `{payload['config']['runs']}`", f"- Warmups: `{payload['config']['warmups']}`", + f"- Startup runs: `{payload['config'].get('startup_runs', 0)}`", f"- Generated: `{payload['generated_at_utc']}`", "", - "### Scenarios", - "", - "| Scenario | Median (s) | p95 (s) | Stdev (s) | Deterministic | Digest |", - "|---|---:|---:|---:|:---:|---|", ] + if startup_probes: + lines.extend( + [ + "### Startup / Import Probes", + "", + "| Probe | Median (s) | First (s) | CPU Median (s) |", + "|---|---:|---:|---:|", + ] + ) + for probe in startup_probes: + stats = probe.get("stats_seconds", {}) + cpu_stats = probe.get("child_cpu_stats_seconds", {}) + lines.append( + "| " + f"{probe.get('name', '')} | " + f"{float(stats.get('median', 0.0)):.4f} | " + f"{float(probe.get('first_seconds', 0.0)):.4f} | " + f"{float(cpu_stats.get('median', 0.0)):.4f} |" + ) + lines.append("") + + lines.extend( + [ + "### Scenarios", + "", + "| Scenario | Median (s) | p95 (s) | CPU Median (s) | Files A/C | Artifacts KiB | Exit | Deterministic | Digest |", + "|---|---:|---:|---:|---:|---:|---|:---:|---|", + ] + ) + for scenario in scenarios: stats = scenario.get("stats_seconds", {}) + cpu_stats = scenario.get("child_cpu_stats_seconds", {}) + inventory = scenario.get("inventory_sample", {}) lines.append( "| " f"{scenario.get('name', '')} | " f"{float(stats.get('median', 0.0)):.4f} | " f"{float(stats.get('p95', 0.0)):.4f} | " - f"{float(stats.get('stdev', 0.0)):.4f} | " + f"{float(cpu_stats.get('median', 0.0)):.4f} | " + f"{inventory.get('analyzed', 0)}/{inventory.get('cached', 0)} | " + f"{float(scenario.get('artifact_total_kib_sample', 0.0)):.1f} | " + f"{scenario.get('exit_code_counts', {})} | " f"{'yes' if bool(scenario.get('deterministic')) else 'no'} | " f"{scenario.get('digest', '')} |" ) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index af04cf8a..c8037e04 100755 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -20,13 +20,21 @@ from datetime import datetime, timezone from pathlib import Path from statistics import fmean, median, pstdev -from typing import Literal +from typing import Literal, cast from codeclone import __version__ as codeclone_version from codeclone.baseline import current_python_tag -BENCHMARK_SCHEMA_VERSION = "1.0" +BENCHMARK_SCHEMA_VERSION = "1.1" BENCHMARK_CLI_MODULE = "codeclone.main" +BenchmarkProfile = Literal["smoke", "extended", "diagnostic"] +ReportFormat = Literal["html", "md", "sarif", "text"] +REPORT_FORMAT_SPECS: Mapping[ReportFormat, tuple[str, str]] = { + "html": ("--html", ".html"), + "md": ("--md", ".md"), + "sarif": ("--sarif", ".sarif"), + "text": ("--text", ".txt"), +} BENCHMARK_NEUTRAL_ARGS: tuple[str, ...] = ( "--no-fail-on-new", "--no-fail-on-new-metrics", @@ -59,17 +67,40 @@ class Scenario: name: str mode: Literal["cold", "warm"] - extra_args: tuple[str, ...] + extra_args: tuple[str, ...] = () + report_formats: tuple[ReportFormat, ...] = () + run_cap: int | None = None + warmup_cap: int | None = None + expected_exit_codes: tuple[int, ...] = (0,) @dataclass(frozen=True) class RunMeasurement: elapsed_seconds: float + child_user_seconds: float + child_system_seconds: float + exit_code: int digest: str files_found: int files_analyzed: int files_cached: int files_skipped: int + artifact_bytes: dict[str, int] + cache_bytes: int + + +@dataclass(frozen=True) +class StartupProbe: + name: str + args: tuple[str, ...] + + +@dataclass(frozen=True) +class ProbeMeasurement: + elapsed_seconds: float + child_user_seconds: float + child_system_seconds: float + exit_code: int def _percentile(sorted_values: list[float], q: float) -> float: @@ -105,6 +136,71 @@ def _stats(values: list[float]) -> dict[str, float]: } +def _as_float(value: object, default: float = 0.0) -> float: + if isinstance(value, bool): + return float(value) + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + try: + return float(value) + except ValueError: + return default + return default + + +def _resource_usage_seconds() -> tuple[float, float]: + try: + import resource + except ImportError: + return (0.0, 0.0) + usage = resource.getrusage(resource.RUSAGE_CHILDREN) + return (float(usage.ru_utime), float(usage.ru_stime)) + + +def _resource_delta( + before: tuple[float, float], + after: tuple[float, float], +) -> tuple[float, float]: + return ( + max(0.0, after[0] - before[0]), + max(0.0, after[1] - before[1]), + ) + + +def _normalized_env() -> dict[str, str]: + env = dict(os.environ) + env["PYTHONHASHSEED"] = "0" + env["LC_ALL"] = "C.UTF-8" + env["LANG"] = "C.UTF-8" + env["TZ"] = "UTC" + return env + + +def _artifact_paths( + *, + report_path: Path, + report_formats: tuple[ReportFormat, ...], +) -> dict[str, Path]: + paths: dict[str, Path] = {"json": report_path} + for report_format in report_formats: + _flag, suffix = REPORT_FORMAT_SPECS[report_format] + paths[report_format] = report_path.with_suffix(suffix) + return paths + + +def _artifact_size_map(paths: Mapping[str, Path]) -> dict[str, int]: + return { + name: path.stat().st_size + for name, path in sorted(paths.items()) + if path.exists() + } + + +def _file_size(path: Path) -> int: + return path.stat().st_size if path.exists() else 0 + + def _read_report(report_path: Path) -> tuple[str, dict[str, int]]: payload_obj: object = json.loads(report_path.read_text(encoding="utf-8")) if not isinstance(payload_obj, dict): @@ -155,21 +251,25 @@ def _run_cli_once( cache_path: Path, report_path: Path, extra_args: tuple[str, ...], + report_formats: tuple[ReportFormat, ...] = (), + expected_exit_codes: tuple[int, ...] = (0,), ) -> RunMeasurement: - env = dict(os.environ) - env["PYTHONHASHSEED"] = "0" - env["LC_ALL"] = "C.UTF-8" - env["LANG"] = "C.UTF-8" - env["TZ"] = "UTC" - + env = _normalized_env() + artifact_paths = _artifact_paths( + report_path=report_path, + report_formats=report_formats, + ) + report_args: list[str] = ["--json", str(report_path)] + for report_format in report_formats: + flag, _suffix = REPORT_FORMAT_SPECS[report_format] + report_args.extend([flag, str(artifact_paths[report_format])]) cmd = [ python_executable, "-m", BENCHMARK_CLI_MODULE, str(target), *BENCHMARK_NEUTRAL_ARGS, - "--json", - str(report_path), + *report_args, "--cache-path", str(cache_path), "--no-progress", @@ -177,6 +277,7 @@ def _run_cli_once( *extra_args, ] + usage_before = _resource_usage_seconds() start = time.perf_counter() completed = subprocess.run( cmd, @@ -186,7 +287,11 @@ def _run_cli_once( env=env, ) elapsed_seconds = time.perf_counter() - start - if completed.returncode != 0: + child_user_seconds, child_system_seconds = _resource_delta( + usage_before, + _resource_usage_seconds(), + ) + if completed.returncode not in expected_exit_codes: stderr_tail = "\n".join(completed.stderr.splitlines()[-20:]) stdout_tail = "\n".join(completed.stdout.splitlines()[-20:]) raise RuntimeError( @@ -197,11 +302,16 @@ def _run_cli_once( digest, files = _read_report(report_path) return RunMeasurement( elapsed_seconds=elapsed_seconds, + child_user_seconds=child_user_seconds, + child_system_seconds=child_system_seconds, + exit_code=completed.returncode, digest=digest, files_found=files["found"], files_analyzed=files["analyzed"], files_cached=files["cached"], files_skipped=files["skipped"], + artifact_bytes=_artifact_size_map(artifact_paths), + cache_bytes=_file_size(cache_path), ) @@ -252,6 +362,22 @@ def _print_bulleted_lines(header: str, lines: Sequence[str]) -> None: print(f"- {line}") +def _effective_count(requested: int, cap: int | None) -> int: + return min(requested, cap) if cap is not None else requested + + +def _exit_code_counts(measurements: Sequence[RunMeasurement]) -> dict[str, int]: + counts: dict[str, int] = {} + for measurement in measurements: + key = str(measurement.exit_code) + counts[key] = counts.get(key, 0) + 1 + return dict(sorted(counts.items(), key=lambda item: int(item[0]))) + + +def _artifact_total_kib(artifact_bytes: Mapping[str, int]) -> float: + return sum(artifact_bytes.values()) / 1024.0 + + def _scenario_result( *, scenario: Scenario, @@ -266,6 +392,8 @@ def _scenario_result( shutil.rmtree(scenario_dir) scenario_dir.mkdir(parents=True, exist_ok=True) + effective_runs = _effective_count(runs, scenario.run_cap) + effective_warmups = _effective_count(warmups, scenario.warmup_cap) warm_cache_path = scenario_dir / "shared-cache.json" cold_cache_path = scenario_dir / "cold-cache.json" @@ -276,9 +404,11 @@ def _scenario_result( cache_path=warm_cache_path, report_path=scenario_dir / "seed-report.json", extra_args=scenario.extra_args, + report_formats=scenario.report_formats, + expected_exit_codes=scenario.expected_exit_codes, ) - for idx in range(warmups): + for idx in range(effective_warmups): if scenario.mode == "warm": cache_path = warm_cache_path else: @@ -290,10 +420,12 @@ def _scenario_result( cache_path=cache_path, report_path=scenario_dir / f"warmup-report-{idx}.json", extra_args=scenario.extra_args, + report_formats=scenario.report_formats, + expected_exit_codes=scenario.expected_exit_codes, ) measurements: list[RunMeasurement] = [] - for idx in range(runs): + for idx in range(effective_runs): if scenario.mode == "warm": cache_path = warm_cache_path else: @@ -305,6 +437,8 @@ def _scenario_result( cache_path=cache_path, report_path=scenario_dir / f"run-report-{idx}.json", extra_args=scenario.extra_args, + report_formats=scenario.report_formats, + expected_exit_codes=scenario.expected_exit_codes, ) _validate_inventory_sample(scenario=scenario, measurement=measurement) measurements.append(measurement) @@ -318,26 +452,178 @@ def _scenario_result( ) timings = [m.elapsed_seconds for m in measurements] + child_user = [m.child_user_seconds for m in measurements] + child_system = [m.child_system_seconds for m in measurements] + child_cpu = [m.child_user_seconds + m.child_system_seconds for m in measurements] sample = measurements[0] return { "name": scenario.name, "mode": scenario.mode, "extra_args": list(scenario.extra_args), - "warmups": warmups, - "runs": runs, + "report_formats": list(scenario.report_formats), + "warmups": effective_warmups, + "runs": effective_runs, + "requested_warmups": warmups, + "requested_runs": runs, + "run_cap": scenario.run_cap, + "warmup_cap": scenario.warmup_cap, + "expected_exit_codes": list(scenario.expected_exit_codes), + "exit_code_counts": _exit_code_counts(measurements), "deterministic": deterministic, "digest": digests[0], "timings_seconds": timings, "stats_seconds": _stats(timings), + "child_user_stats_seconds": _stats(child_user), + "child_system_stats_seconds": _stats(child_system), + "child_cpu_stats_seconds": _stats(child_cpu), "inventory_sample": { "found": sample.files_found, "analyzed": sample.files_analyzed, "cached": sample.files_cached, "skipped": sample.files_skipped, }, + "artifact_bytes_sample": sample.artifact_bytes, + "artifact_total_kib_sample": _artifact_total_kib(sample.artifact_bytes), + "cache_bytes_sample": sample.cache_bytes, } +def _run_probe_once( + *, + python_executable: str, + probe: StartupProbe, +) -> ProbeMeasurement: + cmd = [python_executable, *probe.args] + usage_before = _resource_usage_seconds() + start = time.perf_counter() + completed = subprocess.run( + cmd, + check=False, + capture_output=True, + text=True, + env=_normalized_env(), + ) + elapsed_seconds = time.perf_counter() - start + child_user_seconds, child_system_seconds = _resource_delta( + usage_before, + _resource_usage_seconds(), + ) + if completed.returncode != 0: + stderr_tail = "\n".join(completed.stderr.splitlines()[-20:]) + stdout_tail = "\n".join(completed.stdout.splitlines()[-20:]) + raise RuntimeError( + f"startup probe {probe.name} failed with exit {completed.returncode}" + f"\nSTDOUT:\n{stdout_tail}\nSTDERR:\n{stderr_tail}" + ) + return ProbeMeasurement( + elapsed_seconds=elapsed_seconds, + child_user_seconds=child_user_seconds, + child_system_seconds=child_system_seconds, + exit_code=completed.returncode, + ) + + +def _startup_probes() -> tuple[StartupProbe, ...]: + return ( + StartupProbe(name="python_empty", args=("-c", "pass")), + StartupProbe(name="import_codeclone", args=("-c", "import codeclone")), + StartupProbe( + name="import_codeclone_main", + args=("-c", "import codeclone.main"), + ), + StartupProbe( + name="cli_version", + args=("-m", BENCHMARK_CLI_MODULE, "--version"), + ), + ) + + +def _probe_result( + *, + probe: StartupProbe, + python_executable: str, + runs: int, +) -> dict[str, object]: + measurements = [ + _run_probe_once(python_executable=python_executable, probe=probe) + for _idx in range(runs) + ] + timings = [m.elapsed_seconds for m in measurements] + child_user = [m.child_user_seconds for m in measurements] + child_system = [m.child_system_seconds for m in measurements] + child_cpu = [m.child_user_seconds + m.child_system_seconds for m in measurements] + return { + "name": probe.name, + "args": list(probe.args), + "runs": runs, + "timings_seconds": timings, + "stats_seconds": _stats(timings), + "first_seconds": timings[0] if timings else 0.0, + "subsequent_stats_seconds": _stats(timings[1:]) if len(timings) > 1 else None, + "child_user_stats_seconds": _stats(child_user), + "child_system_stats_seconds": _stats(child_system), + "child_cpu_stats_seconds": _stats(child_cpu), + "exit_code_counts": { + str(code): sum(1 for item in measurements if item.exit_code == code) + for code in sorted({item.exit_code for item in measurements}) + }, + } + + +def _scenario_profile(profile: BenchmarkProfile) -> tuple[Scenario, ...]: + core = ( + Scenario(name="cold_full", mode="cold"), + Scenario(name="warm_full", mode="warm"), + Scenario(name="warm_clones_only", mode="warm", extra_args=("--skip-metrics",)), + ) + report_scenarios = ( + Scenario( + name="cold_html", + mode="cold", + report_formats=("html",), + run_cap=3, + warmup_cap=1, + ), + Scenario( + name="warm_html", + mode="warm", + report_formats=("html",), + run_cap=5, + warmup_cap=1, + ), + Scenario( + name="cold_all_reports", + mode="cold", + report_formats=("html", "md", "sarif", "text"), + run_cap=3, + warmup_cap=1, + ), + Scenario( + name="warm_all_reports", + mode="warm", + report_formats=("html", "md", "sarif", "text"), + run_cap=5, + warmup_cap=1, + ), + ) + diagnostic_scenarios = ( + Scenario( + name="ci_cold_diagnostic", + mode="cold", + extra_args=("--ci",), + report_formats=("html",), + run_cap=3, + warmup_cap=0, + expected_exit_codes=(0, 2, 3), + ), + ) + if profile == "smoke": + return core + if profile == "extended": + return core + report_scenarios + return core + report_scenarios + diagnostic_scenarios + + def _cgroup_value(path: Path) -> str | None: try: content = path.read_text(encoding="utf-8").strip() @@ -396,12 +682,28 @@ def _median_for(name: str) -> float | None: cold_full = _median_for("cold_full") warm_full = _median_for("warm_full") warm_clones = _median_for("warm_clones_only") + cold_html = _median_for("cold_html") + warm_html = _median_for("warm_html") + cold_all_reports = _median_for("cold_all_reports") + warm_all_reports = _median_for("warm_all_reports") comparisons: dict[str, float] = {} if cold_full and warm_full: comparisons["warm_full_speedup_vs_cold_full"] = cold_full / warm_full if warm_full and warm_clones: comparisons["warm_clones_only_speedup_vs_warm_full"] = warm_full / warm_clones + if cold_full and cold_html: + comparisons["cold_html_overhead_vs_cold_full"] = cold_html / cold_full + if warm_full and warm_html: + comparisons["warm_html_overhead_vs_warm_full"] = warm_html / warm_full + if cold_full and cold_all_reports: + comparisons["cold_all_reports_overhead_vs_cold_full"] = ( + cold_all_reports / cold_full + ) + if warm_full and warm_all_reports: + comparisons["warm_all_reports_overhead_vs_warm_full"] = ( + warm_all_reports / warm_full + ) return comparisons @@ -470,7 +772,7 @@ def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description=( "Deterministic Docker-oriented benchmark for CodeClone CLI " - "(cold/warm cache scenarios)." + "(cold/warm cache, report, and startup scenarios)." ) ) parser.add_argument( @@ -502,6 +804,27 @@ def _parse_args() -> argparse.Namespace: default=int(os.environ.get("CODECLONE_BENCH_WARMUPS", "3")), help="Warmup runs per scenario", ) + parser.add_argument( + "--scenario-profile", + choices=("smoke", "extended", "diagnostic"), + default=os.environ.get("CODECLONE_BENCH_PROFILE", "smoke"), + help=( + "Scenario set: smoke keeps the historical core set; extended adds " + "report-format scenarios with per-scenario caps; diagnostic also " + "adds a CI-gate timing scenario that may exit non-zero." + ), + ) + parser.add_argument( + "--startup-runs", + type=int, + default=int(os.environ.get("CODECLONE_BENCH_STARTUP_RUNS", "3")), + help="Measured runs per startup/import probe", + ) + parser.add_argument( + "--no-startup-probes", + action="store_true", + help="Skip Python/import startup probes.", + ) parser.add_argument( "--tmp-dir", type=Path, @@ -534,6 +857,8 @@ def main() -> int: raise SystemExit("--runs must be > 0") if args.warmups < 0: raise SystemExit("--warmups must be >= 0") + if args.startup_runs <= 0: + raise SystemExit("--startup-runs must be > 0") if args.max_regression_pct < 0: raise SystemExit("--max-regression-pct must be >= 0") target = args.target.resolve() @@ -547,11 +872,10 @@ def main() -> int: shutil.rmtree(workspace) workspace.mkdir(parents=True, exist_ok=True) - scenarios = [ - Scenario(name="cold_full", mode="cold", extra_args=()), - Scenario(name="warm_full", mode="warm", extra_args=()), - Scenario(name="warm_clones_only", mode="warm", extra_args=("--skip-metrics",)), - ] + scenario_profile = str(args.scenario_profile) + if scenario_profile not in {"smoke", "extended", "diagnostic"}: + raise SystemExit(f"unknown scenario profile: {scenario_profile}") + scenarios = _scenario_profile(cast(BenchmarkProfile, scenario_profile)) scenario_results = [ _scenario_result( scenario=scenario, @@ -563,6 +887,18 @@ def main() -> int: ) for scenario in scenarios ] + startup_probe_results = ( + [ + _probe_result( + probe=probe, + python_executable=args.python_executable, + runs=args.startup_runs, + ) + for probe in _startup_probes() + ] + if not args.no_startup_probes + else [] + ) comparisons = _comparison_metrics(scenario_results) @@ -577,9 +913,13 @@ def main() -> int: "target": str(target), "runs": args.runs, "warmups": args.warmups, + "scenario_profile": scenario_profile, + "startup_runs": args.startup_runs, + "startup_probes": not args.no_startup_probes, "python_executable": args.python_executable, }, "environment": _environment(), + "startup_probes": startup_probe_results, "scenarios": scenario_results, "comparisons": comparisons, "generated_at_utc": datetime.now(timezone.utc) @@ -612,17 +952,44 @@ def main() -> int: print("CodeClone Docker benchmark") print(f"target={target}") - print(f"runs={args.runs} warmups={args.warmups}") + print( + f"profile={scenario_profile} runs={args.runs} " + f"warmups={args.warmups} startup_runs={args.startup_runs}" + ) + if startup_probe_results: + print("startup probes:") + for probe in startup_probe_results: + name = str(probe["name"]) + stats = probe["stats_seconds"] + cpu_stats = probe["child_cpu_stats_seconds"] + assert isinstance(stats, dict) + assert isinstance(cpu_stats, dict) + print( + f"- {name:22s} median={_as_float(stats['median']):.4f}s " + f"first={_as_float(probe['first_seconds']):.4f}s " + f"cpu={_as_float(cpu_stats['median']):.4f}s" + ) for scenario in scenario_results: name = str(scenario["name"]) stats = scenario["stats_seconds"] + cpu_stats = scenario["child_cpu_stats_seconds"] + inventory = scenario["inventory_sample"] + exit_counts = scenario["exit_code_counts"] assert isinstance(stats, dict) + assert isinstance(cpu_stats, dict) + assert isinstance(inventory, dict) + assert isinstance(exit_counts, dict) median_s = float(stats["median"]) p95_s = float(stats["p95"]) stdev_s = float(stats["stdev"]) + cpu_median_s = float(cpu_stats["median"]) print( - f"- {name:16s} median={median_s:.4f}s " + f"- {name:20s} median={median_s:.4f}s " f"p95={p95_s:.4f}s stdev={stdev_s:.4f}s " + f"cpu={cpu_median_s:.4f}s " + f"files={inventory.get('analyzed', 0)}/{inventory.get('cached', 0)} " + f"artifacts={_as_float(scenario['artifact_total_kib_sample']):.1f}KiB " + f"exit={exit_counts} " f"digest={scenario['digest']}" ) _print_bulleted_lines( diff --git a/benchmarks/run_docker_benchmark.sh b/benchmarks/run_docker_benchmark.sh index c828a804..f4af6831 100755 --- a/benchmarks/run_docker_benchmark.sh +++ b/benchmarks/run_docker_benchmark.sh @@ -10,6 +10,8 @@ CPUS="${CPUS:-1.0}" MEMORY="${MEMORY:-2g}" RUNS="${RUNS:-12}" WARMUPS="${WARMUPS:-3}" +SCENARIO_PROFILE="${SCENARIO_PROFILE:-smoke}" +STARTUP_RUNS="${STARTUP_RUNS:-3}" HOST_UID="$(id -u)" HOST_GID="$(id -g)" CONTAINER_USER="${CONTAINER_USER:-${HOST_UID}:${HOST_GID}}" @@ -41,6 +43,8 @@ docker run \ --output "/bench-out/$OUTPUT_BASENAME" \ --runs "$RUNS" \ --warmups "$WARMUPS" \ + --scenario-profile "$SCENARIO_PROFILE" \ + --startup-runs "$STARTUP_RUNS" \ "$@" echo "[bench] results: $OUT_DIR/$OUTPUT_BASENAME" diff --git a/docs/book/20-benchmarking.md b/docs/book/20-benchmarking.md index dc5f791b..39306675 100644 --- a/docs/book/20-benchmarking.md +++ b/docs/book/20-benchmarking.md @@ -16,19 +16,40 @@ Define a reproducible, deterministic benchmark workflow for CodeClone in Docker. ## Data model -Benchmark output (`benchmark_schema_version=1.0`) contains: +Benchmark output (`benchmark_schema_version=1.1`) contains: - tool metadata (`name`, `version`, `python_tag`) -- benchmark config (`target`, `runs`, `warmups`) +- benchmark config (`target`, `runs`, `warmups`, `scenario_profile`, + `startup_runs`) - execution environment (platform, cpu limits/affinity, cgroup limits) +- startup/import probes that isolate new-process cost from analysis cost: + - `python_empty` + - `import_codeclone` + - `import_codeclone_main` + - `cli_version` - scenario results: - `cold_full` (cold cache each run) - `warm_full` (shared warm cache) - `warm_clones_only` (shared warm cache with `--skip-metrics`) -- latency stats per scenario (`min`, `max`, `mean`, `median`, `p95`, `stdev`) + - extended profile only: `cold_html`, `warm_html`, `cold_all_reports`, + `warm_all_reports` + - diagnostic profile only: `ci_cold_diagnostic` +- latency stats per scenario and probe (`min`, `max`, `mean`, `median`, `p95`, `stdev`) +- child process CPU stats per scenario/probe (`child_user_stats_seconds`, + `child_system_stats_seconds`, `child_cpu_stats_seconds`) +- per-scenario inventory and artifact samples (`inventory_sample`, + `artifact_bytes_sample`, `cache_bytes_sample`, `exit_code_counts`) - deterministic digest check (`integrity.digest.value` must be stable within scenario) - cross-scenario comparisons (speedup ratios) +Scenario profiles: + +| Profile | Purpose | Default in CI | +|--------------|-------------------------------------------------------------------------|---------------| +| `smoke` | Historical core scenarios only; bounded push/PR signal. | yes | +| `extended` | Adds HTML/all-report scenarios with per-scenario run caps. | manual only | +| `diagnostic` | Adds `ci_cold_diagnostic`, where exit `0`, `2`, or `3` is recorded. | no | + ## Contracts - Benchmark must run in containerized, isolated environment. @@ -37,13 +58,21 @@ Benchmark output (`benchmark_schema_version=1.0`) contains: - Runtime environment is normalized: `PYTHONHASHSEED=0`, `TZ=UTC`, `LC_ALL/LANG=C.UTF-8`. - Each measured run must exit successfully (`exit=0`); any failure aborts the benchmark. + The `diagnostic` profile is the only exception: `ci_cold_diagnostic` records + `0`, `2`, or `3` so gate-failure timing can be measured without treating the + benchmark sample as a product failure. - Determinism guard: if scenario digest diverges across measured runs, benchmark fails. +- Extended report scenarios are intentionally capped below global `runs`/`warmups` + so GitHub-hosted workers do not pay unbounded cold-report CPU. ## Invariants (MUST) - Cold scenario uses a fixed cache path and removes cache file before each run (cold cache with stable canonical metadata path). - Warm scenarios seed one shared cache file before warmups/measured runs. +- Startup/import probes run as fresh Python subprocesses and do not read report + output; they are for process/bootstrap/import cost only. +- Core smoke scenarios remain gate-neutral by passing explicit no-fail flags. - Benchmark JSON write is atomic (`.tmp` + replace). - Benchmark scenario ordering is stable and fixed. @@ -82,6 +111,24 @@ CPUSET=0 CPUS=1.0 MEMORY=2g RUNS=16 WARMUPS=4 \ ./benchmarks/run_docker_benchmark.sh ``` +Extended report-profile run: + +```bash +SCENARIO_PROFILE=extended RUNS=16 WARMUPS=4 STARTUP_RUNS=3 \ + ./benchmarks/run_docker_benchmark.sh +``` + +Local diagnostic run that also measures the CI-gate timing path: + +```bash +uv run python benchmarks/run_benchmark.py \ + --target . \ + --scenario-profile diagnostic \ + --runs 3 \ + --warmups 1 \ + --output /tmp/codeclone-benchmark-diagnostic.json +``` + Permissions note: - The host wrapper runs the container as host `uid:gid` by default @@ -99,7 +146,8 @@ Permissions note: - Job behavior: - runs Docker benchmark with pinned runner limits - uploads `.cache/benchmarks/codeclone-benchmark.json` as artifact - - emits scenario table and ratio table into `GITHUB_STEP_SUMMARY` + - emits startup/import probe, scenario, and ratio tables into + `GITHUB_STEP_SUMMARY` - prints ratios in job logs (important for quick trend checks) ## Non-guarantees diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 05e90108..52c46be0 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -16,7 +16,9 @@ BENCHMARK_NEUTRAL_ARGS, RunMeasurement, Scenario, + _comparison_metrics, _run_cli_once, + _scenario_profile, _timing_regressions, _validate_inventory_sample, ) @@ -31,11 +33,16 @@ def _measurement( ) -> RunMeasurement: return RunMeasurement( elapsed_seconds=0.1, + child_user_seconds=0.08, + child_system_seconds=0.01, + exit_code=0, digest="digest", files_found=found, files_analyzed=analyzed, files_cached=cached, files_skipped=skipped, + artifact_bytes={"json": 128}, + cache_bytes=256, ) @@ -138,6 +145,123 @@ def fake_run( ] +def test_benchmark_runner_can_emit_additional_report_formats( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + captured: dict[str, object] = {} + + def fake_run( + cmd: list[str], + *, + check: bool, + capture_output: bool, + text: bool, + env: dict[str, str], + ) -> CompletedProcess[str]: + captured["cmd"] = cmd + captured["check"] = check + captured["capture_output"] = capture_output + captured["text"] = text + captured["env"] = env + return CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr("benchmarks.run_benchmark.subprocess.run", fake_run) + monkeypatch.setattr( + "benchmarks.run_benchmark._read_report", + lambda _report_path: ( + "digest", + {"found": 10, "analyzed": 10, "cached": 0, "skipped": 0}, + ), + ) + + report_path = tmp_path / "report.json" + _run_cli_once( + target=tmp_path, + python_executable="python3", + cache_path=tmp_path / "cache.json", + report_path=report_path, + extra_args=(), + report_formats=("html", "md", "sarif", "text"), + ) + + assert captured["cmd"] == [ + "python3", + "-m", + BENCHMARK_CLI_MODULE, + str(tmp_path), + *BENCHMARK_NEUTRAL_ARGS, + "--json", + str(report_path), + "--html", + str(tmp_path / "report.html"), + "--md", + str(tmp_path / "report.md"), + "--sarif", + str(tmp_path / "report.sarif"), + "--text", + str(tmp_path / "report.txt"), + "--cache-path", + str(tmp_path / "cache.json"), + "--no-progress", + "--quiet", + ] + + +def test_benchmark_extended_profile_adds_capped_report_scenarios() -> None: + scenarios = {scenario.name: scenario for scenario in _scenario_profile("extended")} + + assert set(scenarios) == { + "cold_full", + "warm_full", + "warm_clones_only", + "cold_html", + "warm_html", + "cold_all_reports", + "warm_all_reports", + } + assert scenarios["cold_html"].report_formats == ("html",) + assert scenarios["cold_html"].run_cap == 3 + assert scenarios["warm_all_reports"].report_formats == ( + "html", + "md", + "sarif", + "text", + ) + assert scenarios["warm_all_reports"].run_cap == 5 + + +def test_benchmark_diagnostic_profile_allows_ci_gate_exit_codes() -> None: + scenarios = { + scenario.name: scenario for scenario in _scenario_profile("diagnostic") + } + + diagnostic = scenarios["ci_cold_diagnostic"] + assert diagnostic.extra_args == ("--ci",) + assert diagnostic.expected_exit_codes == (0, 2, 3) + + +def test_benchmark_comparison_metrics_include_report_overheads() -> None: + scenarios: list[dict[str, object]] = [ + {"name": "cold_full", "stats_seconds": {"median": 2.0}}, + {"name": "warm_full", "stats_seconds": {"median": 1.0}}, + {"name": "warm_clones_only", "stats_seconds": {"median": 0.5}}, + {"name": "cold_html", "stats_seconds": {"median": 2.4}}, + {"name": "warm_html", "stats_seconds": {"median": 1.2}}, + {"name": "cold_all_reports", "stats_seconds": {"median": 3.0}}, + {"name": "warm_all_reports", "stats_seconds": {"median": 1.5}}, + ] + + assert _comparison_metrics(scenarios) == { + "cold_all_reports_overhead_vs_cold_full": 1.5, + "cold_html_overhead_vs_cold_full": 1.2, + "warm_all_reports_overhead_vs_warm_full": 1.5, + "warm_clones_only_speedup_vs_warm_full": 2.0, + "warm_full_speedup_vs_cold_full": 2.0, + "warm_html_overhead_vs_warm_full": 1.2, + } + + @pytest.mark.parametrize( ("scenario", "measurement", "message"), ( From 40231dd5de7b8ffc807913d65ca1a140ce39bc15 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 14:48:54 +0500 Subject: [PATCH 020/113] feat(html): refresh background depth and table styling --- codeclone/report/html/assets/css.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 147b8f2a..89b62c14 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -131,7 +131,11 @@ html{-webkit-text-size-adjust:100%;text-size-adjust:100%;-webkit-font-smoothing:antialiased; -moz-osx-font-smoothing:grayscale;scroll-behavior:smooth;scrollbar-gutter:stable} body{font-family:var(--font-sans);font-size:14px;line-height:1.6;color:var(--text-primary); - background:var(--bg-body);overflow-x:hidden; + background: + radial-gradient(1200px 520px at 50% -10%, + color-mix(in oklch,var(--accent-primary) 10%,transparent),transparent 72%), + var(--bg-body); + background-attachment:fixed;overflow-x:hidden; /* Inter stylistic alternates: zero — slashed zero (disambiguates 0 from O in metric values) ss02 — disambiguation set (I/l/1/0 clear apart) @@ -390,10 +394,11 @@ linear-gradient(to right,rgba(0,0,0,.15),transparent) left center / 14px 100% no-repeat scroll, linear-gradient(to left,rgba(0,0,0,.15),transparent) right center / 14px 100% no-repeat scroll} .table{inline-size:max-content;min-inline-size:100%;border-collapse:collapse;font-size:.82rem; - font-family:var(--font-mono)} + font-family:var(--font-sans)} .table th{position:sticky;top:0;z-index:2;padding:var(--sp-2) var(--sp-3);text-align:left;font-family:var(--font-sans); - font-weight:600;font-size:.75rem;text-transform:uppercase;letter-spacing:.05em; - color:var(--text-muted);background:var(--bg-overlay);border-bottom:1px solid var(--border); + font-weight:600;font-size:.72rem;text-transform:uppercase;letter-spacing:.06em; + color:var(--text-secondary);background:var(--bg-overlay); + border-bottom:2px solid color-mix(in oklch,var(--accent-primary) 30%,var(--border)); white-space:nowrap;cursor:default;user-select:none} .table th[data-sortable]{cursor:pointer} .table th[data-sortable]:hover{color:var(--text-primary)} @@ -401,13 +406,15 @@ .table th[aria-sort] .sort-icon{opacity:1;color:var(--accent-primary)} .table td{padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border);color:var(--text-secondary); vertical-align:top} -.table tr:last-child td{border-bottom:none} -.table tr:hover td{background:var(--bg-raised)} +.table tbody tr:nth-child(even) td{background:color-mix(in oklch,var(--bg-raised) 45%,transparent)} +.table tbody tr:last-child td{border-bottom:none} +.table tbody tr:hover td{background:var(--accent-muted)} .table .col-name{font-weight:500;color:var(--text-primary);max-width:360px;overflow:hidden; text-overflow:ellipsis;white-space:nowrap} .table .col-file,.table .col-path{color:var(--text-muted);max-width:240px;overflow:hidden; text-overflow:ellipsis;white-space:nowrap} -.table .col-number,.table .col-num{font-variant-numeric:tabular-nums;text-align:right;white-space:nowrap} +.table .col-number,.table .col-num{font-family:var(--font-numeric); + font-variant-numeric:tabular-nums;text-align:right;white-space:nowrap;color:var(--text-primary)} .table .col-risk,.table .col-badge,.table .col-cat{white-space:nowrap} .table .col-steps{max-width:120px;word-break:break-word} .table .col-wide{max-width:320px;word-break:break-all} From cf3af762fd3fbc9385abaf33d563c76ac6cea14c Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 14:56:06 +0500 Subject: [PATCH 021/113] feat(html): visualize numeric table metrics with magnitude meters --- codeclone/report/html/assets/css.py | 12 +++++++ codeclone/report/html/sections/_coupling.py | 3 ++ codeclone/report/html/widgets/badges.py | 27 +++++++++++++++ codeclone/report/html/widgets/tables.py | 22 ++++++++++++ tests/test_html_report.py | 38 ++++++++++++++++++--- 5 files changed, 98 insertions(+), 4 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 89b62c14..957495e3 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -430,6 +430,18 @@ .score-bar-val{font-family:var(--font-numeric);font-variant-numeric:tabular-nums; font-size:.78rem;color:var(--text-secondary)} .score-bar--strong .score-bar-val{color:var(--accent-primary);font-weight:600} +.metric-meter{display:inline-flex;align-items:center;gap:8px;width:100%; + flex-direction:row-reverse;justify-content:flex-start} +.metric-meter-track{flex:1;max-width:60px;height:5px;border-radius:3px; + background:var(--bg-overlay);overflow:hidden} +.metric-meter-fill{display:block;height:100%;border-radius:3px; + background:color-mix(in oklch,var(--accent-primary) 70%,var(--text-muted))} +.metric-meter-val{font-family:var(--font-numeric);font-variant-numeric:tabular-nums; + font-size:.8rem;color:var(--text-primary);min-width:22px;text-align:right} +.metric-meter--mid .metric-meter-fill{background:var(--warning)} +.metric-meter--mid .metric-meter-val{color:var(--warning)} +.metric-meter--high .metric-meter-fill{background:var(--error)} +.metric-meter--high .metric-meter-val{color:var(--error);font-weight:600} .status-pill{display:inline-flex;align-items:center;font-size:.68rem;font-weight:500; padding:2px 9px;border-radius:999px;white-space:nowrap;letter-spacing:.01em; font-family:var(--font-sans)} diff --git a/codeclone/report/html/sections/_coupling.py b/codeclone/report/html/sections/_coupling.py index 3db20927..faa2aa1a 100644 --- a/codeclone/report/html/sections/_coupling.py +++ b/codeclone/report/html/sections/_coupling.py @@ -283,6 +283,7 @@ def render_quality_panel(ctx: ReportContext) -> str: headers=("Function", "File", "CC", "Nesting", "Risk"), rows=cx_rows, empty_message="Complexity metrics are not available.", + column_types={"CC": "meter", "Nesting": "meter"}, ctx=ctx, ) @@ -306,6 +307,7 @@ def render_quality_panel(ctx: ReportContext) -> str: rows=cp_rows, empty_message="Coupling metrics are not available.", raw_html_headers=("Coupled classes",), + column_types={"CBO": "meter"}, ctx=ctx, ) @@ -329,6 +331,7 @@ def render_quality_panel(ctx: ReportContext) -> str: headers=("Class", "File", "LCOM4", "Risk", "Methods", "Fields"), rows=ch_rows, empty_message="Cohesion metrics are not available.", + column_types={"LCOM4": "meter", "Methods": "meter", "Fields": "meter"}, ctx=ctx, ) diff --git a/codeclone/report/html/widgets/badges.py b/codeclone/report/html/widgets/badges.py index d9600bd4..aa445ba4 100644 --- a/codeclone/report/html/widgets/badges.py +++ b/codeclone/report/html/widgets/badges.py @@ -148,6 +148,33 @@ def _score_bar_html(value: str) -> str: ) +def _metric_meter_html(value: str, *, fraction: float) -> str: + """Render a numeric metric as its value plus a magnitude bar. + + *fraction* (0..1) is the value's share of the column maximum; the bar fills + to that share and tints by band (low/mid/high) so table magnitudes read at a + glance without altering the underlying number. + """ + text = str(value).strip() + try: + float(text) + except (TypeError, ValueError): + return _escape_html(text) + pct = max(0, min(100, round(fraction * 100))) + if fraction >= 0.66: + band = " metric-meter--high" + elif fraction >= 0.33: + band = " metric-meter--mid" + else: + band = "" + return ( + f'' + f'' + f'' + f'{_escape_html(text)}' + ) + + def _chips_html(text: str) -> str: """Render a comma-separated string as a row of compact chips.""" parts = [part.strip() for part in str(text).split(",") if part.strip()] diff --git a/codeclone/report/html/widgets/tables.py b/codeclone/report/html/widgets/tables.py index 2e270823..8b2d71c0 100644 --- a/codeclone/report/html/widgets/tables.py +++ b/codeclone/report/html/widgets/tables.py @@ -14,6 +14,7 @@ from ..primitives.escape import _escape_html from .badges import ( _chips_html, + _metric_meter_html, _quality_badge_html, _score_bar_html, _status_pill_html, @@ -79,6 +80,14 @@ "chips": _chips_html, } + +def _safe_abs_float(value: object) -> float: + try: + return abs(float(str(value).strip())) + except (TypeError, ValueError): + return 0.0 + + _CELL_TYPE_CLS = { "score": "col-score", "status": "col-badge", @@ -110,6 +119,14 @@ def render_rows_table( raw_html_set = {h.lower() for h in raw_html_headers} typed_cols = {h.lower(): t for h, t in (column_types or {}).items()} + # Meter columns self-scale: each bar fills relative to that column's max. + meter_max: dict[int, float] = {} + for col_idx, header in enumerate(lower_headers): + if typed_cols.get(header) != "meter": + continue + values = [_safe_abs_float(row[col_idx]) for row in rows if col_idx < len(row)] + meter_max[col_idx] = max([*values, 0.0]) + # colgroup cg = [""] for h in lower_headers: @@ -126,6 +143,11 @@ def render_rows_table( def _td(col_idx: int, cell: str) -> str: h = lower_headers[col_idx] if col_idx < len(lower_headers) else "" cell_type = typed_cols.get(h) + if cell_type == "meter": + colmax = meter_max.get(col_idx, 0.0) + fraction = _safe_abs_float(cell) / colmax if colmax > 0 else 0.0 + meter = _metric_meter_html(cell, fraction=fraction) + return f'{meter}' if cell_type in _CELL_RENDERERS: cls = _CELL_TYPE_CLS[cell_type] return f'{_CELL_RENDERERS[cell_type](cell)}' diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 12ed520c..27b3372a 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -906,10 +906,7 @@ def test_html_report_table_css_matches_rendered_column_classes() -> None: ".table .col-file,.table .col-path{color:var(--text-muted);" "max-width:240px;overflow:hidden;" ), - ( - ".table .col-number,.table .col-num{font-variant-numeric:" - "tabular-nums;text-align:right;white-space:nowrap}" - ), + ".table .col-number,.table .col-num{font-family:var(--font-numeric);", ".table .col-risk,.table .col-badge,.table .col-cat{white-space:nowrap}", ) @@ -4487,3 +4484,36 @@ def test_overview_launchpad_absent_when_queue_empty() -> None: overview = html.split('id="panel-overview"', 1)[1].split('id="panel-review"', 1)[0] assert "review-launchpad" not in overview assert "data-goto-tab" not in overview + + +def test_render_rows_table_meter_column_self_scales() -> None: + from codeclone.report.html.widgets.tables import render_rows_table + + html = render_rows_table( + headers=("Name", "CC"), + rows=[("alpha", "20"), ("beta", "10"), ("gamma", "5")], + empty_message="none", + column_types={"CC": "meter"}, + ) + assert "metric-meter" in html + # column max (20) fills 100% and reads as the high band + assert 'style="width:100%"' in html + assert "metric-meter--high" in html + # half the max (10) fills 50% and reads as the mid band + assert 'style="width:50%"' in html + assert "metric-meter--mid" in html + # the underlying numbers are preserved verbatim + assert ">20
" in html and ">5
" in html + + +def test_render_rows_table_meter_handles_non_numeric() -> None: + from codeclone.report.html.widgets.tables import render_rows_table + + html = render_rows_table( + headers=("Name", "CC"), + rows=[("alpha", "n/a")], + empty_message="none", + column_types={"CC": "meter"}, + ) + assert "n/a" in html + assert "metric-meter-fill" not in html From 9ff8dec144c1434edfb5362a585b23748144c3ca Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 14:58:35 +0500 Subject: [PATCH 022/113] fix(tests): fix extractor test phase ledger stub --- tests/test_extractor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_extractor.py b/tests/test_extractor.py index c37bf0a3..466942c9 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -3449,7 +3449,10 @@ def _fake_fingerprint( _node: ast.FunctionDef | ast.AsyncFunctionDef, _cfg: NormalizationConfig, _qualname: str, + *, + phase_ledger: object, ) -> tuple[str, int]: + del phase_ledger return "f" * 40, 1 def _fake_extract_segments( From eefb1a7fc36a449b8f01fd68f3e8641aa0cfabcf Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 15:02:30 +0500 Subject: [PATCH 023/113] feat(html): add metric meters and source-kind badges to more tables --- codeclone/report/html/sections/_dead_code.py | 1 + codeclone/report/html/sections/_dependencies.py | 1 + codeclone/report/html/sections/_module_map.py | 15 +++++++++++++-- codeclone/report/html/widgets/tables.py | 8 ++++++++ tests/test_html_report.py | 14 ++++++++++++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/codeclone/report/html/sections/_dead_code.py b/codeclone/report/html/sections/_dead_code.py index ffdad1d9..10c4e59b 100644 --- a/codeclone/report/html/sections/_dead_code.py +++ b/codeclone/report/html/sections/_dead_code.py @@ -102,6 +102,7 @@ def render_dead_code_panel(ctx: ReportContext) -> str: headers=("Name", "File", "Line", "Kind", "Confidence", "Rule", "Source"), rows=suppressed_rows, empty_message="No suppressed dead-code candidates.", + column_types={"Source": "source_kind"}, ctx=ctx, ) diff --git a/codeclone/report/html/sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py index 48afb04e..483da055 100644 --- a/codeclone/report/html/sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -271,6 +271,7 @@ def render_dependencies_panel(ctx: ReportContext) -> str: rows=dep_chain_rows, empty_message="No dependency chains detected.", raw_html_headers=("Longest chain",), + column_types={"Length": "meter"}, ctx=ctx, ) + '

Detected cycles

' diff --git a/codeclone/report/html/sections/_module_map.py b/codeclone/report/html/sections/_module_map.py index 02ebe21b..d03692cf 100644 --- a/codeclone/report/html/sections/_module_map.py +++ b/codeclone/report/html/sections/_module_map.py @@ -229,7 +229,13 @@ def _mm_unwind_table(unwind_candidates: Sequence[object], ctx: ReportContext) -> headers=("Module", "Fan-in", "Fan-out", "Score", "Status", "Signals"), rows=rows, empty_message="No unwind candidates detected.", - column_types={"Score": "score", "Status": "status", "Signals": "chips"}, + column_types={ + "Fan-in": "meter", + "Fan-out": "meter", + "Score": "score", + "Status": "status", + "Signals": "chips", + }, ctx=ctx, ) @@ -340,7 +346,12 @@ def _render_overloaded_modules_section(ctx: ReportContext) -> str: ), rows=rows, empty_message=_OVERLOADED_EMPTY_MESSAGE, - column_types={"Score": "score", "Status": "status"}, + column_types={ + "Score": "score", + "Status": "status", + "LOC": "meter", + "Complexity total": "meter", + }, ctx=ctx, ) ) diff --git a/codeclone/report/html/widgets/tables.py b/codeclone/report/html/widgets/tables.py index 8b2d71c0..f8e1090f 100644 --- a/codeclone/report/html/widgets/tables.py +++ b/codeclone/report/html/widgets/tables.py @@ -17,6 +17,7 @@ _metric_meter_html, _quality_badge_html, _score_bar_html, + _source_kind_badge_html, _status_pill_html, _tab_empty, ) @@ -46,6 +47,11 @@ "kind": "76px", "steps": "120px", "coupled classes": "360px", + "fan-in": "96px", + "fan-out": "100px", + "loc": "100px", + "complexity total": "136px", + "source": "104px", } _COL_CLS: dict[str, str] = {} @@ -78,6 +84,7 @@ "score": _score_bar_html, "status": _status_pill_html, "chips": _chips_html, + "source_kind": _source_kind_badge_html, } @@ -92,6 +99,7 @@ def _safe_abs_float(value: object) -> float: "score": "col-score", "status": "col-badge", "chips": "col-chips", + "source_kind": "col-badge", } diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 27b3372a..47b818ea 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -4517,3 +4517,17 @@ def test_render_rows_table_meter_handles_non_numeric() -> None: ) assert "n/a" in html assert "metric-meter-fill" not in html + + +def test_render_rows_table_source_kind_column_renders_badge() -> None: + from codeclone.report.html.widgets.tables import render_rows_table + + html = render_rows_table( + headers=("Name", "Source"), + rows=[("x", "production"), ("y", "tests")], + empty_message="none", + column_types={"Source": "source_kind"}, + ) + assert "source-kind-badge" in html + assert "source-kind-production" in html + assert "source-kind-tests" in html From 5d32b5768fda5e52ee4623d0bbe6feaac77682bd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 15:06:23 +0500 Subject: [PATCH 024/113] feat(html): add card depth/hover-lift and stronger KPI numeric hierarchy --- codeclone/report/html/assets/css.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 957495e3..5a3fd1ef 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -603,10 +603,14 @@ .overview-kpi-grid--with-health .meta-item{min-width:0} .overview-kpi-grid--with-health .meta-item{min-height:0} .overview-kpi-cards .meta-item{display:grid;grid-template-rows:auto 1fr auto; - align-items:start;padding:var(--sp-3) var(--sp-4);gap:var(--sp-2);min-height:0} -.overview-kpi-cards .meta-item .meta-label{font-size:.75rem;min-height:18px} + align-items:start;padding:var(--sp-3) var(--sp-4);gap:var(--sp-2);min-height:0; + box-shadow:var(--shadow-sm);transition:border-color var(--dur-fast) var(--ease), + box-shadow var(--dur-normal) var(--ease),transform var(--dur-fast) var(--ease)} +.overview-kpi-cards .meta-item:hover{box-shadow:var(--shadow-md);transform:translateY(-1px)} +.overview-kpi-cards .meta-item .meta-label{font-size:.68rem;min-height:18px; + text-transform:uppercase;letter-spacing:.05em;font-weight:600} .overview-kpi-cards .meta-item .meta-value{display:flex;align-items:center; - font-size:1.55rem;line-height:1;padding:var(--sp-1) 0} + font-size:1.85rem;line-height:1;padding:var(--sp-1) 0;letter-spacing:-0.02em} .overview-kpi-cards .kpi-detail{margin-top:0;gap:4px;align-self:end} .overview-kpi-cards .kpi-micro{padding:2px 6px;font-size:.65rem} .overview-kpi-grid--with-health .overview-health-card{padding:var(--sp-2)} @@ -645,10 +649,11 @@ transition:stroke-dashoffset 1s var(--ease)} .health-ring-label{position:absolute;inset:0;display:flex;flex-direction:column; align-items:center;justify-content:center} -.health-ring-score{font-family:var(--font-numeric);font-size:1.85rem;font-weight:680; +.health-ring-score{font-family:var(--font-numeric);font-size:2.15rem;font-weight:700; color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1; - letter-spacing:-0.018em} -.health-ring-grade{font-size:.72rem;font-weight:500;color:var(--text-muted);margin-top:3px} + letter-spacing:-0.022em} +.health-ring-grade{font-size:.7rem;font-weight:600;color:var(--text-muted);margin-top:4px; + text-transform:uppercase;letter-spacing:.06em} .health-ring-delta{font-size:.65rem;font-weight:600;margin-top:3px} .health-ring-delta--up{color:var(--success)} .health-ring-delta--down{color:var(--error)} @@ -991,10 +996,15 @@ One source of truth for findings, suggestions, and the review queue. */ .finding-card{position:relative;display:flex;background:var(--bg-surface); border:1px solid var(--border);border-radius:var(--radius-lg);overflow:hidden; - transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} -.finding-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} + box-shadow:var(--shadow-sm); + transition:border-color var(--dur-fast) var(--ease), + box-shadow var(--dur-normal) var(--ease),transform var(--dur-fast) var(--ease)} +.finding-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-md); + transform:translateY(-1px)} .finding-card-stripe{flex:0 0 4px;align-self:stretch;background:var(--border-strong)} +.finding-card--critical{border-color:color-mix(in oklch,var(--error) 22%,var(--border))} .finding-card--critical .finding-card-stripe{background:var(--error)} +.finding-card--warning{border-color:color-mix(in oklch,var(--warning) 16%,var(--border))} .finding-card--warning .finding-card-stripe{background:var(--warning)} .finding-card--info .finding-card-stripe{background:var(--info)} .finding-card-main{flex:1;min-width:0;padding:var(--sp-3) var(--sp-4)} From 236eacb96068bd65febddc2d25035c0b31d0bf5e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 15:28:47 +0500 Subject: [PATCH 025/113] feat(html): centralize all label badges onto one design-token system --- codeclone/report/html/assets/css.py | 41 +++++++++++++++++------------ 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 5a3fd1ef..2e6ec1e2 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -67,6 +67,14 @@ --radius-lg:8px; --radius-xl:12px; + /* badge design code — one scale for every read-only label badge */ + --badge-font:var(--font-sans); + --badge-size:.68rem; + --badge-weight:600; + --badge-tracking:.015em; + --badge-pad:2px var(--sp-2); + --badge-radius:var(--radius-sm); + /* spacing */ --sp-1:4px;--sp-2:8px;--sp-3:12px;--sp-4:16px;--sp-5:20px;--sp-6:24px;--sp-8:32px;--sp-10:40px; @@ -442,15 +450,11 @@ .metric-meter--mid .metric-meter-val{color:var(--warning)} .metric-meter--high .metric-meter-fill{background:var(--error)} .metric-meter--high .metric-meter-val{color:var(--error);font-weight:600} -.status-pill{display:inline-flex;align-items:center;font-size:.68rem;font-weight:500; - padding:2px 9px;border-radius:999px;white-space:nowrap;letter-spacing:.01em; - font-family:var(--font-sans)} .status-pill--candidate{background:var(--accent-muted);color:var(--accent-primary)} .status-pill--ranked{background:var(--bg-overlay);color:var(--text-secondary)} .status-pill--neutral{background:var(--bg-overlay);color:var(--text-muted)} -.chip{display:inline-flex;align-items:center;font-size:.66rem;font-family:var(--font-mono); - padding:2px 7px;margin:1px 3px 1px 0;border-radius:var(--radius-sm); - background:var(--bg-overlay);color:var(--text-secondary);border:1px solid var(--border)} +.chip{margin:1px 3px 1px 0;background:var(--bg-overlay);color:var(--text-secondary); + border:1px solid var(--border)} """ # --------------------------------------------------------------------------- @@ -563,8 +567,16 @@ # --------------------------------------------------------------------------- _BADGES = """\ -.risk-badge,.severity-badge{display:inline-flex;align-items:center;font-size:.68rem;font-weight:600; - padding:2px var(--sp-2);border-radius:var(--radius-sm);text-transform:uppercase;letter-spacing:.02em} +/* One typographic scale for every read-only label badge; color/background and + any per-variant tweaks (uppercase, etc.) live in the modifiers below. */ +.risk-badge,.severity-badge,.source-kind-badge,.status-pill, +.finding-meta-badge,.suggestion-chip,.chip,.launchpad-sev{ + display:inline-flex;align-items:center;white-space:nowrap;line-height:1.2; + font-family:var(--badge-font);font-size:var(--badge-size); + font-weight:var(--badge-weight);letter-spacing:var(--badge-tracking); + padding:var(--badge-pad);border-radius:var(--badge-radius); + font-variant-numeric:tabular-nums} +.risk-badge,.severity-badge{text-transform:uppercase} .risk-critical,.severity-critical{background:var(--error-muted);color:var(--error)} .risk-high,.severity-high{background:var(--error-muted);color:var(--error)} .risk-warning,.severity-warning{background:var(--warning-muted);color:var(--warning)} @@ -572,8 +584,7 @@ .risk-low,.severity-low{background:var(--success-muted);color:var(--success)} .risk-info,.severity-info{background:var(--info-muted);color:var(--info)} -.source-kind-badge{display:inline-flex;align-items:center;font-size:.68rem;font-weight:500; - padding:2px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted)} +.source-kind-badge{background:var(--bg-overlay);color:var(--text-muted)} .source-kind-production{background:var(--error-muted);color:var(--error)} .source-kind-test,.source-kind-test_util{background:var(--info-muted);color:var(--info)} .source-kind-fixture,.source-kind-conftest{background:var(--warning-muted);color:var(--warning)} @@ -1020,9 +1031,7 @@ margin-top:4px;word-break:break-all} .finding-card-actions{flex-shrink:0} .finding-card-meta{display:flex;flex-wrap:wrap;gap:6px;margin-top:9px} -.finding-meta-badge{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); - border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted); - white-space:nowrap;line-height:1.2;font-variant-numeric:tabular-nums} +.finding-meta-badge{background:var(--bg-overlay);color:var(--text-muted)} .finding-meta-badge--easy{color:var(--success);background:var(--success-muted, rgba(34,197,94,.1))} .finding-meta-badge--moderate{color:var(--warning);background:var(--warning-muted)} .finding-meta-badge--hard{color:var(--error);background:var(--error-muted)} @@ -1041,8 +1050,7 @@ background:var(--accent-muted)} .review-launchpad-title{font-size:.95rem;font-weight:600;color:var(--text-primary)} .review-launchpad-sevs{display:flex;flex-wrap:wrap;gap:6px;margin-top:5px} -.launchpad-sev{font-size:.7rem;font-weight:500;padding:2px 9px;border-radius:999px; - font-family:var(--font-numeric);color:var(--text-secondary);background:var(--bg-overlay)} +.launchpad-sev{color:var(--text-secondary);background:var(--bg-overlay)} .launchpad-sev--critical{color:var(--danger); background:color-mix(in oklch,var(--danger) 14%,transparent)} .launchpad-sev--warning{color:var(--warning); @@ -1091,8 +1099,7 @@ .review-card.is-reviewed .review-toggle{background:var(--accent-primary); border-color:var(--accent-primary);color:#fff} .suggestion-context{display:flex;gap:var(--sp-1);flex-wrap:wrap} -.suggestion-chip{font-size:.68rem;font-weight:500;padding:2px var(--sp-2);border-radius:var(--radius-sm); - background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} +.suggestion-chip{background:var(--bg-overlay);color:var(--text-muted)} .suggestion-summary{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary);line-height:1.5} .suggestion-action{display:flex;align-items:center;gap:var(--sp-1); font-size:.8rem;font-weight:500;color:var(--accent-primary);margin-top:var(--sp-1)} From 8009b1e776b8ac153c12e880637ad827471b7b5a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 15:35:39 +0500 Subject: [PATCH 026/113] feat(html): badge the Clones suppressed table with chips and code chips --- codeclone/report/html/assets/css.py | 6 ++++++ codeclone/report/html/sections/_clones.py | 6 ++++++ codeclone/report/html/widgets/badges.py | 8 ++++++++ codeclone/report/html/widgets/tables.py | 3 +++ tests/test_html_report.py | 14 ++++++++++++++ 5 files changed, 37 insertions(+) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 2e6ec1e2..9e4e41d7 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -455,6 +455,12 @@ .status-pill--neutral{background:var(--bg-overlay);color:var(--text-muted)} .chip{margin:1px 3px 1px 0;background:var(--bg-overlay);color:var(--text-secondary); border:1px solid var(--border)} +/* Code sort: identifiers / globs in mono, distinct from sans label badges */ +.code-chip{display:inline-flex;align-items:center;max-width:100%;font-family:var(--font-mono); + font-size:.72rem;padding:2px var(--sp-2);border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-secondary);border:1px solid var(--border); + white-space:nowrap;overflow:hidden;text-overflow:ellipsis} +.table .col-code{max-width:240px} """ # --------------------------------------------------------------------------- diff --git a/codeclone/report/html/sections/_clones.py b/codeclone/report/html/sections/_clones.py index b18624fb..0e0e27c5 100644 --- a/codeclone/report/html/sections/_clones.py +++ b/codeclone/report/html/sections/_clones.py @@ -247,6 +247,12 @@ def _render_suppressed_clone_panel( headers=("Kind", "Group", "File", "Type", "Occurrences", "Rule", "Pattern"), rows=rows, empty_message="No suppressed clone groups.", + column_types={ + "Kind": "chips", + "Type": "chips", + "Rule": "code", + "Pattern": "code", + }, ctx=ctx, ) diff --git a/codeclone/report/html/widgets/badges.py b/codeclone/report/html/widgets/badges.py index aa445ba4..a974ec49 100644 --- a/codeclone/report/html/widgets/badges.py +++ b/codeclone/report/html/widgets/badges.py @@ -181,6 +181,14 @@ def _chips_html(text: str) -> str: return "".join(f'{_escape_html(part)}' for part in parts) +def _code_chip_html(text: str) -> str: + """Render an identifier / glob value as a compact monospace code chip.""" + value = str(text).strip() + if not value or value == "-": + return _escape_html(value) + return f'{_escape_html(value)}' + + _INLINE_EMPTY_ICONS: dict[str, str] = { "good": ( ' float: "status": "col-badge", "chips": "col-chips", "source_kind": "col-badge", + "code": "col-code", } diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 47b818ea..74724b1f 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -4531,3 +4531,17 @@ def test_render_rows_table_source_kind_column_renders_badge() -> None: assert "source-kind-badge" in html assert "source-kind-production" in html assert "source-kind-tests" in html + + +def test_render_rows_table_code_column_renders_code_chip() -> None: + from codeclone.report.html.widgets.tables import render_rows_table + + html = render_rows_table( + headers=("Name", "Rule"), + rows=[("x", "golden_fixture@project_config"), ("y", "-")], + empty_message="none", + column_types={"Rule": "code"}, + ) + assert 'golden_fixture@project_config' in html + # the placeholder dash stays plain, not chipped + assert '-' not in html From d4434b38bb89dcbb5ab8c5aeb9e68c73e8c52135 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 15:37:40 +0500 Subject: [PATCH 027/113] feat(html): badge the Coverage Join table (CC meter, status chip) --- codeclone/report/html/sections/_coverage_join.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codeclone/report/html/sections/_coverage_join.py b/codeclone/report/html/sections/_coverage_join.py index 48218504..f0fba344 100644 --- a/codeclone/report/html/sections/_coverage_join.py +++ b/codeclone/report/html/sections/_coverage_join.py @@ -85,6 +85,7 @@ def render_coverage_join_panel(ctx: ReportContext) -> str: empty_message=_coverage_join_empty_message(), empty_description=_coverage_join_empty_description(), raw_html_headers=("Location",), + column_types={"CC": "meter", "Status": "chips"}, ctx=ctx, ) ) From 009f51979196b4063a530137de4929098352189f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 15:43:31 +0500 Subject: [PATCH 028/113] feat(html): fold count badges onto a shared numeric token scale --- codeclone/report/html/assets/css.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 9e4e41d7..456583f4 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -75,6 +75,11 @@ --badge-pad:2px var(--sp-2); --badge-radius:var(--radius-sm); + /* count sort — tabular numerals shared by counts and micro-stats */ + --count-font:var(--font-numeric); + --count-size:.64rem; + --count-weight:700; + /* spacing */ --sp-1:4px;--sp-2:8px;--sp-3:12px;--sp-4:16px;--sp-5:20px;--sp-6:24px;--sp-8:32px;--sp-10:40px; @@ -221,7 +226,9 @@ .main-tab[aria-selected="true"] .main-tab-icon{opacity:1} .main-tab-label{display:inline-flex;align-items:center} .tab-count{display:inline-flex;align-items:center;justify-content:center;min-width:18px; - height:18px;padding:0 5px;font-size:.68rem;font-weight:700;border-radius:var(--radius-sm); + height:18px;padding:0 5px;border-radius:var(--radius-sm); + font-family:var(--count-font);font-size:var(--count-size);font-weight:var(--count-weight); + font-variant-numeric:tabular-nums; background:var(--bg-overlay);color:var(--text-muted);margin-left:var(--sp-1)} .main-tab[aria-selected="true"] .tab-count{background:rgba(255,255,255,.24); color:#fff} @@ -744,11 +751,11 @@ .meta-item--accent:hover{border-color:var(--accent-primary)} .kpi-detail{display:flex;flex-wrap:wrap;gap:3px;margin-top:2px} .kpi-detail code{font-size:.78rem} -.kpi-micro{display:inline-flex;align-items:center;gap:3px;font-size:.62rem; +.kpi-micro{display:inline-flex;align-items:center;gap:3px;font-size:var(--count-size); padding:1px 5px;border-radius:var(--radius-sm);background:var(--bg-raised); - white-space:nowrap;line-height:1.3;font-family:inherit} -.kpi-micro-val{font-family:inherit;font-weight:500;font-variant-numeric:tabular-nums; - color:var(--text-muted)} + white-space:nowrap;line-height:1.3;font-family:var(--font-sans)} +.kpi-micro-val{font-family:var(--count-font);font-weight:var(--count-weight); + font-variant-numeric:tabular-nums;color:var(--text-muted)} .kpi-micro-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} .kpi-micro--baselined{color:var(--success);font-weight:500;font-size:.6rem} .kpi-delta{font-size:.62rem;font-weight:700;margin-left:auto; @@ -1091,8 +1098,8 @@ background:color-mix(in oklch,var(--danger) 16%,transparent)} .review-chip--warning.is-active{border-color:var(--warning);color:var(--warning); background:color-mix(in oklch,var(--warning) 16%,transparent)} -.review-chip-count{font-family:var(--font-numeric);font-variant-numeric:tabular-nums; - font-size:.66rem;opacity:.85} +.review-chip-count{font-family:var(--count-font);font-size:var(--count-size); + font-weight:var(--count-weight);font-variant-numeric:tabular-nums;opacity:.85} .review-queue{display:flex;flex-direction:column;gap:9px} .review-toggle{display:inline-flex;align-items:center;justify-content:center; width:30px;height:30px;border-radius:8px;cursor:pointer;color:var(--text-muted); From e17f74fca88001bdfef4010b2d65fa13e75f02fa Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 16:00:20 +0500 Subject: [PATCH 029/113] feat(corpus): integrate CodeClone Corpus CI bench --- .github/workflows/validation-corpus.yml | 92 +++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 .github/workflows/validation-corpus.yml diff --git a/.github/workflows/validation-corpus.yml b/.github/workflows/validation-corpus.yml new file mode 100644 index 00000000..540531a2 --- /dev/null +++ b/.github/workflows/validation-corpus.yml @@ -0,0 +1,92 @@ +name: validation-corpus +run-name: validation corpus • ${{ github.event_name }} • ${{ github.ref_name }} + +on: + push: + branches: [ "**" ] + pull_request: + workflow_dispatch: + inputs: + tier: + description: Corpus tier + required: true + default: all + type: choice + options: + - smoke + - gates + - full + - all + corpus-ref: + description: codeclone-validation-corpus ref + required: true + default: main + +permissions: + contents: read + +concurrency: + group: validation-corpus-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + validation-corpus: + name: corpus + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + - name: Checkout CodeClone + uses: actions/checkout@v6.0.2 + + - name: Checkout validation corpus + uses: actions/checkout@v6.0.2 + with: + repository: orenlab/codeclone-validation-corpus + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.corpus-ref || 'main' }} + path: validation-corpus + + - name: Set up Python + uses: actions/setup-python@v6.2.0 + with: + python-version: "3.14" + allow-prereleases: true + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install CodeClone from this checkout + run: uv sync --all-extras + + - name: Install validation corpus dependencies + run: uv sync --project validation-corpus + + - name: Resolve corpus tier + shell: bash + run: | + tier="all" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + tier="${{ inputs.tier }}" + fi + echo "CORPUS_TIER=$tier" >> "$GITHUB_ENV" + + - name: Run validation corpus + run: | + uv run --project validation-corpus python -m corpus_tools.cli \ + --tier "$CORPUS_TIER" \ + --codeclone-command "$GITHUB_WORKSPACE/.venv/bin/python -m codeclone.main" \ + --work-root "$RUNNER_TEMP/codeclone-validation-corpus-work" + + - name: Write summary + if: always() + shell: bash + run: | + { + echo "## CodeClone validation corpus" + echo + echo "- Tier: \`${CORPUS_TIER:-unknown}\`" + echo "- Corpus: \`orenlab/codeclone-validation-corpus\`" + echo "- CodeClone source: current checkout" + } >> "$GITHUB_STEP_SUMMARY" From 14488019cf4d35551697491823c831163e54aee7 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 16:32:39 +0500 Subject: [PATCH 030/113] perf(memory): delta experience replace and batch store hydration Skip unchanged distillation projections, delete/insert only digests that changed, and load compare/list paths with three batched SELECTs instead of per-row N+1. Update HTML stat-card CSS snapshot for count-font tokens. --- codeclone/memory/experience/store.py | 285 +++++++++++++++++++------- tests/test_html_report.py | 4 +- tests/test_memory_experience_store.py | 62 ++++++ 3 files changed, 271 insertions(+), 80 deletions(-) diff --git a/codeclone/memory/experience/store.py b/codeclone/memory/experience/store.py index d2132c60..3895ca9e 100644 --- a/codeclone/memory/experience/store.py +++ b/codeclone/memory/experience/store.py @@ -11,8 +11,10 @@ from __future__ import annotations import sqlite3 -from collections.abc import Sequence +from collections.abc import Callable, Sequence +from typing import TypeVar +from ...utils.iterutils import chunked from .models import ( Experience, ExperienceEvidence, @@ -21,6 +23,21 @@ ExperienceStatus, ) +_SQLITE_IN_QUERY_BATCH = 500 +_T = TypeVar("_T") +_FACETS_BATCH_SQL = ( + "SELECT experience_id, facet_kind, facet_value, count " + "FROM memory_experience_facets " + "WHERE experience_id IN ({placeholders}) " + "ORDER BY experience_id ASC, facet_kind ASC, facet_value ASC" +) +_EVIDENCE_BATCH_SQL = ( + "SELECT experience_id, trajectory_id, outcome, finished_at_utc " + "FROM memory_experience_evidence " + "WHERE experience_id IN ({placeholders}) " + "ORDER BY experience_id ASC, finished_at_utc ASC, trajectory_id ASC" +) + def _use_row_factory(conn: sqlite3.Connection) -> None: conn.row_factory = sqlite3.Row @@ -33,15 +50,133 @@ def replace_experiences( experiences: Sequence[Experience], ) -> int: """Replace all experiences for a project with the distilled set.""" - conn.execute("DELETE FROM memory_experiences WHERE project_id=?", (project_id,)) - for experience in experiences: - _insert_experience(conn, experience) + if not experiences: + conn.execute("DELETE FROM memory_experiences WHERE project_id=?", (project_id,)) + conn.commit() + return 0 + + new_by_digest = { + experience.experience_digest: experience for experience in experiences + } + stored_by_digest = _experiences_by_digest(conn, project_id=project_id) + existing_digests = set(stored_by_digest) + new_digests = set(new_by_digest) + + remove_digests = existing_digests - new_digests + refresh: list[Experience] = [] + for digest in sorted(new_digests): + incoming = new_by_digest[digest] + stored = stored_by_digest.get(digest) + if stored is None: + refresh.append(incoming) + continue + if _experience_content_key(stored) != _experience_content_key(incoming): + remove_digests.add(digest) + refresh.append(incoming) + + if not remove_digests and not refresh: + return len(experiences) + + for batch in chunked(tuple(sorted(remove_digests)), _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + conn.execute( + f"DELETE FROM memory_experiences WHERE project_id=? " + f"AND experience_digest IN ({placeholders})", + (project_id, *batch), + ) + if refresh: + _batch_insert_experiences(conn, refresh) conn.commit() return len(experiences) -def _insert_experience(conn: sqlite3.Connection, experience: Experience) -> None: - conn.execute( +def _experiences_by_digest( + conn: sqlite3.Connection, + *, + project_id: str, +) -> dict[str, Experience]: + _use_row_factory(conn) + rows = conn.execute( + "SELECT * FROM memory_experiences WHERE project_id=?", + (project_id,), + ).fetchall() + if not rows: + return {} + return { + experience.experience_digest: experience + for experience in _hydrate_experience_rows(conn, rows) + } + + +def _group_rows_by_experience_id( + conn: sqlite3.Connection, + *, + ids: Sequence[str], + sql: str, + build: Callable[[sqlite3.Row], _T], +) -> dict[str, list[_T]]: + grouped: dict[str, list[_T]] = {experience_id: [] for experience_id in ids} + for batch in chunked(tuple(ids), _SQLITE_IN_QUERY_BATCH): + placeholders = ", ".join("?" for _ in batch) + rows = conn.execute(sql.format(placeholders=placeholders), batch).fetchall() + for row in rows: + grouped.setdefault(str(row["experience_id"]), []).append(build(row)) + return grouped + + +def _hydrate_experience_rows( + conn: sqlite3.Connection, + rows: Sequence[sqlite3.Row], +) -> list[Experience]: + experience_ids = [str(row["id"]) for row in rows] + facets_by_id = _group_rows_by_experience_id( + conn, + ids=experience_ids, + sql=_FACETS_BATCH_SQL, + build=_row_to_facet, + ) + evidence_by_id = _group_rows_by_experience_id( + conn, + ids=experience_ids, + sql=_EVIDENCE_BATCH_SQL, + build=_row_to_evidence, + ) + return [ + _row_to_experience( + row, + facets=tuple(facets_by_id.get(str(row["id"]), [])), + evidence=tuple(evidence_by_id.get(str(row["id"]), [])), + ) + for row in rows + ] + + +def _experience_content_key(experience: Experience) -> tuple[object, ...]: + """Comparable payload excluding distill timestamps refreshed every run.""" + return ( + experience.id, + experience.repo_root_digest, + experience.subject_family, + experience.signal, + experience.outcome_class, + experience.support, + experience.quality_min, + experience.information_value, + experience.status, + experience.statement, + experience.distillation_version, + experience.first_observed_at_utc, + experience.last_observed_at_utc, + experience.facets, + experience.evidence, + ) + + +def _batch_insert_experiences( + conn: sqlite3.Connection, + experiences: Sequence[Experience], +) -> None: + conn.executemany( """ INSERT INTO memory_experiences( id, project_id, repo_root_digest, subject_family, signal, @@ -51,42 +186,52 @@ def _insert_experience(conn: sqlite3.Connection, experience: Experience) -> None updated_at_utc ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, - ( - experience.id, - experience.project_id, - experience.repo_root_digest, - experience.subject_family, - experience.signal, - experience.outcome_class, - experience.support, - experience.quality_min, - experience.information_value, - experience.status, - experience.statement, - experience.experience_digest, - experience.distillation_version, - experience.first_observed_at_utc, - experience.last_observed_at_utc, - experience.distilled_at_utc, - experience.updated_at_utc, - ), - ) - conn.executemany( - "INSERT INTO memory_experience_facets(" - "experience_id, facet_kind, facet_value, count) VALUES (?, ?, ?, ?)", [ - (experience.id, facet.facet_kind, facet.facet_value, facet.count) - for facet in experience.facets - ], - ) - conn.executemany( - "INSERT INTO memory_experience_evidence(" - "experience_id, trajectory_id, outcome, finished_at_utc) VALUES (?, ?, ?, ?)", - [ - (experience.id, item.trajectory_id, item.outcome, item.finished_at_utc) - for item in experience.evidence + ( + experience.id, + experience.project_id, + experience.repo_root_digest, + experience.subject_family, + experience.signal, + experience.outcome_class, + experience.support, + experience.quality_min, + experience.information_value, + experience.status, + experience.statement, + experience.experience_digest, + experience.distillation_version, + experience.first_observed_at_utc, + experience.last_observed_at_utc, + experience.distilled_at_utc, + experience.updated_at_utc, + ) + for experience in experiences ], ) + facet_rows = [ + (experience.id, facet.facet_kind, facet.facet_value, facet.count) + for experience in experiences + for facet in experience.facets + ] + if facet_rows: + conn.executemany( + "INSERT INTO memory_experience_facets(" + "experience_id, facet_kind, facet_value, count) VALUES (?, ?, ?, ?)", + facet_rows, + ) + evidence_rows = [ + (experience.id, item.trajectory_id, item.outcome, item.finished_at_utc) + for experience in experiences + for item in experience.evidence + ] + if evidence_rows: + conn.executemany( + "INSERT INTO memory_experience_evidence(" + "experience_id, trajectory_id, outcome, finished_at_utc) " + "VALUES (?, ?, ?, ?)", + evidence_rows, + ) def count_experiences(conn: sqlite3.Connection, *, project_id: str) -> int: @@ -108,7 +253,7 @@ def list_experiences( "ORDER BY subject_family ASC, signal ASC, outcome_class ASC", (project_id,), ).fetchall() - return [_row_to_experience(conn, row) for row in rows] + return _hydrate_experience_rows(conn, rows) def list_experiences_for_subject_family( @@ -123,7 +268,7 @@ def list_experiences_for_subject_family( "ORDER BY signal ASC, outcome_class ASC", (project_id, subject_family), ).fetchall() - return [_row_to_experience(conn, row) for row in rows] + return _hydrate_experience_rows(conn, rows) def find_experience( @@ -136,45 +281,24 @@ def find_experience( "SELECT * FROM memory_experiences WHERE id=?", (experience_id,), ).fetchone() - return _row_to_experience(conn, row) if row is not None else None + if row is None: + return None + return _hydrate_experience_rows(conn, [row])[0] -def _facets_for_experience( - conn: sqlite3.Connection, - experience_id: str, -) -> tuple[ExperienceFacet, ...]: - rows = conn.execute( - "SELECT facet_kind, facet_value, count FROM memory_experience_facets " - "WHERE experience_id=? ORDER BY facet_kind ASC, facet_value ASC", - (experience_id,), - ).fetchall() - return tuple( - ExperienceFacet( - facet_kind=_facet_kind(str(row["facet_kind"])), - facet_value=str(row["facet_value"]), - count=int(row["count"]), - ) - for row in rows +def _row_to_facet(row: sqlite3.Row) -> ExperienceFacet: + return ExperienceFacet( + facet_kind=_facet_kind(str(row["facet_kind"])), + facet_value=str(row["facet_value"]), + count=int(row["count"]), ) -def _evidence_for_experience( - conn: sqlite3.Connection, - experience_id: str, -) -> tuple[ExperienceEvidence, ...]: - rows = conn.execute( - "SELECT trajectory_id, outcome, finished_at_utc " - "FROM memory_experience_evidence WHERE experience_id=? " - "ORDER BY finished_at_utc ASC, trajectory_id ASC", - (experience_id,), - ).fetchall() - return tuple( - ExperienceEvidence( - trajectory_id=str(row["trajectory_id"]), - outcome=str(row["outcome"]), - finished_at_utc=str(row["finished_at_utc"]), - ) - for row in rows +def _row_to_evidence(row: sqlite3.Row) -> ExperienceEvidence: + return ExperienceEvidence( + trajectory_id=str(row["trajectory_id"]), + outcome=str(row["outcome"]), + finished_at_utc=str(row["finished_at_utc"]), ) @@ -185,7 +309,12 @@ def _facet_kind(value: str) -> ExperienceFacetKind: raise ValueError(msg) -def _row_to_experience(conn: sqlite3.Connection, row: sqlite3.Row) -> Experience: +def _row_to_experience( + row: sqlite3.Row, + *, + facets: tuple[ExperienceFacet, ...] | None = None, + evidence: tuple[ExperienceEvidence, ...] | None = None, +) -> Experience: experience_id = str(row["id"]) return Experience( id=experience_id, @@ -205,8 +334,8 @@ def _row_to_experience(conn: sqlite3.Connection, row: sqlite3.Row) -> Experience last_observed_at_utc=str(row["last_observed_at_utc"]), distilled_at_utc=str(row["distilled_at_utc"]), updated_at_utc=str(row["updated_at_utc"]), - facets=_facets_for_experience(conn, experience_id), - evidence=_evidence_for_experience(conn, experience_id), + facets=facets if facets is not None else (), + evidence=evidence if evidence is not None else (), ) diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 74724b1f..e6e4a3be 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -3578,8 +3578,8 @@ def test_html_report_uses_jetbrains_mono_for_stat_card_content() -> None: ".meta-item{padding:var(--sp-3) var(--sp-4);", "font-family:var(--font-mono)}", ".kpi-micro{display:inline-flex;align-items:center;gap:3px;", - "font-family:inherit}", - ".kpi-micro-val{font-family:inherit;font-weight:500;", + "font-family:var(--font-sans)}", + ".kpi-micro-val{font-family:var(--count-font);font-weight:var(--count-weight);", ".overview-summary-item{background:var(--bg-surface);", "border:1px solid color-mix(in srgb,var(--border) 78%,transparent);", "padding:var(--sp-4)}", diff --git a/tests/test_memory_experience_store.py b/tests/test_memory_experience_store.py index 7bd86aec..608664f9 100644 --- a/tests/test_memory_experience_store.py +++ b/tests/test_memory_experience_store.py @@ -8,6 +8,7 @@ import sqlite3 from collections.abc import Iterator +from dataclasses import replace from pathlib import Path import pytest @@ -164,6 +165,67 @@ def test_list_for_subject_family_filters(conn: sqlite3.Connection) -> None: assert [item.id for item in scoped] == ["exp-b"] +def test_replace_refreshes_when_digest_matches_but_content_differs( + conn: sqlite3.Connection, +) -> None: + original = _experience(suffix="a", signal="scope_expanded") + replace_experiences(conn, project_id=_PROJECT_ID, experiences=[original]) + updated = replace( + original, + statement="override statement", + updated_at_utc="2026-06-09T00:00:00Z", + ) + replace_experiences(conn, project_id=_PROJECT_ID, experiences=[updated]) + loaded = list_experiences(conn, project_id=_PROJECT_ID)[0] + assert loaded.statement == "override statement" + + +def test_replace_noop_compare_uses_bounded_selects(conn: sqlite3.Connection) -> None: + experiences = [ + _experience(suffix="a", signal="scope_expanded"), + _experience(suffix="b", signal="recovered"), + ] + replace_experiences(conn, project_id=_PROJECT_ID, experiences=experiences) + query_count = 0 + + def _trace(sql: str) -> None: + nonlocal query_count + stripped = sql.lstrip() + if stripped and stripped.split(None, 1)[0].upper() in { + "SELECT", + "INSERT", + "DELETE", + "UPDATE", + }: + query_count += 1 + + conn.set_trace_callback(_trace) + try: + replace_experiences(conn, project_id=_PROJECT_ID, experiences=experiences) + finally: + conn.set_trace_callback(None) + # Parent row + batched facets + batched evidence; no mutation statements. + assert query_count == 3 + + +def test_replace_skips_write_when_digests_unchanged(conn: sqlite3.Connection) -> None: + experiences = [ + _experience(suffix="a", signal="scope_expanded"), + _experience(suffix="b", signal="recovered"), + ] + replace_experiences(conn, project_id=_PROJECT_ID, experiences=experiences) + before = conn.execute( + "SELECT updated_at_utc FROM memory_experiences WHERE id=?", + ("exp-a",), + ).fetchone()[0] + replace_experiences(conn, project_id=_PROJECT_ID, experiences=experiences) + after = conn.execute( + "SELECT updated_at_utc FROM memory_experiences WHERE id=?", + ("exp-a",), + ).fetchone()[0] + assert before == after + + def test_empty_replace_clears_project(conn: sqlite3.Connection) -> None: replace_experiences( conn, From 403411711b3fbf30051c264e6b228041b93ba34b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 17:13:58 +0500 Subject: [PATCH 031/113] feat(docs): refresh CHANGELOG.md --- CHANGELOG.md | 285 +++++++++++++-------------------------------------- 1 file changed, 73 insertions(+), 212 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aa1a7d3..72a5f150 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,217 +1,78 @@ -Changelog - -[2.1.0a1] - Unreleased - -2.1.0a1 opens the CodeClone 2.1 alpha line with intent-first structural -change control, Engineering Memory, trajectory and experience layers, semantic -retrieval, Platform Observability, native agent integrations, and a reorganized -documentation site. - -Added - -* Structural Change Controller. The new - start_controlled_change / finish_controlled_change workflow reduces the - governed agent edit cycle from 7–11 MCP calls to 3–4. It combines workspace - checks, intent declaration, blast-radius mapping, bounded edit scope, patch - verification, review-claim validation, and deterministic review receipts. - CodeClone now exposes 33 agent-visible MCP tools by default. -* Live Implementation Context. The new read-only - get_implementation_context tool projects bounded structural facts for - repo-relative paths from one existing run. It reports workspace freshness, - cache origin, imports/importers, public surface, blast radius, and test - anchors, with separate deterministic digests for the off-report context - artifact and the exact bounded projection. Active intents add explicit - allowed/review/do-not-touch boundaries, while impact mode adds transitive - dependency context and baseline-sensitive findings. Engineering Memory, - tests, docs, trajectories, and Experiences remain lane-separated evidence. - Exact qualname subjects resolve through an off-report Unit and API-surface - location index, with unknown symbols reported explicitly rather than guessed. - Zero-argument queries now resolve active intent scope or bounded live dirty - paths, related module roles collapse with explicit relation tags, and one - safety-first global budget reports all ordinary and safety omissions. - Cache schema 2.9 adds a separate, rebuildable per-function relationship-fact - projection without changing Unit serialization or canonical report identity. - Cross-module calls and resolved non-call references are now attributed to - their caller with production/test lanes; conservative caller-scope shadow - guards keep ambiguous imported names as unresolved call observations. - Intra-module functions, same-module class methods, and self/cls receiver - methods now resolve against the enclosing module and class (keyed on the - actual first-parameter name, never a hardcoded self, and never for - staticmethods), only when the target definition exists; cache schema 2.10. - Per-function relationship facts now aggregate across files (cold and cached) - onto the analysis result and the MCP run record, off the canonical report. - get_implementation_context now projects call_context (callers, callees, - references, test_callers) from those facts with relation_kind x - resolution_status evidence tags, separate production and test caller lanes, - unresolved call observations, and a complete/partial/unavailable - call_graph_status; relationship records are bound into context_artifact_digest. - contract mode returns a truth-map (definition_sites, version_constants, - contract_tests, memory_conflicts) and persistence/serialization path callers - that are emitted only with a typed or memory-backed anchor and are otherwise - not_available rather than name- or directory-guessed. - Context evidence never authorizes edits; edit_allowed remains authoritative. -* Change-intent lifecycle and multi-agent coordination. - manage_change_intent supports declare, check, clear, queue, promote, and - recover operations. Renewable leases, ownership classification, optional - SQLite coordination, retention, workspace hygiene, and recoverable-intent - handling make concurrent agent work explicit and auditable. -* Engineering Memory. A local SQLite knowledge graph stores typed, - evidence-linked repository facts such as contracts, decisions, risks, test - anchors, prior changes, and git provenance. Agents receive ranked, - scope-aware context through get_relevant_memory and - query_engineering_memory; drafts remain human-governed and can be approved - through the CLI or VS Code Memory view. Memory never authorizes edits or - overrides the canonical report, gates, or Patch Trail. -* Trajectory Memory and Patch Trail. Audit-derived trajectories preserve - agent workflows, declared scope, actual changed paths, verification outcomes, - incidents, citations, and review evidence. The current trajectory-v3 - projection adds quality passports, complexity scoring, anomaly detection, - agent profiles, dashboards, semantic retrieval, and deterministic Patch Trail - summaries. Engineering Memory schema 1.7 persists trajectory and Patch - Trail evidence. -* Experience Layer. Deterministic experience-v1 patterns are distilled - from canonical trajectories across all outcomes and exposed through a - separate advisory retrieval lane. Experiences retain supporting evidence and - agent-diversity facets, but never become authority automatically; - promote_experience creates a human-governed memory draft. -* Semantic memory retrieval. Optional LanceDB-backed hybrid search combines - FTS5/BM25 and vector retrieval using deterministic Reciprocal Rank Fusion. - Local embeddings are available through codeclone[semantic-local] with - BAAI/bge-small-en-v1.5. Semantic indexing is lazy, failure-tolerant, and - eventually consistent rather than synchronously rebuilt after every finish. -* Platform Observability. Opt-in, development-only telemetry traces - CodeClone’s own CLI, MCP, analysis, database, semantic-index, and projection - worker activity. The local observer captures timings, RSS/CPU, MCP payload and - token pressure, DB query counts and shapes, causal worker chains, and costly - no-ops. JSON/HTML views provide a diagnostic cockpit, while - query_platform_observability exposes bounded MCP sections for development - agents. Observability never affects reports, gates, baselines, memory facts, - or edit authorization. -* IDE and agent integrations. The VS Code extension gains Engineering - Memory governance, trajectory dashboards, controller audit views, and - workspace session statistics. Native integrations are available for Claude - Desktop, Claude Code, Codex, and Cursor. Claude Code now has a dedicated - marketplace plugin and storefront, separate from the Desktop `.mcpb` bundle. - The Cursor plugin includes skills, rules, fail-closed preToolUse enforcement, - scoped workspace-intent checks, and a structural-review agent. -* Controller and diagnostic CLI surfaces. Added blast-radius, patch - verification, session statistics, controller audit, memory trajectory, - anomaly, agent-profile, semantic-search, and Platform Observability commands. -* Documentation and edition model. Documentation is reorganized into a - thematic 00–26 contract book with unified integration guides, dedicated - chapters for the Controller, Engineering Memory, trajectories, Experiences, - and Platform Observability, plus explicit Open Source / Team / Enterprise - retention and capability tiers. -* MCP schemas now include parameter-level descriptions and deterministic - next_tool guidance. Workspace hygiene warnings, audit events, token-budget - tracking, and documentation-contract linting were also added. -* **Corpus Analytics (intent lane, Slice 1).** Optional offline clustering of - historical change-control intents via `codeclone analytics …`. - Requires `codeclone[analytics]`. Reads audit + trajectory (+ optional registry - overlay), writes SQLite/LanceDB artifacts under `.codeclone/analytics/`, and - exports inspectable JSON/HTML with sweep comparison, cluster diagnostics, - noise exploration, explicit heuristic recommendation vs maintainer selection, - and runtime observability spans. Analytics embeddings and their lifecycle are - separate from the Engineering Memory semantic index; - `[tool.codeclone.analytics]` configures paths and clustering defaults. -* **Corpus Analytics interpretability (Slice 1.1).** JSON export schema `1.2` - and the self-contained HTML report now separate formal technical validity - from human interpretation. Valid runs expose dominant-cluster ratios, - bounded representative/boundary/noise previews, numeric summaries, - categorical correlations, small-cluster provenance completeness, and - explicit preview disclosure. Invalid and failed runs remain inspectable in a - limited diagnostic mode without partition metrics, previews, score, or rank; - sweep comparison includes every persisted candidate. Representation contract - `3` materializes explicit trajectory, Patch Trail, and registry-overlay - presence facts for new snapshots without adding live registry state to source +# Changelog + +## [2.1.0a1] - Unreleased + +CodeClone 2.1 introduces intent-first structural change control, persistent engineering context, agent workflow +evidence, platform self-observability, and broader IDE/agent integration. + +### Added + +- **Structural Change Controller** with `start_controlled_change` / `finish_controlled_change`, bounded edit scope, + blast-radius checks, patch verification, claim validation, multi-agent intent coordination, and deterministic review + receipts. +- **Live Implementation Context** via `get_implementation_context`, including bounded structural context, call + relationships, contract-oriented truth maps, freshness, test anchors, and active intent boundaries. Context remains + read-only and never authorizes edits. +- **Engineering Memory**, **Trajectory Memory**, **Patch Trail**, and **Experience Layer** for typed repository + knowledge, historical agent workflows, change evidence, reusable patterns, and human-governed promotion. +- **Semantic retrieval** with optional LanceDB hybrid search, FTS5/BM25, vector search, and deterministic Reciprocal + Rank Fusion. +- **Platform Observability** for development-time tracing of CLI, MCP, analysis phases, database activity, semantic + indexing, worker chains, memory/CPU use, MCP payload pressure, and costly no-ops. +- **Corpus Analytics** for offline intent clustering, interpretability, versioned profiles, sweep comparison, maintainer + selection, and inspectable JSON/HTML outputs. +- **Module Map** as a deterministic report-only package/module graph with cycle, hub, overloaded-module, and + unwind-candidate views. +- **Guided Finding Review** as a prioritized report-only review queue with shared finding cards, filters, progress + tracking, and reviewed-state persistence. +- **Native agent and IDE integrations** for VS Code, Claude Desktop, Claude Code, Codex, and Cursor, including + governance, audit, memory, trajectory, and structural-review workflows. +- Expanded controller, memory, trajectory, analytics, semantic-search, observability, blast-radius, patch-verification, + and diagnostic CLI/MCP surfaces. +- Reorganized documentation into a contract-focused 00–26 book with unified integration guidance and explicit edition + tiers. +- MCP schemas now include parameter descriptions, deterministic `next_tool` guidance, token-budget tracking, workspace + hygiene warnings, and documentation-contract linting. + +### Contract changes + +- Cache schema advanced to **2.9** for the rebuildable per-function relationship-fact projection and to **2.10** for + intra-module, class-method, and receiver-aware call resolution. +- Engineering Memory schema advanced to **1.7** for trajectory and Patch Trail evidence. +- Corpus Analytics store schema advanced to **1.2**. +- Corpus Analytics JSON export schema advanced through **1.2** and **1.3**. +- Corpus Analytics representation contract advanced to **3**. +- Corpus Analytics control-plane contract introduced at **1.0**. +- `derived.module_map` and `derived.review_queue` remain report-only projections excluded from the integrity digest; + they add no analysis pass, metrics family, or report schema bump. +- Live Implementation Context relationship facts remain off the canonical report and do not change canonical report identity. -* **Corpus Analytics profiles and selection control (Slice 1.2).** Added - versioned bundled and repository-local profile manifests, finite - profile-scoped sweeps, separate suitability and profile-aware ranking, - immutable profile batch receipts, configurable ordinary sweep grids, manual - clustering parameters, and append-only maintainer selection events. Store - schema is now `1.2`; JSON export schema `1.3` adds control-plane contract - `1.0`, profile context/summary, and active selection without changing - technical-validity semantics. -* **Module map.** A default-on, report-only `derived.module_map` projection - reprojects existing dependency and overloaded-module facts into deterministic - package/module graph views and unwind-candidate triage rows for refactor - scoping. A new `Module map` HTML tab (between Quality and Dependencies) renders - the precomputed graph with a Packages/Modules zoom toggle, candidate/cycle/hub - cues, a truncation notice on sampled graphs, and full-size unwind and - top-overloaded tables. `get_report_section(section="module_map")` returns the - projection directly. No new analysis pass, metrics family, or report schema - bump — `derived` stays excluded from the integrity digest. -* **Guided finding review.** A default-on, report-only `derived.review_queue` - projection orders the existing suggestions into a prioritized, cross-family - actionable queue (severity, priority, family, location, effort) with summary - counts. A new `Review` HTML tab (between Overview and Clones) renders it as a - walkable list of shared finding cards with a per-finding reviewed toggle - (persisted in `localStorage`), a progress bar, and severity/family filters; the - `Overview` tab gains a launchpad banner that links into it. A new shared - `finding_card` component centralizes the card chrome now used by the - Suggestions, Review, and Structural Findings surfaces. No new analysis pass, - metrics family, or report schema bump — `derived` stays excluded from the - integrity digest. - -Changed - -* The default project workspace moved from .cache/codeclone/ to - .codeclone/; legacy locations now produce a migration warning. -* Documentation builds now use Zensical with strict, clean builds. -* pydantic is now a base dependency. -* LCOM4 excludes Protocol methods and Pydantic validation/serialization hooks; - computed_field remains part of cohesion analysis. -* Repository test coverage is enforced at >=99%. - -Fixed - -* Durable memory writes. Engineering Memory now uses - synchronous=FULL, preserving committed drafts across unclean MCP process - exits. Intent and audit stores retain recovery-oriented - synchronous=NORMAL. -* Atomic memory ingestion. persist_batch no longer commits records and - subjects mid-batch; it now defers the commit so a later failure in the same - batch rolls back the whole ingestion instead of leaving half-written records - behind. Standalone store writes keep their previous commit-on-write behavior. -* Observable best-effort failures. The non-fatal audit-event writer and the - best-effort finish-payload memory proposer no longer swallow exceptions with - zero signal; each now increments an observability counter (audit.emit_dropped, - memory.propose_candidate_dropped) on its fallback path, so silent drops stay - countable in the cockpit. Both remain non-fatal and the telemetry never - re-raises. -* Compact implementation-context misses. get_implementation_context no longer - emits the full empty facet scaffolding (structural_context, budget_summary, - dataflow, call_context, uncertainties) when an explicit symbol query resolves - nothing. The subject_not_found response now returns only the unresolved - subject, a slim provenance block, the projection digest, and an actionable - next_steps list, so a miss does not burn agent context. -* Memory lifecycle correctness. Draft records are no longer marked stale - before human promotion. Trajectory rebuilds now deduplicate superseded - projections, repoint evidence, remove stale workflow rows, and preserve - bounded claim-validation citations. -* Workspace hygiene and intent attribution. Finish blocks only on missing - evidence or foreign dirty overlap. Out-of-scope dirt is advisory, - continue_own_wip supports resuming owned work, queued foreign intents no - longer create false overlaps, and recoverable intents do not grant foreign - attribution. -* Patch verification correctness. Identical before/after runs are rejected - for structural and governance profiles. Negative health deltas now surface a - regression advisory, and Claim Guard warns when review text overstates patch - quality. -* Semantic retrieval correctness and cost. Hybrid search now preserves - lexical and vector relevance through RRF instead of allowing metadata ranking - to suppress strong matches. Per-source vector retrieval prevents dense lanes - from crowding out other sources. Embedding providers load lazily, failures - preserve documented fallback behavior, and redundant projection jobs are - coalesced or deferred. -* Architecture and import boundaries. Blast-radius graph logic moved into - codeclone/analysis/blast_radius.py, removing the CLI-to-MCP dependency + +### Changed + +- Default project workspace moved from `.cache/codeclone/` to `.codeclone/`; legacy paths emit a migration warning. +- Documentation builds now use Zensical with strict clean builds. +- `pydantic` is now a base dependency. +- LCOM4 excludes Protocol methods and Pydantic validation/serialization hooks; `computed_field` remains included. +- Repository coverage is enforced at **>=99%**. + +### Fixed + +- Engineering Memory writes are durable and batch ingestion is atomic. +- Best-effort audit and memory-proposal failures are now observable instead of silently swallowed. +- Implementation-context misses return a compact actionable payload instead of empty scaffolding. +- Memory, trajectory, and Patch Trail lifecycle handling now avoids premature staleness, duplicate projections, stale + workflow rows, and broken evidence links. +- Workspace hygiene, intent attribution, continuation of owned work, queue handling, and recoverable-intent behavior + were corrected. +- Patch verification now rejects identical before/after runs where required, surfaces health regressions, and warns on + overstated review claims. +- Semantic retrieval now preserves lexical/vector relevance, avoids source crowding, loads embeddings lazily, and + coalesces redundant projection work. +- Blast-radius graph logic moved into `codeclone/analysis/blast_radius.py`, removing the CLI-to-MCP dependency violation. -* Regression accuracy. respect_pyproject=false no longer reports - golden-fixture clone groups as false new regressions. Documentation URLs, - plugin references, and contract tests were updated after the documentation - reorganization. +- `respect_pyproject=false` no longer reports golden-fixture clone groups as false new regressions. +- Documentation URLs, integration references, and contract tests were aligned with the reorganized site. ## [2.0.2] - 2026-05-19 From b75c15e4b1d7460a11db2862750bdb644c43582f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:17:32 +0500 Subject: [PATCH 032/113] feat(html): refine design tokens with hairline borders and softer elevation --- codeclone/report/html/assets/css.py | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 456583f4..2d2d6273 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -28,9 +28,9 @@ --bg-overlay:oklch(29% 0.033 275); --bg-subtle:oklch(34% 0.038 275); - /* Border — same hue, higher chroma for legibility */ - --border:oklch(32% 0.035 275); - --border-strong:oklch(44% 0.045 275); + /* Border — quiet hairlines; -strong only for hover/emphasis */ + --border:oklch(28% 0.018 275); + --border-strong:oklch(40% 0.028 275); /* Text — muted greys keep a trace of indigo so they feel alive */ --text-primary:oklch(95% 0.010 275); @@ -55,17 +55,17 @@ --info:oklch(72% 0.13 238); --info-muted:color-mix(in oklch,oklch(72% 0.13 238) 18%,transparent); - /* elevation */ - --shadow-sm:0 1px 2px rgba(0,0,0,.25); - --shadow-md:0 2px 8px rgba(0,0,0,.3); - --shadow-lg:0 4px 16px rgba(0,0,0,.35); - --shadow-xl:0 8px 32px rgba(0,0,0,.4); + /* elevation — soft, diffuse, layered */ + --shadow-sm:0 1px 2px rgba(0,0,0,.18); + --shadow-md:0 4px 14px -3px rgba(0,0,0,.34); + --shadow-lg:0 10px 30px -8px rgba(0,0,0,.44); + --shadow-xl:0 20px 50px -14px rgba(0,0,0,.55); /* radii */ - --radius-sm:4px; - --radius-md:6px; - --radius-lg:8px; - --radius-xl:12px; + --radius-sm:5px; + --radius-md:8px; + --radius-lg:11px; + --radius-xl:16px; /* badge design code — one scale for every read-only label badge */ --badge-font:var(--font-sans); @@ -105,7 +105,7 @@ :root:not([data-theme]){ --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); - --border:oklch(88% 0.020 275);--border-strong:oklch(78% 0.028 275); + --border:oklch(92% 0.010 275);--border-strong:oklch(85% 0.016 275); --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); --accent-soft:oklch(94% 0.045 275); @@ -113,15 +113,15 @@ --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); - --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); - --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); + --shadow-sm:0 1px 2px rgba(17,20,38,.05);--shadow-md:0 4px 14px -3px rgba(17,20,38,.08); + --shadow-lg:0 12px 30px -8px rgba(17,20,38,.12);--shadow-xl:0 22px 50px -14px rgba(17,20,38,.16); color-scheme:light; } } [data-theme="light"]{ --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); - --border:oklch(88% 0.020 275);--border-strong:oklch(78% 0.028 275); + --border:oklch(92% 0.010 275);--border-strong:oklch(85% 0.016 275); --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); --accent-soft:oklch(94% 0.045 275); @@ -129,8 +129,8 @@ --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); - --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); - --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); + --shadow-sm:0 1px 2px rgba(17,20,38,.05);--shadow-md:0 4px 14px -3px rgba(17,20,38,.08); + --shadow-lg:0 12px 30px -8px rgba(17,20,38,.12);--shadow-xl:0 22px 50px -14px rgba(17,20,38,.16); color-scheme:light; } """ From 7e100f598161872988b3477e860f05e6225b8005 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:20:14 +0500 Subject: [PATCH 033/113] feat(html): smart button states with focus ring and tactile press --- codeclone/report/html/assets/css.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 2d2d6273..f48e1838 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -279,6 +279,18 @@ .btn.ghost:hover{background:var(--bg-raised);border-color:var(--border)} .btn.btn-icon{padding:var(--sp-1);min-width:28px;justify-content:center} .btn svg{width:14px;height:14px} +.btn:hover{box-shadow:var(--shadow-sm)} + +/* Smart controls — one accent focus ring for every button + tactile press */ +button:focus-visible{outline:2px solid var(--accent-primary);outline-offset:2px} +.btn:active,.prov-pill:active,.theme-toggle:active,.badge-btn:active,.badge-tab:active, +.review-launchpad-cta:active,.review-toggle:active,.review-chip:active, +.clone-nav-btn:active{transform:translateY(.5px) scale(.985)} +@media(prefers-reduced-motion:reduce){ + .btn:active,.prov-pill:active,.theme-toggle:active,.badge-btn:active,.badge-tab:active, + .review-launchpad-cta:active,.review-toggle:active,.review-chip:active, + .clone-nav-btn:active{transform:none} +} /* Inputs */ input[type="text"]{padding:var(--sp-1) var(--sp-3);font-size:.85rem;border:1px solid var(--border); From 6df063d91c34c8bafc98c42fae185f9b8ae7641b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:22:27 +0500 Subject: [PATCH 034/113] feat(html): polish form controls with unified hover and focus states --- codeclone/report/html/assets/css.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index f48e1838..ebf8c4b9 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -295,17 +295,21 @@ /* Inputs */ input[type="text"]{padding:var(--sp-1) var(--sp-3);font-size:.85rem;border:1px solid var(--border); border-radius:var(--radius-md);background:var(--bg-body);color:var(--text-primary);outline:none; - transition:border-color var(--dur-fast) var(--ease)} -input[type="text"]:focus{border-color:var(--accent-primary);box-shadow:0 0 0 2px var(--accent-muted)} + transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} +input[type="text"]:hover{border-color:var(--border-strong)} +input[type="text"]:focus{border-color:var(--accent-primary);box-shadow:0 0 0 3px var(--accent-muted)} input[type="text"]::placeholder{color:var(--text-muted)} /* Selects */ .select{padding:var(--sp-1) var(--sp-3);padding-right:var(--sp-6);font-size:.8rem; border:1px solid var(--border);border-radius:var(--radius-md);background:var(--bg-raised); - color:var(--text-secondary);cursor:pointer;appearance:none; + color:var(--text-secondary);cursor:pointer;appearance:none;outline:none; + transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease), + color var(--dur-fast) var(--ease); background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%236b6f88' stroke-width='2'%3E%3Cpath d='M3 4.5l3 3 3-3'/%3E%3C/svg%3E"); background-repeat:no-repeat;background-position:right 8px center} -.select:focus{border-color:var(--accent-primary);outline:none} +.select:hover{border-color:var(--border-strong);color:var(--text-primary)} +.select:focus{border-color:var(--accent-primary);box-shadow:0 0 0 3px var(--accent-muted)} /* Checkbox labels */ .inline-check{display:inline-flex;align-items:center;gap:var(--sp-1);font-size:.8rem; From a90fa853e0f06a694d81e33df60598b90d341e0c Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:30:31 +0500 Subject: [PATCH 035/113] fix(html): remove pill shapes, crisp radii, clean light background --- codeclone/report/html/assets/css.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index ebf8c4b9..f4e1ad19 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -61,11 +61,14 @@ --shadow-lg:0 10px 30px -8px rgba(0,0,0,.44); --shadow-xl:0 20px 50px -14px rgba(0,0,0,.55); - /* radii */ - --radius-sm:5px; - --radius-md:8px; - --radius-lg:11px; - --radius-xl:16px; + /* radii — crisp, not bubbly */ + --radius-sm:4px; + --radius-md:6px; + --radius-lg:8px; + --radius-xl:12px; + + /* page-background glow (dark only; light overrides to transparent) */ + --bg-glow:color-mix(in oklch,var(--accent-primary) 9%,transparent); /* badge design code — one scale for every read-only label badge */ --badge-font:var(--font-sans); @@ -103,7 +106,7 @@ so the whole theme feels like one family in both modes. */ @media(prefers-color-scheme:light){ :root:not([data-theme]){ - --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; + --bg-body:oklch(99% 0.003 275);--bg-surface:#ffffff;--bg-glow:transparent; --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); --border:oklch(92% 0.010 275);--border-strong:oklch(85% 0.016 275); --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); @@ -119,7 +122,7 @@ } } [data-theme="light"]{ - --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; + --bg-body:oklch(99% 0.003 275);--bg-surface:#ffffff;--bg-glow:transparent; --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); --border:oklch(92% 0.010 275);--border-strong:oklch(85% 0.016 275); --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); @@ -145,8 +148,7 @@ -moz-osx-font-smoothing:grayscale;scroll-behavior:smooth;scrollbar-gutter:stable} body{font-family:var(--font-sans);font-size:14px;line-height:1.6;color:var(--text-primary); background: - radial-gradient(1200px 520px at 50% -10%, - color-mix(in oklch,var(--accent-primary) 10%,transparent),transparent 72%), + radial-gradient(1100px 460px at 50% -12%,var(--bg-glow),transparent 70%), var(--bg-body); background-attachment:fixed;overflow-x:hidden; /* Inter stylistic alternates: @@ -354,7 +356,7 @@ .filters-btn{display:inline-flex;align-items:center;gap:var(--sp-1);white-space:nowrap} .filters-btn-ico{flex:none} .filters-count{display:inline-flex;align-items:center;justify-content:center; - min-width:18px;height:18px;padding:0 5px;border-radius:999px; + min-width:18px;height:18px;padding:0 5px;border-radius:var(--radius-sm); background:var(--accent-primary);color:#fff;font-size:.68rem;font-weight:600; line-height:1} .filters-btn[aria-expanded="true"]{border-color:var(--accent-primary); @@ -1104,7 +1106,7 @@ .review-filters{display:flex;flex-wrap:wrap;align-items:center;gap:6px;margin-bottom:var(--sp-4)} .review-filter-sep{width:1px;align-self:stretch;background:var(--border);margin:2px var(--sp-1)} .review-chip{display:inline-flex;align-items:center;gap:5px;font-size:.72rem; - font-family:var(--font-sans);padding:4px 10px;border-radius:999px;cursor:pointer; + font-family:var(--font-sans);padding:4px 10px;border-radius:var(--radius-sm);cursor:pointer; background:var(--bg-overlay);color:var(--text-secondary);border:1px solid var(--border); transition:border-color var(--dur-fast) var(--ease),color var(--dur-fast) var(--ease)} .review-chip:hover{border-color:var(--border-strong)} @@ -1421,7 +1423,7 @@ color-mix(in srgb,var(--bg-raised) 55%,transparent) 0%, var(--bg-surface) 100%)} .prov-hero-badge{display:inline-flex;align-items:center;gap:7px; - padding:6px 12px 6px 10px;border-radius:999px;font-weight:700;font-size:.78rem; + padding:6px 12px 6px 10px;border-radius:var(--radius-md);font-weight:700;font-size:.78rem; letter-spacing:.005em;white-space:nowrap;flex-shrink:0; border:1px solid var(--border);background:var(--bg-surface)} .prov-hero-icon{flex-shrink:0} From 4b16bd4fc21947745c2ce1f4b0d6921b92da246f Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:32:41 +0500 Subject: [PATCH 036/113] fix(html): align dependency hub-chip count to the shared count scale --- codeclone/report/html/assets/css.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index f4e1ad19..5ce2169a 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -978,7 +978,8 @@ .dep-hub-pill{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-2); border-radius:var(--radius-sm);background:var(--bg-overlay);font-size:.8rem} .dep-hub-name{color:var(--text-primary);font-family:var(--font-mono);font-size:.8rem} -.dep-hub-deg{font-size:.68rem;font-weight:600;color:var(--accent-primary); +.dep-hub-deg{font-family:var(--count-font);font-size:var(--count-size); + font-weight:var(--count-weight);font-variant-numeric:tabular-nums;color:var(--accent-primary); background:var(--accent-muted);padding:2px var(--sp-2);border-radius:var(--radius-sm)} /* Legend */ From fd2198ed38149072af70846c8cf59fc49bc99345 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:34:44 +0500 Subject: [PATCH 037/113] fix(html): restore the faint indigo tint on the light backdrop --- codeclone/report/html/assets/css.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 5ce2169a..16fa2062 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -106,7 +106,7 @@ so the whole theme feels like one family in both modes. */ @media(prefers-color-scheme:light){ :root:not([data-theme]){ - --bg-body:oklch(99% 0.003 275);--bg-surface:#ffffff;--bg-glow:transparent; + --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff;--bg-glow:transparent; --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); --border:oklch(92% 0.010 275);--border-strong:oklch(85% 0.016 275); --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); @@ -122,7 +122,7 @@ } } [data-theme="light"]{ - --bg-body:oklch(99% 0.003 275);--bg-surface:#ffffff;--bg-glow:transparent; + --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff;--bg-glow:transparent; --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); --border:oklch(92% 0.010 275);--border-strong:oklch(85% 0.016 275); --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); From 11ca3e55ffac8f2e503bb6200eb23a4ce21cafc3 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:37:43 +0500 Subject: [PATCH 038/113] feat(html): unify Report Provenance onto the shared badge and count tokens --- codeclone/report/html/assets/css.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index 16fa2062..bb35d8eb 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -1244,7 +1244,7 @@ border:1px solid var(--border); box-shadow:0 1px 2px color-mix(in srgb,var(--text-primary) 3%,transparent)} .prov-section:last-child{margin-bottom:0} -.prov-section-title{font-size:.66rem;font-weight:700;text-transform:uppercase;letter-spacing:.09em; +.prov-section-title{font-size:.66rem;font-weight:600;text-transform:uppercase;letter-spacing:.06em; color:var(--text-secondary);margin:0 calc(-1*var(--sp-4)) var(--sp-2); padding:0 var(--sp-4) var(--sp-2);border:none; border-bottom:1px solid color-mix(in srgb,var(--border) 60%,transparent); @@ -1262,11 +1262,12 @@ /* Provenance summary badges */ .prov-summary{display:flex;flex-wrap:wrap;align-items:center;gap:6px; padding:var(--sp-2) var(--sp-4);border-top:1px solid var(--border)} -.prov-badge{display:inline-flex;align-items:center;gap:4px;font-size:.68rem; - padding:2px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-raised); +.prov-badge{display:inline-flex;align-items:center;gap:4px;font-size:var(--badge-size); + padding:2px var(--sp-2);border-radius:var(--badge-radius);background:var(--bg-raised); white-space:nowrap;line-height:1.3;border:1px solid color-mix(in srgb,var(--border) 55%,transparent); - font-family:var(--font-mono);letter-spacing:.005em} -.prov-badge-val{font-weight:600;font-variant-numeric:tabular-nums;color:var(--text-primary)} + font-family:var(--badge-font);letter-spacing:var(--badge-tracking)} +.prov-badge-val{font-family:var(--count-font);font-weight:var(--count-weight); + font-variant-numeric:tabular-nums;color:var(--text-primary)} .prov-badge-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} .prov-badge--inline{padding:2px 8px} .prov-badge--inline .prov-badge-val{font-weight:500} From 9c242e93083ee791bede7d06598804fb50f56660 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:42:30 +0500 Subject: [PATCH 039/113] fix(html): color the info severity consistently in launchpad and review chips --- codeclone/report/html/assets/css.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index bb35d8eb..a9af5845 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -1087,6 +1087,8 @@ background:color-mix(in oklch,var(--danger) 14%,transparent)} .launchpad-sev--warning{color:var(--warning); background:color-mix(in oklch,var(--warning) 14%,transparent)} +.launchpad-sev--info{color:var(--info); + background:color-mix(in oklch,var(--info) 14%,transparent)} .review-launchpad-cta{display:inline-flex;align-items:center;gap:7px;flex-shrink:0; font-size:.82rem;font-weight:600;font-family:var(--font-sans);cursor:pointer; padding:9px 16px;border-radius:var(--radius-md);border:0; @@ -1117,6 +1119,8 @@ background:color-mix(in oklch,var(--danger) 16%,transparent)} .review-chip--warning.is-active{border-color:var(--warning);color:var(--warning); background:color-mix(in oklch,var(--warning) 16%,transparent)} +.review-chip--info.is-active{border-color:var(--info);color:var(--info); + background:color-mix(in oklch,var(--info) 16%,transparent)} .review-chip-count{font-family:var(--count-font);font-size:var(--count-size); font-weight:var(--count-weight);font-variant-numeric:tabular-nums;opacity:.85} .review-queue{display:flex;flex-direction:column;gap:9px} From 84bc51d954f7177be3d44419ef7320f6a8876218 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 18:55:12 +0500 Subject: [PATCH 040/113] fix(html): meet WCAG AA contrast for severity badges in light theme --- codeclone/report/html/assets/css.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/codeclone/report/html/assets/css.py b/codeclone/report/html/assets/css.py index a9af5845..bb7a8dbf 100644 --- a/codeclone/report/html/assets/css.py +++ b/codeclone/report/html/assets/css.py @@ -44,7 +44,8 @@ --accent-soft:oklch(30% 0.12 275); /* Semantic — brand-adjacent, hue-rotated so they read as siblings - of the indigo instead of raw Tailwind defaults */ + of the indigo instead of raw Tailwind defaults. Light-mode lightness is + tuned so severity badge text clears WCAG AA (>=4.5:1) on its muted bg. */ --success:oklch(74% 0.15 162); --success-muted:color-mix(in oklch,oklch(74% 0.15 162) 18%,transparent); --warning:oklch(80% 0.15 82); @@ -112,10 +113,10 @@ --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); --accent-soft:oklch(94% 0.045 275); - --success:oklch(52% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); - --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); - --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); - --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); + --success:oklch(47% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); + --warning:oklch(51.5% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); + --error:oklch(50.5% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); + --danger:oklch(50.5% 0.22 20);--info:oklch(48.5% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); --shadow-sm:0 1px 2px rgba(17,20,38,.05);--shadow-md:0 4px 14px -3px rgba(17,20,38,.08); --shadow-lg:0 12px 30px -8px rgba(17,20,38,.12);--shadow-xl:0 22px 50px -14px rgba(17,20,38,.16); color-scheme:light; @@ -128,10 +129,10 @@ --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); --accent-soft:oklch(94% 0.045 275); - --success:oklch(52% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); - --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); - --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); - --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); + --success:oklch(47% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); + --warning:oklch(51.5% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); + --error:oklch(50.5% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); + --danger:oklch(50.5% 0.22 20);--info:oklch(48.5% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); --shadow-sm:0 1px 2px rgba(17,20,38,.05);--shadow-md:0 4px 14px -3px rgba(17,20,38,.08); --shadow-lg:0 12px 30px -8px rgba(17,20,38,.12);--shadow-xl:0 22px 50px -14px rgba(17,20,38,.16); color-scheme:light; From 5961bb2ac787a5aefb1e7840c898f27ad1abab14 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 21 Jun 2026 19:32:34 +0500 Subject: [PATCH 041/113] refactor(html): route the Module Map zoom toggle through the shared split-tabs widget --- codeclone/report/html/sections/_module_map.py | 36 +++++++++---------- codeclone/report/html/widgets/tabs.py | 11 ++++-- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/codeclone/report/html/sections/_module_map.py b/codeclone/report/html/sections/_module_map.py index d03692cf..6cde35b4 100644 --- a/codeclone/report/html/sections/_module_map.py +++ b/codeclone/report/html/sections/_module_map.py @@ -29,6 +29,7 @@ ) from ..widgets.glossary import glossary_tip from ..widgets.tables import render_rows_table +from ..widgets.tabs import render_split_tabs if TYPE_CHECKING: from .._context import ReportContext @@ -190,26 +191,25 @@ def _mm_zoom_toggle( graph_packages: Mapping[str, object], graph_modules: Mapping[str, object], ) -> str: - packages_svg = _render_module_map_svg(graph_packages) - modules_svg = _render_module_map_svg(graph_modules) package_count = len(_as_sequence(graph_packages.get("nodes"))) module_count = len(_as_sequence(graph_modules.get("nodes"))) - packages_active = "active" if default_zoom == "packages" else "" - modules_active = "" if default_zoom == "packages" else "active" - return ( - '" - f'
{packages_svg}
' - f'
{modules_svg}
' + return render_split_tabs( + group_id="module-map-zoom", + active_id=default_zoom, + tabs=[ + ( + "packages", + "Packages", + package_count, + _render_module_map_svg(graph_packages), + ), + ( + "modules", + "Modules", + module_count, + _render_module_map_svg(graph_modules), + ), + ], ) diff --git a/codeclone/report/html/widgets/tabs.py b/codeclone/report/html/widgets/tabs.py index 5b708e3a..4710187b 100644 --- a/codeclone/report/html/widgets/tabs.py +++ b/codeclone/report/html/widgets/tabs.py @@ -18,20 +18,25 @@ def render_split_tabs( group_id: str, tabs: Sequence[tuple[str, str, int, str]], emit_clone_counters: bool = False, + active_id: str | None = None, ) -> str: """Render sub-tab navigation + panels. - Each tab tuple: ``(tab_id, label, count, panel_html)``. + Each tab tuple: ``(tab_id, label, count, panel_html)``. ``active_id`` selects + which tab starts active; when omitted the first tab is active. """ if not tabs: return "" + def _is_active(idx: int, tab_id: str) -> bool: + return tab_id == active_id if active_id is not None else idx == 0 + nav: list[str] = [ '