From a802ab6f80de7cad71b35c02d9911366b5f38c11 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 26 Jun 2026 10:06:43 +0800 Subject: [PATCH] Add icon_classify: classify a widget from its pixel shape Set-of-Marks/element proposers return boxes but not what each box is; form_fields.checkbox_state reads a box already known to be a checkbox. box_features extracts {aspect, fill, edge_density, circularity}; classify_widget is the pure heuristic classifier (round->radio, wide-rounded->toggle, square-sparse->checkbox, wide-hollow->text_field, wide-filled->button, else icon); classify_icon composes them. cv2 lazy. --- WHATS_NEW.md | 6 + .../doc/new_features/v219_features_doc.rst | 46 ++++++++ .../Zh/doc/new_features/v219_features_doc.rst | 38 +++++++ je_auto_control/__init__.py | 5 + .../gui/script_builder/command_schema.py | 16 +++ .../utils/executor/action_executor.py | 16 +++ .../utils/icon_classify/__init__.py | 6 + .../utils/icon_classify/icon_classify.py | 107 ++++++++++++++++++ .../utils/mcp_server/tools/_factories.py | 22 ++++ .../utils/mcp_server/tools/_handlers.py | 10 ++ .../headless/test_icon_classify_batch.py | 107 ++++++++++++++++++ 11 files changed, 379 insertions(+) create mode 100644 docs/source/Eng/doc/new_features/v219_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v219_features_doc.rst create mode 100644 je_auto_control/utils/icon_classify/__init__.py create mode 100644 je_auto_control/utils/icon_classify/icon_classify.py create mode 100644 test/unit_test/headless/test_icon_classify_batch.py diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 3ebd77c4..83c0dbe3 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -2,6 +2,12 @@ ## What's new (2026-06-26) +### Classify a Widget from Its Pixel Shape + +Tell a checkbox from a radio button from a text field — from pixels, no model. Full reference: [`docs/source/Eng/doc/new_features/v219_features_doc.rst`](docs/source/Eng/doc/new_features/v219_features_doc.rst). + +- **`classify_widget` / `box_features` / `classify_icon`** (`AC_classify_widget`, `AC_classify_icon`): Set-of-Marks and element proposers return *boxes* but not *what each box is*; `form_fields.checkbox_state` reads a box already known to be a checkbox — the gap is the typing step before it. `box_features` extracts `{aspect, fill, edge_density, circularity}` for a box; `classify_widget` is the pure heuristic classifier (round→radio, wide-rounded→toggle, square-sparse→checkbox, wide-hollow→text_field, wide-filled→button, else icon); `classify_icon` composes them. The classifier is pure and fully testable; cv2/numpy imported lazily so the module stays importable. Sixth feature of the ROUND-15 perception lane. No `PySide6`. + ### Localize a Change to the Elements That Changed Turn a raw screen diff into "element 3 changed" by scoring a list of element boxes. Full reference: [`docs/source/Eng/doc/new_features/v218_features_doc.rst`](docs/source/Eng/doc/new_features/v218_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v219_features_doc.rst b/docs/source/Eng/doc/new_features/v219_features_doc.rst new file mode 100644 index 00000000..8e595dde --- /dev/null +++ b/docs/source/Eng/doc/new_features/v219_features_doc.rst @@ -0,0 +1,46 @@ +Classify a Widget from Its Pixel Shape +====================================== + +Set-of-Marks and element proposers hand back *boxes*, but not *what each box is*. +``form_fields.checkbox_state`` already reads a box known to be a checkbox; the +gap is the typing step before it — is this box a checkbox, a radio button, a push +button, a text field or a toggle? ``icon_classify`` answers that from cheap +geometric features (no model). + +* :func:`box_features` — extract ``{aspect, fill, edge_density, circularity}`` + for a box region (the objective measurements). +* :func:`classify_widget` — pure: map a feature dict to a widget type by + documented heuristics. +* :func:`classify_icon` — compose the two: a box to ``{type, features}``. + +``classify_widget`` is pure and fully testable; ``box_features`` imports cv2 / +numpy lazily (the module stays importable without them) and reuses +:func:`visual_match._to_gray`. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import classify_icon, classify_widget + + # From a screenshot + a box: + classify_icon("dialog.png", [120, 80, 16, 16]) + # {'type': 'checkbox', 'features': {'aspect': 1.0, 'fill': 0.12, ...}} + + # From features you already have: + classify_widget({"aspect": 1.0, "circularity": 0.9, "fill": 0.4}) # 'radio' + +The heuristics: a round box (aspect ≈ 1, high circularity) is a ``radio``; a wide +rounded box is a ``toggle``; a near-square sparse box is a ``checkbox``; a wide +hollow box is a ``text_field``; a wide filled box is a ``button``; anything else +is an ``icon``. Tune by reading ``features`` and applying your own rules where +the defaults misfire — the measurements are the durable part. + +Executor commands +----------------- + +``AC_classify_widget`` (``features`` JSON object → ``{type}``, pure) and +``AC_classify_icon`` (``source`` image + ``box`` ``[x, y, w, h]`` → +``{type, features}``). They are the matching read-only ``ac_*`` MCP tools and +Script Builder commands under **Image**. diff --git a/docs/source/Zh/doc/new_features/v219_features_doc.rst b/docs/source/Zh/doc/new_features/v219_features_doc.rst new file mode 100644 index 00000000..00cfaf05 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v219_features_doc.rst @@ -0,0 +1,38 @@ +從像素形狀分類控制項 +==================== + +Set-of-Marks 與元素提案器回傳*方框*,卻不告訴你*每個方框是什麼*。``form_fields.checkbox_state`` +已能讀取一個已知是核取方塊的方框;缺少的是它之前的分類步驟——這個方框是核取方塊、單選鈕、按鈕、 +文字欄位還是切換開關?``icon_classify`` 從低成本的幾何特徵(無需模型)回答此問題。 + +* :func:`box_features` ——擷取方框區域的 ``{aspect, fill, edge_density, circularity}``(客觀量測)。 +* :func:`classify_widget` ——純函式:以記載的啟發式規則把特徵字典映射為控制項型別。 +* :func:`classify_icon` ——組合兩者:把一個方框轉為 ``{type, features}``。 + +``classify_widget`` 為純函式且可完整測試;``box_features`` 延遲匯入 cv2 / numpy(模組無需它們即可匯入), +並重用 :func:`visual_match._to_gray`。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import classify_icon, classify_widget + + # 從截圖 + 方框: + classify_icon("dialog.png", [120, 80, 16, 16]) + # {'type': 'checkbox', 'features': {'aspect': 1.0, 'fill': 0.12, ...}} + + # 從你已有的特徵: + classify_widget({"aspect": 1.0, "circularity": 0.9, "fill": 0.4}) # 'radio' + +啟發式規則:圓形方框(aspect ≈ 1、高 circularity)為 ``radio``;寬且圓潤為 ``toggle``; +近正方且稀疏為 ``checkbox``;寬且空心為 ``text_field``;寬且填滿為 ``button``;其餘為 ``icon``。 +在預設誤判處,可讀取 ``features`` 套用你自己的規則微調——量測值才是耐用的部分。 + +執行器指令 +---------- + +``AC_classify_widget``(``features`` JSON 物件 → ``{type}``,純函式)與 +``AC_classify_icon``(``source`` 影像 + ``box`` ``[x, y, w, h]`` → ``{type, features}``)。 +皆以對應的唯讀 ``ac_*`` MCP 工具及 Script Builder 指令(位於 **Image** 分類下)形式提供。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 8686170d..54b3f178 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -145,6 +145,10 @@ from je_auto_control.utils.theme_normalize import match_theme, normalize_theme # Attribute a screen change to the specific element boxes that changed from je_auto_control.utils.change_localize import localize_changes, rank_changes +# Classify what kind of widget a box is from its pixel shape +from je_auto_control.utils.icon_classify import ( + box_features, classify_icon, classify_widget, +) # Rich clipboard formats — RTF + CSV/TSV codecs and Windows get / set from je_auto_control.utils.clipboard_rich_formats import ( build_rtf, csv_to_rows, get_clipboard_csv, get_clipboard_rtf, rows_to_csv, @@ -1774,6 +1778,7 @@ def start_autocontrol_gui(*args, **kwargs): "grade_contrast", "dominant_pair", "region_contrast", "normalize_theme", "match_theme", "localize_changes", "rank_changes", + "classify_widget", "box_features", "classify_icon", "build_rtf", "rtf_to_text", "rows_to_csv", "csv_to_rows", "set_clipboard_rtf", "get_clipboard_rtf", "set_clipboard_csv", "get_clipboard_csv", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index ac0cf971..f642c163 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -4632,6 +4632,22 @@ def _add_work_queue_specs(specs: List[CommandSpec]) -> None: ), description="Rank which element boxes changed between two frames.", )) + specs.append(CommandSpec( + "AC_classify_widget", "Image", "Classify Widget (features)", + fields=( + FieldSpec("features", FieldType.STRING, + placeholder="JSON {aspect, circularity, fill}"), + ), + description="Map geometric features to a widget type.", + )) + specs.append(CommandSpec( + "AC_classify_icon", "Image", "Classify Icon (box)", + fields=( + FieldSpec("source", FieldType.STRING, placeholder="image path"), + FieldSpec("box", FieldType.STRING, placeholder="[x, y, w, h]"), + ), + description="Classify the widget in an image box from its pixels.", + )) specs.append(CommandSpec( "AC_normalize_ext", "Shell", "Normalize Extension", fields=( diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index fc271047..62168052 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2932,6 +2932,20 @@ def _localize_changes(reference: Any, boxes: Any, current: Any = None, return {"changes": changes} +def _classify_widget(features: Any) -> Dict[str, Any]: + """Adapter: map geometric features to a widget type (pure).""" + from je_auto_control.utils.icon_classify import classify_widget + import json + data = json.loads(features) if isinstance(features, str) else dict(features) + return {"type": classify_widget(data)} + + +def _classify_icon(source: Any, box: Any) -> Dict[str, Any]: + """Adapter: classify the widget in a box from its pixels (device).""" + from je_auto_control.utils.icon_classify import classify_icon + return classify_icon(str(source), _coerce_list(box)) + + def _normalize_ext(target: str) -> Dict[str, Any]: """Adapter: the lowercased extension of a path / bare ext (pure).""" from je_auto_control.utils.file_assoc import normalize_ext @@ -6973,6 +6987,8 @@ def __init__(self): "AC_match_theme": _match_theme, "AC_rank_changes": _rank_changes, "AC_localize_changes": _localize_changes, + "AC_classify_widget": _classify_widget, + "AC_classify_icon": _classify_icon, "AC_normalize_ext": _normalize_ext, "AC_file_association": _file_association, "AC_get_control_text": _get_control_text, diff --git a/je_auto_control/utils/icon_classify/__init__.py b/je_auto_control/utils/icon_classify/__init__.py new file mode 100644 index 00000000..f7c35564 --- /dev/null +++ b/je_auto_control/utils/icon_classify/__init__.py @@ -0,0 +1,6 @@ +"""Classify what kind of widget a box is from its pixel shape.""" +from je_auto_control.utils.icon_classify.icon_classify import ( + WIDGET_TYPES, box_features, classify_icon, classify_widget, +) + +__all__ = ["classify_widget", "box_features", "classify_icon", "WIDGET_TYPES"] diff --git a/je_auto_control/utils/icon_classify/icon_classify.py b/je_auto_control/utils/icon_classify/icon_classify.py new file mode 100644 index 00000000..edd11c23 --- /dev/null +++ b/je_auto_control/utils/icon_classify/icon_classify.py @@ -0,0 +1,107 @@ +"""Classify what kind of widget a box is from its pixel shape. + +Set-of-Marks and element proposers hand back *boxes*, but not *what each box is*. +``form_fields.checkbox_state`` already reads a box known to be a checkbox; the +gap is the typing step before it — is this box a checkbox, a radio button, a +push button, a text field or a toggle? ``icon_classify`` answers that from cheap +geometric features (no model): + +* :func:`box_features` — extract ``{aspect, fill, edge_density, circularity}`` + for a box region (the objective measurements). +* :func:`classify_widget` — pure: map a feature dict to a widget type by + documented heuristics. +* :func:`classify_icon` — compose the two: a box to ``{type, features}``. + +``classify_widget`` is pure and fully testable; ``box_features`` imports cv2 / +numpy lazily (the module stays importable without them) and reuses +:func:`visual_match._to_gray`. Imports no ``PySide6``. +""" +from typing import Any, Dict, Sequence + +# The widget types this classifier can return. +WIDGET_TYPES = ("radio", "toggle", "checkbox", "text_field", "button", "icon") + + +def _is_round(aspect: float, circ: float) -> bool: + """Near-square and circular (a radio button / round dot).""" + return 0.7 <= aspect <= 1.4 and circ >= 0.7 + + +def _is_pill(aspect: float, circ: float) -> bool: + """Wide and rounded (a toggle switch).""" + return 1.8 <= aspect <= 3.5 and circ >= 0.55 + + +def classify_widget(features: Dict[str, float]) -> str: + """Map geometric ``features`` to a widget type by heuristics (pure). + + Uses ``aspect`` (w/h), ``circularity`` (1 = circle), and ``fill`` (ink + fraction). Round → ``radio``; wide & rounded → ``toggle``; near-square & + sparse → ``checkbox``; wide & hollow → ``text_field``; wide & filled → + ``button``; otherwise ``icon``. + """ + aspect = float(features.get("aspect", 1.0)) + circ = float(features.get("circularity", 0.0)) + fill = float(features.get("fill", 0.0)) + if _is_round(aspect, circ): + return "radio" + if _is_pill(aspect, circ): + return "toggle" + if 0.7 <= aspect <= 1.4 and fill <= 0.6: + return "checkbox" + if aspect >= 2.5 and fill <= 0.2: + return "text_field" + if aspect >= 1.5 and fill >= 0.2: + return "button" + return "icon" + + +def _circularity(binary: Any) -> float: + """Circularity (``4*pi*A / P^2``, 1 = circle) of the largest blob.""" + import math + + import cv2 + contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return 0.0 + largest = max(contours, key=cv2.contourArea) + area = float(cv2.contourArea(largest)) + perimeter = float(cv2.arcLength(largest, True)) + if perimeter <= 0.0: + return 0.0 + return min(1.0, 4.0 * math.pi * area / (perimeter * perimeter)) + + +def box_features(source: Any, box: Sequence[int]) -> Dict[str, float]: + """Extract ``{aspect, fill, edge_density, circularity}`` for a box (cv2). + + ``aspect`` is width/height, ``fill`` the ink fraction (Otsu foreground), + ``edge_density`` the Canny-edge fraction, ``circularity`` the largest blob's + roundness. An empty box yields all zeros. + """ + import cv2 + from je_auto_control.utils.visual_match.visual_match import _to_gray + gray = _to_gray(source) + x, y, w, h = (int(box[0]), int(box[1]), int(box[2]), int(box[3])) + patch = gray[max(0, y):y + h, max(0, x):x + w] + if patch.size == 0: + return {"aspect": 0.0, "fill": 0.0, "edge_density": 0.0, + "circularity": 0.0} + _, binary = cv2.threshold(patch, 0, 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + fill = float((binary > 0).sum()) / patch.size + edges = cv2.Canny(patch, 50, 150) + edge_density = float((edges > 0).sum()) / patch.size + return { + "aspect": round(w / h, 3) if h else 0.0, + "fill": round(fill, 3), + "edge_density": round(edge_density, 3), + "circularity": round(_circularity(binary), 3), + } + + +def classify_icon(source: Any, box: Sequence[int]) -> Dict[str, Any]: + """Classify the widget in a box from its pixels: ``{type, features}``.""" + features = box_features(source, box) + return {"type": classify_widget(features), "features": features} diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 41e8400d..3b11e843 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -4137,6 +4137,28 @@ def img_histogram_tools() -> List[MCPTool]: handler=h.localize_changes, annotations=READ_ONLY, ), + MCPTool( + name="ac_classify_widget", + description=("Map geometric 'features' {aspect, circularity, fill} " + "to a widget type (radio/toggle/checkbox/text_field/" + "button/icon). Pure. Returns {type}."), + input_schema=schema({"features": {"type": "object"}}, + required=["features"]), + handler=h.classify_widget, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_classify_icon", + description=("Classify the widget in a 'box' [x,y,w,h] of a " + "'source' image from its pixel shape. Returns {type, " + "features}."), + input_schema=schema({"source": {"type": "string"}, + "box": {"type": "array", + "items": {"type": "integer"}}}, + required=["source", "box"]), + handler=h.classify_icon, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index a91d0644..66875681 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -787,6 +787,16 @@ def localize_changes(reference, boxes, current=None, threshold=0.1, return _localize_changes(reference, boxes, current, threshold, region) +def classify_widget(features): + from je_auto_control.utils.executor.action_executor import _classify_widget + return _classify_widget(features) + + +def classify_icon(source, box): + from je_auto_control.utils.executor.action_executor import _classify_icon + return _classify_icon(source, box) + + def normalize_ext(target): from je_auto_control.utils.executor.action_executor import _normalize_ext return _normalize_ext(target) diff --git a/test/unit_test/headless/test_icon_classify_batch.py b/test/unit_test/headless/test_icon_classify_batch.py new file mode 100644 index 00000000..63f65444 --- /dev/null +++ b/test/unit_test/headless/test_icon_classify_batch.py @@ -0,0 +1,107 @@ +"""Headless tests for icon_classify (pure classifier + cv2 features).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.icon_classify import ( + box_features, classify_icon, classify_widget, +) + + +# --- pure classify_widget ------------------------------------------------- + +def test_classify_radio_round(): + assert classify_widget( + {"aspect": 1.0, "circularity": 0.92, "fill": 0.4}) == "radio" + + +def test_classify_toggle_wide_rounded(): + assert classify_widget( + {"aspect": 2.4, "circularity": 0.6, "fill": 0.5}) == "toggle" + + +def test_classify_checkbox_square_sparse(): + assert classify_widget( + {"aspect": 1.0, "circularity": 0.2, "fill": 0.1}) == "checkbox" + + +def test_classify_text_field_wide_hollow(): + assert classify_widget( + {"aspect": 4.0, "circularity": 0.1, "fill": 0.05}) == "text_field" + + +def test_classify_button_wide_filled(): + assert classify_widget( + {"aspect": 2.0, "circularity": 0.2, "fill": 0.5}) == "button" + + +def test_classify_icon_fallback(): + assert classify_widget( + {"aspect": 1.1, "circularity": 0.3, "fill": 0.9}) == "icon" + + +def test_classify_widget_defaults_dont_crash(): + assert classify_widget({}) in ("checkbox", "icon") + + +# --- cv2 box_features / classify_icon (per-function importorskip) ---------- + +def test_box_features_circle_rounder_than_square(): + np = pytest.importorskip("numpy") + cv2 = pytest.importorskip("cv2") + canvas = np.full((40, 40), 255, dtype="uint8") + cv2.circle(canvas, (20, 20), 14, 0, -1) + circle = box_features(canvas, [3, 3, 34, 34]) + square_canvas = np.full((40, 40), 255, dtype="uint8") + cv2.rectangle(square_canvas, (6, 6), (34, 34), 0, -1) + square = box_features(square_canvas, [3, 3, 34, 34]) + assert circle["circularity"] > square["circularity"] + assert circle["circularity"] > 0.8 + + +def test_classify_icon_detects_radio_from_pixels(): + np = pytest.importorskip("numpy") + cv2 = pytest.importorskip("cv2") + canvas = np.full((40, 40), 255, dtype="uint8") + cv2.circle(canvas, (20, 20), 13, 0, -1) # filled round dot + result = classify_icon(canvas, [4, 4, 32, 32]) + assert result["type"] == "radio" + assert set(result["features"]) == {"aspect", "fill", "edge_density", + "circularity"} + + +def test_box_features_empty_box(): + pytest.importorskip("numpy") + pytest.importorskip("cv2") + import numpy as np + canvas = np.zeros((10, 10), dtype="uint8") + feats = box_features(canvas, [0, 0, 0, 0]) + assert feats == {"aspect": 0.0, "fill": 0.0, "edge_density": 0.0, + "circularity": 0.0} + + +# --- wiring (cv2-free) ---------------------------------------------------- + +def test_executor_pure_classify_path(): + from je_auto_control.utils.executor.action_executor import ( + _classify_widget, + ) + out = _classify_widget('{"aspect": 1.0, "circularity": 0.9, "fill": 0.3}') + assert out["type"] == "radio" + + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert {"AC_classify_widget", "AC_classify_icon"} <= known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry, + ) + names = {t.name for t in build_default_tool_registry()} + assert {"ac_classify_widget", "ac_classify_icon"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert {"AC_classify_widget", "AC_classify_icon"} <= specs + + +def test_facade_exports(): + for name in ("classify_widget", "box_features", "classify_icon"): + assert hasattr(ac, name) and name in ac.__all__