diff --git a/WHATS_NEW.md b/WHATS_NEW.md index d0a40e5a..531ded96 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -2,6 +2,12 @@ ## What's new (2026-06-26) +### Adaptive Timeout from Observed Durations + +Stop guessing wait timeouts — learn them from how long the step actually takes. Full reference: [`docs/source/Eng/doc/new_features/v211_features_doc.rst`](docs/source/Eng/doc/new_features/v211_features_doc.rst). + +- **`recommend_timeout` / `timeout_stats`** (`AC_adaptive_timeout`, `AC_timeout_stats`): hard-coded waits are a perennial flakiness source — too short races a slow machine, too long makes every failure pay the full timeout. This learns the timeout from observed step durations: a high percentile (the slow-but-real case) scaled by a safety `factor`, clamped to a sane `[min_s, max_s]` band. `recommend_timeout` is the single number to feed a `wait_for_*` / actionability `GateConfig`; `timeout_stats` also exposes the percentiles and `floored`/`capped` flags for tuning. Both are pure and reuse `stats.percentile`; with no samples they fall back to `default_s`. Third feature of the ROUND-15 input-fidelity lane. No `PySide6`. + ### Verify a Field After Typing Read the field back and confirm the value actually landed — don't type and hope. Full reference: [`docs/source/Eng/doc/new_features/v210_features_doc.rst`](docs/source/Eng/doc/new_features/v210_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v211_features_doc.rst b/docs/source/Eng/doc/new_features/v211_features_doc.rst new file mode 100644 index 00000000..4da02166 --- /dev/null +++ b/docs/source/Eng/doc/new_features/v211_features_doc.rst @@ -0,0 +1,46 @@ +Adaptive Timeout from Observed Durations +======================================== + +Hard-coded waits are a perennial source of flakiness: too short and a slow +machine races the UI; too long and every failure pays the full timeout. The +durable fix is to *learn* the timeout from how long a step has actually taken. +``adaptive_timeout`` turns a sample of observed durations into a robust timeout +— a high percentile (the slow-but-real case) scaled by a safety ``factor``, +then clamped to a sane ``[min_s, max_s]`` band. + +* :func:`recommend_timeout` — the single number to feed a wait or ``GateConfig``. +* :func:`timeout_stats` — the same with the percentiles and clamp flags exposed + for logging / tuning. + +Both are pure and reuse :func:`stats.percentile`; with no samples they fall back +to ``default_s`` (or ``min_s``). Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import recommend_timeout, timeout_stats + + # The dialog has historically taken these seconds to appear: + seen = [0.8, 1.1, 0.9, 3.2, 1.0, 1.3] + + recommend_timeout(seen) # ~ p95 * 1.5, clamped to [1, 60] + recommend_timeout(seen, percentile_q=99.0, factor=2.0, max_s=30.0) + + timeout_stats(seen) + # {'n': 6, 'p50': 1.05, 'p_high': 2.7..., 'percentile_q': 95.0, + # 'recommended': 4.1..., 'floored': False, 'capped': False} + +Use the recommendation as the ``timeout_s`` for the next ``wait_for_*`` / +actionability gate, recomputing it as the duration sample grows. With no samples +yet, pass ``default_s`` for the cold-start value. + +Executor commands +----------------- + +``AC_adaptive_timeout`` (``durations`` + ``percentile_q`` / ``factor`` / +``min_s`` / ``max_s`` → ``{timeout_s}``) and ``AC_timeout_stats`` (same inputs → +``{n, p50, p_high, percentile_q, recommended, floored, capped}``). ``durations`` +accepts a JSON list. They are the matching read-only ``ac_*`` MCP tools and +Script Builder commands under **Flow**. diff --git a/docs/source/Zh/doc/new_features/v211_features_doc.rst b/docs/source/Zh/doc/new_features/v211_features_doc.rst new file mode 100644 index 00000000..aad939d3 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v211_features_doc.rst @@ -0,0 +1,40 @@ +依觀測時長自適應逾時 +==================== + +寫死的等待是長年的不穩定來源:太短則慢機器與 UI 競速;太長則每次失敗都得付滿整個逾時。可長久的修法是 +從某步驟*實際*花了多久來*學習*逾時。``adaptive_timeout`` 把一組觀測時長轉為穩健的逾時——取高百分位 +(慢但真實的情況)乘上安全 ``factor``,再夾到合理的 ``[min_s, max_s]`` 區間。 + +* :func:`recommend_timeout` ——餵給等待或 ``GateConfig`` 的單一數值。 +* :func:`timeout_stats` ——同上,但額外暴露百分位與夾值旗標以利記錄 / 調校。 + +兩者皆為純函式並重用 :func:`stats.percentile`;沒有樣本時退回 ``default_s``(或 ``min_s``)。 +不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import recommend_timeout, timeout_stats + + # 對話框歷來出現所花的秒數: + seen = [0.8, 1.1, 0.9, 3.2, 1.0, 1.3] + + recommend_timeout(seen) # 約 p95 * 1.5,夾到 [1, 60] + recommend_timeout(seen, percentile_q=99.0, factor=2.0, max_s=30.0) + + timeout_stats(seen) + # {'n': 6, 'p50': 1.05, 'p_high': 2.7..., 'percentile_q': 95.0, + # 'recommended': 4.1..., 'floored': False, 'capped': False} + +把建議值當作下一個 ``wait_for_*`` / actionability 閘的 ``timeout_s``,並隨樣本增長重新計算。 +尚無樣本時,以 ``default_s`` 作為冷啟動值。 + +執行器指令 +---------- + +``AC_adaptive_timeout``(``durations`` 加上 ``percentile_q`` / ``factor`` / +``min_s`` / ``max_s`` → ``{timeout_s}``)與 ``AC_timeout_stats``(同樣輸入 → +``{n, p50, p_high, percentile_q, recommended, floored, capped}``)。``durations`` +接受 JSON 清單。皆以對應的唯讀 ``ac_*`` MCP 工具及 Script Builder 指令(位於 **Flow** 分類下)形式提供。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index e25642fd..8e202f2f 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -123,6 +123,10 @@ from je_auto_control.utils.verify_field import ( compare_field_value, fill_and_verify, verify_field_value, ) +# Derive a wait timeout from observed step durations +from je_auto_control.utils.adaptive_timeout import ( + recommend_timeout, timeout_stats, +) # Rich clipboard formats — RTF + CSV/TSV codecs and Windows get / set from je_auto_control.utils.clipboard_rich_formats import ( build_rtf, csv_to_rows, get_clipboard_csv, get_clipboard_rtf, rows_to_csv, @@ -1744,6 +1748,7 @@ def start_autocontrol_gui(*args, **kwargs): "decode_conversion_mode", "RetryBudget", "run_with_budget", "backoff_delay", "jittered_delay", "compare_field_value", "verify_field_value", "fill_and_verify", + "recommend_timeout", "timeout_stats", "build_rtf", "rtf_to_text", "rows_to_csv", "csv_to_rows", "set_clipboard_rtf", "get_clipboard_rtf", "set_clipboard_csv", "get_clipboard_csv", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 17221686..e1d51a07 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -4458,6 +4458,32 @@ def _add_work_queue_specs(specs: List[CommandSpec]) -> None: ), description="Read a control's value back and confirm it equals expected.", )) + specs.append(CommandSpec( + "AC_adaptive_timeout", "Flow", "Adaptive Timeout", + fields=( + FieldSpec("durations", FieldType.STRING, + placeholder="JSON list of durations (seconds)"), + FieldSpec("percentile_q", FieldType.FLOAT, optional=True, + default=95.0), + FieldSpec("factor", FieldType.FLOAT, optional=True, default=1.5), + FieldSpec("min_s", FieldType.FLOAT, optional=True, default=1.0), + FieldSpec("max_s", FieldType.FLOAT, optional=True, default=60.0), + ), + description="Recommend a wait timeout from observed step durations.", + )) + specs.append(CommandSpec( + "AC_timeout_stats", "Flow", "Timeout Stats", + fields=( + FieldSpec("durations", FieldType.STRING, + placeholder="JSON list of durations (seconds)"), + FieldSpec("percentile_q", FieldType.FLOAT, optional=True, + default=95.0), + FieldSpec("factor", FieldType.FLOAT, optional=True, default=1.5), + FieldSpec("min_s", FieldType.FLOAT, optional=True, default=1.0), + FieldSpec("max_s", FieldType.FLOAT, optional=True, default=60.0), + ), + description="Timeout recommendation plus percentiles and clamp flags.", + )) specs.append(CommandSpec( "AC_normalize_ext", "Shell", "Normalize Extension", fields=( diff --git a/je_auto_control/utils/adaptive_timeout/__init__.py b/je_auto_control/utils/adaptive_timeout/__init__.py new file mode 100644 index 00000000..f4a4cdc7 --- /dev/null +++ b/je_auto_control/utils/adaptive_timeout/__init__.py @@ -0,0 +1,6 @@ +"""Derive a wait timeout from observed step durations instead of guessing.""" +from je_auto_control.utils.adaptive_timeout.adaptive_timeout import ( + recommend_timeout, timeout_stats, +) + +__all__ = ["recommend_timeout", "timeout_stats"] diff --git a/je_auto_control/utils/adaptive_timeout/adaptive_timeout.py b/je_auto_control/utils/adaptive_timeout/adaptive_timeout.py new file mode 100644 index 00000000..5e8a7de9 --- /dev/null +++ b/je_auto_control/utils/adaptive_timeout/adaptive_timeout.py @@ -0,0 +1,78 @@ +"""Derive a wait timeout from observed step durations instead of guessing. + +Hard-coded waits are a perennial source of flakiness: too short and a slow +machine races the UI; too long and every failure pays the full timeout. The +durable fix is to *learn* the timeout from how long the step has actually taken. +``adaptive_timeout`` turns a sample of observed durations into a robust timeout: +a high percentile (the slow-but-real case) scaled by a safety ``factor``, then +clamped to a sane ``[min_s, max_s]`` band. + +* :func:`recommend_timeout` — the single number to feed a wait / ``GateConfig``. +* :func:`timeout_stats` — the same with the percentiles and clamp flags exposed + for logging / tuning. + +Both are pure and reuse :func:`stats.percentile`; with no samples they fall back +to ``default_s`` (or ``min_s``). Imports no ``PySide6``. +""" +from typing import Any, Dict, List, Optional, Sequence + +from je_auto_control.utils.stats.stats import percentile + + +def _clamp(value: float, min_s: float, max_s: Optional[float]) -> float: + """Clamp ``value`` to ``[min_s, max_s]`` (``max_s`` None = no upper cap).""" + bounded = max(float(min_s), float(value)) + if max_s is not None: + bounded = min(float(max_s), bounded) + return bounded + + +def _fallback(default_s: Optional[float], min_s: float) -> float: + """The timeout to use when there are no duration samples.""" + return float(default_s) if default_s is not None else float(min_s) + + +def recommend_timeout(durations: Sequence[float], *, percentile_q: float = 95.0, + factor: float = 1.5, min_s: float = 1.0, + max_s: Optional[float] = 60.0, + default_s: Optional[float] = None) -> float: + """Recommend a wait timeout (seconds) from observed ``durations``. + + Takes the ``percentile_q``-th percentile of the samples, scales it by + ``factor``, and clamps to ``[min_s, max_s]``. With no samples returns + ``default_s`` (or ``min_s``). + """ + samples = [float(d) for d in durations if d is not None] + if not samples: + return _fallback(default_s, min_s) + scaled = percentile(samples, float(percentile_q)) * float(factor) + return _clamp(scaled, min_s, max_s) + + +def timeout_stats(durations: Sequence[float], *, percentile_q: float = 95.0, + factor: float = 1.5, min_s: float = 1.0, + max_s: Optional[float] = 60.0, + default_s: Optional[float] = None) -> Dict[str, Any]: + """Recommend a timeout and expose the percentiles and clamp decisions. + + Returns ``{n, p50, p_high, percentile_q, recommended, floored, capped}``. + """ + samples: List[float] = [float(d) for d in durations if d is not None] + recommended = recommend_timeout( + samples, percentile_q=percentile_q, factor=factor, min_s=min_s, + max_s=max_s, default_s=default_s) + if not samples: + return {"n": 0, "p50": None, "p_high": None, + "percentile_q": float(percentile_q), + "recommended": recommended, "floored": False, "capped": False} + p_high = percentile(samples, float(percentile_q)) + scaled = p_high * float(factor) + return { + "n": len(samples), + "p50": percentile(samples, 50.0), + "p_high": p_high, + "percentile_q": float(percentile_q), + "recommended": recommended, + "floored": scaled < float(min_s), + "capped": max_s is not None and scaled > float(max_s), + } diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index b44416b6..4183e25a 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2783,6 +2783,28 @@ def _verify_field_value(expected: Any, name: Optional[str] = None, mode=str(mode)) +def _adaptive_timeout(durations: Any, percentile_q: Any = 95.0, + factor: Any = 1.5, min_s: Any = 1.0, + max_s: Any = 60.0) -> Dict[str, Any]: + """Adapter: recommend a wait timeout from observed durations (pure).""" + from je_auto_control.utils.adaptive_timeout import recommend_timeout + samples = [float(d) for d in _coerce_list(durations)] if durations else [] + timeout = recommend_timeout(samples, percentile_q=float(percentile_q), + factor=float(factor), min_s=float(min_s), + max_s=float(max_s)) + return {"timeout_s": float(timeout)} + + +def _timeout_stats(durations: Any, percentile_q: Any = 95.0, factor: Any = 1.5, + min_s: Any = 1.0, max_s: Any = 60.0) -> Dict[str, Any]: + """Adapter: timeout recommendation plus percentiles / clamp flags (pure).""" + from je_auto_control.utils.adaptive_timeout import timeout_stats + samples = [float(d) for d in _coerce_list(durations)] if durations else [] + return timeout_stats(samples, percentile_q=float(percentile_q), + factor=float(factor), min_s=float(min_s), + max_s=float(max_s)) + + def _normalize_ext(target: str) -> Dict[str, Any]: """Adapter: the lowercased extension of a path / bare ext (pure).""" from je_auto_control.utils.file_assoc import normalize_ext @@ -6809,6 +6831,8 @@ def __init__(self): "AC_plan_retry_delays": _plan_retry_delays, "AC_compare_field_value": _compare_field_value, "AC_verify_field_value": _verify_field_value, + "AC_adaptive_timeout": _adaptive_timeout, + "AC_timeout_stats": _timeout_stats, "AC_normalize_ext": _normalize_ext, "AC_file_association": _file_association, "AC_get_control_text": _get_control_text, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 7995e9fd..74f7c236 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -1826,6 +1826,37 @@ def smart_wait_tools() -> List[MCPTool]: handler=h.verify_field_value, annotations=READ_ONLY, ), + MCPTool( + name="ac_adaptive_timeout", + description=("Recommend a wait timeout (seconds) from observed step " + "'durations': the 'percentile_q'-th percentile scaled " + "by 'factor', clamped to [min_s, max_s]. Returns " + "{timeout_s}."), + input_schema=schema({"durations": {"type": "array", + "items": {"type": "number"}}, + "percentile_q": {"type": "number"}, + "factor": {"type": "number"}, + "min_s": {"type": "number"}, + "max_s": {"type": "number"}}, + required=["durations"]), + handler=h.adaptive_timeout, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_timeout_stats", + description=("Recommend a timeout and expose the percentiles and " + "clamp decisions. Returns {n, p50, p_high, " + "percentile_q, recommended, floored, capped}."), + input_schema=schema({"durations": {"type": "array", + "items": {"type": "number"}}, + "percentile_q": {"type": "number"}, + "factor": {"type": "number"}, + "min_s": {"type": "number"}, + "max_s": {"type": "number"}}, + required=["durations"]), + handler=h.timeout_stats, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 42ce390a..96138a70 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -698,6 +698,20 @@ def verify_field_value(expected, name=None, role=None, app_name=None, mode) +def adaptive_timeout(durations, percentile_q=95.0, factor=1.5, min_s=1.0, + max_s=60.0): + from je_auto_control.utils.executor.action_executor import ( + _adaptive_timeout, + ) + return _adaptive_timeout(durations, percentile_q, factor, min_s, max_s) + + +def timeout_stats(durations, percentile_q=95.0, factor=1.5, min_s=1.0, + max_s=60.0): + from je_auto_control.utils.executor.action_executor import _timeout_stats + return _timeout_stats(durations, percentile_q, factor, min_s, max_s) + + def normalize_ext(target): from je_auto_control.utils.executor.action_executor import _normalize_ext return _normalize_ext(target) diff --git a/test/unit_test/headless/test_adaptive_timeout_batch.py b/test/unit_test/headless/test_adaptive_timeout_batch.py new file mode 100644 index 00000000..91949bb4 --- /dev/null +++ b/test/unit_test/headless/test_adaptive_timeout_batch.py @@ -0,0 +1,104 @@ +"""Headless tests for adaptive_timeout (pure timeout recommendation).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.adaptive_timeout import ( + recommend_timeout, timeout_stats, +) + + +# --- recommend_timeout ---------------------------------------------------- + +def test_recommend_scales_percentile_by_factor(): + # p95 of 1..10 ~ 9.55; * 1.0 factor, within [0.1, 100] -> ~9.55 + durations = list(range(1, 11)) + value = recommend_timeout(durations, percentile_q=95.0, factor=1.0, + min_s=0.1, max_s=100.0) + assert value == pytest.approx(9.55, abs=0.1) + + +def test_recommend_applies_factor(): + value = recommend_timeout([2.0, 2.0, 2.0], percentile_q=95.0, factor=2.0, + min_s=0.1, max_s=100.0) + assert value == pytest.approx(4.0) + + +def test_recommend_floors_to_min(): + value = recommend_timeout([0.01, 0.02], percentile_q=95.0, factor=1.0, + min_s=1.0, max_s=100.0) + assert value == pytest.approx(1.0) + + +def test_recommend_caps_to_max(): + value = recommend_timeout([100.0, 200.0], percentile_q=95.0, factor=2.0, + min_s=1.0, max_s=10.0) + assert value == pytest.approx(10.0) + + +def test_recommend_empty_uses_default_then_min(): + assert recommend_timeout([], default_s=7.0) == pytest.approx(7.0) + assert recommend_timeout([], min_s=3.0) == pytest.approx(3.0) + + +def test_recommend_ignores_none_samples(): + value = recommend_timeout([None, 2.0, None, 2.0], percentile_q=50.0, + factor=1.0, min_s=0.1, max_s=100.0) + assert value == pytest.approx(2.0) + + +# --- timeout_stats -------------------------------------------------------- + +def test_timeout_stats_exposes_percentiles_and_flags(): + stats = timeout_stats([1.0, 2.0, 3.0, 4.0], percentile_q=95.0, factor=1.0, + min_s=0.1, max_s=100.0) + assert stats["n"] == 4 + assert stats["p50"] == pytest.approx(2.5) + assert stats["floored"] is False + assert stats["capped"] is False + assert stats["recommended"] == pytest.approx(stats["p_high"]) + + +def test_timeout_stats_flags_capped(): + stats = timeout_stats([50.0, 60.0], percentile_q=95.0, factor=2.0, + min_s=1.0, max_s=10.0) + assert stats["capped"] is True + assert stats["recommended"] == pytest.approx(10.0) + + +def test_timeout_stats_empty(): + stats = timeout_stats([], default_s=5.0) + assert stats["n"] == 0 + assert stats["p50"] is None + assert stats["recommended"] == pytest.approx(5.0) + + +# --- wiring --------------------------------------------------------------- + +def test_executor_paths(): + from je_auto_control.utils.executor.action_executor import ( + _adaptive_timeout, _timeout_stats, + ) + out = _adaptive_timeout([2.0, 2.0, 2.0], 95.0, 2.0, 0.1, 100.0) + assert out["timeout_s"] == pytest.approx(4.0) + # accepts a JSON-list string (Script Builder text field) + out2 = _adaptive_timeout("[2.0, 2.0]", 50.0, 1.0, 0.1, 100.0) + assert out2["timeout_s"] == pytest.approx(2.0) + assert _timeout_stats([1.0, 2.0], 95.0, 1.0, 0.1, 100.0)["n"] == 2 + + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert {"AC_adaptive_timeout", "AC_timeout_stats"} <= known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry, + ) + names = {t.name for t in build_default_tool_registry()} + assert {"ac_adaptive_timeout", "ac_timeout_stats"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert {"AC_adaptive_timeout", "AC_timeout_stats"} <= specs + + +def test_facade_exports(): + for name in ("recommend_timeout", "timeout_stats"): + assert hasattr(ac, name) and name in ac.__all__