5 روز پیش · 5e8844e2d7
--- a/backend/app/automation/nodes/research.py
+++ b/backend/app/automation/nodes/research.py
@@ -129,8 +129,11 @@ class AiWebResearchRunner:
 
				                     "result_count": self.result_count,
			
 
				                     "detail_max_pages": self.detail_max_pages,
			
 
				                     "click_attempts": self.params.get("click_attempts", 2),
			
 
				+                    "maximize_browser": self.params.get("maximize_browser", True),
			
 
				                     "page_load_wait_seconds": self.params.get("page_load_wait_seconds", 8),
			
 
				                     "action_wait_seconds": self.params.get("action_wait_seconds", 1),
			
 
				+                    "wait_jitter_min_seconds": self.params.get("wait_jitter_min_seconds", 0),
			
 
				+                    "wait_jitter_max_seconds": self.params.get("wait_jitter_max_seconds", 0),
			
 
				                     "close_browser": True,
			
 
				                     "include_debug_analyses": False,
			
 
				                 },
			
@@ -371,6 +374,12 @@ register_node(
 
				             "max_search_pages": field_def("number", "每轮搜索页屏", 2, minimum=1, maximum=10),
			
 
				             "result_count": field_def("number", "每轮研究结果数", 2, minimum=1, maximum=5),
			
 
				             "detail_max_pages": field_def("number", "每个详情页屏", 2, minimum=1, maximum=10),
			
 
				+            "click_attempts": field_def("number", "标题点击重试", 2, minimum=1, maximum=5),
			
 
				+            "maximize_browser": field_def("boolean", "打开后最大化浏览器", True),
			
 
				+            "page_load_wait_seconds": field_def("number", "页面加载等待秒数", 8, minimum=0, maximum=120),
			
 
				+            "action_wait_seconds": field_def("number", "操作等待秒数", 1, minimum=0, maximum=30),
			
 
				+            "wait_jitter_min_seconds": field_def("number", "等待抖动最小秒数", 0, minimum=0, maximum=30),
			
 
				+            "wait_jitter_max_seconds": field_def("number", "等待抖动最大秒数", 0, minimum=0, maximum=30),
			
 
				         },
			
 
				         "inputs": {
			
 
				             "objective": field_def("string", "研究目标"),
			
--- a/backend/app/automation/nodes/web_search.py
+++ b/backend/app/automation/nodes/web_search.py
@@ -1,12 +1,16 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				+import base64
			
 
				 import json
			
 
				+import random
			
 
				 import time
			
 
				+from io import BytesIO
			
 
				 from pathlib import Path
			
 
				 from typing import Any
			
 
				 from urllib.parse import quote_plus
			
 
				 
			
 
				 from fastapi import HTTPException
			
 
				+from PIL import Image
			
 
				 
			
 
				 from ... import ai_service, settings_service, windows_automation
			
 
				 from ..context import WorkflowContext
			
@@ -38,6 +42,9 @@ def _percent(value: Any) -> float | None:
 
				         return None
			
 
				     if 0 <= number <= 1:
			
 
				         number *= 100
			
 
				+    elif number > 100:
			
 
				+        # 部分小模型会丢失小数点，把 67.6 输出为 676；此时按千分比还原为百分比。
			
 
				+        number = number / 10
			
 
				     return max(0.0, min(100.0, number))
			
 
				 
			
 
				 
			
@@ -51,7 +58,10 @@ def _screen_point(x_percent: Any, y_percent: Any, width: Any, height: Any) -> tu
 
				         return None, None
			
 
				     if x is None or y is None or screen_width <= 0 or screen_height <= 0:
			
 
				         return None, None
			
 
				-    return round(screen_width * x / 100), round(screen_height * y / 100)
			
 
				+    # 模型可能返回 100%，直接换算会得到屏幕外坐标并触发 PyAutoGUI 角点保护。
			
 
				+    safe_x = max(1, min(screen_width - 2, round(screen_width * x / 100)))
			
 
				+    safe_y = max(1, min(screen_height - 2, round(screen_height * y / 100)))
			
 
				+    return safe_x, safe_y
			
 
				 
			
 
				 
			
 
				 def normalize_search_result(item: Any, scroll_page: int, width: Any, height: Any) -> dict[str, Any] | None:
			
@@ -83,6 +93,20 @@ def result_identity(item: dict[str, Any]) -> str:
 
				     return str(item.get("url") or item.get("title") or "").strip().lower()
			
 
				 
			
 
				 
			
 
				+def screenshot_difference(left: dict[str, Any], right: dict[str, Any]) -> float:
			
 
				+    """用低分辨率灰度图估算两张截图差异，返回 0 到 1 的平均像素差。"""
			
 
				+    try:
			
 
				+        left_image = Image.open(BytesIO(base64.b64decode(str(left["image_base64"])))).convert("L").resize((96, 54))
			
 
				+        right_image = Image.open(BytesIO(base64.b64decode(str(right["image_base64"])))).convert("L").resize((96, 54))
			
 
				+    except Exception:
			
 
				+        return 1.0
			
 
				+    left_pixels = list(left_image.getdata())
			
 
				+    right_pixels = list(right_image.getdata())
			
 
				+    if not left_pixels or len(left_pixels) != len(right_pixels):
			
 
				+        return 1.0
			
 
				+    return sum(abs(a - b) for a, b in zip(left_pixels, right_pixels)) / (255 * len(left_pixels))
			
 
				+
			
 
				+
			
 
				 class WebSearchRunner:
			
 
				     """使用真实浏览器、屏幕截图和多模态模型完成网页搜索研究。"""
			
 
				 
			
@@ -100,8 +124,20 @@ class WebSearchRunner:
 
				         self.result_count = _integer(params.get("result_count"), 3, 1, 10)
			
 
				         self.detail_max_pages = _integer(params.get("detail_max_pages"), 4, 1, 20)
			
 
				         self.click_attempts = _integer(params.get("click_attempts"), 2, 1, 5)
			
 
				+        self.maximize_browser = bool(params.get("maximize_browser", True))
			
 
				+        self.wait_jitter_min = _number(params.get("wait_jitter_min_seconds"), 0, 0, 30)
			
 
				+        self.wait_jitter_max = _number(params.get("wait_jitter_max_seconds"), 0, 0, 30)
			
 
				+        if self.wait_jitter_max < self.wait_jitter_min:
			
 
				+            self.wait_jitter_min, self.wait_jitter_max = self.wait_jitter_max, self.wait_jitter_min
			
 
				+        self.focus_change_threshold = _number(params.get("focus_change_threshold"), 0.12, 0, 1)
			
 
				+        self.scroll_change_threshold = _number(params.get("scroll_change_threshold"), 0.01, 0, 1)
			
 
				         self.analyses: list[dict[str, Any]] = []
			
 
				 
			
 
				+    def _sleep(self, seconds: float) -> None:
			
 
				+        """在固定等待上增加可配置随机抖动，默认不抖动。"""
			
 
				+        jitter = random.uniform(self.wait_jitter_min, self.wait_jitter_max)
			
 
				+        time.sleep(max(0.0, seconds) + jitter)
			
 
				+
			
 
				     def run(self) -> dict[str, Any]:
			
 
				         browser = str(self.params.get("browser") or "edge")
			
 
				         engine = str(self.params.get("search_engine") or "google").lower()
			
@@ -109,7 +145,10 @@ class WebSearchRunner:
 
				         search_url = template.format(query=quote_plus(self.query))
			
 
				         opened = windows_automation.open_url(search_url, browser=browser, new_window=True)
			
 
				         self.context.remember_pid(opened.get("pid"))
			
 
				-        time.sleep(self.page_wait)
			
 
				+        if self.maximize_browser:
			
 
				+            self._sleep(self.action_wait)
			
 
				+            opened["maximize"] = windows_automation.maximize_active_window()
			
 
				+        self._sleep(self.page_wait)
			
 
				 
			
 
				         try:
			
 
				             results = self._collect_results(engine)
			
@@ -138,7 +177,7 @@ class WebSearchRunner:
 
				             if bool(self.params.get("close_browser", True)):
			
 
				                 try:
			
 
				                     windows_automation.keyboard_action("hotkey", keys=["alt", "f4"])
			
 
				-                    time.sleep(self.action_wait)
			
 
				+                    self._sleep(self.action_wait)
			
 
				                 except Exception:
			
 
				                     # 清理浏览器失败不应覆盖已经得到的搜索结果或原始异常。
			
 
				                     pass
			
@@ -162,19 +201,35 @@ class WebSearchRunner:
 
				             raise HTTPException(status_code=502, detail="网页视觉模型返回值必须是 JSON 对象")
			
 
				         return parsed
			
 
				 
			
 
				-    def _text_json(self, prompt: str) -> dict[str, Any]:
			
 
				+    def _text_json(self, prompt: str, stage: str) -> dict[str, Any]:
			
 
				         result = ai_service.chat(
			
 
				             int(self.context.provider_id),
			
 
				             int(self.context.model_id),
			
 
				             prompt,
			
 
				             self.context.temperature,
			
 
				         )
			
 
				+        content = str(result.get("content") or "")
			
 
				+        extracted = ai_service.extract_json_text(content)
			
 
				         try:
			
 
				-            parsed = json.loads(ai_service.extract_json_text(result["content"]))
			
 
				+            parsed = json.loads(extracted)
			
 
				         except (json.JSONDecodeError, ValueError, TypeError) as exc:
			
 
				-            raise HTTPException(status_code=502, detail=f"网页搜索模型未返回有效 JSON: {exc}") from exc
			
 
				+            # 失败时保留阶段和原始片段，方便从异步任务详情直接定位是哪次模型输出坏了。
			
 
				+            raw_excerpt = extracted[:1500]
			
 
				+            raise HTTPException(
			
 
				+                status_code=502,
			
 
				+                detail={
			
 
				+                    "message": f"网页搜索模型未返回有效 JSON: {exc}",
			
 
				+                    "stage": stage,
			
 
				+                    "raw_excerpt": raw_excerpt,
			
 
				+                    "raw_length": len(extracted),
			
 
				+                    "content_excerpt": content[:1500],
			
 
				+                },
			
 
				+            ) from exc
			
 
				         if not isinstance(parsed, dict):
			
 
				-            raise HTTPException(status_code=502, detail="网页搜索模型返回值必须是 JSON 对象")
			
 
				+            raise HTTPException(
			
 
				+                status_code=502,
			
 
				+                detail={"message": "网页搜索模型返回值必须是 JSON 对象", "stage": stage},
			
 
				+            )
			
 
				         return parsed
			
 
				 
			
 
				     def _collect_results(self, engine: str) -> list[dict[str, Any]]:
			
@@ -220,7 +275,7 @@ class WebSearchRunner:
 
				             if bool(analysis.get("is_bottom")):
			
 
				                 break
			
 
				             windows_automation.keyboard_action("press", key="pagedown")
			
 
				-            time.sleep(self.action_wait)
			
 
				+            self._sleep(self.action_wait)
			
 
				         return results
			
 
				 
			
 
				     def _rank_results(self, results: list[dict[str, Any]]) -> list[dict[str, Any]]:
			
@@ -244,7 +299,7 @@ class WebSearchRunner:
 
				 
			
 
				 搜索结果：
			
 
				 {json.dumps(indexed, ensure_ascii=False, indent=2)}"""
			
 
				-        ranking = self._text_json(prompt)
			
 
				+        ranking = self._text_json(prompt, "rank_results")
			
 
				         self.analyses.append({"type": "ranking", **ranking})
			
 
				         ranked: list[dict[str, Any]] = []
			
 
				         used: set[int] = set()
			
@@ -274,6 +329,7 @@ class WebSearchRunner:
 
				                 self._restore_search_page_if_needed(classification)
			
 
				                 continue
			
 
				             visited_url = self._current_url()
			
 
				+            self._focus_page_content(f"detail_before_extract:{result.get('title') or ''}")
			
 
				             chunks = self._extract_detail(result)
			
 
				             cleaned = self._clean_detail(result, visited_url, chunks)
			
 
				             details.append({
			
@@ -285,15 +341,15 @@ class WebSearchRunner:
 
				                 "cleaned": cleaned,
			
 
				             })
			
 
				             windows_automation.keyboard_action("hotkey", keys=["alt", "left"])
			
 
				-            time.sleep(self.page_wait)
			
 
				+            self._sleep(self.page_wait)
			
 
				         return details
			
 
				 
			
 
				     def _go_to_scroll_page(self, scroll_page: int) -> None:
			
 
				         windows_automation.keyboard_action("press", key="home")
			
 
				-        time.sleep(self.action_wait)
			
 
				+        self._sleep(self.action_wait)
			
 
				         for _ in range(max(0, scroll_page)):
			
 
				             windows_automation.keyboard_action("press", key="pagedown")
			
 
				-            time.sleep(self.action_wait)
			
 
				+            self._sleep(self.action_wait)
			
 
				 
			
 
				     def _open_result(self, result: dict[str, Any]) -> dict[str, Any]:
			
 
				         title = str(result.get("title") or "")
			
@@ -331,8 +387,18 @@ class WebSearchRunner:
 
				                     "notes": "模型未返回可用点击坐标",
			
 
				                 }
			
 
				                 continue
			
 
				-            windows_automation.mouse_action("click", x=int(x), y=int(y))
			
 
				-            time.sleep(self.page_wait)
			
 
				+            try:
			
 
				+                windows_automation.mouse_action("click", x=int(x), y=int(y))
			
 
				+            except HTTPException as exc:
			
 
				+                if isinstance(exc.detail, dict):
			
 
				+                    exc.detail["target_result"] = {
			
 
				+                        "title": title,
			
 
				+                        "scroll_page": scroll_page,
			
 
				+                        "x": int(x),
			
 
				+                        "y": int(y),
			
 
				+                    }
			
 
				+                raise
			
 
				+            self._sleep(self.page_wait)
			
 
				             screenshot = self._capture()
			
 
				             prompt = f"""请判断点击搜索结果后当前浏览器页面的类型。
			
 
				 预期标题：{title}
			
@@ -358,7 +424,7 @@ class WebSearchRunner:
 
				         if classification.get("is_search_results_page"):
			
 
				             return
			
 
				         windows_automation.keyboard_action("hotkey", keys=["alt", "left"])
			
 
				-        time.sleep(self.page_wait)
			
 
				+        self._sleep(self.page_wait)
			
 
				 
			
 
				     def _current_url(self) -> str:
			
 
				         try:
			
@@ -366,14 +432,76 @@ class WebSearchRunner:
 
				         except ImportError as exc:
			
 
				             raise HTTPException(status_code=500, detail="pyperclip is not installed") from exc
			
 
				         windows_automation.keyboard_action("hotkey", keys=["alt", "d"])
			
 
				-        time.sleep(self.action_wait)
			
 
				+        self._sleep(self.action_wait)
			
 
				         windows_automation.keyboard_action("hotkey", keys=["ctrl", "c"])
			
 
				-        time.sleep(self.action_wait)
			
 
				+        self._sleep(self.action_wait)
			
 
				         url = str(pyperclip.paste() or "").strip()
			
 
				         windows_automation.keyboard_action("press", key="escape")
			
 
				-        time.sleep(self.action_wait)
			
 
				+        self._sleep(self.action_wait)
			
 
				         return url
			
 
				 
			
 
				+    def _focus_page_content(self, reason: str) -> dict[str, Any]:
			
 
				+        """点击活动浏览器窗口正文区域以恢复页面焦点；若误触导致页面变化则回退。"""
			
 
				+        before = self._capture()
			
 
				+        try:
			
 
				+            bounds = windows_automation.active_window_bounds()
			
 
				+        except HTTPException as exc:
			
 
				+            self.analyses.append({"type": "focus_page_content", "reason": reason, "focused": False, "error": exc.detail})
			
 
				+            return {"focused": False, "error": exc.detail}
			
 
				+
			
 
				+        width = max(1, int(bounds.get("width") or 1))
			
 
				+        height = max(1, int(bounds.get("height") or 1))
			
 
				+        left = int(bounds.get("left") or 0)
			
 
				+        top = int(bounds.get("top") or 0)
			
 
				+        # 避开浏览器顶部工具栏、底部边缘和右侧滚动条，降低误点链接或浏览器控件的概率。
			
 
				+        x = left + max(80, min(width - 120, round(width * 0.55)))
			
 
				+        y = top + max(140, min(height - 160, round(height * 0.48)))
			
 
				+        windows_automation.mouse_action("click", x=x, y=y)
			
 
				+        self._sleep(self.action_wait)
			
 
				+        after = self._capture()
			
 
				+        diff = screenshot_difference(before, after)
			
 
				+        focused = diff <= self.focus_change_threshold
			
 
				+        if not focused:
			
 
				+            windows_automation.keyboard_action("hotkey", keys=["alt", "left"])
			
 
				+            self._sleep(self.page_wait)
			
 
				+        result = {
			
 
				+            "type": "focus_page_content",
			
 
				+            "reason": reason,
			
 
				+            "focused": focused,
			
 
				+            "x": x,
			
 
				+            "y": y,
			
 
				+            "screenshot_difference": diff,
			
 
				+            "window": bounds,
			
 
				+            "rolled_back": not focused,
			
 
				+        }
			
 
				+        self.analyses.append(result)
			
 
				+        return result
			
 
				+
			
 
				+    def _scroll_detail_page(self, before: dict[str, Any], title: str, detail_page: int) -> None:
			
 
				+        """详情页优先用 PageDown 翻页；若截图几乎不变，则用鼠标滚轮兜底。"""
			
 
				+        self._focus_page_content(f"detail_scroll:{title}:{detail_page}")
			
 
				+        windows_automation.keyboard_action("press", key="pagedown")
			
 
				+        self._sleep(self.action_wait)
			
 
				+        after_key = self._capture()
			
 
				+        key_diff = screenshot_difference(before, after_key)
			
 
				+        used_fallback = key_diff < self.scroll_change_threshold
			
 
				+        wheel_diff: float | None = None
			
 
				+        if used_fallback:
			
 
				+            windows_automation.mouse_action("scroll", amount=-6)
			
 
				+            self._sleep(self.action_wait)
			
 
				+            after_wheel = self._capture()
			
 
				+            wheel_diff = screenshot_difference(before, after_wheel)
			
 
				+        self.analyses.append(
			
 
				+            {
			
 
				+                "type": "detail_scroll",
			
 
				+                "title": title,
			
 
				+                "detail_page": detail_page,
			
 
				+                "pagedown_difference": key_diff,
			
 
				+                "used_wheel_fallback": used_fallback,
			
 
				+                "wheel_difference": wheel_diff,
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				     def _extract_detail(self, result: dict[str, Any]) -> list[dict[str, Any]]:
			
 
				         chunks: list[dict[str, Any]] = []
			
 
				         title = str(result.get("title") or "")
			
@@ -397,8 +525,7 @@ class WebSearchRunner:
 
				             self.analyses.append({"type": "detail_extraction", "title": title, **extraction})
			
 
				             if extraction.get("is_bottom") or extraction.get("page_state") in {"blocked", "captcha"}:
			
 
				                 break
			
 
				-            windows_automation.keyboard_action("press", key="pagedown")
			
 
				-            time.sleep(self.action_wait)
			
 
				+            self._scroll_detail_page(screenshot, title, detail_page)
			
 
				         return chunks
			
 
				 
			
 
				     def _clean_detail(self, result: dict[str, Any], visited_url: str, chunks: list[dict[str, Any]]) -> dict[str, Any]:
			
@@ -408,7 +535,7 @@ class WebSearchRunner:
 
				 提取片段：{json.dumps(chunks, ensure_ascii=False)}
			
 
				 严格只输出 JSON：
			
 
				 {{"clean_title": string, "clean_text": string, "key_points": [string], "notes": string}}"""
			
 
				-        cleaned = self._text_json(prompt)
			
 
				+        cleaned = self._text_json(prompt, "clean_detail")
			
 
				         self.analyses.append({"type": "clean_detail", "title": result.get("title"), **cleaned})
			
 
				         return cleaned
			
 
				 
			
@@ -421,7 +548,7 @@ class WebSearchRunner:
 
				 详情：{json.dumps(details, ensure_ascii=False)}
			
 
				 严格只输出 JSON：
			
 
				 {{"summary": string, "key_points": [string], "conclusion": string, "notes": string}}"""
			
 
				-        summary = self._text_json(prompt)
			
 
				+        summary = self._text_json(prompt, "summarize")
			
 
				         self.analyses.append({"type": "final_summary", **summary})
			
 
				         return summary
			
 
				 
			
@@ -464,8 +591,11 @@ register_node(
 
				             "result_count": field_def("number", "研究结果数", 3, minimum=1, maximum=10),
			
 
				             "detail_max_pages": field_def("number", "每页最多滚动", 4, minimum=1, maximum=20),
			
 
				             "click_attempts": field_def("number", "标题点击重试", 2, minimum=1, maximum=5),
			
 
				+            "maximize_browser": field_def("boolean", "打开后最大化浏览器", True),
			
 
				             "page_load_wait_seconds": field_def("number", "页面加载等待秒数", 8, minimum=0, maximum=120),
			
 
				             "action_wait_seconds": field_def("number", "操作等待秒数", 1, minimum=0, maximum=30),
			
 
				+            "wait_jitter_min_seconds": field_def("number", "等待抖动最小秒数", 0, minimum=0, maximum=30),
			
 
				+            "wait_jitter_max_seconds": field_def("number", "等待抖动最大秒数", 0, minimum=0, maximum=30),
			
 
				             "close_browser": field_def("boolean", "完成后关闭浏览器", True),
			
 
				             "include_debug_analyses": field_def("boolean", "返回调试分析", False),
			
 
				         },
			
--- a/backend/app/windows_automation.py
+++ b/backend/app/windows_automation.py
@@ -68,6 +68,27 @@ def load_pyautogui():
 
				     return pyautogui
			
 
				 
			
 
				 
			
 
				+def raise_if_pyautogui_failsafe(pyautogui: Any, exc: Exception, action: str, extra: dict[str, Any] | None = None) -> None:
			
 
				+    """把 PyAutoGUI 安全保护异常转换为带坐标上下文的接口错误。"""
			
 
				+    if exc.__class__.__name__ != "FailSafeException":
			
 
				+        raise exc
			
 
				+    position: dict[str, Any] = {"x": None, "y": None}
			
 
				+    try:
			
 
				+        current = pyautogui.position()
			
 
				+        position = {"x": current.x, "y": current.y}
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    raise HTTPException(
			
 
				+        status_code=500,
			
 
				+        detail={
			
 
				+            "message": str(exc),
			
 
				+            "action": action,
			
 
				+            "mouse_position": position,
			
 
				+            "extra": extra or {},
			
 
				+        },
			
 
				+    ) from exc
			
 
				+
			
 
				+
			
 
				 def normalize_key_name(key: str) -> str:
			
 
				     """把浏览器或用户输入的按键名转换为 pyautogui 兼容名称。"""
			
 
				     normalized = str(key).strip().lower()
			
@@ -230,7 +251,11 @@ def safe_proc_name(proc: psutil.Process) -> str | None:
 
				 def take_screenshot(save_path: str | None = None, include_base64: bool = True) -> dict[str, Any]:
			
 
				     """截取当前屏幕；可保存为 PNG 文件，也可返回 base64 供接口直接预览。"""
			
 
				     pyautogui = load_pyautogui()
			
 
				-    image = pyautogui.screenshot()
			
 
				+    try:
			
 
				+        image = pyautogui.screenshot()
			
 
				+    except Exception as exc:
			
 
				+        raise_if_pyautogui_failsafe(pyautogui, exc, "screenshot")
			
 
				+        raise
			
 
				     width, height = image.size
			
 
				 
			
 
				     result: dict[str, Any] = {"action": "screenshot", "width": width, "height": height}
			
@@ -250,6 +275,35 @@ def take_screenshot(save_path: str | None = None, include_base64: bool = True) -
 
				     return result
			
 
				 
			
 
				 
			
 
				+def active_window_bounds() -> dict[str, Any]:
			
 
				+    """返回当前活动窗口的位置和尺寸，用于把点击限制在浏览器窗口内部。"""
			
 
				+    pyautogui = load_pyautogui()
			
 
				+    window = pyautogui.getActiveWindow()
			
 
				+    if window is None:
			
 
				+        raise HTTPException(status_code=500, detail="No active window found")
			
 
				+    return {
			
 
				+        "left": int(window.left),
			
 
				+        "top": int(window.top),
			
 
				+        "width": int(window.width),
			
 
				+        "height": int(window.height),
			
 
				+        "title": str(getattr(window, "title", "") or ""),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def maximize_active_window() -> dict[str, Any]:
			
 
				+    """最大化当前活动窗口；失败时退回 Windows 快捷键。"""
			
 
				+    pyautogui = load_pyautogui()
			
 
				+    window = pyautogui.getActiveWindow()
			
 
				+    if window is not None:
			
 
				+        try:
			
 
				+            window.maximize()
			
 
				+            return {"action": "maximize_window", "method": "window.maximize", "title": str(window.title or "")}
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+    pyautogui.hotkey("win", "up")
			
 
				+    return {"action": "maximize_window", "method": "hotkey", "keys": ["win", "up"]}
			
 
				+
			
 
				+
			
 
				 def mouse_action(
			
 
				     action: MouseAction,
			
 
				     x: int | None = None,
			
@@ -264,22 +318,31 @@ def mouse_action(
 
				     if action in {"move_to", "drag_to"} and (x is None or y is None):
			
 
				         raise HTTPException(status_code=400, detail="x and y are required for this mouse action")
			
 
				 
			
 
				-    if action == "move_to":
			
 
				-        pyautogui.moveTo(x, y, duration=duration)
			
 
				-    elif action == "move_rel":
			
 
				-        pyautogui.moveRel(x or 0, y or 0, duration=duration)
			
 
				-    elif action == "click":
			
 
				-        pyautogui.click(x=x, y=y, clicks=clicks, button=button)
			
 
				-    elif action == "double_click":
			
 
				-        pyautogui.doubleClick(x=x, y=y, button=button)
			
 
				-    elif action == "right_click":
			
 
				-        pyautogui.rightClick(x=x, y=y)
			
 
				-    elif action == "drag_to":
			
 
				-        pyautogui.dragTo(x, y, duration=duration, button=button)
			
 
				-    elif action == "scroll":
			
 
				-        pyautogui.scroll(amount)
			
 
				-    else:
			
 
				-        raise HTTPException(status_code=400, detail="Unsupported mouse action")
			
 
				+    try:
			
 
				+        if action == "move_to":
			
 
				+            pyautogui.moveTo(x, y, duration=duration)
			
 
				+        elif action == "move_rel":
			
 
				+            pyautogui.moveRel(x or 0, y or 0, duration=duration)
			
 
				+        elif action == "click":
			
 
				+            pyautogui.click(x=x, y=y, clicks=clicks, button=button)
			
 
				+        elif action == "double_click":
			
 
				+            pyautogui.doubleClick(x=x, y=y, button=button)
			
 
				+        elif action == "right_click":
			
 
				+            pyautogui.rightClick(x=x, y=y)
			
 
				+        elif action == "drag_to":
			
 
				+            pyautogui.dragTo(x, y, duration=duration, button=button)
			
 
				+        elif action == "scroll":
			
 
				+            pyautogui.scroll(amount)
			
 
				+        else:
			
 
				+            raise HTTPException(status_code=400, detail="Unsupported mouse action")
			
 
				+    except Exception as exc:
			
 
				+        raise_if_pyautogui_failsafe(
			
 
				+            pyautogui,
			
 
				+            exc,
			
 
				+            f"mouse_{action}",
			
 
				+            {"x": x, "y": y, "button": button, "clicks": clicks, "amount": amount},
			
 
				+        )
			
 
				+        raise
			
 
				 
			
 
				     position = pyautogui.position()
			
 
				     return {"action": f"mouse_{action}", "x": position.x, "y": position.y}
			
@@ -296,26 +359,35 @@ def keyboard_action(
 
				     pyautogui = load_pyautogui()
			
 
				     normalized_key = normalize_key_name(key) if key else None
			
 
				     normalized_keys = normalize_key_list(keys)
			
 
				-    if action == "press":
			
 
				-        if not normalized_key:
			
 
				-            raise HTTPException(status_code=400, detail="key is required")
			
 
				-        pyautogui.press(normalized_key, interval=interval)
			
 
				-    elif action == "hotkey":
			
 
				-        if not normalized_keys:
			
 
				-            raise HTTPException(status_code=400, detail="keys are required")
			
 
				-        pyautogui.hotkey(*normalized_keys, interval=interval)
			
 
				-    elif action == "write":
			
 
				-        if text is None:
			
 
				-            raise HTTPException(status_code=400, detail="text is required")
			
 
				-        pyautogui.write(text, interval=interval)
			
 
				-    elif action == "key_down":
			
 
				-        if not normalized_key:
			
 
				-            raise HTTPException(status_code=400, detail="key is required")
			
 
				-        pyautogui.keyDown(normalized_key)
			
 
				-    elif action == "key_up":
			
 
				-        if not normalized_key:
			
 
				-            raise HTTPException(status_code=400, detail="key is required")
			
 
				-        pyautogui.keyUp(normalized_key)
			
 
				-    else:
			
 
				-        raise HTTPException(status_code=400, detail="Unsupported keyboard action")
			
 
				+    try:
			
 
				+        if action == "press":
			
 
				+            if not normalized_key:
			
 
				+                raise HTTPException(status_code=400, detail="key is required")
			
 
				+            pyautogui.press(normalized_key, interval=interval)
			
 
				+        elif action == "hotkey":
			
 
				+            if not normalized_keys:
			
 
				+                raise HTTPException(status_code=400, detail="keys are required")
			
 
				+            pyautogui.hotkey(*normalized_keys, interval=interval)
			
 
				+        elif action == "write":
			
 
				+            if text is None:
			
 
				+                raise HTTPException(status_code=400, detail="text is required")
			
 
				+            pyautogui.write(text, interval=interval)
			
 
				+        elif action == "key_down":
			
 
				+            if not normalized_key:
			
 
				+                raise HTTPException(status_code=400, detail="key is required")
			
 
				+            pyautogui.keyDown(normalized_key)
			
 
				+        elif action == "key_up":
			
 
				+            if not normalized_key:
			
 
				+                raise HTTPException(status_code=400, detail="key is required")
			
 
				+            pyautogui.keyUp(normalized_key)
			
 
				+        else:
			
 
				+            raise HTTPException(status_code=400, detail="Unsupported keyboard action")
			
 
				+    except Exception as exc:
			
 
				+        raise_if_pyautogui_failsafe(
			
 
				+            pyautogui,
			
 
				+            exc,
			
 
				+            f"keyboard_{action}",
			
 
				+            {"key": normalized_key, "keys": normalized_keys},
			
 
				+        )
			
 
				+        raise
			
 
				     return {"action": f"keyboard_{action}", "key": normalized_key, "keys": normalized_keys}
			
--- a/workflows/ai-web-research.workflow.json
+++ b/workflows/ai-web-research.workflow.json
@@ -58,7 +58,13 @@
 
				         "browser": "edge",
			
 
				         "max_search_pages": 2,
			
 
				         "result_count": 2,
			
 
				-        "detail_max_pages": 2
			
 
				+        "detail_max_pages": 2,
			
 
				+        "click_attempts": 2,
			
 
				+        "maximize_browser": true,
			
 
				+        "page_load_wait_seconds": 8,
			
 
				+        "action_wait_seconds": 1,
			
 
				+        "wait_jitter_min_seconds": 0,
			
 
				+        "wait_jitter_max_seconds": 0
			
 
				       },
			
 
				       "inputs": {
			
 
				         "objective": {"source": "variable", "name": "objective"},