|
|
@@ -1,12 +1,16 @@
|
|
|
from __future__ import annotations
|
|
|
|
|
|
+import base64
|
|
|
import json
|
|
|
+import random
|
|
|
import time
|
|
|
+from io import BytesIO
|
|
|
from pathlib import Path
|
|
|
from typing import Any
|
|
|
from urllib.parse import quote_plus
|
|
|
|
|
|
from fastapi import HTTPException
|
|
|
+from PIL import Image
|
|
|
|
|
|
from ... import ai_service, settings_service, windows_automation
|
|
|
from ..context import WorkflowContext
|
|
|
@@ -38,6 +42,9 @@ def _percent(value: Any) -> float | None:
|
|
|
return None
|
|
|
if 0 <= number <= 1:
|
|
|
number *= 100
|
|
|
+ elif number > 100:
|
|
|
+ # 部分小模型会丢失小数点,把 67.6 输出为 676;此时按千分比还原为百分比。
|
|
|
+ number = number / 10
|
|
|
return max(0.0, min(100.0, number))
|
|
|
|
|
|
|
|
|
@@ -51,7 +58,10 @@ def _screen_point(x_percent: Any, y_percent: Any, width: Any, height: Any) -> tu
|
|
|
return None, None
|
|
|
if x is None or y is None or screen_width <= 0 or screen_height <= 0:
|
|
|
return None, None
|
|
|
- return round(screen_width * x / 100), round(screen_height * y / 100)
|
|
|
+ # 模型可能返回 100%,直接换算会得到屏幕外坐标并触发 PyAutoGUI 角点保护。
|
|
|
+ safe_x = max(1, min(screen_width - 2, round(screen_width * x / 100)))
|
|
|
+ safe_y = max(1, min(screen_height - 2, round(screen_height * y / 100)))
|
|
|
+ return safe_x, safe_y
|
|
|
|
|
|
|
|
|
def normalize_search_result(item: Any, scroll_page: int, width: Any, height: Any) -> dict[str, Any] | None:
|
|
|
@@ -83,6 +93,20 @@ def result_identity(item: dict[str, Any]) -> str:
|
|
|
return str(item.get("url") or item.get("title") or "").strip().lower()
|
|
|
|
|
|
|
|
|
+def screenshot_difference(left: dict[str, Any], right: dict[str, Any]) -> float:
|
|
|
+ """用低分辨率灰度图估算两张截图差异,返回 0 到 1 的平均像素差。"""
|
|
|
+ try:
|
|
|
+ left_image = Image.open(BytesIO(base64.b64decode(str(left["image_base64"])))).convert("L").resize((96, 54))
|
|
|
+ right_image = Image.open(BytesIO(base64.b64decode(str(right["image_base64"])))).convert("L").resize((96, 54))
|
|
|
+ except Exception:
|
|
|
+ return 1.0
|
|
|
+ left_pixels = list(left_image.getdata())
|
|
|
+ right_pixels = list(right_image.getdata())
|
|
|
+ if not left_pixels or len(left_pixels) != len(right_pixels):
|
|
|
+ return 1.0
|
|
|
+ return sum(abs(a - b) for a, b in zip(left_pixels, right_pixels)) / (255 * len(left_pixels))
|
|
|
+
|
|
|
+
|
|
|
class WebSearchRunner:
|
|
|
"""使用真实浏览器、屏幕截图和多模态模型完成网页搜索研究。"""
|
|
|
|
|
|
@@ -100,8 +124,20 @@ class WebSearchRunner:
|
|
|
self.result_count = _integer(params.get("result_count"), 3, 1, 10)
|
|
|
self.detail_max_pages = _integer(params.get("detail_max_pages"), 4, 1, 20)
|
|
|
self.click_attempts = _integer(params.get("click_attempts"), 2, 1, 5)
|
|
|
+ self.maximize_browser = bool(params.get("maximize_browser", True))
|
|
|
+ self.wait_jitter_min = _number(params.get("wait_jitter_min_seconds"), 0, 0, 30)
|
|
|
+ self.wait_jitter_max = _number(params.get("wait_jitter_max_seconds"), 0, 0, 30)
|
|
|
+ if self.wait_jitter_max < self.wait_jitter_min:
|
|
|
+ self.wait_jitter_min, self.wait_jitter_max = self.wait_jitter_max, self.wait_jitter_min
|
|
|
+ self.focus_change_threshold = _number(params.get("focus_change_threshold"), 0.12, 0, 1)
|
|
|
+ self.scroll_change_threshold = _number(params.get("scroll_change_threshold"), 0.01, 0, 1)
|
|
|
self.analyses: list[dict[str, Any]] = []
|
|
|
|
|
|
+ def _sleep(self, seconds: float) -> None:
|
|
|
+ """在固定等待上增加可配置随机抖动,默认不抖动。"""
|
|
|
+ jitter = random.uniform(self.wait_jitter_min, self.wait_jitter_max)
|
|
|
+ time.sleep(max(0.0, seconds) + jitter)
|
|
|
+
|
|
|
def run(self) -> dict[str, Any]:
|
|
|
browser = str(self.params.get("browser") or "edge")
|
|
|
engine = str(self.params.get("search_engine") or "google").lower()
|
|
|
@@ -109,7 +145,10 @@ class WebSearchRunner:
|
|
|
search_url = template.format(query=quote_plus(self.query))
|
|
|
opened = windows_automation.open_url(search_url, browser=browser, new_window=True)
|
|
|
self.context.remember_pid(opened.get("pid"))
|
|
|
- time.sleep(self.page_wait)
|
|
|
+ if self.maximize_browser:
|
|
|
+ self._sleep(self.action_wait)
|
|
|
+ opened["maximize"] = windows_automation.maximize_active_window()
|
|
|
+ self._sleep(self.page_wait)
|
|
|
|
|
|
try:
|
|
|
results = self._collect_results(engine)
|
|
|
@@ -138,7 +177,7 @@ class WebSearchRunner:
|
|
|
if bool(self.params.get("close_browser", True)):
|
|
|
try:
|
|
|
windows_automation.keyboard_action("hotkey", keys=["alt", "f4"])
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
except Exception:
|
|
|
# 清理浏览器失败不应覆盖已经得到的搜索结果或原始异常。
|
|
|
pass
|
|
|
@@ -162,19 +201,35 @@ class WebSearchRunner:
|
|
|
raise HTTPException(status_code=502, detail="网页视觉模型返回值必须是 JSON 对象")
|
|
|
return parsed
|
|
|
|
|
|
- def _text_json(self, prompt: str) -> dict[str, Any]:
|
|
|
+ def _text_json(self, prompt: str, stage: str) -> dict[str, Any]:
|
|
|
result = ai_service.chat(
|
|
|
int(self.context.provider_id),
|
|
|
int(self.context.model_id),
|
|
|
prompt,
|
|
|
self.context.temperature,
|
|
|
)
|
|
|
+ content = str(result.get("content") or "")
|
|
|
+ extracted = ai_service.extract_json_text(content)
|
|
|
try:
|
|
|
- parsed = json.loads(ai_service.extract_json_text(result["content"]))
|
|
|
+ parsed = json.loads(extracted)
|
|
|
except (json.JSONDecodeError, ValueError, TypeError) as exc:
|
|
|
- raise HTTPException(status_code=502, detail=f"网页搜索模型未返回有效 JSON: {exc}") from exc
|
|
|
+ # 失败时保留阶段和原始片段,方便从异步任务详情直接定位是哪次模型输出坏了。
|
|
|
+ raw_excerpt = extracted[:1500]
|
|
|
+ raise HTTPException(
|
|
|
+ status_code=502,
|
|
|
+ detail={
|
|
|
+ "message": f"网页搜索模型未返回有效 JSON: {exc}",
|
|
|
+ "stage": stage,
|
|
|
+ "raw_excerpt": raw_excerpt,
|
|
|
+ "raw_length": len(extracted),
|
|
|
+ "content_excerpt": content[:1500],
|
|
|
+ },
|
|
|
+ ) from exc
|
|
|
if not isinstance(parsed, dict):
|
|
|
- raise HTTPException(status_code=502, detail="网页搜索模型返回值必须是 JSON 对象")
|
|
|
+ raise HTTPException(
|
|
|
+ status_code=502,
|
|
|
+ detail={"message": "网页搜索模型返回值必须是 JSON 对象", "stage": stage},
|
|
|
+ )
|
|
|
return parsed
|
|
|
|
|
|
def _collect_results(self, engine: str) -> list[dict[str, Any]]:
|
|
|
@@ -220,7 +275,7 @@ class WebSearchRunner:
|
|
|
if bool(analysis.get("is_bottom")):
|
|
|
break
|
|
|
windows_automation.keyboard_action("press", key="pagedown")
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
return results
|
|
|
|
|
|
def _rank_results(self, results: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
|
@@ -244,7 +299,7 @@ class WebSearchRunner:
|
|
|
|
|
|
搜索结果:
|
|
|
{json.dumps(indexed, ensure_ascii=False, indent=2)}"""
|
|
|
- ranking = self._text_json(prompt)
|
|
|
+ ranking = self._text_json(prompt, "rank_results")
|
|
|
self.analyses.append({"type": "ranking", **ranking})
|
|
|
ranked: list[dict[str, Any]] = []
|
|
|
used: set[int] = set()
|
|
|
@@ -274,6 +329,7 @@ class WebSearchRunner:
|
|
|
self._restore_search_page_if_needed(classification)
|
|
|
continue
|
|
|
visited_url = self._current_url()
|
|
|
+ self._focus_page_content(f"detail_before_extract:{result.get('title') or ''}")
|
|
|
chunks = self._extract_detail(result)
|
|
|
cleaned = self._clean_detail(result, visited_url, chunks)
|
|
|
details.append({
|
|
|
@@ -285,15 +341,15 @@ class WebSearchRunner:
|
|
|
"cleaned": cleaned,
|
|
|
})
|
|
|
windows_automation.keyboard_action("hotkey", keys=["alt", "left"])
|
|
|
- time.sleep(self.page_wait)
|
|
|
+ self._sleep(self.page_wait)
|
|
|
return details
|
|
|
|
|
|
def _go_to_scroll_page(self, scroll_page: int) -> None:
|
|
|
windows_automation.keyboard_action("press", key="home")
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
for _ in range(max(0, scroll_page)):
|
|
|
windows_automation.keyboard_action("press", key="pagedown")
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
|
|
|
def _open_result(self, result: dict[str, Any]) -> dict[str, Any]:
|
|
|
title = str(result.get("title") or "")
|
|
|
@@ -331,8 +387,18 @@ class WebSearchRunner:
|
|
|
"notes": "模型未返回可用点击坐标",
|
|
|
}
|
|
|
continue
|
|
|
- windows_automation.mouse_action("click", x=int(x), y=int(y))
|
|
|
- time.sleep(self.page_wait)
|
|
|
+ try:
|
|
|
+ windows_automation.mouse_action("click", x=int(x), y=int(y))
|
|
|
+ except HTTPException as exc:
|
|
|
+ if isinstance(exc.detail, dict):
|
|
|
+ exc.detail["target_result"] = {
|
|
|
+ "title": title,
|
|
|
+ "scroll_page": scroll_page,
|
|
|
+ "x": int(x),
|
|
|
+ "y": int(y),
|
|
|
+ }
|
|
|
+ raise
|
|
|
+ self._sleep(self.page_wait)
|
|
|
screenshot = self._capture()
|
|
|
prompt = f"""请判断点击搜索结果后当前浏览器页面的类型。
|
|
|
预期标题:{title}
|
|
|
@@ -358,7 +424,7 @@ class WebSearchRunner:
|
|
|
if classification.get("is_search_results_page"):
|
|
|
return
|
|
|
windows_automation.keyboard_action("hotkey", keys=["alt", "left"])
|
|
|
- time.sleep(self.page_wait)
|
|
|
+ self._sleep(self.page_wait)
|
|
|
|
|
|
def _current_url(self) -> str:
|
|
|
try:
|
|
|
@@ -366,14 +432,76 @@ class WebSearchRunner:
|
|
|
except ImportError as exc:
|
|
|
raise HTTPException(status_code=500, detail="pyperclip is not installed") from exc
|
|
|
windows_automation.keyboard_action("hotkey", keys=["alt", "d"])
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
windows_automation.keyboard_action("hotkey", keys=["ctrl", "c"])
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
url = str(pyperclip.paste() or "").strip()
|
|
|
windows_automation.keyboard_action("press", key="escape")
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
return url
|
|
|
|
|
|
+ def _focus_page_content(self, reason: str) -> dict[str, Any]:
|
|
|
+ """点击活动浏览器窗口正文区域以恢复页面焦点;若误触导致页面变化则回退。"""
|
|
|
+ before = self._capture()
|
|
|
+ try:
|
|
|
+ bounds = windows_automation.active_window_bounds()
|
|
|
+ except HTTPException as exc:
|
|
|
+ self.analyses.append({"type": "focus_page_content", "reason": reason, "focused": False, "error": exc.detail})
|
|
|
+ return {"focused": False, "error": exc.detail}
|
|
|
+
|
|
|
+ width = max(1, int(bounds.get("width") or 1))
|
|
|
+ height = max(1, int(bounds.get("height") or 1))
|
|
|
+ left = int(bounds.get("left") or 0)
|
|
|
+ top = int(bounds.get("top") or 0)
|
|
|
+ # 避开浏览器顶部工具栏、底部边缘和右侧滚动条,降低误点链接或浏览器控件的概率。
|
|
|
+ x = left + max(80, min(width - 120, round(width * 0.55)))
|
|
|
+ y = top + max(140, min(height - 160, round(height * 0.48)))
|
|
|
+ windows_automation.mouse_action("click", x=x, y=y)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
+ after = self._capture()
|
|
|
+ diff = screenshot_difference(before, after)
|
|
|
+ focused = diff <= self.focus_change_threshold
|
|
|
+ if not focused:
|
|
|
+ windows_automation.keyboard_action("hotkey", keys=["alt", "left"])
|
|
|
+ self._sleep(self.page_wait)
|
|
|
+ result = {
|
|
|
+ "type": "focus_page_content",
|
|
|
+ "reason": reason,
|
|
|
+ "focused": focused,
|
|
|
+ "x": x,
|
|
|
+ "y": y,
|
|
|
+ "screenshot_difference": diff,
|
|
|
+ "window": bounds,
|
|
|
+ "rolled_back": not focused,
|
|
|
+ }
|
|
|
+ self.analyses.append(result)
|
|
|
+ return result
|
|
|
+
|
|
|
+ def _scroll_detail_page(self, before: dict[str, Any], title: str, detail_page: int) -> None:
|
|
|
+ """详情页优先用 PageDown 翻页;若截图几乎不变,则用鼠标滚轮兜底。"""
|
|
|
+ self._focus_page_content(f"detail_scroll:{title}:{detail_page}")
|
|
|
+ windows_automation.keyboard_action("press", key="pagedown")
|
|
|
+ self._sleep(self.action_wait)
|
|
|
+ after_key = self._capture()
|
|
|
+ key_diff = screenshot_difference(before, after_key)
|
|
|
+ used_fallback = key_diff < self.scroll_change_threshold
|
|
|
+ wheel_diff: float | None = None
|
|
|
+ if used_fallback:
|
|
|
+ windows_automation.mouse_action("scroll", amount=-6)
|
|
|
+ self._sleep(self.action_wait)
|
|
|
+ after_wheel = self._capture()
|
|
|
+ wheel_diff = screenshot_difference(before, after_wheel)
|
|
|
+ self.analyses.append(
|
|
|
+ {
|
|
|
+ "type": "detail_scroll",
|
|
|
+ "title": title,
|
|
|
+ "detail_page": detail_page,
|
|
|
+ "pagedown_difference": key_diff,
|
|
|
+ "used_wheel_fallback": used_fallback,
|
|
|
+ "wheel_difference": wheel_diff,
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
def _extract_detail(self, result: dict[str, Any]) -> list[dict[str, Any]]:
|
|
|
chunks: list[dict[str, Any]] = []
|
|
|
title = str(result.get("title") or "")
|
|
|
@@ -397,8 +525,7 @@ class WebSearchRunner:
|
|
|
self.analyses.append({"type": "detail_extraction", "title": title, **extraction})
|
|
|
if extraction.get("is_bottom") or extraction.get("page_state") in {"blocked", "captcha"}:
|
|
|
break
|
|
|
- windows_automation.keyboard_action("press", key="pagedown")
|
|
|
- time.sleep(self.action_wait)
|
|
|
+ self._scroll_detail_page(screenshot, title, detail_page)
|
|
|
return chunks
|
|
|
|
|
|
def _clean_detail(self, result: dict[str, Any], visited_url: str, chunks: list[dict[str, Any]]) -> dict[str, Any]:
|
|
|
@@ -408,7 +535,7 @@ class WebSearchRunner:
|
|
|
提取片段:{json.dumps(chunks, ensure_ascii=False)}
|
|
|
严格只输出 JSON:
|
|
|
{{"clean_title": string, "clean_text": string, "key_points": [string], "notes": string}}"""
|
|
|
- cleaned = self._text_json(prompt)
|
|
|
+ cleaned = self._text_json(prompt, "clean_detail")
|
|
|
self.analyses.append({"type": "clean_detail", "title": result.get("title"), **cleaned})
|
|
|
return cleaned
|
|
|
|
|
|
@@ -421,7 +548,7 @@ class WebSearchRunner:
|
|
|
详情:{json.dumps(details, ensure_ascii=False)}
|
|
|
严格只输出 JSON:
|
|
|
{{"summary": string, "key_points": [string], "conclusion": string, "notes": string}}"""
|
|
|
- summary = self._text_json(prompt)
|
|
|
+ summary = self._text_json(prompt, "summarize")
|
|
|
self.analyses.append({"type": "final_summary", **summary})
|
|
|
return summary
|
|
|
|
|
|
@@ -464,8 +591,11 @@ register_node(
|
|
|
"result_count": field_def("number", "研究结果数", 3, minimum=1, maximum=10),
|
|
|
"detail_max_pages": field_def("number", "每页最多滚动", 4, minimum=1, maximum=20),
|
|
|
"click_attempts": field_def("number", "标题点击重试", 2, minimum=1, maximum=5),
|
|
|
+ "maximize_browser": field_def("boolean", "打开后最大化浏览器", True),
|
|
|
"page_load_wait_seconds": field_def("number", "页面加载等待秒数", 8, minimum=0, maximum=120),
|
|
|
"action_wait_seconds": field_def("number", "操作等待秒数", 1, minimum=0, maximum=30),
|
|
|
+ "wait_jitter_min_seconds": field_def("number", "等待抖动最小秒数", 0, minimum=0, maximum=30),
|
|
|
+ "wait_jitter_max_seconds": field_def("number", "等待抖动最大秒数", 0, minimum=0, maximum=30),
|
|
|
"close_browser": field_def("boolean", "完成后关闭浏览器", True),
|
|
|
"include_debug_analyses": field_def("boolean", "返回调试分析", False),
|
|
|
},
|