test_web_search.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. from __future__ import annotations
  2. import unittest
  3. import json
  4. from pathlib import Path
  5. from unittest.mock import patch
  6. from app.automation.context import WorkflowContext
  7. from app.automation.nodes.web_search import WebSearchRunner, normalize_search_result, result_identity
  8. from app.automation.nodes.research import AiWebResearchRunner, compact_evidence, validate_json_data, validate_research_result
  9. from app.automation_service import web_search_workflow_template
  10. from app.automation_service import workflow_return_data
  11. from app.schemas import AutomationWorkflowSaveRequest
  12. class WebSearchHelpersTest(unittest.TestCase):
  13. def test_normalize_search_result_converts_percent_to_screen_point(self) -> None:
  14. result = normalize_search_result(
  15. {
  16. "title": "示例结果",
  17. "url": "https://example.com",
  18. "snippet": "摘要",
  19. "title_center_x_percent": 25,
  20. "title_center_y_percent": 40,
  21. },
  22. scroll_page=2,
  23. width=1920,
  24. height=1080,
  25. )
  26. self.assertIsNotNone(result)
  27. self.assertEqual(result["title_center_x"], 480)
  28. self.assertEqual(result["title_center_y"], 432)
  29. self.assertEqual(result["scroll_page"], 2)
  30. def test_result_identity_prefers_url_and_falls_back_to_title(self) -> None:
  31. self.assertEqual(result_identity({"url": "HTTPS://EXAMPLE.COM", "title": "标题"}), "https://example.com")
  32. self.assertEqual(result_identity({"url": "", "title": " 标题 "}), "标题")
  33. def test_ranking_ignores_invalid_and_duplicate_indexes(self) -> None:
  34. runner = WebSearchRunner(
  35. WorkflowContext(workflow_id=1, provider_id=1, model_id=1),
  36. {"query": "测试", "result_count": 2},
  37. )
  38. runner._text_json = lambda prompt: {
  39. "ranked_results": [
  40. {"original_index": 1, "relevance_score": 9},
  41. {"original_index": 1, "relevance_score": 8},
  42. {"original_index": 99, "relevance_score": 7},
  43. {"original_index": 0, "relevance_score": 6},
  44. ]
  45. }
  46. ranked = runner._rank_results([{"title": "A"}, {"title": "B"}])
  47. self.assertEqual([item["title"] for item in ranked], ["B", "A"])
  48. def test_failed_title_location_keeps_search_page_state(self) -> None:
  49. runner = WebSearchRunner(
  50. WorkflowContext(workflow_id=1, provider_id=1, model_id=1),
  51. {"query": "测试", "click_attempts": 1},
  52. )
  53. runner._go_to_scroll_page = lambda scroll_page: None
  54. runner._capture = lambda: {"width": 1920, "height": 1080, "image_base64": "", "mime_type": "image/png"}
  55. runner._vision_json = lambda prompt, screenshot: {"found": False, "notes": "未找到标题"}
  56. result = runner._open_result({"title": "不存在的标题", "scroll_page": 0})
  57. self.assertFalse(result["opened_detail_page"])
  58. self.assertTrue(result["is_search_results_page"])
  59. class WebSearchWorkflowTemplateTest(unittest.TestCase):
  60. def test_template_matches_workflow_schema(self) -> None:
  61. workflow = AutomationWorkflowSaveRequest.model_validate(web_search_workflow_template())
  62. self.assertEqual(workflow.schema_version, "workflow/v1")
  63. self.assertEqual(workflow.workflow_key, "ai-web-research")
  64. self.assertEqual(workflow.variables["objective"]["default"], "")
  65. self.assertEqual([node.type for node in workflow.nodes], ["flow.start", "research.ai_web_research", "flow.end"])
  66. def test_checked_in_workflow_matches_template(self) -> None:
  67. path = Path(__file__).resolve().parents[2] / "workflows" / "ai-web-research.workflow.json"
  68. checked_in = json.loads(path.read_text(encoding="utf-8"))
  69. self.assertEqual(checked_in, web_search_workflow_template())
  70. class AiResearchHelpersTest(unittest.TestCase):
  71. def test_json_schema_validation_reports_missing_required_field(self) -> None:
  72. schema = {
  73. "type": "object",
  74. "required": ["summary"],
  75. "properties": {"summary": {"type": "string"}},
  76. }
  77. result = validate_json_data({}, schema)
  78. self.assertFalse(result["schema_valid"])
  79. self.assertTrue(result["errors"])
  80. def test_compact_evidence_keeps_source_and_cleaned_content(self) -> None:
  81. evidence = compact_evidence(
  82. {
  83. "researched_details": [
  84. {
  85. "visited_url": "https://example.com",
  86. "opened_detail_page": True,
  87. "result": {"title": "原始标题"},
  88. "cleaned": {"clean_title": "清理标题", "clean_text": "正文", "key_points": ["要点"]},
  89. }
  90. ]
  91. }
  92. )
  93. self.assertEqual(evidence[0]["title"], "清理标题")
  94. self.assertEqual(evidence[0]["url"], "https://example.com")
  95. def test_research_validation_enforces_minimum_sources(self) -> None:
  96. result = validate_research_result(
  97. {"summary": "完成"},
  98. {
  99. "type": "object",
  100. "required": ["summary"],
  101. "properties": {"summary": {"type": "string"}},
  102. },
  103. {"min_sources": 2},
  104. [{"title": "A", "url": "https://example.com"}],
  105. )
  106. self.assertTrue(result["schema_valid"])
  107. self.assertFalse(result["constraints_valid"])
  108. self.assertFalse(result["valid"])
  109. def test_workflow_return_data_uses_configured_node(self) -> None:
  110. workflow = {"settings": {"return": {"node_id": "research"}}}
  111. result = {"outputs": {"research": {"data": {"answer": 1}}}}
  112. self.assertEqual(workflow_return_data(workflow, result), {"data": {"answer": 1}})
  113. def test_ai_research_retries_until_assessment_is_valid(self) -> None:
  114. runner = AiWebResearchRunner(
  115. WorkflowContext(workflow_id=1, provider_id=1, model_id=1),
  116. {
  117. "objective": "测试目标",
  118. "output_schema": {
  119. "type": "object",
  120. "required": ["answer"],
  121. "properties": {"answer": {"type": "string"}},
  122. },
  123. "constraints": {"min_sources": 1},
  124. "max_attempts": 2,
  125. },
  126. )
  127. runner._create_plan = lambda: {"queries": ["第一轮", "第二轮"]}
  128. assessments = iter(
  129. [
  130. {
  131. "goal_achieved": False,
  132. "candidate_data": {},
  133. "missing_information": ["答案"],
  134. "next_queries": ["第二轮"],
  135. },
  136. {
  137. "goal_achieved": True,
  138. "candidate_data": {"answer": "完成"},
  139. "missing_information": [],
  140. "next_queries": [],
  141. },
  142. ]
  143. )
  144. runner._assess_progress = lambda plan, queries, evidence: next(assessments)
  145. fake_output = {
  146. "result_count": 1,
  147. "researched_count": 1,
  148. "researched_details": [
  149. {
  150. "visited_url": "https://example.com",
  151. "opened_detail_page": True,
  152. "result": {"title": "来源"},
  153. "cleaned": {"clean_text": "证据"},
  154. }
  155. ],
  156. }
  157. with patch("app.automation.nodes.research.WebSearchRunner") as search_runner:
  158. search_runner.return_value.run.return_value = fake_output
  159. result = runner.run()
  160. self.assertTrue(result["goal_achieved"])
  161. self.assertEqual(result["attempts_used"], 2)
  162. self.assertEqual(result["data"], {"answer": "完成"})
  163. if __name__ == "__main__":
  164. unittest.main()