sensors.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. from __future__ import annotations
  2. import json
  3. import os
  4. import shutil
  5. import subprocess
  6. from datetime import datetime
  7. from typing import Any
  8. import psutil
  9. def now_iso() -> str:
  10. return datetime.now().astimezone().isoformat(timespec="seconds")
  11. def run_command(args: list[str], timeout: int = 8) -> tuple[int, str, str]:
  12. creationflags = subprocess.CREATE_NO_WINDOW if os.name == "nt" else 0
  13. try:
  14. process = subprocess.Popen(
  15. args,
  16. stdout=subprocess.PIPE,
  17. stderr=subprocess.PIPE,
  18. text=True,
  19. encoding="utf-8",
  20. errors="replace",
  21. creationflags=creationflags,
  22. )
  23. stdout, stderr = process.communicate(timeout=timeout)
  24. return process.returncode or 0, stdout.strip(), stderr.strip()
  25. except subprocess.TimeoutExpired:
  26. terminate_process_tree(process.pid)
  27. stdout, stderr = process.communicate()
  28. message = f"command timed out after {timeout} seconds and process tree was terminated"
  29. return 1, (stdout or "").strip(), ((stderr or "").strip() + "\n" + message).strip()
  30. except OSError as exc:
  31. return 1, "", str(exc)
  32. def terminate_process_tree(pid: int) -> None:
  33. if os.name == "nt":
  34. subprocess.run(
  35. ["taskkill", "/PID", str(pid), "/T", "/F"],
  36. stdout=subprocess.DEVNULL,
  37. stderr=subprocess.DEVNULL,
  38. creationflags=subprocess.CREATE_NO_WINDOW,
  39. check=False,
  40. )
  41. return
  42. try:
  43. psutil.Process(pid).kill()
  44. except psutil.Error:
  45. pass
  46. def powershell_path() -> str | None:
  47. candidates = [
  48. r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
  49. shutil.which("powershell"),
  50. shutil.which("pwsh"),
  51. ]
  52. for candidate in candidates:
  53. if candidate and os.path.exists(candidate):
  54. return candidate
  55. return None
  56. def run_powershell(script: str, timeout: int = 8) -> Any:
  57. ps = powershell_path()
  58. if not ps:
  59. return None
  60. code, stdout, _ = run_command(
  61. [
  62. ps,
  63. "-NoProfile",
  64. "-NonInteractive",
  65. "-ExecutionPolicy",
  66. "Bypass",
  67. "-Command",
  68. script,
  69. ],
  70. timeout=timeout,
  71. )
  72. if code != 0 or not stdout:
  73. return None
  74. try:
  75. return json.loads(stdout)
  76. except json.JSONDecodeError:
  77. return None
  78. def collect_cpu() -> dict[str, Any]:
  79. freq = psutil.cpu_freq()
  80. return {
  81. "load_percent": psutil.cpu_percent(interval=0.2),
  82. "per_core_percent": psutil.cpu_percent(interval=None, percpu=True),
  83. "physical_cores": psutil.cpu_count(logical=False),
  84. "logical_cores": psutil.cpu_count(logical=True),
  85. "frequency_mhz": round(freq.current, 2) if freq else None,
  86. }
  87. def collect_memory() -> dict[str, Any]:
  88. vm = psutil.virtual_memory()
  89. swap = psutil.swap_memory()
  90. return {
  91. "total": vm.total,
  92. "available": vm.available,
  93. "used": vm.used,
  94. "percent": vm.percent,
  95. "swap_total": swap.total,
  96. "swap_used": swap.used,
  97. "swap_percent": swap.percent,
  98. }
  99. def collect_psutil_temperatures() -> list[dict[str, Any]]:
  100. sensors: list[dict[str, Any]] = []
  101. if not hasattr(psutil, "sensors_temperatures"):
  102. return sensors
  103. try:
  104. groups = psutil.sensors_temperatures(fahrenheit=False) or {}
  105. except (AttributeError, OSError):
  106. return sensors
  107. for group, entries in groups.items():
  108. for entry in entries:
  109. sensors.append(
  110. {
  111. "source": "psutil",
  112. "hardware_type": "temperature",
  113. "name": entry.label or group,
  114. "value": entry.current,
  115. "unit": "C",
  116. "high": entry.high,
  117. "critical": entry.critical,
  118. }
  119. )
  120. return sensors
  121. def collect_hardware_monitor_sensors() -> list[dict[str, Any]]:
  122. script = r"""
  123. $namespaces = @('root\LibreHardwareMonitor', 'root\OpenHardwareMonitor')
  124. $items = @()
  125. foreach ($ns in $namespaces) {
  126. try {
  127. $items += Get-CimInstance -Namespace $ns -ClassName Sensor -ErrorAction Stop |
  128. Where-Object { $_.SensorType -in @('Temperature','Load','Fan','Voltage','Power','Clock') } |
  129. Select-Object @{Name='source';Expression={$ns}}, Identifier, Name, SensorType, Value, Min, Max
  130. } catch {}
  131. }
  132. $items | ConvertTo-Json -Depth 4
  133. """
  134. raw = run_powershell(script)
  135. if not raw:
  136. return []
  137. rows = raw if isinstance(raw, list) else [raw]
  138. sensors: list[dict[str, Any]] = []
  139. unit_map = {
  140. "Temperature": "C",
  141. "Load": "%",
  142. "Fan": "RPM",
  143. "Voltage": "V",
  144. "Power": "W",
  145. "Clock": "MHz",
  146. }
  147. for row in rows:
  148. sensor_type = row.get("SensorType")
  149. sensors.append(
  150. {
  151. "source": row.get("source"),
  152. "hardware_type": sensor_type,
  153. "name": row.get("Name"),
  154. "identifier": row.get("Identifier"),
  155. "value": row.get("Value"),
  156. "unit": unit_map.get(sensor_type),
  157. "min": row.get("Min"),
  158. "max": row.get("Max"),
  159. }
  160. )
  161. return sensors
  162. def collect_acpi_temperatures() -> list[dict[str, Any]]:
  163. script = r"""
  164. Get-CimInstance -Namespace root/wmi -ClassName MSAcpi_ThermalZoneTemperature -ErrorAction SilentlyContinue |
  165. Select-Object InstanceName, CurrentTemperature |
  166. ConvertTo-Json -Depth 3
  167. """
  168. raw = run_powershell(script)
  169. if not raw:
  170. return []
  171. rows = raw if isinstance(raw, list) else [raw]
  172. sensors: list[dict[str, Any]] = []
  173. for row in rows:
  174. current = row.get("CurrentTemperature")
  175. celsius = round((current / 10) - 273.15, 1) if isinstance(current, (int, float)) else None
  176. sensors.append(
  177. {
  178. "source": "MSAcpi_ThermalZoneTemperature",
  179. "hardware_type": "Temperature",
  180. "name": row.get("InstanceName"),
  181. "value": celsius,
  182. "unit": "C",
  183. }
  184. )
  185. return sensors
  186. def collect_nvidia_gpus() -> list[dict[str, Any]]:
  187. nvidia_smi = shutil.which("nvidia-smi")
  188. if not nvidia_smi:
  189. return []
  190. query = "name,utilization.gpu,utilization.memory,temperature.gpu,memory.total,memory.used,power.draw"
  191. code, stdout, stderr = run_command(
  192. [
  193. nvidia_smi,
  194. f"--query-gpu={query}",
  195. "--format=csv,noheader,nounits",
  196. ],
  197. timeout=8,
  198. )
  199. if code != 0:
  200. return [{"source": "nvidia-smi", "error": stderr or stdout}]
  201. gpus: list[dict[str, Any]] = []
  202. for index, line in enumerate(stdout.splitlines()):
  203. parts = [part.strip() for part in line.split(",")]
  204. if len(parts) < 7:
  205. continue
  206. gpus.append(
  207. {
  208. "source": "nvidia-smi",
  209. "index": index,
  210. "name": parts[0],
  211. "load_percent": to_float(parts[1]),
  212. "memory_load_percent": to_float(parts[2]),
  213. "temperature_c": to_float(parts[3]),
  214. "memory_total_mb": to_float(parts[4]),
  215. "memory_used_mb": to_float(parts[5]),
  216. "power_w": to_float(parts[6]),
  217. }
  218. )
  219. return gpus
  220. def collect_windows_gpu_counters() -> list[dict[str, Any]]:
  221. script = r"""
  222. $counters = Get-Counter '\GPU Engine(*)\Utilization Percentage' -ErrorAction SilentlyContinue
  223. $rows = @()
  224. if ($counters) {
  225. $rows = $counters.CounterSamples |
  226. Where-Object { $_.CookedValue -gt 0 } |
  227. Select-Object InstanceName, CookedValue
  228. }
  229. $rows | ConvertTo-Json -Depth 3
  230. """
  231. raw = run_powershell(script, timeout=10)
  232. if not raw:
  233. return []
  234. rows = raw if isinstance(raw, list) else [raw]
  235. total = sum(float(row.get("CookedValue") or 0) for row in rows)
  236. return [
  237. {
  238. "source": "Windows Performance Counter",
  239. "name": "GPU Engine Utilization",
  240. "load_percent": round(total, 2),
  241. "engines": rows[:20],
  242. }
  243. ]
  244. def to_float(value: Any) -> float | None:
  245. try:
  246. return float(str(value).replace("W", "").strip())
  247. except (TypeError, ValueError):
  248. return None
  249. def collect_sensors() -> dict[str, Any]:
  250. hardware_sensors = collect_hardware_monitor_sensors()
  251. temperatures = collect_psutil_temperatures() + collect_acpi_temperatures()
  252. gpu = collect_nvidia_gpus()
  253. if not gpu:
  254. gpu = collect_windows_gpu_counters()
  255. return {
  256. "collected_at": now_iso(),
  257. "cpu": collect_cpu(),
  258. "memory": collect_memory(),
  259. "gpu": gpu,
  260. "temperatures": temperatures,
  261. "hardware_sensors": hardware_sensors,
  262. "notes": [
  263. "CPU 和内存负载来自 psutil。",
  264. "显卡优先使用 nvidia-smi;否则尝试 Windows GPU 性能计数器。",
  265. "温度优先读取 LibreHardwareMonitor/OpenHardwareMonitor WMI,其次 psutil 和 ACPI 热区;Windows 原生接口可能无法提供 CPU/GPU 真实温度。",
  266. ],
  267. }