feat: 初始化Web服务监控系统

This commit is contained in:
2026-04-10 16:30:21 +08:00
commit a7cc60596e
3 changed files with 247 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
# 日志文件
*.log
# 临时文件
temp_email.html
# 状态文件
status.json
# Python
__pycache__/
*.py[cod]
*$py.class

74
README.md Normal file
View File

@@ -0,0 +1,74 @@
# Web服务监控系统
自动监控所有Web服务状态每20分钟检查一次如有服务停止则发送邮件通知。
## 功能特点
- 定时检查多个Web服务端口
- 自动检测服务是否存活
- 发现停止服务时发送HTML格式邮件通知
- 邮件包含启动命令,方便快速恢复
## 监控的服务
| 服务名称 | 端口 | 健康检查路径 |
|---------|------|-------------|
| PDF翻译助手 V2 | 19000 | /api/health |
| LLM Index RAG | 19001 | /api/stats |
| 碎片信息记录 | 19009 | /api/notes |
| ParamHub Python | 19010 | /api/stats |
| 产品参数爬取 API | 19011 | /api/products |
| 产品参数爬取 后台 | 19012 | / |
| LLM Proxy API | 19007 | /health |
| LLM Proxy 后台 | 19008 | /api/stats |
## 使用方法
### 手动检查
```bash
python3 monitor.py
```
### 配置定时任务
```bash
# 添加到crontab
crontab -e
# 每20分钟检查一次
*/20 * * * * /usr/bin/python3 /home/xian/.openclaw/workspace-coder/works/service-monitor/monitor.py >> /home/xian/.openclaw/workspace-coder/works/service-monitor/monitor.log 2>&1
```
## 配置说明
修改 `monitor.py` 中的配置:
```python
# 通知邮箱
NOTIFY_EMAIL = "your-email@example.com"
# 服务列表
SERVICES = [
{"name": "服务名称", "port": 端口号, "path": "/health", "start_cmd": "启动命令"},
# ...
]
```
## 依赖
- Python 3
- urllib (内置)
- email-sender 技能 (邮件发送)
## 文件说明
| 文件 | 说明 |
|------|------|
| monitor.py | 主监控脚本 |
| status.json | 上次检查状态 |
| monitor.log | 运行日志 |
## 版本历史
- v1.0.0 (2026-04-10) - 初始版本支持8个服务监控

160
monitor.py Normal file
View File

@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
服务监控脚本
每20分钟检查所有Web服务状态如有停止则发送邮件通知
"""
import json
import subprocess
import urllib.request
import urllib.error
from datetime import datetime
from pathlib import Path
# 配置
SERVICES = [
{"name": "PDF翻译助手 V2", "port": 19000, "path": "/api/health", "start_cmd": "cd ~/.openclaw/workspace-coder/works/pdf-translate-web-v2 && nohup python3 app.py > /tmp/pdf-v2.log 2>&1 &"},
{"name": "LLM Index RAG", "port": 19001, "path": "/api/stats", "start_cmd": "cd ~/.openclaw/workspace-coder/works/llm-index-rag && nohup python3 app.py > /tmp/llm-index.log 2>&1 &"},
{"name": "碎片信息记录", "port": 19009, "path": "/api/notes", "start_cmd": "cd ~/.openclaw/workspace-coder/works/snippet-notes && nohup python3 app.py > /tmp/snippet.log 2>&1 &"},
{"name": "ParamHub Python", "port": 19010, "path": "/api/stats", "start_cmd": "cd ~/.openclaw/workspace-coder/works/param-hub-python && nohup python3 app.py > /tmp/paramhub.log 2>&1 &"},
{"name": "产品参数爬取 API", "port": 19011, "path": "/api/products", "start_cmd": "cd ~/.openclaw/common/projects/product-crawler && nohup python3 app.py > /tmp/crawler-api.log 2>&1 &"},
{"name": "产品参数爬取 后台", "port": 19012, "path": "/", "start_cmd": "cd ~/.openclaw/common/projects/product-crawler/admin && nohup python3 app.py > /tmp/crawler-admin.log 2>&1 &"},
{"name": "LLM Proxy API", "port": 19007, "path": "/health", "start_cmd": "cd ~/.openclaw/common/projects/llm-proxy && nohup python3 app.py > /tmp/llm-proxy.log 2>&1 &"},
{"name": "LLM Proxy 后台", "port": 19008, "path": "/api/stats", "start_cmd": "cd ~/.openclaw/common/projects/llm-proxy/admin && nohup python3 app.py > /tmp/llm-proxy-admin.log 2>&1 &"},
]
NOTIFY_EMAIL = "zuitoushang@tphai.com"
LOG_FILE = Path(__file__).parent / "monitor.log"
STATUS_FILE = Path(__file__).parent / "status.json"
def check_service(port: int, path: str = "/", timeout: int = 5) -> bool:
"""检查服务是否运行"""
url = f"http://localhost:{port}{path}"
try:
req = urllib.request.Request(url, method='GET')
with urllib.request.urlopen(req, timeout=timeout) as response:
return response.status == 200
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, ConnectionRefusedError):
return False
def get_status() -> dict:
"""读取上次状态"""
if STATUS_FILE.exists():
with open(STATUS_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
return {"last_check": None, "stopped_services": []}
def save_status(status: dict):
"""保存状态"""
with open(STATUS_FILE, 'w', encoding='utf-8') as f:
json.dump(status, f, ensure_ascii=False, indent=2)
def log(message: str):
"""写入日志"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"[{timestamp}] {message}\n"
with open(LOG_FILE, 'a', encoding='utf-8') as f:
f.write(log_entry)
print(log_entry.strip())
def send_notification(stopped: list):
"""发送邮件通知"""
if not stopped:
return
# 使用邮件发送技能
email_script = Path(__file__).parent.parent.parent / "skills/email-sender/scripts/send_email.py"
# 构建邮件内容
subject = f"【服务监控警报】{len(stopped)}个服务已停止"
body_lines = [
"<h2>🚨 服务停止通知</h2>",
f"<p>检测时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>",
f"<p>以下 <strong>{len(stopped)}</strong> 个服务已停止运行:</p>",
"<table border='1' cellpadding='8' cellspacing='0' style='border-collapse: collapse;'>",
"<tr style='background: #f0f0f0;'><th>服务名称</th><th>端口</th><th>启动命令</th></tr>"
]
for svc in stopped:
body_lines.append(f"<tr>")
body_lines.append(f"<td><strong>{svc['name']}</strong></td>")
body_lines.append(f"<td>{svc['port']}</td>")
body_lines.append(f"<td><code>{svc['start_cmd']}</code></td>")
body_lines.append(f"</tr>")
body_lines.append("</table>")
body_lines.append("<hr>")
body_lines.append("<h3>📝 启动方式</h3>")
body_lines.append("<pre>")
for svc in stopped:
body_lines.append(f"# {svc['name']}")
body_lines.append(svc['start_cmd'])
body_lines.append("")
body_lines.append("</pre>")
body = "\n".join(body_lines)
# 写入临时HTML文件
temp_html = Path(__file__).parent / "temp_email.html"
temp_html.write_text(body, encoding='utf-8')
# 调用邮件发送脚本
cmd = [
"python3", str(email_script),
"send",
"--to", NOTIFY_EMAIL,
"--subject", subject,
"--body", body,
"--html"
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0:
log(f"邮件发送成功: {NOTIFY_EMAIL}")
else:
log(f"邮件发送失败: {result.stderr}")
except Exception as e:
log(f"邮件发送异常: {e}")
def main():
"""主函数"""
log("=" * 50)
log("开始检查服务状态...")
stopped_services = []
for svc in SERVICES:
is_running = check_service(svc['port'], svc['path'])
status = "✅ 运行中" if is_running else "❌ 已停止"
log(f" {svc['name']} (端口 {svc['port']}): {status}")
if not is_running:
stopped_services.append(svc)
# 保存状态
save_status({
"last_check": datetime.now().isoformat(),
"stopped_services": [s['name'] for s in stopped_services]
})
# 发送通知
if stopped_services:
log(f"发现 {len(stopped_services)} 个服务停止,发送邮件通知...")
send_notification(stopped_services)
else:
log("所有服务运行正常 ✓")
log("检查完成")
return len(stopped_services)
if __name__ == "__main__":
main()