feat: 初始化Web服务监控系统
This commit is contained in:
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# 日志文件
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# 临时文件
|
||||||
|
temp_email.html
|
||||||
|
|
||||||
|
# 状态文件
|
||||||
|
status.json
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
74
README.md
Normal file
74
README.md
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# Web服务监控系统
|
||||||
|
|
||||||
|
自动监控所有Web服务状态,每20分钟检查一次,如有服务停止则发送邮件通知。
|
||||||
|
|
||||||
|
## 功能特点
|
||||||
|
|
||||||
|
- 定时检查多个Web服务端口
|
||||||
|
- 自动检测服务是否存活
|
||||||
|
- 发现停止服务时发送HTML格式邮件通知
|
||||||
|
- 邮件包含启动命令,方便快速恢复
|
||||||
|
|
||||||
|
## 监控的服务
|
||||||
|
|
||||||
|
| 服务名称 | 端口 | 健康检查路径 |
|
||||||
|
|---------|------|-------------|
|
||||||
|
| PDF翻译助手 V2 | 19000 | /api/health |
|
||||||
|
| LLM Index RAG | 19001 | /api/stats |
|
||||||
|
| 碎片信息记录 | 19009 | /api/notes |
|
||||||
|
| ParamHub Python | 19010 | /api/stats |
|
||||||
|
| 产品参数爬取 API | 19011 | /api/products |
|
||||||
|
| 产品参数爬取 后台 | 19012 | / |
|
||||||
|
| LLM Proxy API | 19007 | /health |
|
||||||
|
| LLM Proxy 后台 | 19008 | /api/stats |
|
||||||
|
|
||||||
|
## 使用方法
|
||||||
|
|
||||||
|
### 手动检查
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 monitor.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### 配置定时任务
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 添加到crontab
|
||||||
|
crontab -e
|
||||||
|
|
||||||
|
# 每20分钟检查一次
|
||||||
|
*/20 * * * * /usr/bin/python3 /home/xian/.openclaw/workspace-coder/works/service-monitor/monitor.py >> /home/xian/.openclaw/workspace-coder/works/service-monitor/monitor.log 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
## 配置说明
|
||||||
|
|
||||||
|
修改 `monitor.py` 中的配置:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 通知邮箱
|
||||||
|
NOTIFY_EMAIL = "your-email@example.com"
|
||||||
|
|
||||||
|
# 服务列表
|
||||||
|
SERVICES = [
|
||||||
|
{"name": "服务名称", "port": 端口号, "path": "/health", "start_cmd": "启动命令"},
|
||||||
|
# ...
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 依赖
|
||||||
|
|
||||||
|
- Python 3
|
||||||
|
- urllib (内置)
|
||||||
|
- email-sender 技能 (邮件发送)
|
||||||
|
|
||||||
|
## 文件说明
|
||||||
|
|
||||||
|
| 文件 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| monitor.py | 主监控脚本 |
|
||||||
|
| status.json | 上次检查状态 |
|
||||||
|
| monitor.log | 运行日志 |
|
||||||
|
|
||||||
|
## 版本历史
|
||||||
|
|
||||||
|
- v1.0.0 (2026-04-10) - 初始版本,支持8个服务监控
|
||||||
160
monitor.py
Normal file
160
monitor.py
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
服务监控脚本
|
||||||
|
每20分钟检查所有Web服务状态,如有停止则发送邮件通知
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 配置
|
||||||
|
SERVICES = [
|
||||||
|
{"name": "PDF翻译助手 V2", "port": 19000, "path": "/api/health", "start_cmd": "cd ~/.openclaw/workspace-coder/works/pdf-translate-web-v2 && nohup python3 app.py > /tmp/pdf-v2.log 2>&1 &"},
|
||||||
|
{"name": "LLM Index RAG", "port": 19001, "path": "/api/stats", "start_cmd": "cd ~/.openclaw/workspace-coder/works/llm-index-rag && nohup python3 app.py > /tmp/llm-index.log 2>&1 &"},
|
||||||
|
{"name": "碎片信息记录", "port": 19009, "path": "/api/notes", "start_cmd": "cd ~/.openclaw/workspace-coder/works/snippet-notes && nohup python3 app.py > /tmp/snippet.log 2>&1 &"},
|
||||||
|
{"name": "ParamHub Python", "port": 19010, "path": "/api/stats", "start_cmd": "cd ~/.openclaw/workspace-coder/works/param-hub-python && nohup python3 app.py > /tmp/paramhub.log 2>&1 &"},
|
||||||
|
{"name": "产品参数爬取 API", "port": 19011, "path": "/api/products", "start_cmd": "cd ~/.openclaw/common/projects/product-crawler && nohup python3 app.py > /tmp/crawler-api.log 2>&1 &"},
|
||||||
|
{"name": "产品参数爬取 后台", "port": 19012, "path": "/", "start_cmd": "cd ~/.openclaw/common/projects/product-crawler/admin && nohup python3 app.py > /tmp/crawler-admin.log 2>&1 &"},
|
||||||
|
{"name": "LLM Proxy API", "port": 19007, "path": "/health", "start_cmd": "cd ~/.openclaw/common/projects/llm-proxy && nohup python3 app.py > /tmp/llm-proxy.log 2>&1 &"},
|
||||||
|
{"name": "LLM Proxy 后台", "port": 19008, "path": "/api/stats", "start_cmd": "cd ~/.openclaw/common/projects/llm-proxy/admin && nohup python3 app.py > /tmp/llm-proxy-admin.log 2>&1 &"},
|
||||||
|
]
|
||||||
|
|
||||||
|
NOTIFY_EMAIL = "zuitoushang@tphai.com"
|
||||||
|
LOG_FILE = Path(__file__).parent / "monitor.log"
|
||||||
|
STATUS_FILE = Path(__file__).parent / "status.json"
|
||||||
|
|
||||||
|
|
||||||
|
def check_service(port: int, path: str = "/", timeout: int = 5) -> bool:
|
||||||
|
"""检查服务是否运行"""
|
||||||
|
url = f"http://localhost:{port}{path}"
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, method='GET')
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as response:
|
||||||
|
return response.status == 200
|
||||||
|
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, ConnectionRefusedError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_status() -> dict:
|
||||||
|
"""读取上次状态"""
|
||||||
|
if STATUS_FILE.exists():
|
||||||
|
with open(STATUS_FILE, 'r', encoding='utf-8') as f:
|
||||||
|
return json.load(f)
|
||||||
|
return {"last_check": None, "stopped_services": []}
|
||||||
|
|
||||||
|
|
||||||
|
def save_status(status: dict):
|
||||||
|
"""保存状态"""
|
||||||
|
with open(STATUS_FILE, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(status, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def log(message: str):
|
||||||
|
"""写入日志"""
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
log_entry = f"[{timestamp}] {message}\n"
|
||||||
|
with open(LOG_FILE, 'a', encoding='utf-8') as f:
|
||||||
|
f.write(log_entry)
|
||||||
|
print(log_entry.strip())
|
||||||
|
|
||||||
|
|
||||||
|
def send_notification(stopped: list):
|
||||||
|
"""发送邮件通知"""
|
||||||
|
if not stopped:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 使用邮件发送技能
|
||||||
|
email_script = Path(__file__).parent.parent.parent / "skills/email-sender/scripts/send_email.py"
|
||||||
|
|
||||||
|
# 构建邮件内容
|
||||||
|
subject = f"【服务监控警报】{len(stopped)}个服务已停止"
|
||||||
|
|
||||||
|
body_lines = [
|
||||||
|
"<h2>🚨 服务停止通知</h2>",
|
||||||
|
f"<p>检测时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>",
|
||||||
|
f"<p>以下 <strong>{len(stopped)}</strong> 个服务已停止运行:</p>",
|
||||||
|
"<table border='1' cellpadding='8' cellspacing='0' style='border-collapse: collapse;'>",
|
||||||
|
"<tr style='background: #f0f0f0;'><th>服务名称</th><th>端口</th><th>启动命令</th></tr>"
|
||||||
|
]
|
||||||
|
|
||||||
|
for svc in stopped:
|
||||||
|
body_lines.append(f"<tr>")
|
||||||
|
body_lines.append(f"<td><strong>{svc['name']}</strong></td>")
|
||||||
|
body_lines.append(f"<td>{svc['port']}</td>")
|
||||||
|
body_lines.append(f"<td><code>{svc['start_cmd']}</code></td>")
|
||||||
|
body_lines.append(f"</tr>")
|
||||||
|
|
||||||
|
body_lines.append("</table>")
|
||||||
|
body_lines.append("<hr>")
|
||||||
|
body_lines.append("<h3>📝 启动方式</h3>")
|
||||||
|
body_lines.append("<pre>")
|
||||||
|
for svc in stopped:
|
||||||
|
body_lines.append(f"# {svc['name']}")
|
||||||
|
body_lines.append(svc['start_cmd'])
|
||||||
|
body_lines.append("")
|
||||||
|
body_lines.append("</pre>")
|
||||||
|
|
||||||
|
body = "\n".join(body_lines)
|
||||||
|
|
||||||
|
# 写入临时HTML文件
|
||||||
|
temp_html = Path(__file__).parent / "temp_email.html"
|
||||||
|
temp_html.write_text(body, encoding='utf-8')
|
||||||
|
|
||||||
|
# 调用邮件发送脚本
|
||||||
|
cmd = [
|
||||||
|
"python3", str(email_script),
|
||||||
|
"send",
|
||||||
|
"--to", NOTIFY_EMAIL,
|
||||||
|
"--subject", subject,
|
||||||
|
"--body", body,
|
||||||
|
"--html"
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||||
|
if result.returncode == 0:
|
||||||
|
log(f"邮件发送成功: {NOTIFY_EMAIL}")
|
||||||
|
else:
|
||||||
|
log(f"邮件发送失败: {result.stderr}")
|
||||||
|
except Exception as e:
|
||||||
|
log(f"邮件发送异常: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""主函数"""
|
||||||
|
log("=" * 50)
|
||||||
|
log("开始检查服务状态...")
|
||||||
|
|
||||||
|
stopped_services = []
|
||||||
|
|
||||||
|
for svc in SERVICES:
|
||||||
|
is_running = check_service(svc['port'], svc['path'])
|
||||||
|
status = "✅ 运行中" if is_running else "❌ 已停止"
|
||||||
|
log(f" {svc['name']} (端口 {svc['port']}): {status}")
|
||||||
|
|
||||||
|
if not is_running:
|
||||||
|
stopped_services.append(svc)
|
||||||
|
|
||||||
|
# 保存状态
|
||||||
|
save_status({
|
||||||
|
"last_check": datetime.now().isoformat(),
|
||||||
|
"stopped_services": [s['name'] for s in stopped_services]
|
||||||
|
})
|
||||||
|
|
||||||
|
# 发送通知
|
||||||
|
if stopped_services:
|
||||||
|
log(f"发现 {len(stopped_services)} 个服务停止,发送邮件通知...")
|
||||||
|
send_notification(stopped_services)
|
||||||
|
else:
|
||||||
|
log("所有服务运行正常 ✓")
|
||||||
|
|
||||||
|
log("检查完成")
|
||||||
|
return len(stopped_services)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user