feat: 三方案改进检测准确度 - YOLO优先、参数调整、连续性判断

2026-04-16 22:35:51 +08:00
parent d6693f9fd0
commit 7875cca69e
6 changed files with 272 additions and 105 deletions
--- a/config.py
+++ b/config.py
@@ -29,9 +29,14 @@ DEFAULT_CONFIG = {
    "display_limit": 20,    # 显示最近多少条
    # 检测算法开关
-    "use_haar_cascade": True,       # Haar Cascade 人体检测
+    "use_yolo": True,              # YOLO 检测（最准确）
-    "use_mediapipe_face": True,      # MediaPipe 人脸检测
+    "use_haar_cascade": False,     # Haar Cascade 人体检测（备用）
-    "use_face_recognition": True,    # face_recognition 人脸识别
+    "use_mediapipe_face": True,    # MediaPipe 人脸检测
    "use_face_recognition": True,  # face_recognition 人脸识别
    # 连续性判断配置
    "confirm_frames": 3,           # 连续几帧确认
    "min_detection_confidence": 0.3,  # 检测置信度阈值
    # AI大模型分析开关
    "use_vision_api": False,         # 是否使用大模型分析（默认关闭）
--- a/local_analyzer.py
+++ b/local_analyzer.py
@@ -134,55 +134,70 @@ class LocalAnalyzer:
            # 方法1：MediaPipe 人脸检测 + 人员识别（优先）
            if HAS_PERSON_MANAGER and use_mediapipe:
-                print(f"[LocalAnalyzer] Using MediaPipe face detection...")
+                print(f"[LocalAnalyzer] Using PersonManager for detection...")
                person_result = person_manager.analyze_image(image_path, save_new_person=True)
                metrics['person_count'] = person_result['total_count']
                metrics['new_persons'] = person_result['new_count']
                metrics['known_persons'] = person_result['known_count']
                metrics['detection_source'] = person_result.get('detection_source', 'unknown')
                prev_person_count = self.prev_human_count
-                person_count_change = person_result['total_count'] - prev_person_count
+                current_count = person_result['current_count']
-                metrics['person_count_change'] = person_count_change
+                person_count_change = current_count - prev_person_count
-                # 记录人员事件
+                # 只有确认的变化才记录
-                for person in person_result['persons']:
+                if person_result['confirmed_change']:
-                    if person['is_new']:
+                    metrics['person_count_change'] = person_count_change
                    # 记录人员事件
                    for person in person_result['persons']:
                        if person['is_new']:
                            events.append({
                                'event_type': '人物活动',
                                'description': f'新人出现: {person["name"]}，当前共 {current_count} 人',
                                'confidence': '高',
                                'source': 'local'
                            })
                            self.human_count += 1
                            self.person_change_count += 1
                        else:
                            events.append({
                                'event_type': '人物活动',
                                'description': f'已知人员: {person["name"]} [{person.get("source", "detected")}]',
                                'confidence': '高',
                                'source': 'local'
                            })
                    # 检测人员进出
                    if person_count_change > 0:
                        events.append({
-                            'event_type': '人物活动',
+                            'event_type': '人员进出',
-                            'description': f'新人出现: {person["name"]}，当前共 {person_result["total_count"]} 人',
+                            'description': f'检测到 {person_count_change} 人进入，当前共 {current_count} 人 [{person_result.get("detection_source", "")}]',
                            'confidence': '高',
                            'source': 'local'
                        })
                        self.human_count += 1
                        self.person_change_count += 1
-                    else:
+                    elif person_count_change < 0:
                        events.append({
-                            'event_type': '人物活动',
+                            'event_type': '人员进出',
-                            'description': f'已知人员: {person["name"]}',
+                            'description': f'检测到 {abs(person_count_change)} 人离开，当前剩 {current_count} 人',
                            'confidence': '高',
                            'source': 'local'
                        })
-                
+                        self.person_change_count += 1
-                # 检测人员进出
+                    
-                if person_count_change > 0:
+                    self.prev_human_count = current_count
-                    events.append({
+                else:
-                        'event_type': '人员进出',
+                    # 没有确认的变化，只记录当前状态
-                        'description': f'检测到 {person_count_change} 人进入，当前共 {person_result["total_count"]} 人',
+                    metrics['person_count_change'] = 0
-                        'confidence': '高',
+                    if current_count > 0:
-                        'source': 'local'
+                        events.append({
-                    })
+                            'event_type': '人物活动',
-                    self.person_change_count += 1
+                            'description': f'检测到 {current_count} 人（状态稳定）',
-                elif person_count_change < 0:
+                            'confidence': '低',
-                    events.append({
+                            'source': 'local'
-                        'event_type': '人员进出',
+                        })
                        'description': f'检测到 {abs(person_count_change)} 人离开，当前剩 {person_result["total_count"]} 人',
                        'confidence': '高',
                        'source': 'local'
                    })
                    self.person_change_count += 1
                self.prev_human_count = person_result['total_count']
            # 方法2：Haar Cascade 人体检测（备用或并行）
            if use_haar and self.human_cascade is not None:
--- a/person_manager.py
+++ b/person_manager.py
@@ -42,12 +42,20 @@ class PersonManager:
        # 加载人员库
        self.persons = self._load_persons_db()
-        # 检测器状态
+        # 初始化检测器状态
        self.face_detector = None
        self.mp_face_detection = None
        self.cv_face_detector = None
        self.has_mediapipe = HAS_MEDIAPIPE
        # 从配置读取参数
        try:
            from config import config_mgr
            self.config['mediapipe_min_confidence'] = config_mgr.get('min_detection_confidence', 0.3)
            self.config['confirm_frames'] = config_mgr.get('confirm_frames', 3)
        except:
            pass
        # 初始化检测器
        self._init_detectors()
@@ -56,8 +64,23 @@ class PersonManager:
            'face_match_threshold': 0.6,    # 人脸匹配阈值
            'unknown_person_id': 'unknown', # 未知人员ID
            'max_persons': 100,             # 最大人员数量
            # 方案1: 参数调整
            'mediapipe_min_confidence': 0.3,  # 降低阈值，更容易检测
            'mediapipe_model_selection': 1,   # 1: 远距离模型
            'haar_scale_factor': 1.05,        # Haar更细粒度
            'haar_min_neighbors': 2,          # 降低邻居要求
            # 方案2: 连续性判断
            'confirm_frames': 3,             # 连续几帧确认
            'leave_frames': 2,               # 连续几帧消失才算离开
        }
        # 方案2: 追踪状态（连续判断）
        self.tracked_persons = {}  # {person_id: {'frames': count, 'confirmed': bool}}
        self.prev_persons = []     # 前一帧检测到的人
        self.confirmation_buffer = {}  # 确认缓冲区
        # 统计
        self.total_detections = 0
        self.known_persons_detected = 0
@@ -80,17 +103,16 @@ class PersonManager:
    def _init_detectors(self):
        """初始化检测器"""
-        # MediaPipe 人脸检测
+        # MediaPipe 人脸检测（方案1: 参数调整）
        if self.has_mediapipe:
            try:
                # 使用更安全的导入方式
                mp_face_detection = mp.solutions.face_detection
                self.face_detector = mp_face_detection.FaceDetection(
-                    model_selection=0,  # 0: 短距离，1: 远距离
+                    model_selection=self.config['mediapipe_model_selection'],  # 远距离模型
-                    min_detection_confidence=0.5
+                    min_detection_confidence=self.config['mediapipe_min_confidence']  # 降低阈值
                )
                self.mp_face_detection = mp_face_detection
-                print("[PersonManager] MediaPipe face detector initialized")
+                print(f"[PersonManager] MediaPipe initialized (model={self.config['mediapipe_model_selection']}, conf={self.config['mediapipe_min_confidence']})")
            except Exception as e:
                print(f"[PersonManager] MediaPipe init failed: {e}")
                self.face_detector = None
@@ -101,13 +123,25 @@ class PersonManager:
            model_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            if Path(model_path).exists():
                self.cv_face_detector = cv2.CascadeClassifier(model_path)
-                print("[PersonManager] OpenCV face detector initialized (backup)")
+                print("[PersonManager] OpenCV Haar Cascade initialized (backup)")
        except Exception as e:
            self.cv_face_detector = None
-            print(f"[PersonManager] OpenCV face detector init failed: {e}")
+            print(f"[PersonManager] OpenCV detector init failed: {e}")
        # 方案3: YOLO 检测（更准确）
        self.yolo_detector = None
        try:
            from ultralytics import YOLO
            # 使用轻量级 nano 模型
            self.yolo_detector = YOLO('yolov8n.pt')  # nano 模型，快速
            print("[PersonManager] YOLOv8nano initialized (most accurate)")
        except ImportError:
            print("[PersonManager] YOLO not installed. Install with: pip install ultralytics")
        except Exception as e:
            print(f"[PersonManager] YOLO init failed: {e}")
    def detect_faces(self, image):
-        """检测人脸
+        """检测人脸（优先使用 YOLO，其次 MediaPipe，最后 Haar）
        Args:
            image: 图片（numpy array 或路径）
@@ -123,7 +157,31 @@ class PersonManager:
        faces = []
-        # MediaPipe 检测
+        # 方案3: YOLO 检测（优先，最准确）
        if self.yolo_detector is not None:
            try:
                results = self.yolo_detector(image, classes=[0], verbose=False)  # class 0 = person
                for r in results:
                    for box in r.boxes:
                        x1, y1, x2, y2 = box.xyxy[0].tolist()
                        conf = box.conf[0].item()
                        # 转换为 [x, y, w, h] 格式
                        faces.append({
                            'bbox': [int(x1), int(y1), int(x2-x1), int(y2-y1)],
                            'confidence': conf,
                            'source': 'yolo'
                        })
                if faces:
                    print(f"[PersonManager] YOLO detected {len(faces)} persons")
                    return faces  # YOLO 检测成功，直接返回
            except Exception as e:
                print(f"[PersonManager] YOLO detection failed: {e}")
        # 方案1+2: MediaPipe 检测
        if self.has_mediapipe and self.face_detector is not None:
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = self.face_detector.process(rgb_image)
@@ -143,14 +201,17 @@ class PersonManager:
                        'confidence': detection.score[0],
                        'source': 'mediapipe'
                    })
                if faces:
                    return faces
-        # OpenCV 检测（备用）
+        # 备用: OpenCV Haar 检测
-        elif self.cv_face_detector is not None:
+        if self.cv_face_detector is not None:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            detections = self.cv_face_detector.detectMultiScale(
                gray,
-                scaleFactor=1.1,
+                scaleFactor=self.config['haar_scale_factor'],
-                minNeighbors=5,
+                minNeighbors=self.config['haar_min_neighbors'],
                minSize=(30, 30)
            )
@@ -351,7 +412,7 @@ class PersonManager:
            self._save_persons_db()
    def analyze_image(self, image_path, save_new_person=True):
-        """分析图片中的人员
+        """分析图片中的人员（带连续性判断）
        Args:
            image_path: 图片路径
@@ -363,6 +424,7 @@ class PersonManager:
                'persons': list,         # 识别的人员
                'new_count': int,        # 新人员数量
                'known_count': int,      # 已知人员数量
                'confirmed_change': bool, # 是否有确认的人员变化
            }
        """
        image = cv2.imread(image_path)
@@ -373,61 +435,117 @@ class PersonManager:
        # 检测人脸
        faces = self.detect_faces(image)
        current_count = len(faces)
-        persons = []
+        # 方案2: 连续性判断
-        new_count = 0
+        confirmed_change = False
-        known_count = 0
+        confirmed_persons = []
-        for face in faces:
+        # 检查人数变化
-            bbox = face['bbox']
+        prev_count = len(self.prev_persons)
        if current_count != prev_count:
            # 人数变化，记录到缓冲区
            key = f"count_{current_count}"
            if key not in self.confirmation_buffer:
                self.confirmation_buffer[key] = {'count': 0, 'persons': []}
-            # 提取特征
+            self.confirmation_buffer[key]['count'] += 1
            encoding = self.extract_face_encoding(image, bbox)
-            # 匹配
+            # 临时识别人员
-            match_result = self.match_face(encoding)
+            temp_persons = []
-            
+            for face in faces:
-            if match_result['is_new']:
+                bbox = face['bbox']
-                # 新人员
+                encoding = self.extract_face_encoding(image, bbox)
-                new_count += 1
+                match_result = self.match_face(encoding)
-                if save_new_person and len(self.persons) < self.config['max_persons']:
+                person_info = {
-                    new_person = self.add_new_person(image, bbox)
+                    'person_id': match_result['person_id'] if not match_result['is_new'] else 'unknown',
                    if new_person:
                        persons.append({
                            'person_id': new_person['person_id'],
                            'name': new_person['name'],
                            'bbox': bbox,
                            'is_new': True,
                            'confidence': face['confidence']
                        })
                else:
                    persons.append({
                        'person_id': 'unknown',
                        'name': 'Unknown (new)',
                        'bbox': bbox,
                        'is_new': True,
                        'confidence': face['confidence']
                    })
            else:
                # 已知人员
                known_count += 1
                self.update_person_visit(match_result['person_id'])
                persons.append({
                    'person_id': match_result['person_id'],
                    'name': match_result['name'],
                    'bbox': bbox,
-                    'is_new': False,
+                    'is_new': match_result['is_new'],
-                    'confidence': match_result['confidence']
+                    'confidence': face['confidence'],
-                })
+                    'source': face['source']
                }
                temp_persons.append(person_info)
            self.confirmation_buffer[key]['persons'] = temp_persons
            # 达到确认帧数
            if self.confirmation_buffer[key]['count'] >= self.config['confirm_frames']:
                confirmed_change = True
                confirmed_persons = temp_persons
                print(f"[PersonManager] Confirmed: {prev_count} -> {current_count} persons (after {self.config['confirm_frames']} frames)")
                # 清空其他缓冲区
                self.confirmation_buffer = {}
                # 更新前一帧状态
                self.prev_persons = temp_persons
        else:
            # 人数不变，清空变化缓冲区，维持当前状态
            if current_count > 0:
                # 识别当前人员
                temp_persons = []
                for face in faces:
                    bbox = face['bbox']
                    encoding = self.extract_face_encoding(image, bbox)
                    match_result = self.match_face(encoding)
                    person_info = {
                        'person_id': match_result['person_id'] if not match_result['is_new'] else 'unknown',
                        'name': match_result['name'],
                        'bbox': bbox,
                        'is_new': match_result['is_new'],
                        'confidence': face['confidence'],
                        'source': face['source']
                    }
                    temp_persons.append(person_info)
                confirmed_persons = temp_persons
                self.prev_persons = temp_persons
            # 清空变化缓冲区
            keys_to_remove = [k for k in self.confirmation_buffer.keys() if not k.endswith(f"_{current_count}")]
            for k in keys_to_remove:
                del self.confirmation_buffer[k]
        # 统计新人和已知人员
        new_count = 0
        known_count = 0
        persons_to_save = []
        for person in confirmed_persons:
            if person['is_new']:
                new_count += 1
                # 只有确认后才保存新人
                if confirmed_change and save_new_person and len(self.persons) < self.config['max_persons']:
                    # 找到对应的 face bbox
                    for face in faces:
                        if face['bbox'] == person['bbox']:
                            new_person = self.add_new_person(image, face['bbox'])
                            if new_person:
                                person['person_id'] = new_person['person_id']
                                person['name'] = new_person['name']
                                persons_to_save.append(person)
                            break
            else:
                known_count += 1
                self.update_person_visit(person['person_id'])
                persons_to_save.append(person)
        return {
            'faces': faces,
-            'persons': persons,
+            'persons': persons_to_save,
            'new_count': new_count,
            'known_count': known_count,
-            'total_count': len(persons)
+            'total_count': len(persons_to_save),
            'confirmed_change': confirmed_change,
            'current_count': current_count,
            'prev_count': prev_count,
            'detection_source': faces[0]['source'] if faces else 'none'
        }
    def get_persons_list(self):
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,6 @@ uvicorn>=0.23.0
 requests>=2.31.0
 numpy>=1.20.0
-# Optional: More accurate face detection and recognition
+# Optional: More accurate detection
-# mediapipe>=0.10.0
+ultralytics>=8.0.0
-# face-recognition>=1.7.0 (requires dlib, may need manual install on Windows)
+mediapipe>=0.10.0
--- a/web/static/app.js
+++ b/web/static/app.js
@@ -507,10 +507,15 @@ function loadSettingsForm() {
            document.getElementById('setting-refresh-interval').value = config.refresh_interval || 5;
            // Detection algorithm settings
-            document.getElementById('setting-use-haar').checked = config.use_haar_cascade !== false;
+            document.getElementById('setting-use-yolo').checked = config.use_yolo !== false;
            document.getElementById('setting-use-mediapipe').checked = config.use_mediapipe_face !== false;
            document.getElementById('setting-use-haar').checked = config.use_haar_cascade === true;
            document.getElementById('setting-use-face-rec').checked = config.use_face_recognition !== false;
            // Confirmation settings
            document.getElementById('setting-confirm-frames').value = config.confirm_frames || 3;
            document.getElementById('setting-min-confidence').value = config.min_detection_confidence || 0.3;
            // Vision API settings
            document.getElementById('setting-use-vision-api').checked = config.use_vision_api === true;
            document.getElementById('setting-vision-trigger').value = config.vision_api_trigger || 'person_change';
@@ -533,10 +538,15 @@ function saveSettings() {
        refresh_interval: parseInt(document.getElementById('setting-refresh-interval').value),
        // Detection algorithms
        use_yolo: document.getElementById('setting-use-yolo').checked,
        use_haar_cascade: document.getElementById('setting-use-haar').checked,
        use_mediapipe_face: document.getElementById('setting-use-mediapipe').checked,
        use_face_recognition: document.getElementById('setting-use-face-rec').checked,
        // Confirmation settings
        confirm_frames: parseInt(document.getElementById('setting-confirm-frames').value),
        min_detection_confidence: parseFloat(document.getElementById('setting-min-confidence').value),
        // Vision API
        use_vision_api: document.getElementById('setting-use-vision-api').checked,
        vision_api_trigger: document.getElementById('setting-vision-trigger').value,
--- a/web/static/index.html
+++ b/web/static/index.html
@@ -138,26 +138,45 @@
                </div>
                <div class="settings-section">
-                    <h4>检测算法设置</h4>
+                    <h4>Detection Algorithms</h4>
                    <div class="setting-item">
-                        <label>Haar Cascade 人体检测:</label>
+                        <label>YOLO (Most Accurate):</label>
-                        <input type="checkbox" id="setting-use-haar" checked>
+                        <input type="checkbox" id="setting-use-yolo" checked>
-                        <span class="setting-desc">传统人体检测（备用）</span>
+                        <span class="setting-desc">YOLOv8 nano - Best accuracy</span>
                    </div>
                    <div class="setting-item">
-                        <label>MediaPipe 人脸检测:</label>
+                        <label>MediaPipe Face:</label>
                        <input type="checkbox" id="setting-use-mediapipe" checked>
-                        <span class="setting-desc">高精度人脸检测</span>
+                        <span class="setting-desc">High precision face detection</span>
                    </div>
                    <div class="setting-item">
                        <label>Haar Cascade Body:</label>
                        <input type="checkbox" id="setting-use-haar">
                        <span class="setting-desc">Traditional body detection (backup)</span>
                    </div>
                    <div class="setting-item">
                        <label>Face Recognition:</label>
                        <input type="checkbox" id="setting-use-face-rec" checked>
-                        <span class="setting-desc">人脸识别（识别同一人）</span>
+                        <span class="setting-desc">Identify same person</span>
                    </div>
                </div>
                <div class="settings-section">
-                    <h4>AI大模型分析</h4>
+                    <h4>Confirmation Settings</h4>
                    <div class="setting-item">
                        <label>Confirm Frames:</label>
                        <input type="number" id="setting-confirm-frames" value="3" min="1" max="10">
                        <span class="setting-desc">Frames to confirm detection</span>
                    </div>
                    <div class="setting-item">
                        <label>Min Confidence:</label>
                        <input type="number" id="setting-min-confidence" value="0.3" min="0.1" max="1" step="0.1">
                        <span class="setting-desc">Detection confidence threshold</span>
                    </div>
                </div>
                <div class="settings-section">
                    <h4>AI Analysis</h4>
                    <div class="setting-item">
                        <label>启用大模型分析:</label>
                        <input type="checkbox" id="setting-use-vision-api">