feat: 三方案改进检测准确度 - YOLO优先、参数调整、连续性判断

2026-04-16 22:35:51 +08:00
parent d6693f9fd0
commit 7875cca69e
6 changed files with 272 additions and 105 deletions
--- a/person_manager.py
+++ b/person_manager.py
@@ -42,12 +42,20 @@ class PersonManager:
        # 加载人员库
        self.persons = self._load_persons_db()
        
-        # 检测器状态
+        # 初始化检测器状态
        self.face_detector = None
        self.mp_face_detection = None
        self.cv_face_detector = None
        self.has_mediapipe = HAS_MEDIAPIPE
        
+        # 从配置读取参数
+        try:
+            from config import config_mgr
+            self.config['mediapipe_min_confidence'] = config_mgr.get('min_detection_confidence', 0.3)
+            self.config['confirm_frames'] = config_mgr.get('confirm_frames', 3)
+        except:
+            pass
+        
        # 初始化检测器
        self._init_detectors()
        
@@ -56,8 +64,23 @@ class PersonManager:
            'face_match_threshold': 0.6,    # 人脸匹配阈值
            'unknown_person_id': 'unknown', # 未知人员ID
            'max_persons': 100,             # 最大人员数量
+            
+            # 方案1: 参数调整
+            'mediapipe_min_confidence': 0.3,  # 降低阈值，更容易检测
+            'mediapipe_model_selection': 1,   # 1: 远距离模型
+            'haar_scale_factor': 1.05,        # Haar更细粒度
+            'haar_min_neighbors': 2,          # 降低邻居要求
+            
+            # 方案2: 连续性判断
+            'confirm_frames': 3,             # 连续几帧确认
+            'leave_frames': 2,               # 连续几帧消失才算离开
        }
        
+        # 方案2: 追踪状态（连续判断）
+        self.tracked_persons = {}  # {person_id: {'frames': count, 'confirmed': bool}}
+        self.prev_persons = []     # 前一帧检测到的人
+        self.confirmation_buffer = {}  # 确认缓冲区
+        
        # 统计
        self.total_detections = 0
        self.known_persons_detected = 0
@@ -80,17 +103,16 @@ class PersonManager:
    
    def _init_detectors(self):
        """初始化检测器"""
-        # MediaPipe 人脸检测
+        # MediaPipe 人脸检测（方案1: 参数调整）
        if self.has_mediapipe:
            try:
-                # 使用更安全的导入方式
                mp_face_detection = mp.solutions.face_detection
                self.face_detector = mp_face_detection.FaceDetection(
-                    model_selection=0,  # 0: 短距离，1: 远距离
-                    min_detection_confidence=0.5
+                    model_selection=self.config['mediapipe_model_selection'],  # 远距离模型
+                    min_detection_confidence=self.config['mediapipe_min_confidence']  # 降低阈值
                )
                self.mp_face_detection = mp_face_detection
-                print("[PersonManager] MediaPipe face detector initialized")
+                print(f"[PersonManager] MediaPipe initialized (model={self.config['mediapipe_model_selection']}, conf={self.config['mediapipe_min_confidence']})")
            except Exception as e:
                print(f"[PersonManager] MediaPipe init failed: {e}")
                self.face_detector = None
@@ -101,13 +123,25 @@ class PersonManager:
            model_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            if Path(model_path).exists():
                self.cv_face_detector = cv2.CascadeClassifier(model_path)
-                print("[PersonManager] OpenCV face detector initialized (backup)")
+                print("[PersonManager] OpenCV Haar Cascade initialized (backup)")
        except Exception as e:
            self.cv_face_detector = None
-            print(f"[PersonManager] OpenCV face detector init failed: {e}")
+            print(f"[PersonManager] OpenCV detector init failed: {e}")
+        
+        # 方案3: YOLO 检测（更准确）
+        self.yolo_detector = None
+        try:
+            from ultralytics import YOLO
+            # 使用轻量级 nano 模型
+            self.yolo_detector = YOLO('yolov8n.pt')  # nano 模型，快速
+            print("[PersonManager] YOLOv8nano initialized (most accurate)")
+        except ImportError:
+            print("[PersonManager] YOLO not installed. Install with: pip install ultralytics")
+        except Exception as e:
+            print(f"[PersonManager] YOLO init failed: {e}")
    
    def detect_faces(self, image):
-        """检测人脸
+        """检测人脸（优先使用 YOLO，其次 MediaPipe，最后 Haar）
        
        Args:
            image: 图片（numpy array 或路径）
@@ -123,7 +157,31 @@ class PersonManager:
        
        faces = []
        
-        # MediaPipe 检测
+        # 方案3: YOLO 检测（优先，最准确）
+        if self.yolo_detector is not None:
+            try:
+                results = self.yolo_detector(image, classes=[0], verbose=False)  # class 0 = person
+                
+                for r in results:
+                    for box in r.boxes:
+                        x1, y1, x2, y2 = box.xyxy[0].tolist()
+                        conf = box.conf[0].item()
+                        
+                        # 转换为 [x, y, w, h] 格式
+                        faces.append({
+                            'bbox': [int(x1), int(y1), int(x2-x1), int(y2-y1)],
+                            'confidence': conf,
+                            'source': 'yolo'
+                        })
+                
+                if faces:
+                    print(f"[PersonManager] YOLO detected {len(faces)} persons")
+                    return faces  # YOLO 检测成功，直接返回
+                    
+            except Exception as e:
+                print(f"[PersonManager] YOLO detection failed: {e}")
+        
+        # 方案1+2: MediaPipe 检测
        if self.has_mediapipe and self.face_detector is not None:
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = self.face_detector.process(rgb_image)
@@ -143,14 +201,17 @@ class PersonManager:
                        'confidence': detection.score[0],
                        'source': 'mediapipe'
                    })
+                
+                if faces:
+                    return faces
        
-        # OpenCV 检测（备用）
-        elif self.cv_face_detector is not None:
+        # 备用: OpenCV Haar 检测
+        if self.cv_face_detector is not None:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            detections = self.cv_face_detector.detectMultiScale(
                gray,
-                scaleFactor=1.1,
-                minNeighbors=5,
+                scaleFactor=self.config['haar_scale_factor'],
+                minNeighbors=self.config['haar_min_neighbors'],
                minSize=(30, 30)
            )
            
@@ -351,7 +412,7 @@ class PersonManager:
            self._save_persons_db()
    
    def analyze_image(self, image_path, save_new_person=True):
-        """分析图片中的人员
+        """分析图片中的人员（带连续性判断）
        
        Args:
            image_path: 图片路径
@@ -363,6 +424,7 @@ class PersonManager:
                'persons': list,         # 识别的人员
                'new_count': int,        # 新人员数量
                'known_count': int,      # 已知人员数量
+                'confirmed_change': bool, # 是否有确认的人员变化
            }
        """
        image = cv2.imread(image_path)
@@ -373,61 +435,117 @@ class PersonManager:
        
        # 检测人脸
        faces = self.detect_faces(image)
+        current_count = len(faces)
        
-        persons = []
-        new_count = 0
-        known_count = 0
+        # 方案2: 连续性判断
+        confirmed_change = False
+        confirmed_persons = []
        
-        for face in faces:
-            bbox = face['bbox']
+        # 检查人数变化
+        prev_count = len(self.prev_persons)
+        
+        if current_count != prev_count:
+            # 人数变化，记录到缓冲区
+            key = f"count_{current_count}"
+            if key not in self.confirmation_buffer:
+                self.confirmation_buffer[key] = {'count': 0, 'persons': []}
            
-            # 提取特征
-            encoding = self.extract_face_encoding(image, bbox)
+            self.confirmation_buffer[key]['count'] += 1
            
-            # 匹配
-            match_result = self.match_face(encoding)
-            
-            if match_result['is_new']:
-                # 新人员
-                new_count += 1
+            # 临时识别人员
+            temp_persons = []
+            for face in faces:
+                bbox = face['bbox']
+                encoding = self.extract_face_encoding(image, bbox)
+                match_result = self.match_face(encoding)
                
-                if save_new_person and len(self.persons) < self.config['max_persons']:
-                    new_person = self.add_new_person(image, bbox)
-                    if new_person:
-                        persons.append({
-                            'person_id': new_person['person_id'],
-                            'name': new_person['name'],
-                            'bbox': bbox,
-                            'is_new': True,
-                            'confidence': face['confidence']
-                        })
-                else:
-                    persons.append({
-                        'person_id': 'unknown',
-                        'name': 'Unknown (new)',
-                        'bbox': bbox,
-                        'is_new': True,
-                        'confidence': face['confidence']
-                    })
-            else:
-                # 已知人员
-                known_count += 1
-                self.update_person_visit(match_result['person_id'])
-                
-                persons.append({
-                    'person_id': match_result['person_id'],
+                person_info = {
+                    'person_id': match_result['person_id'] if not match_result['is_new'] else 'unknown',
                    'name': match_result['name'],
                    'bbox': bbox,
-                    'is_new': False,
-                    'confidence': match_result['confidence']
-                })
+                    'is_new': match_result['is_new'],
+                    'confidence': face['confidence'],
+                    'source': face['source']
+                }
+                temp_persons.append(person_info)
+            
+            self.confirmation_buffer[key]['persons'] = temp_persons
+            
+            # 达到确认帧数
+            if self.confirmation_buffer[key]['count'] >= self.config['confirm_frames']:
+                confirmed_change = True
+                confirmed_persons = temp_persons
+                
+                print(f"[PersonManager] Confirmed: {prev_count} -> {current_count} persons (after {self.config['confirm_frames']} frames)")
+                
+                # 清空其他缓冲区
+                self.confirmation_buffer = {}
+                
+                # 更新前一帧状态
+                self.prev_persons = temp_persons
+        
+        else:
+            # 人数不变，清空变化缓冲区，维持当前状态
+            if current_count > 0:
+                # 识别当前人员
+                temp_persons = []
+                for face in faces:
+                    bbox = face['bbox']
+                    encoding = self.extract_face_encoding(image, bbox)
+                    match_result = self.match_face(encoding)
+                    
+                    person_info = {
+                        'person_id': match_result['person_id'] if not match_result['is_new'] else 'unknown',
+                        'name': match_result['name'],
+                        'bbox': bbox,
+                        'is_new': match_result['is_new'],
+                        'confidence': face['confidence'],
+                        'source': face['source']
+                    }
+                    temp_persons.append(person_info)
+                
+                confirmed_persons = temp_persons
+                self.prev_persons = temp_persons
+            
+            # 清空变化缓冲区
+            keys_to_remove = [k for k in self.confirmation_buffer.keys() if not k.endswith(f"_{current_count}")]
+            for k in keys_to_remove:
+                del self.confirmation_buffer[k]
+        
+        # 统计新人和已知人员
+        new_count = 0
+        known_count = 0
+        persons_to_save = []
+        
+        for person in confirmed_persons:
+            if person['is_new']:
+                new_count += 1
+                # 只有确认后才保存新人
+                if confirmed_change and save_new_person and len(self.persons) < self.config['max_persons']:
+                    # 找到对应的 face bbox
+                    for face in faces:
+                        if face['bbox'] == person['bbox']:
+                            new_person = self.add_new_person(image, face['bbox'])
+                            if new_person:
+                                person['person_id'] = new_person['person_id']
+                                person['name'] = new_person['name']
+                                persons_to_save.append(person)
+                            break
+            else:
+                known_count += 1
+                self.update_person_visit(person['person_id'])
+                persons_to_save.append(person)
        
        return {
            'faces': faces,
-            'persons': persons,
+            'persons': persons_to_save,
            'new_count': new_count,
            'known_count': known_count,
-            'total_count': len(persons)
+            'total_count': len(persons_to_save),
+            'confirmed_change': confirmed_change,
+            'current_count': current_count,
+            'prev_count': prev_count,
+            'detection_source': faces[0]['source'] if faces else 'none'
        }
    
    def get_persons_list(self):