记录零基础ROS2学习和实践(五)(双目摄像头摄像头配置过程)

关于采摘机器人小车博主选用的是英特尔公司Intel RealSense d435摄像头

安装SDK与ROS Wrapper

由于有Intel官方有github地址, 可以根据官方文档下载, 先安装SDK后安装ROS Wrapper

创建ros功能包

创建c++类型的功能包

1 2	cd ros2_ws/src ros2 pkg create --build-type ament_cmake orange_find

创建好后会看到如下四个文件,每个文件的功能在实践(二)中讲过了

基础的功能包写好后, 还需要自己构建三个文件, 以下逐一讲解

新建config

这是功能包的外部配置文件, 目前暂时存储一组相机在世界坐标系中的位置和姿态参数, 后续会写成一个接口动态传输这些数据

新建launch

这是最关键的启动文件入口位置, 在src中写的脚本都要在launch中执行, 新建一个orange_detector_launch.py

from launch import LaunchDescription
from launch_ros.actions import Node

def generate_launch_description():
    return LaunchDescription([
        Node(
            package='orange_finding',
            executable='orange_detector_node.py',
            name='orange_detector_node',
            output='screen'
        )
    ])

新建models

用来存放训练好的yolov11模型, 后续应考虑轻量化问题

编写相机识别橘子脚本

经过不断调试, 最终给出一个较为不错的脚本, 可以做到

实时检测图像中的橘子
计算橘子的3D位置（相机坐标系和世界坐标系）
显示深度信息和置信度
支持相机坐标系到世界坐标系的坐标转换

#!/usr/bin/env python3
import rclpy
from rclpy.node import Node
import cv2
import numpy as np
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge
from ultralytics import YOLO
import pyrealsense2 as rs
import json
from ament_index_python.packages import get_package_share_directory
import time

# 配置参数
RESOLUTION = (1280, 720)
FPS = 15
CONF_THRESH = 0.5

# 相机相对于世界坐标系的位置（从Teach界面获取的数据）
# X: 339.925, Y: 34.500, Z: 478.035
CAMERA_POSITION = np.array([339.925, 34.500, 478.035])  # 相机在世界坐标系中的位置（默认值）

class OrangeDetectorNode(Node):
    def __init__(self):
        super().__init__('orange_detector_node')
        self.bridge = CvBridge()
        self.color_sub = self.create_subscription(Image, '/camera/camera/color/image_raw', self.color_callback, 10)
        self.depth_sub = self.create_subscription(Image, '/camera/camera/aligned_depth_to_color/image_raw', self.depth_callback, 10)
        self.info_sub = self.create_subscription(CameraInfo, '/camera/camera/color/camera_info', self.info_callback, 10)
        share_dir = get_package_share_directory('orange_finding')
        model_path = share_dir + '/models/best.pt'
        self.model = YOLO(model_path)
        self.depth_image = None
        self.intrinsics = None
        self.show_depth = True
        self.frame_count = 0
        self.start_time = time.time()
        self.camera_intrinsics_saved = False
        
        # 初始化相机外参矩阵（相机坐标系到世界坐标系的转换）
        self.setup_camera_extrinsics()
        
        cv2.namedWindow('Orange Detection', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('Orange Detection', RESOLUTION[0] // 2, RESOLUTION[1] // 2)
        self.get_logger().info('橘子检测节点已启动。按 q 退出，按 d 切换深度视图。')

    def setup_camera_extrinsics(self):
        """
        设置相机外参矩阵
        根据公式3.1: [Xw, Yw, Zw] = R^T * ([Xc, Yc, Zc] - T)
        其中T是相机在世界坐标系中的位置
        """
        # 尝试从配置文件读取相机外参
        try:
            share_dir = get_package_share_directory('orange_finding')
            config_path = share_dir + '/config/camera_extrinsics.json'
            with open(config_path, 'r') as f:
                config = json.load(f)
            
            # 读取相机位置
            camera_pos = config['camera_position']
            self.translation_vector = np.array([camera_pos['x'], camera_pos['y'], camera_pos['z']])
            
            # 读取旋转矩阵
            self.rotation_matrix = np.array(config['rotation_matrix'])
            
            self.get_logger().info(f'已从配置文件加载相机外参')
        except Exception as e:
            self.get_logger().warn(f'从配置文件加载相机外参失败: {e}')
            self.get_logger().info('使用默认相机外参')
            
            # 使用默认值：假设相机坐标系与世界坐标系方向一致
            self.rotation_matrix = np.eye(3)  # 3x3单位矩阵
            self.translation_vector = CAMERA_POSITION
        
        # 构建4x4的外参矩阵 Lw = [R  T]
        #                                [0ᵀ 1]
        self.extrinsic_matrix = np.eye(4)
        self.extrinsic_matrix[:3, :3] = self.rotation_matrix
        self.extrinsic_matrix[:3, 3] = self.translation_vector
        
        self.get_logger().info(f'相机外参已初始化:')
        self.get_logger().info(f'相机位置: {self.translation_vector}')
        self.get_logger().info(f'旋转矩阵:\n{self.rotation_matrix}')
        self.get_logger().info(f'平移向量: {self.translation_vector}')

    def camera_to_world_coordinates(self, camera_point):
        """
        将相机坐标系中的点转换到世界坐标系
        参数:
            camera_point: 相机坐标系中的点 [x, y, z]
        返回:
            world_point: 世界坐标系中的点 [x, y, z]
        """
        if len(camera_point) != 3:
            return None
            
        # 将相机坐标转换为齐次坐标
        camera_homogeneous = np.array([camera_point[0], camera_point[1], camera_point[2], 1])
        
        # 使用外参矩阵进行转换
        # 注意：这里使用的是相机坐标系到世界坐标系的转换
        # 公式: [Xw, Yw, Zw, 1] = Lw * [Xc, Yc, Zc, 1]
        world_homogeneous = self.extrinsic_matrix @ camera_homogeneous
        
        # 返回世界坐标系中的3D点
        return world_homogeneous[:3]

    def world_to_camera_coordinates(self, world_point):
        """
        将世界坐标系中的点转换到相机坐标系
        参数:
            world_point: 世界坐标系中的点 [x, y, z]
        返回:
            camera_point: 相机坐标系中的点 [x, y, z]
        """
        if len(world_point) != 3:
            return None
            
        # 将世界坐标转换为齐次坐标
        world_homogeneous = np.array([world_point[0], world_point[1], world_point[2], 1])
        
        # 计算外参矩阵的逆矩阵
        extrinsic_inv = np.linalg.inv(self.extrinsic_matrix)
        
        # 使用逆矩阵进行转换
        camera_homogeneous = extrinsic_inv @ world_homogeneous
        
        # 返回相机坐标系中的3D点
        return camera_homogeneous[:3]

    def info_callback(self, msg):
        if self.intrinsics is None:
            self.intrinsics = rs.intrinsics()
            self.intrinsics.width = msg.width
            self.intrinsics.height = msg.height
            self.intrinsics.ppx = msg.k[2]
            self.intrinsics.ppy = msg.k[5]
            self.intrinsics.fx = msg.k[0]
            self.intrinsics.fy = msg.k[4]
            self.intrinsics.model = rs.distortion.inverse_brown_conrady
            self.intrinsics.coeffs = msg.d
        if not self.camera_intrinsics_saved:
            camera_params = {'fx': self.intrinsics.fx, 'fy': self.intrinsics.fy, 'ppx': self.intrinsics.ppx, 'ppy': self.intrinsics.ppy, 'width': self.intrinsics.width, 'height': self.intrinsics.height, 'coeffs': self.intrinsics.coeffs}
            with open('camera_intrinsics.json', 'w') as f:
                json.dump(camera_params, f)
            self.get_logger().info('相机内参已保存到 camera_intrinsics.json')
            self.camera_intrinsics_saved = True

    def depth_callback(self, msg):
        self.depth_image = self.bridge.imgmsg_to_cv2(msg, '16UC1')

    def color_callback(self, msg):
        if self.depth_image is None or self.intrinsics is None:
            return
        color_image = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(self.depth_image, alpha=0.03), cv2.COLORMAP_JET)
        results = self.model(color_image, conf=CONF_THRESH, verbose=False)
        oranges = []
        for result in results:
            boxes = result.boxes
            for box in boxes:
                if box.conf.item() < CONF_THRESH:
                    continue
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
                center_x = int((x1 + x2) / 2)
                center_y = int((y1 + y2) / 2)
                depth = self.depth_image[center_y, center_x] / 1000.0
                camera_point_3d = (0, 0, 0)
                world_point_3d = (0, 0, 0)
                if depth > 0:
                    # 获取相机坐标系中的3D点
                    camera_point_3d = rs.rs2_deproject_pixel_to_point(self.intrinsics, [center_x, center_y], depth)
                    # 将相机坐标转换为世界坐标
                    world_point_3d = self.camera_to_world_coordinates(camera_point_3d)
                class_id = int(box.cls.item())
                class_name = result.names[class_id]
                confidence = box.conf.item()
                oranges.append({
                    'bbox': (x1, y1, x2 - x1, y2 - y1), 
                    'center': (center_x, center_y), 
                    'depth': depth, 
                    'camera_position': camera_point_3d, 
                    'world_position': world_point_3d,
                    'class_name': class_name, 
                    'confidence': confidence
                })
        processed_image = color_image.copy()
        for orange in oranges:
            x, y, w, h = orange['bbox']
            center_x, center_y = orange['center']
            depth = orange['depth']
            camera_pos_x, camera_pos_y, camera_pos_z = orange['camera_position']
            world_pos_x, world_pos_y, world_pos_z = orange['world_position']
            class_name = orange['class_name']
            confidence = orange['confidence']
            
            # 绘制检测框和中心点
            cv2.rectangle(processed_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.circle(processed_image, (center_x, center_y), 5, (0, 0, 255), -1)
            cv2.line(processed_image, (center_x, y), (center_x, y + h), (255, 0, 0), 1)
            cv2.line(processed_image, (x, center_y), (x + w, center_y), (255, 0, 0), 1)
            
            # 显示类别和置信度
            class_text = f'{class_name}: {confidence:.2f}'
            cv2.putText(processed_image, class_text, (x + 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            
            if depth > 0:
                # 显示深度信息
                depth_text = f'Depth: {depth:.2f}m'
                cv2.putText(processed_image, depth_text, (x + 10, y + h + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
                
                # 显示相机坐标系中的3D坐标
                camera_coord_text = f'Camera: ({camera_pos_x:.2f}, {camera_pos_y:.2f}, {camera_pos_z:.2f})'
                cv2.putText(processed_image, camera_coord_text, (x + 10, y + h + 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
                
                # 显示世界坐标系中的3D坐标
                world_coord_text = f'World: ({world_pos_x:.2f}, {world_pos_y:.2f}, {world_pos_z:.2f})'
                cv2.putText(processed_image, world_coord_text, (x + 10, y + h + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)
            else:
                cv2.putText(processed_image, 'Depth: N/A', (x + 10, y + h + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
        self.frame_count += 1
        elapsed_time = time.time() - self.start_time
        fps = self.frame_count / elapsed_time
        y_offset = 30
        line_height = 30
        cv2.putText(processed_image, f'FPS: {fps:.1f}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        y_offset += line_height
        cv2.putText(processed_image, f'橘子数量: {len(oranges)}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        y_offset += line_height
        cv2.putText(processed_image, f'Res: {RESOLUTION[0]}x{RESOLUTION[1]}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        y_offset += line_height
        cv2.putText(processed_image, f'Model: YOLOv11', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        y_offset += line_height
        cv2.putText(processed_image, f'Confidence: {CONF_THRESH}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        y_offset += line_height
        cv2.putText(processed_image, f'Camera Pos: ({self.translation_vector[0]:.1f}, {self.translation_vector[1]:.1f}, {self.translation_vector[2]:.1f})', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        y_offset += line_height
        cv2.putText(processed_image, 'Coord System: Camera -> World', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        cv2.imshow('Orange Detection', processed_image)
        if self.show_depth:
            cv2.imshow('Depth View', depth_colormap)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            cv2.destroyAllWindows()
            rclpy.shutdown()
        elif key == ord('d'):
            self.show_depth = not self.show_depth
            if not self.show_depth:
                cv2.destroyWindow('Depth View')
            self.get_logger().info('深度视图: {}'.format('开启' if self.show_depth else '关闭'))

def main(args=None):
    rclpy.init(args=args)
    node = OrangeDetectorNode()
    rclpy.spin(node)
    cv2.destroyAllWindows()
    rclpy.shutdown()

if __name__ == '__main__':
    main()