关于采摘机器人小车博主选用的是英特尔公司Intel RealSense d435摄像头

安装SDK与ROS Wrapper

由于有Intel官方有github地址, 可以根据官方文档下载, 先安装SDK后安装ROS Wrapper

Intel® RealSense™

创建ros功能包

创建c++类型的功能包

1
2
cd ros2_ws/src
ros2 pkg create --build-type ament_cmake orange_find

创建好后会看到如下四个文件,每个文件的功能在实践(二)中讲过了

2

基础的功能包写好后, 还需要自己构建三个文件, 以下逐一讲解

3

新建config

这是功能包的外部配置文件, 目前暂时存储一组相机在世界坐标系中的位置和姿态参数, 后续会写成一个接口动态传输这些数据

新建launch

这是最关键的启动文件入口位置, 在src中写的脚本都要在launch中执行, 新建一个orange_detector_launch.py

1
2
3
4
5
6
7
8
9
10
11
12
from launch import LaunchDescription
from launch_ros.actions import Node

def generate_launch_description():
return LaunchDescription([
Node(
package='orange_finding',
executable='orange_detector_node.py',
name='orange_detector_node',
output='screen'
)
])

新建models

用来存放训练好的yolov11模型, 后续应考虑轻量化问题
4

编写相机识别橘子脚本

经过不断调试, 最终给出一个较为不错的脚本, 可以做到

  • 实时检测图像中的橘子
  • 计算橘子的3D位置(相机坐标系和世界坐标系)
  • 显示深度信息和置信度
  • 支持相机坐标系到世界坐标系的坐标转换
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/usr/bin/env python3
import rclpy
from rclpy.node import Node
import cv2
import numpy as np
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge
from ultralytics import YOLO
import pyrealsense2 as rs
import json
from ament_index_python.packages import get_package_share_directory
import time

# 配置参数
RESOLUTION = (1280, 720)
FPS = 15
CONF_THRESH = 0.5

# 相机相对于世界坐标系的位置(从Teach界面获取的数据)
# X: 339.925, Y: 34.500, Z: 478.035
CAMERA_POSITION = np.array([339.925, 34.500, 478.035]) # 相机在世界坐标系中的位置(默认值)

class OrangeDetectorNode(Node):
def __init__(self):
super().__init__('orange_detector_node')
self.bridge = CvBridge()
self.color_sub = self.create_subscription(Image, '/camera/camera/color/image_raw', self.color_callback, 10)
self.depth_sub = self.create_subscription(Image, '/camera/camera/aligned_depth_to_color/image_raw', self.depth_callback, 10)
self.info_sub = self.create_subscription(CameraInfo, '/camera/camera/color/camera_info', self.info_callback, 10)
share_dir = get_package_share_directory('orange_finding')
model_path = share_dir + '/models/best.pt'
self.model = YOLO(model_path)
self.depth_image = None
self.intrinsics = None
self.show_depth = True
self.frame_count = 0
self.start_time = time.time()
self.camera_intrinsics_saved = False

# 初始化相机外参矩阵(相机坐标系到世界坐标系的转换)
self.setup_camera_extrinsics()

cv2.namedWindow('Orange Detection', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Orange Detection', RESOLUTION[0] // 2, RESOLUTION[1] // 2)
self.get_logger().info('橘子检测节点已启动。按 q 退出,按 d 切换深度视图。')

def setup_camera_extrinsics(self):
"""
设置相机外参矩阵
根据公式3.1: [Xw, Yw, Zw] = R^T * ([Xc, Yc, Zc] - T)
其中T是相机在世界坐标系中的位置
"""
# 尝试从配置文件读取相机外参
try:
share_dir = get_package_share_directory('orange_finding')
config_path = share_dir + '/config/camera_extrinsics.json'
with open(config_path, 'r') as f:
config = json.load(f)

# 读取相机位置
camera_pos = config['camera_position']
self.translation_vector = np.array([camera_pos['x'], camera_pos['y'], camera_pos['z']])

# 读取旋转矩阵
self.rotation_matrix = np.array(config['rotation_matrix'])

self.get_logger().info(f'已从配置文件加载相机外参')
except Exception as e:
self.get_logger().warn(f'从配置文件加载相机外参失败: {e}')
self.get_logger().info('使用默认相机外参')

# 使用默认值:假设相机坐标系与世界坐标系方向一致
self.rotation_matrix = np.eye(3) # 3x3单位矩阵
self.translation_vector = CAMERA_POSITION

# 构建4x4的外参矩阵 Lw = [R T]
# [0ᵀ 1]
self.extrinsic_matrix = np.eye(4)
self.extrinsic_matrix[:3, :3] = self.rotation_matrix
self.extrinsic_matrix[:3, 3] = self.translation_vector

self.get_logger().info(f'相机外参已初始化:')
self.get_logger().info(f'相机位置: {self.translation_vector}')
self.get_logger().info(f'旋转矩阵:\n{self.rotation_matrix}')
self.get_logger().info(f'平移向量: {self.translation_vector}')

def camera_to_world_coordinates(self, camera_point):
"""
将相机坐标系中的点转换到世界坐标系
参数:
camera_point: 相机坐标系中的点 [x, y, z]
返回:
world_point: 世界坐标系中的点 [x, y, z]
"""
if len(camera_point) != 3:
return None

# 将相机坐标转换为齐次坐标
camera_homogeneous = np.array([camera_point[0], camera_point[1], camera_point[2], 1])

# 使用外参矩阵进行转换
# 注意:这里使用的是相机坐标系到世界坐标系的转换
# 公式: [Xw, Yw, Zw, 1] = Lw * [Xc, Yc, Zc, 1]
world_homogeneous = self.extrinsic_matrix @ camera_homogeneous

# 返回世界坐标系中的3D点
return world_homogeneous[:3]

def world_to_camera_coordinates(self, world_point):
"""
将世界坐标系中的点转换到相机坐标系
参数:
world_point: 世界坐标系中的点 [x, y, z]
返回:
camera_point: 相机坐标系中的点 [x, y, z]
"""
if len(world_point) != 3:
return None

# 将世界坐标转换为齐次坐标
world_homogeneous = np.array([world_point[0], world_point[1], world_point[2], 1])

# 计算外参矩阵的逆矩阵
extrinsic_inv = np.linalg.inv(self.extrinsic_matrix)

# 使用逆矩阵进行转换
camera_homogeneous = extrinsic_inv @ world_homogeneous

# 返回相机坐标系中的3D点
return camera_homogeneous[:3]

def info_callback(self, msg):
if self.intrinsics is None:
self.intrinsics = rs.intrinsics()
self.intrinsics.width = msg.width
self.intrinsics.height = msg.height
self.intrinsics.ppx = msg.k[2]
self.intrinsics.ppy = msg.k[5]
self.intrinsics.fx = msg.k[0]
self.intrinsics.fy = msg.k[4]
self.intrinsics.model = rs.distortion.inverse_brown_conrady
self.intrinsics.coeffs = msg.d
if not self.camera_intrinsics_saved:
camera_params = {'fx': self.intrinsics.fx, 'fy': self.intrinsics.fy, 'ppx': self.intrinsics.ppx, 'ppy': self.intrinsics.ppy, 'width': self.intrinsics.width, 'height': self.intrinsics.height, 'coeffs': self.intrinsics.coeffs}
with open('camera_intrinsics.json', 'w') as f:
json.dump(camera_params, f)
self.get_logger().info('相机内参已保存到 camera_intrinsics.json')
self.camera_intrinsics_saved = True

def depth_callback(self, msg):
self.depth_image = self.bridge.imgmsg_to_cv2(msg, '16UC1')

def color_callback(self, msg):
if self.depth_image is None or self.intrinsics is None:
return
color_image = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(self.depth_image, alpha=0.03), cv2.COLORMAP_JET)
results = self.model(color_image, conf=CONF_THRESH, verbose=False)
oranges = []
for result in results:
boxes = result.boxes
for box in boxes:
if box.conf.item() < CONF_THRESH:
continue
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
center_x = int((x1 + x2) / 2)
center_y = int((y1 + y2) / 2)
depth = self.depth_image[center_y, center_x] / 1000.0
camera_point_3d = (0, 0, 0)
world_point_3d = (0, 0, 0)
if depth > 0:
# 获取相机坐标系中的3D点
camera_point_3d = rs.rs2_deproject_pixel_to_point(self.intrinsics, [center_x, center_y], depth)
# 将相机坐标转换为世界坐标
world_point_3d = self.camera_to_world_coordinates(camera_point_3d)
class_id = int(box.cls.item())
class_name = result.names[class_id]
confidence = box.conf.item()
oranges.append({
'bbox': (x1, y1, x2 - x1, y2 - y1),
'center': (center_x, center_y),
'depth': depth,
'camera_position': camera_point_3d,
'world_position': world_point_3d,
'class_name': class_name,
'confidence': confidence
})
processed_image = color_image.copy()
for orange in oranges:
x, y, w, h = orange['bbox']
center_x, center_y = orange['center']
depth = orange['depth']
camera_pos_x, camera_pos_y, camera_pos_z = orange['camera_position']
world_pos_x, world_pos_y, world_pos_z = orange['world_position']
class_name = orange['class_name']
confidence = orange['confidence']

# 绘制检测框和中心点
cv2.rectangle(processed_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.circle(processed_image, (center_x, center_y), 5, (0, 0, 255), -1)
cv2.line(processed_image, (center_x, y), (center_x, y + h), (255, 0, 0), 1)
cv2.line(processed_image, (x, center_y), (x + w, center_y), (255, 0, 0), 1)

# 显示类别和置信度
class_text = f'{class_name}: {confidence:.2f}'
cv2.putText(processed_image, class_text, (x + 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

if depth > 0:
# 显示深度信息
depth_text = f'Depth: {depth:.2f}m'
cv2.putText(processed_image, depth_text, (x + 10, y + h + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

# 显示相机坐标系中的3D坐标
camera_coord_text = f'Camera: ({camera_pos_x:.2f}, {camera_pos_y:.2f}, {camera_pos_z:.2f})'
cv2.putText(processed_image, camera_coord_text, (x + 10, y + h + 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)

# 显示世界坐标系中的3D坐标
world_coord_text = f'World: ({world_pos_x:.2f}, {world_pos_y:.2f}, {world_pos_z:.2f})'
cv2.putText(processed_image, world_coord_text, (x + 10, y + h + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)
else:
cv2.putText(processed_image, 'Depth: N/A', (x + 10, y + h + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
self.frame_count += 1
elapsed_time = time.time() - self.start_time
fps = self.frame_count / elapsed_time
y_offset = 30
line_height = 30
cv2.putText(processed_image, f'FPS: {fps:.1f}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
y_offset += line_height
cv2.putText(processed_image, f'橘子数量: {len(oranges)}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
y_offset += line_height
cv2.putText(processed_image, f'Res: {RESOLUTION[0]}x{RESOLUTION[1]}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
y_offset += line_height
cv2.putText(processed_image, f'Model: YOLOv11', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
y_offset += line_height
cv2.putText(processed_image, f'Confidence: {CONF_THRESH}', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
y_offset += line_height
cv2.putText(processed_image, f'Camera Pos: ({self.translation_vector[0]:.1f}, {self.translation_vector[1]:.1f}, {self.translation_vector[2]:.1f})', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
y_offset += line_height
cv2.putText(processed_image, 'Coord System: Camera -> World', (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
cv2.imshow('Orange Detection', processed_image)
if self.show_depth:
cv2.imshow('Depth View', depth_colormap)
key = cv2.waitKey(1) & 0xFF
if key == ord('q') or key == 27:
cv2.destroyAllWindows()
rclpy.shutdown()
elif key == ord('d'):
self.show_depth = not self.show_depth
if not self.show_depth:
cv2.destroyWindow('Depth View')
self.get_logger().info('深度视图: {}'.format('开启' if self.show_depth else '关闭'))

def main(args=None):
rclpy.init(args=args)
node = OrangeDetectorNode()
rclpy.spin(node)
cv2.destroyAllWindows()
rclpy.shutdown()

if __name__ == '__main__':
main()

运行项目

先对之前的代码全部编译一次(我已经在系统配置文件中添加了source ~/ros2_ws/install/setup.bash)

1
colcon build

首先启动RealSense相机节点:

1
ros2 launch realsense2_camera rs_launch.py

再启动橘子检测节点

1
ros2 launch orange_finding orange_detector_launch.py

5

效果如下

6