diff --git a/.gitignore b/.gitignore index 684f0ff..0a68db6 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,15 @@ ## temporal vim files *.sw* +##temporal files for rerun +blaze_face_short_range.tflite +face_landmarker.task +.pixi +noetic_ws +pixi.lock +indoor_forward_3_snapdragon_with_gt.bag + + ## files for html-slides _site/ **/*.jekyll-cache diff --git a/rerun/examples/cpp-example-ros-bridge/README.md b/rerun/examples/cpp-example-ros-bridge/README.md new file mode 100644 index 0000000..af173bf --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/README.md @@ -0,0 +1,33 @@ +# cpp-example-ros-bridge + + +## Installation +``` +curl -fsSL https://pixi.sh/install.sh | bash +echo 'eval "$(pixi completion --shell bash)"' >> ~/.bashrc +#And then restart the shell or source the shell config file. +#rm ~/.pixi/bin/pixi + +git clone git@github.com:rerun-io/cpp-example-ros-bridge.git +mv pixi.toml .. +mv rerun_bridge/ .. +rm -rf cpp-example-ros-bridge/ + +$HOME/.pixi/bin/./pixi run example +``` + +## some logs +``` +$HOME/.pixi/bin/./pixi run example +✨ Pixi task (default): mkdir -p noetic_ws/src && ln -sfn $(pwd)/rerun_bridge noetic_ws/src/rerun_bridge +⠉ updating packages in 'default' + ⠁ downloading [00:01:18] [━───────────────────] 21/542 ogre (+49) ⠁ linking [00:01:18] [━───────────────────] 21/542 +``` + + +## References +https://github.com/rerun-io/cpp-example-ros-bridge?tab=readme-ov-file +https://github.com/prefix-dev/pixi/ + + + diff --git a/rerun/examples/cpp-example-ros-bridge/pixi.toml b/rerun/examples/cpp-example-ros-bridge/pixi.toml new file mode 100644 index 0000000..cc4c53c --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/pixi.toml @@ -0,0 +1,25 @@ +[project] +name = "rerun_cpp_example_ros" +authors = ["rerun.io "] +channels = ["robostack-staging", "conda-forge"] +description = "Use the Rerun C++ SDK together with ROS" +homepage = "https://rerun.io" +license = "Apache-2.0" +platforms = ["linux-64"] +repository = "https://github.com/rerun-io/cpp-example-ros" +version = "0.1.0" + +[tasks] +ws = { cmd = "mkdir -p noetic_ws/src && ln -sfn $(pwd)/rerun_bridge noetic_ws/src/rerun_bridge", cwd = "." } +build = { cmd = "ls && catkin_make --cmake-args -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON", depends_on = ["ws"], cwd = "noetic_ws"} +example_data = { cmd = "curl -L -C - -O http://rpg.ifi.uzh.ch/datasets/uzh-fpv-newer-versions/v3/indoor_forward_3_snapdragon_with_gt.bag", depends_on=["ws"], cwd = "noetic_ws/src/rerun_bridge"} +example = { cmd = "bash -c 'source ./devel/setup.bash && roslaunch rerun_bridge example.launch'", depends_on = [ "build", "example_data", "ws"], cwd = "noetic_ws" } + +[dependencies] +compilers = ">=1.7.0,<1.8" +opencv = ">=4.9.0,<4.10" +ros-noetic-catkin = ">=0.8.10,<0.9" +ros-noetic-desktop = ">=1.5.0,<1.6" +ros-noetic-rosbag = ">=1.16.0,<1.17" +yaml-cpp = ">=0.8.0,<0.9" +rerun-sdk = ">=0.13.0,<0.14" diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/CMakeLists.txt b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/CMakeLists.txt new file mode 100644 index 0000000..f1747a2 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.0.2) +project(rerun_bridge) + +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) +endif() + +find_package(catkin REQUIRED COMPONENTS roscpp sensor_msgs nav_msgs cv_bridge) +find_package(OpenCV REQUIRED) +find_package(yaml-cpp REQUIRED) + +include(FetchContent) +FetchContent_Declare(rerun_sdk URL https://github.com/rerun-io/rerun/releases/download/0.13.0/rerun_cpp_sdk.zip) +FetchContent_MakeAvailable(rerun_sdk) + +include_directories( + include + ${YAML_CPP_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} +) + +catkin_package( + INCLUDE_DIRS include + LIBRARIES ${PROJECT_NAME} + CATKIN_DEPENDS roscpp sensor_msgs nav_msgs cv_bridge + DEPENDS opencv yaml-cpp + ) + +add_library(${PROJECT_NAME} src/rerun_bridge/rerun_ros_interface.cpp) +add_executable(visualizer src/rerun_bridge/visualizer_node.cpp) + +add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) +add_dependencies(visualizer ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +target_link_libraries(${PROJECT_NAME} ${catkin_LIBRARIES} ${OpenCV_LIBS} ${YAML_CPP_LIBRARIES} rerun_sdk) +target_link_libraries(visualizer ${PROJECT_NAME} ${catkin_LIBRARIES} ${YAML_CPP_LIBRARIES} rerun_sdk) + +install(TARGETS visualizer DESTINATION bin) diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/include/rerun_bridge/rerun_ros_interface.hpp b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/include/rerun_bridge/rerun_ros_interface.hpp new file mode 100644 index 0000000..92a9833 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/include/rerun_bridge/rerun_ros_interface.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include + +#include + +void log_imu( + const rerun::RecordingStream& rec, const std::string& entity_path, + const sensor_msgs::Imu::ConstPtr& msg +); + +void log_image( + const rerun::RecordingStream& rec, const std::string& entity_path, + const sensor_msgs::Image::ConstPtr& msg +); + +void log_pose_stamped( + const rerun::RecordingStream& rec, const std::string& entity_path, + const geometry_msgs::PoseStamped::ConstPtr& msg +); + +void log_odometry( + const rerun::RecordingStream& rec, const std::string& entity_path, + const nav_msgs::Odometry::ConstPtr& msg +); diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/launch/example.launch b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/launch/example.launch new file mode 100644 index 0000000..52458f5 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/launch/example.launch @@ -0,0 +1,11 @@ + + + + + + + + $(find rerun_bridge)/launch/example_params.yaml + + + diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/launch/example_params.yaml b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/launch/example_params.yaml new file mode 100644 index 0000000..655920c --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/launch/example_params.yaml @@ -0,0 +1,30 @@ +topic_to_entity_path: + /snappy_cam/stereo_l: /groundtruth/pose/snappy_cam/stereo_l + /snappy_cam/stereo_r: /groundtruth/pose/snappy_cam/stereo_r +extra_transform3ds: + - entity_path: /groundtruth/pose/snappy_cam/stereo_l + transform: [ + -0.028228787368606456, -0.999601488301944, 1.2175294828553618e-05, 0.02172388268966517, + 0.014401251861751119, -0.00041887083271471837, -0.9998962088597202, -6.605455433829172e-05, + 0.999497743623523, -0.028225682131089447, 0.014407337010089172, -0.00048817563004522853, + 0.0, 0.0, 0.0, 1.0 + ] + from_parent: true + - entity_path: /groundtruth/pose/snappy_cam/stereo_r + transform: [ + -0.011823057800830705, -0.9998701444077991, -0.010950325390841398, -0.057904961033265645, + 0.011552991631909482, 0.01081376681432078, -0.9998747875767439, 0.00043766687615362694, + 0.9998633625093938, -0.011948086424720228, 0.011423639621249038, -0.00039944945687402214, + 0.0, 0.0, 0.0, 1.0 + ] + from_parent: true +extra_pinholes: + - entity_path: /groundtruth/pose/snappy_cam/stereo_l + height: 480 + width: 640 + image_from_camera: [278.66723066149086, 0, 319.75221200593535, 0, 278.48991409740296, 241.96858910358173, 0, 0, 1] + - entity_path: /groundtruth/pose/snappy_cam/stereo_r + height: 480 + width: 640 + image_from_camera: [277.61640629770613, 0, 314.8944703346039, 0, 277.63749695723294, 236.04310050462587, 0, 0, 1] + diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/package.xml b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/package.xml new file mode 100644 index 0000000..a623263 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/package.xml @@ -0,0 +1,15 @@ + + + rerun_bridge + 0.1.0 + The rerun_bridge package + rerun.io + Apache-2.0 + roscpp + sensor_msgs + nav_msgs + cv_bridge + geometry_msgs + yaml-cpp + catkin + diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/collection_adapters.hpp b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/collection_adapters.hpp new file mode 100644 index 0000000..fc59375 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/collection_adapters.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +// Adapters so we can borrow an OpenCV image easily into Rerun images without copying: +template <> +struct rerun::CollectionAdapter { + /// Borrow for non-temporary. + Collection operator()(const cv::Mat& img) { + assert( + "OpenCV matrix was expected have bit depth CV_U8" && CV_MAT_DEPTH(img.type()) == CV_8U + ); + + return Collection::borrow(img.data, img.total() * img.channels()); + } + + // Do a full copy for temporaries (otherwise the data might be deleted when the temporary is destroyed). + Collection operator()(cv::Mat&& img) { + assert( + "OpenCV matrix was expected have bit depth CV_U8" && CV_MAT_DEPTH(img.type()) == CV_8U + ); + + std::vector img_vec(img.total() * img.channels()); + img_vec.assign(img.data, img.data + img.total() * img.channels()); + return Collection::take_ownership(std::move(img_vec)); + } +}; + +inline rerun::Collection tensor_shape(const cv::Mat& img) { + return {img.rows, img.cols, img.channels()}; +}; diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/rerun_ros_interface.cpp b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/rerun_ros_interface.cpp new file mode 100644 index 0000000..d9370a8 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/rerun_ros_interface.cpp @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "collection_adapters.hpp" + +void log_imu( + const rerun::RecordingStream& rec, const std::string& entity_path, + const sensor_msgs::Imu::ConstPtr& msg +) { + rec.set_time_seconds("timestamp", msg->header.stamp.toSec()); + + rec.log(entity_path + "/x", rerun::Scalar(msg->linear_acceleration.x)); + rec.log(entity_path + "/y", rerun::Scalar(msg->linear_acceleration.y)); + rec.log(entity_path + "/z", rerun::Scalar(msg->linear_acceleration.z)); +} + +void log_image( + const rerun::RecordingStream& rec, const std::string& entity_path, + const sensor_msgs::Image::ConstPtr& msg +) { + rec.set_time_seconds("timestamp", msg->header.stamp.toSec()); + + cv::Mat img = cv_bridge::toCvCopy(msg, "rgb8")->image; + rec.log(entity_path, rerun::Image(tensor_shape(img), rerun::TensorBuffer::u8(img))); +} + +void log_pose_stamped( + const rerun::RecordingStream& rec, const std::string& entity_path, + const geometry_msgs::PoseStamped::ConstPtr& msg +) { + rec.set_time_seconds("timestamp", msg->header.stamp.toSec()); + + rec.log( + entity_path, + rerun::Transform3D( + rerun::Vector3D(msg->pose.position.x, msg->pose.position.y, msg->pose.position.z), + rerun::Quaternion::from_wxyz( + msg->pose.orientation.w, + msg->pose.orientation.x, + msg->pose.orientation.y, + msg->pose.orientation.z + ) + ) + ); + + // this is a somewhat hacky way to get a trajectory visualization in Rerun + // this should be be easier in the future, see https://github.com/rerun-io/rerun/issues/723 + std::string trajectory_entity_path = "/trajectories/" + entity_path; + rec.log( + trajectory_entity_path, + rerun::Points3D( + {{static_cast(msg->pose.position.x), + static_cast(msg->pose.position.y), + static_cast(msg->pose.position.z)}} + ) + ); +} + +void log_odometry( + const rerun::RecordingStream& rec, const std::string& entity_path, + const nav_msgs::Odometry::ConstPtr& msg +) { + rec.set_time_seconds("timestamp", msg->header.stamp.toSec()); + + rec.log( + entity_path, + rerun::Transform3D( + rerun::Vector3D( + msg->pose.pose.position.x, + msg->pose.pose.position.y, + msg->pose.pose.position.z + ), + rerun::Quaternion::from_wxyz( + msg->pose.pose.orientation.w, + msg->pose.pose.orientation.x, + msg->pose.pose.orientation.y, + msg->pose.pose.orientation.z + ) + ) + ); +} diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/visualizer_node.cpp b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/visualizer_node.cpp new file mode 100644 index 0000000..2259f0c --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/visualizer_node.cpp @@ -0,0 +1,161 @@ +#include "visualizer_node.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +RerunLoggerNode::RerunLoggerNode() { + _rec.spawn().exit_on_failure(); + + // Read additional config from yaml file + // NOTE We're not using the ROS parameter server for this, because roscpp doesn't support + // reading nested data structures. + std::string yaml_path; + if (_nh.getParam("yaml_path", yaml_path)) { + ROS_INFO("Read yaml config at %s", yaml_path.c_str()); + } + _read_yaml_config(yaml_path); +} + +std::string RerunLoggerNode::_topic_to_entity_path(const std::string& topic) const { + if (_topic_to_entity_path_map.find(topic) != _topic_to_entity_path_map.end()) { + return _topic_to_entity_path_map.at(topic); + } else { + return topic; + } +} + +void RerunLoggerNode::_read_yaml_config(std::string yaml_path) { + const YAML::Node config = YAML::LoadFile(yaml_path); + + // see https://www.rerun.io/docs/howto/ros2-nav-turtlebot#tf-to-rrtransform3d + if (config["topic_to_entity_path"]) { + _topic_to_entity_path_map = + config["topic_to_entity_path"].as>(); + + for (auto const& [key, val] : _topic_to_entity_path_map) { + ROS_INFO("Mapping topic %s to entity path %s", key.c_str(), val.c_str()); + } + } + if (config["extra_transform3ds"]) { + for (const auto& extra_transform3d : config["extra_transform3ds"]) { + const std::array translation = { + extra_transform3d["transform"][3].as(), + extra_transform3d["transform"][7].as(), + extra_transform3d["transform"][11].as() + }; + // Rerun uses column-major order for Mat3x3 + const std::array mat3x3 = { + extra_transform3d["transform"][0].as(), + extra_transform3d["transform"][4].as(), + extra_transform3d["transform"][8].as(), + extra_transform3d["transform"][1].as(), + extra_transform3d["transform"][5].as(), + extra_transform3d["transform"][9].as(), + extra_transform3d["transform"][2].as(), + extra_transform3d["transform"][6].as(), + extra_transform3d["transform"][10].as() + }; + _rec.log_timeless( + extra_transform3d["entity_path"].as(), + rerun::Transform3D( + rerun::Vec3D(translation), + rerun::Mat3x3(mat3x3), + extra_transform3d["from_parent"].as() + ) + ); + } + } + if (config["extra_pinholes"]) { + for (const auto& extra_pinhole : config["extra_pinholes"]) { + // Rerun uses column-major order for Mat3x3 + const std::array image_from_camera = { + extra_pinhole["image_from_camera"][0].as(), + extra_pinhole["image_from_camera"][3].as(), + extra_pinhole["image_from_camera"][6].as(), + extra_pinhole["image_from_camera"][1].as(), + extra_pinhole["image_from_camera"][4].as(), + extra_pinhole["image_from_camera"][7].as(), + extra_pinhole["image_from_camera"][2].as(), + extra_pinhole["image_from_camera"][5].as(), + extra_pinhole["image_from_camera"][8].as(), + }; + _rec.log_timeless( + extra_pinhole["entity_path"].as(), + rerun::Pinhole(image_from_camera) + .with_resolution( + extra_pinhole["width"].as(), + extra_pinhole["height"].as() + ) + ); + } + } +} + +void RerunLoggerNode::spin() { + while (ros::ok()) { + ros::spinOnce(); + + // NOTE We are currently checking in each iteration if there are new topics. + // This is not efficient, but it's the easiest way to support new topics being added at runtime. + // If you have a lot of topics, you might want to optimize this. + ros::master::V_TopicInfo topic_infos; + ros::master::getTopics(topic_infos); + for (const auto& topic_info : topic_infos) { + if (_topic_to_subscriber.find(topic_info.name) == _topic_to_subscriber.end()) { + auto entity_path = _topic_to_entity_path(topic_info.name); + if (topic_info.datatype == "sensor_msgs/Image") { + _topic_to_subscriber[topic_info.name] = _nh.subscribe( + topic_info.name, + 100, + [&, entity_path](const sensor_msgs::Image::ConstPtr& msg) { + log_image(_rec, entity_path, msg); + } + ); + } else if (topic_info.datatype == "sensor_msgs/Imu") { + _topic_to_subscriber[topic_info.name] = _nh.subscribe( + topic_info.name, + 1000, + [&, entity_path](const sensor_msgs::Imu::ConstPtr& msg) { + log_imu(_rec, entity_path, msg); + } + ); + } else if (topic_info.datatype == "geometry_msgs/PoseStamped") { + _topic_to_subscriber[topic_info.name] = _nh.subscribe( + topic_info.name, + 1000, + [&, entity_path](const geometry_msgs::PoseStamped::ConstPtr& msg) { + log_pose_stamped(_rec, entity_path, msg); + } + ); + } else if (topic_info.datatype == "nav_msgs/Odometry") { + _topic_to_subscriber[topic_info.name] = _nh.subscribe( + topic_info.name, + 1000, + [&, entity_path](const nav_msgs::Odometry::ConstPtr& msg) { + log_odometry(_rec, entity_path, msg); + } + ); + } + } + } + + _loop_rate.sleep(); + } +} + +int main(int argc, char** argv) { + ros::init(argc, argv, "rerun_logger_node"); + RerunLoggerNode node; + node.spin(); + return 0; +} diff --git a/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/visualizer_node.hpp b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/visualizer_node.hpp new file mode 100644 index 0000000..bcc4f50 --- /dev/null +++ b/rerun/examples/cpp-example-ros-bridge/rerun_bridge/src/rerun_bridge/visualizer_node.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#include +#include + + +class RerunLoggerNode { + public: + RerunLoggerNode(); + void spin(); + + private: + std::map _topic_to_entity_path_map; + std::map _topic_to_subscriber; + + void _read_yaml_config(std::string yaml_path); + + std::string _topic_to_entity_path(const std::string& topic) const; + + const rerun::RecordingStream _rec{"rerun_logger_node"}; + ros::NodeHandle _nh{"~"}; + ros::Rate _loop_rate{1000}; +}; diff --git a/rerun/examples/face_tracking/README.md b/rerun/examples/face_tracking/README.md new file mode 100644 index 0000000..f3a45a7 --- /dev/null +++ b/rerun/examples/face_tracking/README.md @@ -0,0 +1,18 @@ +# face_tracking + +## Download scripts +``` +wget https://raw.githubusercontent.com/rerun-io/rerun/main/examples/python/face_tracking/requirements.txt +wget https://raw.githubusercontent.com/rerun-io/rerun/main/examples/python/face_tracking/main.py +``` + + +## Run scripts +mamba activate rrVE +pip install -r requirements.txt +python main.py + + +## Reference +https://github.com/rerun-io/rerun/tree/main/examples/python/face_tracking + diff --git a/rerun/examples/face_tracking/main.py b/rerun/examples/face_tracking/main.py new file mode 100644 index 0000000..62241da --- /dev/null +++ b/rerun/examples/face_tracking/main.py @@ -0,0 +1,450 @@ +#!/usr/bin/env python3 +"""Use the MediaPipe Face detection and Face landmark detection solutions to track human faces in images and videos.""" +from __future__ import annotations + +import argparse +import itertools +import logging +import math +import os +from pathlib import Path +from typing import Final, Iterable + +import cv2 +import mediapipe as mp +import numpy as np +import numpy.typing as npt +import requests +import rerun as rr # pip install rerun-sdk +import tqdm +from mediapipe.tasks.python import vision + +EXAMPLE_DIR: Final = Path(os.path.dirname(__file__)) +DATASET_DIR: Final = EXAMPLE_DIR / "dataset" +MODEL_DIR: Final = EXAMPLE_DIR / "model" + +SAMPLE_IMAGE_PATH = (DATASET_DIR / "image.jpg").resolve() +# from https://pixabay.com/photos/brother-sister-girl-family-boy-977170/ +SAMPLE_IMAGE_URL = "https://i.imgur.com/Vu2Nqwb.jpg" + +# uncomment blendshapes of interest +BLENDSHAPES_CATEGORIES = { + # "_neutral", + # "browDownLeft", + # "browDownRight", + # "browInnerUp", + # "browOuterUpLeft", + # "browOuterUpRight", + # "cheekPuff", + # "cheekSquintLeft", + # "cheekSquintRight", + "eyeBlinkLeft", + "eyeBlinkRight", + # "eyeLookDownLeft", + # "eyeLookDownRight", + # "eyeLookInLeft", + # "eyeLookInRight", + # "eyeLookOutLeft", + # "eyeLookOutRight", + # "eyeLookUpLeft", + # "eyeLookUpRight", + "eyeSquintLeft", + "eyeSquintRight", + "eyeWideLeft", + "eyeWideRight", + # "jawForward", + # "jawLeft", + "jawOpen", + # "jawRight", + # "mouthClose", + # "mouthDimpleLeft", + # "mouthDimpleRight", + # "mouthFrownLeft", + # "mouthFrownRight", + # "mouthFunnel", + # "mouthLeft", + # "mouthLowerDownLeft", + # "mouthLowerDownRight", + # "mouthPressLeft", + # "mouthPressRight", + # "mouthPucker", + # "mouthRight", + # "mouthRollLower", + # "mouthRollUpper", + # "mouthShrugLower", + # "mouthShrugUpper", + "mouthSmileLeft", + "mouthSmileRight", + # "mouthStretchLeft", + # "mouthStretchRight", + # "mouthUpperUpLeft", + # "mouthUpperUpRight", + # "noseSneerLeft", + # "noseSneerRight", +} + + +class FaceDetectorLogger: + """ + Logger for the MediaPipe Face Detection solution. + + https://developers.google.com/mediapipe/solutions/vision/face_detector + """ + + MODEL_PATH: Final = (MODEL_DIR / "blaze_face_short_range.tflite").resolve() + MODEL_URL: Final = ( + "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/latest/" + "blaze_face_short_range.tflite" + ) + + def __init__(self, video_mode: bool = False): + self._video_mode = video_mode + + # download model if necessary + if not self.MODEL_PATH.exists(): + download_file(self.MODEL_URL, self.MODEL_PATH) + + self._base_options = mp.tasks.BaseOptions( + model_asset_path=str(self.MODEL_PATH), + ) + self._options = vision.FaceDetectorOptions( + base_options=self._base_options, + running_mode=mp.tasks.vision.RunningMode.VIDEO if self._video_mode else mp.tasks.vision.RunningMode.IMAGE, + ) + self._detector = vision.FaceDetector.create_from_options(self._options) + + # With this annotation, the viewer will connect the keypoints with some lines to improve visibility. + rr.log( + "video/detector", + rr.ClassDescription( + info=rr.AnnotationInfo(id=0), keypoint_connections=[(0, 1), (1, 2), (2, 0), (2, 3), (0, 4), (1, 5)] + ), + timeless=True, + ) + + def detect_and_log(self, image: npt.NDArray[np.uint8], frame_time_nano: int) -> None: + height, width, _ = image.shape + image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) + + detection_result = ( + self._detector.detect_for_video(image, int(frame_time_nano / 1e6)) + if self._video_mode + else self._detector.detect(image) + ) + rr.log("video/detector/faces", rr.Clear(recursive=True)) + for i, detection in enumerate(detection_result.detections): + # log bounding box + bbox = detection.bounding_box + index, score = detection.categories[0].index, detection.categories[0].score + + # log bounding box + rr.log( + f"video/detector/faces/{i}/bbox", + rr.Boxes2D( + array=[bbox.origin_x, bbox.origin_y, bbox.width, bbox.height], array_format=rr.Box2DFormat.XYWH + ), + rr.AnyValues(index=index, score=score), + ) + + # MediaPipe's keypoints are normalized to [0, 1], so we need to scale them to get pixel coordinates. + pts = [ + (math.floor(keypoint.x * width), math.floor(keypoint.y * height)) for keypoint in detection.keypoints + ] + rr.log(f"video/detector/faces/{i}/keypoints", rr.Points2D(pts, radii=3, keypoint_ids=list(range(6)))) + + +class FaceLandmarkerLogger: + """ + Logger for the MediaPipe Face Landmark Detection solution. + + https://developers.google.com/mediapipe/solutions/vision/face_landmarker + """ + + MODEL_PATH: Final = (MODEL_DIR / "face_landmarker.task").resolve() + MODEL_URL: Final = ( + "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/" + "face_landmarker.task" + ) + + def __init__(self, video_mode: bool = False, num_faces: int = 1): + self._video_mode = video_mode + + # download model if necessary + if not self.MODEL_PATH.exists(): + download_file(self.MODEL_URL, self.MODEL_PATH) + + self._base_options = mp.tasks.BaseOptions( + model_asset_path=str(self.MODEL_PATH), + ) + self._options = vision.FaceLandmarkerOptions( + base_options=self._base_options, + output_face_blendshapes=True, + num_faces=num_faces, + running_mode=mp.tasks.vision.RunningMode.VIDEO if self._video_mode else mp.tasks.vision.RunningMode.IMAGE, + ) + self._detector = vision.FaceLandmarker.create_from_options(self._options) + + # Extract classes from MediaPipe face mesh solution. The goal of this code is: + # 1) Log an annotation context with one class ID per facial feature. For each class ID, the class description + # contains the connections between corresponding keypoints (taken from the MediaPipe face mesh solution) + # 2) A class ID array matching the class IDs in the annotation context to keypoint indices (to be passed as + # the `class_ids` argument to `rr.log`). + + classes = [ + mp.solutions.face_mesh.FACEMESH_LIPS, + mp.solutions.face_mesh.FACEMESH_LEFT_EYE, + mp.solutions.face_mesh.FACEMESH_LEFT_IRIS, + mp.solutions.face_mesh.FACEMESH_LEFT_EYEBROW, + mp.solutions.face_mesh.FACEMESH_RIGHT_EYE, + mp.solutions.face_mesh.FACEMESH_RIGHT_EYEBROW, + mp.solutions.face_mesh.FACEMESH_RIGHT_IRIS, + mp.solutions.face_mesh.FACEMESH_FACE_OVAL, + mp.solutions.face_mesh.FACEMESH_NOSE, + ] + + self._class_ids = [0] * mp.solutions.face_mesh.FACEMESH_NUM_LANDMARKS_WITH_IRISES + class_descriptions = [] + for i, klass in enumerate(classes): + # MediaPipe only provides connections for class, not actual class per keypoint. So we have to extract the + # classes from the connections. + ids = set() + for connection in klass: + ids.add(connection[0]) + ids.add(connection[1]) + + for id_ in ids: + self._class_ids[id_] = i + + class_descriptions.append( + rr.ClassDescription( + info=rr.AnnotationInfo(id=i), + keypoint_connections=klass, + ) + ) + + rr.log("video/landmarker", rr.AnnotationContext(class_descriptions), timeless=True) + rr.log("reconstruction", rr.AnnotationContext(class_descriptions), timeless=True) + + # properly align the 3D face in the viewer + rr.log("reconstruction", rr.ViewCoordinates.RDF, timeless=True) + + def detect_and_log(self, image: npt.NDArray[np.uint8], frame_time_nano: int) -> None: + height, width, _ = image.shape + image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) + + detection_result = ( + self._detector.detect_for_video(image, int(frame_time_nano / 1e6)) + if self._video_mode + else self._detector.detect(image) + ) + + def is_empty(i): # type: ignore[no-untyped-def] + try: + next(i) + return False + except StopIteration: + return True + + if is_empty(zip(detection_result.face_landmarks, detection_result.face_blendshapes)): + rr.log("video/landmarker/faces", rr.Clear(recursive=True)) + rr.log("reconstruction/faces", rr.Clear(recursive=True)) + rr.log("blendshapes", rr.Clear(recursive=True)) + + for i, (landmark, blendshapes) in enumerate( + zip(detection_result.face_landmarks, detection_result.face_blendshapes) + ): + if len(landmark) == 0 or len(blendshapes) == 0: + rr.log(f"video/landmarker/faces/{i}/landmarks", rr.Clear(recursive=True)) + rr.log(f"reconstruction/faces/{i}", rr.Clear(recursive=True)) + rr.log(f"blendshapes/{i}", rr.Clear(recursive=True)) + continue + + # MediaPipe's keypoints are normalized to [0, 1], so we need to scale them to get pixel coordinates. + pts = [(math.floor(lm.x * width), math.floor(lm.y * height)) for lm in landmark] + keypoint_ids = list(range(len(landmark))) + rr.log( + f"video/landmarker/faces/{i}/landmarks", + rr.Points2D(pts, radii=3, keypoint_ids=keypoint_ids, class_ids=self._class_ids), + ) + + rr.log( + f"reconstruction/faces/{i}", + rr.Points3D( + [(lm.x, lm.y, lm.z) for lm in landmark], + keypoint_ids=keypoint_ids, + class_ids=self._class_ids, + ), + ) + + for blendshape in blendshapes: + if blendshape.category_name in BLENDSHAPES_CATEGORIES: + rr.log(f"blendshapes/{i}/{blendshape.category_name}", rr.Scalar(blendshape.score)) + + +# ======================================================================================== +# Main & CLI code + + +def download_file(url: str, path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + logging.info("Downloading %s to %s", url, path) + response = requests.get(url, stream=True) + with tqdm.tqdm.wrapattr( + open(path, "wb"), + "write", + miniters=1, + total=int(response.headers.get("content-length", 0)), + desc=f"Downloading {path.name}", + ) as f: + for chunk in response.iter_content(chunk_size=4096): + f.write(chunk) + + +def resize_image(image: npt.NDArray[np.uint8], max_dim: int | None) -> npt.NDArray[np.uint8]: + """Resize an image if it is larger than max_dim.""" + if max_dim is None: + return image + height, width, _ = image.shape + scale = max_dim / max(height, width) + if scale < 1: + image = cv2.resize(image, (0, 0), fx=scale, fy=scale) + return image + + +def run_from_video_capture(vid: int | str, max_dim: int | None, max_frame_count: int | None, num_faces: int) -> None: + """ + Run the face detector on a video stream. + + Parameters + ---------- + vid: + The video stream to run the detector on. Use 0 for the default camera or a path to a video file. + max_dim: + The maximum dimension of the image. If the image is larger, it will be scaled down. + max_frame_count: + The maximum number of frames to process. If None, process all frames. + num_faces: + The number of faces to track. If set to 1, temporal smoothing will be applied. + """ + + cap = cv2.VideoCapture(vid) + fps = cap.get(cv2.CAP_PROP_FPS) + + detector = FaceDetectorLogger(video_mode=True) + landmarker = FaceLandmarkerLogger(video_mode=True, num_faces=num_faces) + + print("Capturing video stream. Press ctrl-c to stop.") + try: + it: Iterable[int] = itertools.count() if max_frame_count is None else range(max_frame_count) + + for frame_idx in tqdm.tqdm(it, desc="Processing frames"): + # Capture frame-by-frame + ret, frame = cap.read() + if not ret: + break + + # OpenCV sometimes returns a blank frame, so we skip it + if np.all(frame == 0): + continue + + frame = resize_image(frame, max_dim) + + # get frame time + frame_time_nano = int(cap.get(cv2.CAP_PROP_POS_MSEC) * 1e6) + if frame_time_nano == 0: + # On some platforms it always returns zero, so we compute from the frame counter and fps + frame_time_nano = int(frame_idx * 1000 / fps * 1e6) + + # convert to rgb + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # log data + rr.set_time_sequence("frame_nr", frame_idx) + rr.set_time_nanos("frame_time", frame_time_nano) + detector.detect_and_log(frame, frame_time_nano) + landmarker.detect_and_log(frame, frame_time_nano) + rr.log("video/image", rr.Image(frame)) + + except KeyboardInterrupt: + pass + + # When everything done, release the capture + cap.release() + cv2.destroyAllWindows() + + +def run_from_sample_image(path: Path, max_dim: int | None, num_faces: int) -> None: + """Run the face detector on a single image.""" + image = cv2.imread(str(path)) + image = resize_image(image, max_dim) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + logger = FaceDetectorLogger(video_mode=False) + landmarker = FaceLandmarkerLogger(video_mode=False, num_faces=num_faces) + logger.detect_and_log(image, 0) + landmarker.detect_and_log(image, 0) + rr.log("video/image", rr.Image(image)) + + +def main() -> None: + logging.getLogger().addHandler(logging.StreamHandler()) + logging.getLogger().setLevel("INFO") + + parser = argparse.ArgumentParser(description="Uses the MediaPipe Face Detection to track a human pose in video.") + parser.add_argument( + "--demo-image", + action="store_true", + help="Run on a demo image automatically downloaded", + ) + parser.add_argument( + "--image", + type=Path, + help="Run on the provided image", + ) + parser.add_argument("--video", type=Path, help="Run on the provided video file.") + parser.add_argument( + "--camera", type=int, default=0, help="Run from the camera stream (parameter is the camera ID, usually 0)" + ) + parser.add_argument( + "--max-frame", + type=int, + help="Stop after processing this many frames. If not specified, will run until interrupted.", + ) + parser.add_argument( + "--max-dim", + type=int, + help="Resize the image such as its maximum dimension is not larger than this value.", + ) + parser.add_argument( + "--num-faces", + type=int, + default=1, + help=( + "Max number of faces detected by the landmark model " + "(temporal smoothing is applied only for a value of 1)." + ), + ) + + rr.script_add_args(parser) + + args, unknown = parser.parse_known_args() + for arg in unknown: + logging.warning(f"unknown arg: {arg}") + rr.script_setup(args, "rerun_example_mp_face_detection") + + if args.demo_image: + if not SAMPLE_IMAGE_PATH.exists(): + download_file(SAMPLE_IMAGE_URL, SAMPLE_IMAGE_PATH) + + run_from_sample_image(SAMPLE_IMAGE_PATH, args.max_dim, args.num_faces) + elif args.image is not None: + run_from_sample_image(args.image, args.max_dim, args.num_faces) + elif args.video is not None: + run_from_video_capture(str(args.video), args.max_dim, args.max_frame, args.num_faces) + else: + run_from_video_capture(args.camera, args.max_dim, args.max_frame, args.num_faces) + + rr.script_teardown(args) + + +if __name__ == "__main__": + main() diff --git a/rerun/examples/face_tracking/requirements.txt b/rerun/examples/face_tracking/requirements.txt new file mode 100644 index 0000000..daec16f --- /dev/null +++ b/rerun/examples/face_tracking/requirements.txt @@ -0,0 +1,6 @@ +mediapipe>=0.10.1 +numpy +opencv-python<4.6 # Avoid opencv-4.6 since it rotates images incorrectly (https://github.com/opencv/opencv/issues/22088) +requests +rerun-sdk +tqdm diff --git a/rerun/examples/live_camera_edge_detection/README.md b/rerun/examples/live_camera_edge_detection/README.md new file mode 100644 index 0000000..5359aa7 --- /dev/null +++ b/rerun/examples/live_camera_edge_detection/README.md @@ -0,0 +1,19 @@ +# live_camera_edge_detection + +## Download scripts +``` +wget https://raw.githubusercontent.com/rerun-io/rerun/main/examples/python/live_camera_edge_detection/requirements.txt +wget https://raw.githubusercontent.com/rerun-io/rerun/main/examples/python/live_camera_edge_detection/main.py + +``` + + +## Run scripts +mamba activate rrVE +pip install -r requirements.txt +python main.py + + +## Reference +https://github.com/rerun-io/rerun/tree/main/examples/python/live_camera_edge_detection + diff --git a/rerun/examples/live_camera_edge_detection/main.py b/rerun/examples/live_camera_edge_detection/main.py new file mode 100644 index 0000000..33bab09 --- /dev/null +++ b/rerun/examples/live_camera_edge_detection/main.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +""" +Very simple example of capturing from a live camera. + +Runs the opencv canny edge detector on the image stream. +""" +from __future__ import annotations + +import argparse + +import cv2 +import rerun as rr # pip install rerun-sdk + + +def run_canny(num_frames: int | None) -> None: + # Create a new video capture + cap = cv2.VideoCapture(0) + + frame_nr = 0 + + while cap.isOpened(): + if num_frames and frame_nr >= num_frames: + break + + # Read the frame + ret, img = cap.read() + if not ret: + if frame_nr == 0: + print("Failed to capture any frame. No camera connected?") + else: + print("Can't receive frame (stream end?). Exiting…") + break + + # Get the current frame time. On some platforms it always returns zero. + frame_time_ms = cap.get(cv2.CAP_PROP_POS_MSEC) + if frame_time_ms != 0: + rr.set_time_nanos("frame_time", int(frame_time_ms * 1_000_000)) + + rr.set_time_sequence("frame_nr", frame_nr) + frame_nr += 1 + + # Log the original image + rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + rr.log("image/rgb", rr.Image(rgb)) + + # Convert to grayscale + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + rr.log("image/gray", rr.Image(gray)) + + # Run the canny edge detector + canny = cv2.Canny(gray, 50, 200) + rr.log("image/canny", rr.Image(canny)) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Streams a local system camera and runs the canny edge detector.") + parser.add_argument( + "--device", type=int, default=0, help="Which camera device to use. (Passed to `cv2.VideoCapture()`)" + ) + parser.add_argument("--num-frames", type=int, default=None, help="The number of frames to log") + + rr.script_add_args(parser) + args = parser.parse_args() + + rr.script_setup(args, "rerun_example_live_camera_edge_detection") + + run_canny(args.num_frames) + + rr.script_teardown(args) + + +if __name__ == "__main__": + main() diff --git a/rerun/examples/live_camera_edge_detection/requirements.txt b/rerun/examples/live_camera_edge_detection/requirements.txt new file mode 100644 index 0000000..51d3a8c --- /dev/null +++ b/rerun/examples/live_camera_edge_detection/requirements.txt @@ -0,0 +1,2 @@ +opencv-python +rerun-sdk