[ORT][TRT] support FaceFusion (#445)

DefTruth · Nov 30, 2024 · c70e1ff · c70e1ff
1 parent 4252d27
commit c70e1ff
Show file tree

Hide file tree

Showing 55 changed files with 3,313 additions and 34 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,13 +19,18 @@ if (CMAKE_COMPILER_IS_GNUCXX)
     set(CMAKE_CXX_FLAGS "-std=c++17 -Wno-deprecated ${CMAKE_CXX_FLAGS} ")
     message(STATUS "[Linux GCC Compiler Options]+:-std=c++17 -Wno-deprecated")
 endif ()
+# 指定CUDA编译器
+set(CMAKE_CUDA_COMPILER "/usr/local/cuda/bin/nvcc")
+set(CMAKE_CUDA_ARCHITECTURES 89) # For RTX 20xx series
 
+# 启用 CUDA
+enable_language(CUDA)
 set(LITE_AI_ROOT_DIR ${CMAKE_SOURCE_DIR})
 
-option(ENABLE_TEST "build test examples." OFF)
+option(ENABLE_TEST "build test examples." ON)
 option(ENABLE_DEBUG_STRING "enable DEBUG string or not" ON)
 option(ENABLE_ONNXRUNTIME "enable ONNXRuntime engine" ON)
-option(ENABLE_TENSORRT "enable TensorRT engine" OFF)
+option(ENABLE_TENSORRT "enable TensorRT engine" ON)
 option(ENABLE_MNN "enable MNN engine" OFF)
 option(ENABLE_NCNN "enable NCNN engine" OFF) 
 option(ENABLE_TNN "enable TNN engine" OFF)

diff --git a/cmake/opencv.cmake b/cmake/opencv.cmake
@@ -19,9 +19,9 @@ link_directories(${OpenCV_DIR}/lib)
 
 if(NOT WIN32)
     if(ENABLE_OPENCV_VIDEOIO OR ENABLE_TEST)
-        set(OpenCV_LIBS opencv_core opencv_imgproc opencv_imgcodecs opencv_video opencv_videoio)
+        set(OpenCV_LIBS opencv_core opencv_imgproc opencv_imgcodecs opencv_video opencv_videoio opencv_calib3d)
     else()
-        set(OpenCV_LIBS opencv_core opencv_imgproc opencv_imgcodecs) # no videoio, video module
+        set(OpenCV_LIBS opencv_core opencv_imgproc opencv_imgcodecs opencv_calib3d) # no videoio, video module
     endif()
 else()
     set(OpenCV_LIBS opencv_world490)

diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake
@@ -41,6 +41,8 @@ link_directories(${TensorRT_DIR}/lib)
 
 # 1. glob sources files
 file(GLOB TENSORRT_CORE_SRCS ${CMAKE_SOURCE_DIR}/lite/trt/core/*.cpp)
+file(GLOB TENSORRT_CUDA_KERNEL_SRCS_CPP ${CMAKE_SOURCE_DIR}/lite/trt/kernel/*.cpp)
+file(GLOB TENSORRT_CUDA_KERNEL_SRCS_CU ${CMAKE_SOURCE_DIR}/lite/trt/kernel/*.cu)
 file(GLOB TENSORRT_CV_SRCS ${CMAKE_SOURCE_DIR}/lite/trt/cv/*.cpp)
 file(GLOB TENSORRT_NLP_SRCS ${CMAKE_SOURCE_DIR}/lite/trt/nlp/*.cpp)
 file(GLOB TENSORRT_ASR_SRCS ${CMAKE_SOURCE_DIR}/lite/trt/asr/*.cpp)
@@ -52,8 +54,17 @@ file(GLOB TENSORRT_CV_HEAD ${CMAKE_SOURCE_DIR}/lite/trt/cv/*.h)
 file(GLOB TENSORRT_NLP_HEAD ${CMAKE_SOURCE_DIR}/lite/trt/nlp/*.h)
 file(GLOB TENSORRT_ASR_HEAD ${CMAKE_SOURCE_DIR}/lite/trt/asr/*.h)
 file(GLOB TENSORRT_SD_HEAD ${CMAKE_SOURCE_DIR}/lite/trt/sd/*.h)
+file(GLOB TENSORRT_CUDA_KERNEL_HEAD_CPP ${CMAKE_SOURCE_DIR}/lite/trt/kernel/*.h)
+file(GLOB TENSORRT_CUDA_KERNEL_HEAD_CU ${CMAKE_SOURCE_DIR}/lite/trt/kernel/*.cuh)
+
+
+
+set(TRT_SRCS ${TENSORRT_CV_SRCS} ${TENSORRT_NLP_SRCS} ${TENSORRT_ASR_SRCS} ${TENSORRT_CORE_SRCS} ${TENSORRT_SD_SRCS}
+        ${TENSORRT_CUDA_KERNEL_SRCS_CPP} ${TENSORRT_CUDA_KERNEL_SRCS_CU})
+set_source_files_properties(${TENSORRT_CUDA_KERNEL_SRCS_CU} ${TENSORRT_CUDA_KERNEL_SRCS_CPP}
+        ${TENSORRT_CUDA_KERNEL_HEAD_CPP} ${TENSORRT_CUDA_KERNEL_HEAD_CU}
+        PROPERTIES LANGUAGE CUDA)
 
-set(TRT_SRCS ${TENSORRT_CV_SRCS} ${TENSORRT_NLP_SRCS} ${TENSORRT_ASR_SRCS} ${TENSORRT_CORE_SRCS} ${TENSORRT_SD_SRCS})
 # 3. copy
 message("[Lite.AI.Toolkit][I] Installing Lite.AI.ToolKit Headers for TensorRT Backend ...")
 # "INSTALL" can copy all files from the list to the specified path.
@@ -63,4 +74,5 @@ file(INSTALL ${TENSORRT_CV_HEAD} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/lit
 file(INSTALL ${TENSORRT_ASR_HEAD} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/lite/trt/asr)
 file(INSTALL ${TENSORRT_NLP_HEAD} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/lite/trt/nlp)
 file(INSTALL ${TENSORRT_SD_HEAD} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/lite/trt/sd)
-
+file(INSTALL ${TENSORRT_CUDA_KERNEL_HEAD_CPP} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/lite/trt/kernel)
+file(INSTALL ${TENSORRT_CUDA_KERNEL_HEAD_CU} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/lite/trt/kernel)
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
@@ -52,7 +52,7 @@ function(add_lite_ai_toolkit_shared_library version soversion)
     if (ENABLE_TENSORRT)
         include(cmake/tensorrt.cmake)
         set(LITE_SRCS ${LITE_SRCS} ${TRT_SRCS})
-        set(LITE_DEPENDENCIES ${LITE_DEPENDENCIES} cudart nvinfer nvonnxparser 
+        set(LITE_DEPENDENCIES ${LITE_DEPENDENCIES} cuda cudart nvinfer nvonnxparser
                                                    nvinfer_plugin ddim_scheduler_cpp)
         link_directories(${CMAKE_SOURCE_DIR}/lite/bin)
     endif ()

diff --git a/examples/lite/CMakeLists.txt b/examples/lite/CMakeLists.txt
@@ -103,5 +103,10 @@ add_lite_executable(lite_face_parsing_bisenet_dyn cv)
 add_lite_executable(lite_yolov8face cv)
 add_lite_executable(lite_lightenhance cv)
 add_lite_executable(lite_realesrgan cv)
+add_lite_executable(lite_face_68landmarks cv)
+add_lite_executable(lite_face_recognizer cv)
+add_lite_executable(lite_face_swap cv)
+add_lite_executable(lite_face_restoration cv)
+add_lite_executable(lite_facefusion_pipeline cv)
 add_lite_executable(lite_yolov8 cv)
 add_lite_executable(lite_sd_pipeline sd)
diff --git a/examples/lite/cv/test_lite_face_68landmarks.cpp b/examples/lite/cv/test_lite_face_68landmarks.cpp
@@ -0,0 +1,112 @@
+//
+// Created by wangzijian on 11/1/24.
+//
+#include "lite/lite.h"
+#include "lite/trt/cv/trt_face_68landmarks_mt.h"
+
+static void test_default()
+{
+#ifdef ENABLE_ONNXRUNTIME
+    std::string onnx_path = "/home/lite.ai.toolkit/examples/hub/onnx/cv/2dfan4.onnx";
+    std::string test_img_path = "/home/lite.ai.toolkit/examples/lite/resources/test_lite_facefusion_pipeline_source.jpg";
+
+    // 1. Test Default Engine ONNXRuntime
+    lite::cv::faceid::Face_68Landmarks *face68Landmarks = new lite::cv::faceid::Face_68Landmarks(onnx_path);
+
+    lite::types::BoundingBoxType<float, float> bbox;
+    bbox.x1 = 487;
+    bbox.y1 = 236;
+    bbox.x2 = 784;
+    bbox.y2 = 624;
+
+    cv::Mat img_bgr = cv::imread(test_img_path);
+    std::vector<cv::Point2f> face_landmark_5of68;
+    face68Landmarks->detect(img_bgr, bbox, face_landmark_5of68);
+
+    std::cout<<"face id detect done!"<<std::endl;
+
+    delete face68Landmarks;
+#endif
+}
+
+
+
+
+static void test_tensorrt()
+{
+#ifdef ENABLE_TENSORRT
+    std::string engine_path = "/home/lite.ai.toolkit/examples/hub/trt/2dfan4_fp16.engine";
+    std::string test_img_path = "/home/lite.ai.toolkit/1.jpg";
+
+    // 1. Test TensorRT Engine
+    lite::trt::cv::faceid::FaceFusionFace68Landmarks  *face68Landmarks = new lite::trt::cv::faceid::FaceFusionFace68Landmarks(engine_path);
+    lite::types::BoundingBoxType<float, float> bbox;
+    bbox.x1 = 487;
+    bbox.y1 = 236;
+    bbox.x2 = 784;
+    bbox.y2 = 624;
+
+    cv::Mat img_bgr = cv::imread(test_img_path);
+    std::vector<cv::Point2f> face_landmark_5of68;
+    face68Landmarks->detect(img_bgr, bbox, face_landmark_5of68);
+
+    std::cout<<"face id detect done!"<<std::endl;
+
+    delete face68Landmarks;
+#endif
+}
+
+
+static void test_tensorrt_mt()
+{
+#ifdef ENABLE_TENSORRT
+    std::string engine_path = "/home/lite.ai.toolkit/examples/hub/trt/2dfan4_fp16.engine";
+    std::string test_img_path = "/home/lite.ai.toolkit/1.jpg";
+
+    // 1. Test TensorRT Engine
+//    lite::trt::cv::faceid::FaceFusionFace68Landmarks  *face68Landmarks = new lite::trt::cv::faceid::FaceFusionFace68Landmarks(engine_path);
+    trt_face_68landmarks_mt *face68Landmarks = new trt_face_68landmarks_mt(engine_path,4);
+
+    lite::types::BoundingBoxType<float, float> bbox;
+
+    bbox.x1 = 487;
+    bbox.y1 = 236;
+    bbox.x2 = 784;
+    bbox.y2 = 624;
+
+    cv::Mat img_bgr = cv::imread(test_img_path);
+    std::vector<cv::Point2f> face_landmark_5of68;
+    face68Landmarks->detect_async(img_bgr, bbox, face_landmark_5of68);
+
+    cv::Mat img_bgr2 = cv::imread(test_img_path);
+    std::vector<cv::Point2f> face_landmark_5of682;
+    face68Landmarks->detect_async(img_bgr, bbox, face_landmark_5of682);
+
+    cv::Mat img_bgr3 = cv::imread(test_img_path);
+    std::vector<cv::Point2f> face_landmark_5of683;
+    face68Landmarks->detect_async(img_bgr, bbox, face_landmark_5of683);
+
+
+    cv::Mat img_bgr4 = cv::imread(test_img_path);
+    std::vector<cv::Point2f> face_landmark_5of684;
+    face68Landmarks->detect_async(img_bgr, bbox, face_landmark_5of684);
+
+    face68Landmarks->wait_for_completion();
+
+    face68Landmarks->shutdown();
+
+    std::cout<<"face id detect done!"<<std::endl;
+
+    delete face68Landmarks;
+#endif
+}
+
+
+
+int main(__unused int argc, __unused char *argv[])
+{
+//    test_tensorrt();
+    test_tensorrt_mt();
+//    test_default();
+    return 0;
+}
diff --git a/examples/lite/cv/test_lite_face_recognizer.cpp b/examples/lite/cv/test_lite_face_recognizer.cpp
@@ -0,0 +1,39 @@
+//
+// Created by wangzijian on 11/5/24.
+//
+#include "lite/lite.h"
+
+static void test_default()
+{
+#ifdef ENABLE_ONNXRUNTIME
+    std::string onnx_path = "../../../examples/hub/onnx/cv/arcface_w600k_r50.onnx";
+    std::string test_img_path = "../../../examples/lite/resources/test_lite_facefusion_pipeline_source.jpg";
+
+    // 1. Test Default Engine ONNXRuntime
+    lite::cv::faceid::Face_Recognizer *face_recognizer = new lite::cv::faceid::Face_Recognizer(onnx_path);
+
+    std::vector<cv::Point2f> face_landmark_5 = {
+            cv::Point2f(568.2485f, 398.9512f),
+            cv::Point2f(701.7346f, 399.64795f),
+            cv::Point2f(634.2213f, 482.92694f),
+            cv::Point2f(583.5656f, 543.10187f),
+            cv::Point2f(684.52405f, 543.125f)
+    };
+    cv::Mat img_bgr = cv::imread(test_img_path);
+
+    std::vector<float> source_image_embeding;
+
+    face_recognizer->detect(img_bgr,face_landmark_5,source_image_embeding);
+
+
+    std::cout<<"face id detect done!"<<std::endl;
+
+    delete face_recognizer;
+#endif
+}
+
+int main(__unused int argc, __unused char *argv[])
+{
+    test_default();
+    return 0;
+}
diff --git a/examples/lite/cv/test_lite_face_restoration.cpp b/examples/lite/cv/test_lite_face_restoration.cpp
@@ -0,0 +1,119 @@
+//
+// Created by wangzijian on 11/7/24.
+//
+#include "lite/lite.h"
+
+#include "lite/trt/cv/trt_face_restoration_mt.h"
+
+static void test_default()
+{
+#ifdef ENABLE_ONNXRUNTIME
+    std::string onnx_path = "/home/lite.ai.toolkit/examples/hub/onnx/cv/gfpgan_1.4.onnx";
+    std::string test_img_path = "/home/lite.ai.toolkit/trt_result.jpg";
+    std::string save_img_path = "/home/lite.ai.toolkit/trt_result_final.jpg";
+
+    // 1. Test Default Engine ONNXRuntime
+    lite::cv::face::restoration::GFPGAN *face_restoration = new  lite::cv::face::restoration::GFPGAN(onnx_path);
+
+    std::vector<cv::Point2f> face_landmark_5 = {
+            cv::Point2f(569.092041f, 398.845886f),
+            cv::Point2f(701.891724f, 399.156677f),
+            cv::Point2f(634.767212f, 482.927216f),
+            cv::Point2f(584.270996f, 543.294617f),
+            cv::Point2f(684.877991f, 543.067078f)
+    };
+    cv::Mat img_bgr = cv::imread(test_img_path);
+
+    face_restoration->detect(img_bgr,face_landmark_5,save_img_path);
+
+
+    std::cout<<"face restoration detect done!"<<std::endl;
+
+    delete face_restoration;
+#endif
+}
+
+
+
+
+static void test_tensorrt()
+{
+#ifdef ENABLE_TENSORRT
+    std::string engine_path = "/home/lite.ai.toolkit/examples/hub/trt/gfpgan_1.4_fp32.engine";
+    std::string test_img_path = "/home/lite.ai.toolkit/trt_result.jpg";
+    std::string save_img_path = "/home/lite.ai.toolkit/trt_facerestoration_mt_test111.jpg";
+
+    // 1. Test Default Engine TensorRT
+//    lite::trt::cv::face::restoration::TRTGFPGAN *face_restoration_trt = new  lite::trt::cv::face::restoration::TRTGFPGAN(engine_path);
+
+    const int num_threads = 4;  // 使用4个线程
+    auto face_restoration_trt = std::make_unique<trt_face_restoration_mt>(engine_path,4);
+
+//    trt_face_restoration_mt *face_restoration_trt = new trt_face_restoration_mt(engine_path);
+
+
+    // 2. 准备测试数据 - 这里假设我们要处理4张相同的图片作为示例
+    std::vector<std::string> test_img_paths = {
+            "/home/lite.ai.toolkit/trt_result.jpg",
+            "/home/lite.ai.toolkit/trt_result_2.jpg",
+            "/home/lite.ai.toolkit/trt_result_3.jpg",
+            "/home/lite.ai.toolkit/trt_result_4.jpg"
+    };
+
+    std::vector<std::string> save_img_paths = {
+            "/home/lite.ai.toolkit/trt_facerestoration_mt_thread1.jpg",
+            "/home/lite.ai.toolkit/trt_facerestoration_mt_thread2.jpg",
+            "/home/lite.ai.toolkit/trt_facerestoration_mt_thread3.jpg",
+            "/home/lite.ai.toolkit/trt_facerestoration_mt_thread4.jpg"
+    };
+
+    std::vector<cv::Point2f> face_landmark_5 = {
+            cv::Point2f(569.092041f, 398.845886f),
+            cv::Point2f(701.891724f, 399.156677f),
+            cv::Point2f(634.767212f, 482.927216f),
+            cv::Point2f(584.270996f, 543.294617f),
+            cv::Point2f(684.877991f, 543.067078f)
+    };
+//    cv::Mat img_bgr = cv::imread(test_img_path);
+//
+//    face_restoration_trt->detect_async(img_bgr,face_landmark_5,save_img_path);
+//
+//
+//    std::cout<<"face restoration detect done!"<<std::endl;
+//
+//    delete face_restoration_trt;
+    auto start_time = std::chrono::high_resolution_clock::now();
+
+    for (size_t i=0; i < test_img_paths.size();++i){
+        cv::Mat img_bgr = cv::imread(test_img_paths[i]);
+        if (img_bgr.empty()) {
+            std::cerr << "Failed to read image: " << test_img_paths[i] << std::endl;
+            continue;
+        }
+        // 异步提交任务
+        face_restoration_trt->detect_async(img_bgr, face_landmark_5, save_img_paths[i]);
+        std::cout << "Submitted task " << i + 1 << " for processing" << std::endl;
+    }
+
+    // 6. 等待所有任务完成
+    std::cout << "Waiting for all tasks to complete..." << std::endl;
+    face_restoration_trt->wait_for_completion();
+
+    // 7. 计算和输出总耗时
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
+
+    std::cout << "All tasks completed!" << std::endl;
+    std::cout << "Total processing time: " << duration.count() << "ms" << std::endl;
+    std::cout << "Average time per image: " << duration.count() / test_img_paths.size() << "ms" << std::endl;
+
+
+#endif
+}
+
+int main(__unused int argc, __unused char *argv[])
+{
+//    test_default();
+    test_tensorrt();
+    return 0;
+}