Skip to content

Commit

Permalink
add syncgap option, use rough syncgap mode by default
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Feb 16, 2022
1 parent 2d1de87 commit a0882a6
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 10 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
![CI](https://github.com/nihui/realcugan-ncnn-vulkan/workflows/CI/badge.svg)
![download](https://img.shields.io/github/downloads/nihui/realcugan-ncnn-vulkan/total.svg)

ncnn implementation of Real-CUGAN converter. Runs fast on Intel / AMD / Nvidia with Vulkan API.
ncnn implementation of Real-CUGAN converter. Runs fast on Intel / AMD / Nvidia / Apple-Silicon with Vulkan API.

realcugan-ncnn-vulkan uses [ncnn project](https://github.com/Tencent/ncnn) as the universal neural network inference framework.

## [Download](https://github.com/nihui/realcugan-ncnn-vulkan/releases)

Download Windows/Linux/MacOS Executable for Intel/AMD/Nvidia GPU
Download Windows/Linux/MacOS Executable for Intel/AMD/Nvidia/Apple-Silicon GPU

**https://github.com/nihui/realcugan-ncnn-vulkan/releases**

Expand Down Expand Up @@ -43,6 +43,7 @@ Usage: realcugan-ncnn-vulkan -i infile -o outfile [options]...
-n noise-level denoise level (-1/0/1/2/3, default=-1)
-s scale upscale ratio (1/2/3/4, default=2)
-t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu
-c syncgap-mode sync gap mode (0/1/2, default=2)
-m model-path realcugan model path (default=models-se)
-g gpu-id gpu device to use (-1=cpu, default=auto) can be 0,1,2 for multi-gpu
-j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu
Expand All @@ -54,6 +55,7 @@ Usage: realcugan-ncnn-vulkan -i infile -o outfile [options]...
- `noise-level` = noise level, large value means strong denoise effect, -1 = no effect
- `scale` = scale level, 1 = no scaling, 2 = upscale 2x
- `tile-size` = tile size, use smaller value to reduce GPU memory usage, default selects automatically
- `syncgap-mode` = sync gap mode, 0 = no sync, 1 = accurate sync, 2 = rough sync
- `load:proc:save` = thread count for the three stages (image decoding + realcugan upscaling + image encoding), using larger values may increase GPU usage and consume more GPU memory. You can tune this configuration with "4:4:4" for many small-size images, and "2:2:2" for large-size images. The default setting usually works fine for most situations. If you find that your GPU is hungry, try increasing thread count to achieve faster processing.
- `format` = the format of the image to be output, png is better supported, however webp generally yields smaller file sizes, both are losslessly encoded

Expand Down
25 changes: 19 additions & 6 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ static void print_usage()
fprintf(stdout, " -n noise-level denoise level (-1/0/1/2/3, default=-1)\n");
fprintf(stdout, " -s scale upscale ratio (1/2/3/4, default=2)\n");
fprintf(stdout, " -t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu\n");
fprintf(stdout, " -c syncgap-mode sync gap mode (0/1/2, default=2)\n");
fprintf(stdout, " -m model-path realcugan model path (default=models-se)\n");
fprintf(stdout, " -g gpu-id gpu device to use (-1=cpu, default=auto) can be 0,1,2 for multi-gpu\n");
fprintf(stdout, " -j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu\n");
Expand Down Expand Up @@ -447,13 +448,14 @@ int main(int argc, char** argv)
std::vector<int> jobs_proc;
int jobs_save = 2;
int verbose = 0;
int syncgap = 2;
int tta_mode = 0;
path_t format = PATHSTR("png");

#if _WIN32
setlocale(LC_ALL, "");
wchar_t opt;
while ((opt = getopt(argc, argv, L"i:o:n:s:t:m:g:j:f:vxh")) != (wchar_t)-1)
while ((opt = getopt(argc, argv, L"i:o:n:s:t:c:m:g:j:f:vxh")) != (wchar_t)-1)
{
switch (opt)
{
Expand All @@ -472,6 +474,9 @@ int main(int argc, char** argv)
case L't':
tilesize = parse_optarg_int_array(optarg);
break;
case L'c':
syncgap = _wtoi(optarg);
break;
case L'm':
model = optarg;
break;
Expand Down Expand Up @@ -499,7 +504,7 @@ int main(int argc, char** argv)
}
#else // _WIN32
int opt;
while ((opt = getopt(argc, argv, "i:o:n:s:t:m:g:j:f:vxh")) != -1)
while ((opt = getopt(argc, argv, "i:o:n:s:t:c:m:g:j:f:vxh")) != -1)
{
switch (opt)
{
Expand All @@ -518,6 +523,9 @@ int main(int argc, char** argv)
case 't':
tilesize = parse_optarg_int_array(optarg);
break;
case 'c':
syncgap = atoi(optarg);
break;
case 'm':
model = optarg;
break;
Expand Down Expand Up @@ -569,6 +577,12 @@ int main(int argc, char** argv)
return -1;
}

if (!(syncgap == 0 || syncgap == 1 || syncgap == 2))
{
fprintf(stderr, "invalid syncgap argument\n");
return -1;
}

for (int i=0; i<(int)tilesize.size(); i++)
{
if (tilesize[i] != 0 && tilesize[i] < 32)
Expand Down Expand Up @@ -708,11 +722,10 @@ int main(int argc, char** argv)
return -1;
}

int syncgap = 0;

if (model.find(PATHSTR("models-se")) != path_t::npos)
if (model.find(PATHSTR("models-nose")) != path_t::npos)
{
syncgap = 1;
// force syncgap off for nose models
syncgap = 0;
}

#if _WIN32
Expand Down
63 changes: 61 additions & 2 deletions src/realcugan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,12 @@ int RealCUGAN::process(const ncnn::Mat& inimage, ncnn::Mat& outimage) const
{
// cpu only
if (syncgap_needed && syncgap)
return process_cpu_se(inimage, outimage);
{
if (syncgap == 1)
return process_cpu_se(inimage, outimage);
if (syncgap == 2)
return process_cpu_se_rough(inimage, outimage);
}
else
return process_cpu(inimage, outimage);
}
Expand All @@ -259,7 +264,12 @@ int RealCUGAN::process(const ncnn::Mat& inimage, ncnn::Mat& outimage) const
}

if (syncgap_needed && syncgap)
return process_se(inimage, outimage);
{
if (syncgap == 1)
return process_se(inimage, outimage);
if (syncgap == 2)
return process_se_rough(inimage, outimage);
}

const unsigned char* pixeldata = (const unsigned char*)inimage.data;
const int w = inimage.w;
Expand Down Expand Up @@ -1141,6 +1151,36 @@ int RealCUGAN::process_se(const ncnn::Mat& inimage, ncnn::Mat& outimage) const
return 0;
}

int RealCUGAN::process_se_rough(const ncnn::Mat& inimage, ncnn::Mat& outimage) const
{
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();

ncnn::Option opt = net.opt;
opt.blob_vkallocator = blob_vkallocator;
opt.workspace_vkallocator = blob_vkallocator;
opt.staging_vkallocator = staging_vkallocator;

FeatureCache cache;

std::vector<std::string> in0 = {};
std::vector<std::string> out0 = {"gap0", "gap1", "gap2", "gap3"};
process_se_stage0(inimage, in0, out0, opt, cache);

std::vector<std::string> gap0 = {"gap0", "gap1", "gap2", "gap3"};
process_se_sync_gap(inimage, gap0, opt, cache);

std::vector<std::string> in4 = {"gap0", "gap1", "gap2", "gap3"};
process_se_stage2(inimage, in4, outimage, opt, cache);

cache.clear();

vkdev->reclaim_blob_allocator(blob_vkallocator);
vkdev->reclaim_staging_allocator(staging_vkallocator);

return 0;
}

int RealCUGAN::process_cpu_se(const ncnn::Mat& inimage, ncnn::Mat& outimage) const
{
FeatureCache cache;
Expand Down Expand Up @@ -1181,6 +1221,25 @@ int RealCUGAN::process_cpu_se(const ncnn::Mat& inimage, ncnn::Mat& outimage) con
return 0;
}

int RealCUGAN::process_cpu_se_rough(const ncnn::Mat& inimage, ncnn::Mat& outimage) const
{
FeatureCache cache;

std::vector<std::string> in0 = {};
std::vector<std::string> out0 = {"gap0", "gap1", "gap2", "gap3"};
process_cpu_se_stage0(inimage, in0, out0, cache);

std::vector<std::string> gap0 = {"gap0", "gap1", "gap2", "gap3"};
process_cpu_se_sync_gap(inimage, gap0, cache);

std::vector<std::string> in4 = {"gap0", "gap1", "gap2", "gap3"};
process_cpu_se_stage2(inimage, in4, outimage, cache);

cache.clear();

return 0;
}

int RealCUGAN::process_se_stage0(const ncnn::Mat& inimage, const std::vector<std::string>& names, const std::vector<std::string>& outnames, const ncnn::Option& opt, FeatureCache& cache) const
{
const unsigned char* pixeldata = (const unsigned char*)inimage.data;
Expand Down
4 changes: 4 additions & 0 deletions src/realcugan.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class RealCUGAN

int process_cpu_se(const ncnn::Mat& inimage, ncnn::Mat& outimage) const;

int process_se_rough(const ncnn::Mat& inimage, ncnn::Mat& outimage) const;

int process_cpu_se_rough(const ncnn::Mat& inimage, ncnn::Mat& outimage) const;

protected:
int process_se_stage0(const ncnn::Mat& inimage, const std::vector<std::string>& names, const std::vector<std::string>& outnames, const ncnn::Option& opt, FeatureCache& cache) const;
int process_se_stage2(const ncnn::Mat& inimage, const std::vector<std::string>& names, ncnn::Mat& outimage, const ncnn::Option& opt, FeatureCache& cache) const;
Expand Down

0 comments on commit a0882a6

Please sign in to comment.