Skip to content

Commit

Permalink
Use feature buffers for denoising
Browse files Browse the repository at this point in the history
  • Loading branch information
sergcpp committed May 1, 2023
1 parent b6ff5d5 commit 680cfe1
Show file tree
Hide file tree
Showing 26 changed files with 5,109 additions and 951 deletions.
5 changes: 4 additions & 1 deletion compile_shaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def main():

# Denoise
compile_shader(src_name="filter_variance")
compile_shader(src_name="nlm_filter")
compile_shader(src_name="nlm_filter", spv_name="nlm_filter", defines="-DUSE_BASE_COLOR=0 -DUSE_DEPTH_NORMAL=0")
compile_shader(src_name="nlm_filter", spv_name="nlm_filter_n", defines="-DUSE_BASE_COLOR=0 -DUSE_DEPTH_NORMAL=1")
compile_shader(src_name="nlm_filter", spv_name="nlm_filter_b", defines="-DUSE_BASE_COLOR=1 -DUSE_DEPTH_NORMAL=0")
compile_shader(src_name="nlm_filter", spv_name="nlm_filter_bn", defines="-DUSE_BASE_COLOR=1 -DUSE_DEPTH_NORMAL=1")

# Other
compile_shader(src_name="prepare_indir_args")
Expand Down
2 changes: 2 additions & 0 deletions internal/Core.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
#define force_inline __attribute__((always_inline)) inline
#define assume_aligned(ptr, sz) (__builtin_assume_aligned((const void *)ptr, sz))
#define vectorcall
#define restrict __restrict__
#endif
#ifdef _MSC_VER
#define force_inline __forceinline
#define vectorcall __vectorcall
#define assume_aligned(ptr, sz) (__assume((((const char *)ptr) - ((const char *)0)) % (sz) == 0), (ptr))
#define restrict __restrict

#include <intrin.h>

Expand Down
98 changes: 77 additions & 21 deletions internal/CoreRef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4372,7 +4372,7 @@ void Ray::Ref::ShadePrimary(const pass_settings_t &ps, Span<const hit_data_t> in
int img_w, float mix_factor, color_rgba_t *out_color, color_rgba_t *out_base_color,
color_rgba_t *out_depth_normal) {
auto clamp_direct = simd_fvec4{std::numeric_limits<float>::max()};
if (ps.clamp_direct) {
if (ps.clamp_direct != 0.0f) {
clamp_direct.set<0>(ps.clamp_direct);
clamp_direct.set<1>(ps.clamp_direct);
clamp_direct.set<2>(ps.clamp_direct);
Expand Down Expand Up @@ -4411,7 +4411,7 @@ void Ray::Ref::ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t>
int *out_secondary_rays_count, shadow_ray_t *out_shadow_rays, int *out_shadow_rays_count,
int img_w, color_rgba_t *out_color) {
auto clamp_indirect = simd_fvec4{std::numeric_limits<float>::max()};
if (ps.clamp_indirect) {
if (ps.clamp_indirect != 0.0f) {
clamp_indirect.set<0>(ps.clamp_indirect);
clamp_indirect.set<1>(ps.clamp_indirect);
clamp_indirect.set<2>(ps.clamp_indirect);
Expand All @@ -4435,10 +4435,13 @@ void Ray::Ref::ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t>
}
}

template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE>
void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const int input_stride, const float alpha,
const float damping, const color_rgba_t variance[], const rect_t &output_rect,
const int output_stride, color_rgba_t output[]) {
namespace Ray {
namespace Ref {
template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE, bool FEATURE0, bool FEATURE1>
void JointNLMFilter(const color_rgba_t *restrict input, const rect_t &rect, const int input_stride, const float alpha,
const float damping, const color_rgba_t variance[], const color_rgba_t *restrict feature0,
const float feature0_weight, const color_rgba_t *restrict feature1, const float feature1_weight,
const rect_t &output_rect, const int output_stride, color_rgba_t *restrict output) {
const int WindowRadius = (WINDOW_SIZE - 1) / 2;
const float PatchDistanceNormFactor = NEIGHBORHOOD_SIZE * NEIGHBORHOOD_SIZE;
const int NeighborRadius = (NEIGHBORHOOD_SIZE - 1) / 2;
Expand All @@ -4457,7 +4460,7 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
for (int l = -WindowRadius; l <= WindowRadius; ++l) {
const int jx = ix + l;

simd_fvec4 distance = {};
simd_fvec4 color_distance = {};

for (int q = -NeighborRadius; q <= NeighborRadius; ++q) {
for (int p = -NeighborRadius; p <= NeighborRadius; ++p) {
Expand All @@ -4468,16 +4471,39 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
const simd_fvec4 jvar = {variance[(jy + q) * input_stride + (jx + p)].v, simd_mem_aligned};
const simd_fvec4 min_var = min(ivar, jvar);

distance += ((ipx - jpx) * (ipx - jpx) - alpha * (ivar + min_var)) /
(0.0001f + damping * damping * (ivar + jvar));
color_distance += ((ipx - jpx) * (ipx - jpx) - alpha * (ivar + min_var)) /
(0.0001f + damping * damping * (ivar + jvar));
}
}

const float patch_distance =
0.25f * PatchDistanceNormFactor *
(distance.get<0>() + distance.get<1>() + distance.get<2>() + distance.get<3>());
const float patch_distance = 0.25f * PatchDistanceNormFactor *
(color_distance.get<0>() + color_distance.get<1>() +
color_distance.get<2>() + color_distance.get<3>());
float weight = std::exp(-std::max(0.0f, patch_distance));

const float weight = std::exp(-std::max(0.0f, patch_distance));
if (FEATURE0 || FEATURE1) {
simd_fvec4 feature_distance = {};
if (FEATURE0) {
const simd_fvec4 ipx = {feature0[iy * input_stride + ix].v, simd_mem_aligned};
const simd_fvec4 jpx = {feature0[jy * input_stride + jx].v, simd_mem_aligned};

feature_distance = feature0_weight * (ipx - jpx) * (ipx - jpx);
}
if (FEATURE1) {
const simd_fvec4 ipx = {feature1[iy * input_stride + ix].v, simd_mem_aligned};
const simd_fvec4 jpx = {feature1[jy * input_stride + jx].v, simd_mem_aligned};

feature_distance = max(feature_distance, feature1_weight * (ipx - jpx) * (ipx - jpx));
}

const float feature_patch_distance =
0.25f * (feature_distance.get<0>() + feature_distance.get<1>() + feature_distance.get<2>() +
feature_distance.get<3>());
const float feature_weight =
std::exp(-std::max(0.0f, std::min(10000.0f, feature_patch_distance)));

weight = std::min(weight, feature_weight);
}

sum_output += simd_fvec4{input[jy * input_stride + jx].v, simd_mem_aligned} * weight;
sum_weight += weight;
Expand All @@ -4493,19 +4519,49 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
}
}
}
} // namespace Ref
} // namespace Ray

template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE>
void Ray::Ref::JointNLMFilter(const color_rgba_t input[], const rect_t &rect, const int input_stride, const float alpha,
const float damping, const color_rgba_t variance[], const color_rgba_t feature1[],
const float feature1_weight, const color_rgba_t feature2[], const float feature2_weight,
const rect_t &output_rect, const int output_stride, color_rgba_t output[]) {
if (feature1 && feature2) {
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true, true>(input, rect, input_stride, alpha, damping, variance,
feature1, feature1_weight, feature2, feature2_weight,
output_rect, output_stride, output);
} else if (feature1) {
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true, false>(input, rect, input_stride, alpha, damping, variance,
feature1, feature1_weight, nullptr, 0.0f,
output_rect, output_stride, output);
} else if (feature2) {
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true, false>(input, rect, input_stride, alpha, damping, variance,
feature2, feature2_weight, nullptr, 0.0f,
output_rect, output_stride, output);
} else {
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, false, false>(input, rect, input_stride, alpha, damping,
variance, nullptr, 0.0f, nullptr, 0.0f,
output_rect, output_stride, output);
}
}

template void Ray::Ref::NLMFilter<21 /* WINDOW_SIZE */, 5 /* NEIGHBORHOOD_SIZE */>(
template void Ray::Ref::JointNLMFilter<21 /* WINDOW_SIZE */, 5 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
template void Ray::Ref::NLMFilter<21 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
template void Ray::Ref::JointNLMFilter<21 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
template void Ray::Ref::NLMFilter<7 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
template void Ray::Ref::JointNLMFilter<7 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
template void Ray::Ref::NLMFilter<3 /* WINDOW_SIZE */, 1 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
template void Ray::Ref::JointNLMFilter<3 /* WINDOW_SIZE */, 1 /* NEIGHBORHOOD_SIZE */>(
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);

namespace Ray {
extern const int LUT_DIMS = 48;
Expand Down
10 changes: 6 additions & 4 deletions internal/CoreRef.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ force_inline int total_depth(const shadow_ray_t &r) {
}

// Generation of rays
void GeneratePrimaryRays(const camera_t &cam, const rect_t &r, int w, int h, const float random_seq[],
int iteration, const uint16_t required_samples[], aligned_vector<ray_data_t> &out_rays);
void GeneratePrimaryRays(const camera_t &cam, const rect_t &r, int w, int h, const float random_seq[], int iteration,
const uint16_t required_samples[], aligned_vector<ray_data_t> &out_rays);
void SampleMeshInTextureSpace(int iteration, int obj_index, int uv_layer, const mesh_t &mesh, const transform_t &tr,
const uint32_t *vtx_indices, const vertex_t *vertices, const rect_t &r, int w, int h,
const float *random_seq, aligned_vector<ray_data_t> &out_rays,
Expand Down Expand Up @@ -373,8 +373,10 @@ void ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t> inters, Sp

// Denoise
template <int WINDOW_SIZE = 7, int NEIGHBORHOOD_SIZE = 3>
void NLMFilter(const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
void JointNLMFilter(const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight,
const color_rgba_t feature1[], float feature1_weight, const rect_t &output_rect, int output_stride,
color_rgba_t output[]);

// Tonemap

Expand Down
31 changes: 29 additions & 2 deletions internal/RendererCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ template <typename SIMDPolicy> struct PassData {
aligned_vector<color_rgba_t, 16> temp_final_buf;
aligned_vector<color_rgba_t, 16> variance_buf;
aligned_vector<color_rgba_t, 16> filtered_variance_buf;
aligned_vector<color_rgba_t, 16> feature_buf1, feature_buf2;

aligned_vector<typename SIMDPolicy::RayHashType> hash_values;
std::vector<int> head_flags;
Expand Down Expand Up @@ -556,6 +557,16 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage
p.temp_final_buf.resize(rect_ext.w * rect_ext.h);
p.variance_buf.resize(rect_ext.w * rect_ext.h);
p.filtered_variance_buf.resize(rect_ext.w * rect_ext.h);
if (!base_color_buf_.empty()) {
p.feature_buf1.resize(rect_ext.w * rect_ext.h);
} else {
p.feature_buf1 = {};
}
if (!depth_normals_buf_.empty()) {
p.feature_buf2.resize(rect_ext.w * rect_ext.h);
} else {
p.feature_buf2 = {};
}

#define FETCH_FINAL_BUF(_x, _y) \
Ref::simd_fvec4(raw_final_buf_[std::min(std::max(_y, 0), h_ - 1) * w_ + std::min(std::max(_x, 0), w_ - 1)].v, \
Expand Down Expand Up @@ -589,6 +600,11 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage
#undef FETCH_VARIANCE
#undef FETCH_FINAL_BUF

#define FETCH_BASE_COLOR(_x, _y) \
base_color_buf_[std::min(std::max(_y, 0), h_ - 1) * w_ + std::min(std::max(_x, 0), w_ - 1)]
#define FETCH_DEPTH_NORMALS(_x, _y) \
depth_normals_buf_[std::min(std::max(_y, 0), h_ - 1) * w_ + std::min(std::max(_x, 0), w_ - 1)]

for (int y = 4; y < rect_ext.h - 4; ++y) {
for (int x = 4; x < rect_ext.w - 4; ++x) {
const Ref::simd_fvec4 center_val = {p.variance_buf[(y + 0) * rect_ext.w + x].v, Ref::simd_mem_aligned};
Expand All @@ -603,9 +619,19 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage

res = max(res, center_val);
res.store_to(p.filtered_variance_buf[y * rect_ext.w + x].v, Ref::simd_mem_aligned);

if (!base_color_buf_.empty()) {
p.feature_buf1[y * rect_ext.w + x] = FETCH_BASE_COLOR(rect_ext.x + x, rect_ext.y + y);
}
if (!depth_normals_buf_.empty()) {
p.feature_buf2[y * rect_ext.w + x] = FETCH_DEPTH_NORMALS(rect_ext.x + x, rect_ext.y + y);
}
}
}

#undef FETCH_BASE_COLOR
#undef FETCH_DEPTH_NORMALS

Ref::tonemap_params_t tonemap_params;
float variance_threshold;

Expand Down Expand Up @@ -633,9 +659,10 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage

static_assert(EXT_RADIUS >= (NLM_WINDOW_SIZE - 1) / 2 + (NLM_NEIGHBORHOOD_SIZE - 1) / 2, "!");

Ref::NLMFilter<NLM_WINDOW_SIZE, NLM_NEIGHBORHOOD_SIZE>(
Ref::JointNLMFilter<NLM_WINDOW_SIZE, NLM_NEIGHBORHOOD_SIZE>(
p.temp_final_buf.data(), rect_t{EXT_RADIUS, EXT_RADIUS, rect.w, rect.h}, rect_ext.w, 1.0f, 0.45f,
p.filtered_variance_buf.data(), rect, w_, raw_filtered_buf_.data());
p.filtered_variance_buf.data(), !p.feature_buf1.empty() ? p.feature_buf1.data() : nullptr, 64.0f,
!p.feature_buf2.empty() ? p.feature_buf2.data() : nullptr, 32.0f, rect, w_, raw_filtered_buf_.data());

for (int y = rect.y; y < rect.y + rect.h; ++y) {
for (int x = rect.x; x < rect.x + rect.w; ++x) {
Expand Down
Loading

0 comments on commit 680cfe1

Please sign in to comment.