From 9f51f280b14af4f68a252d8f022df81d5d3e6614 Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 1 Dec 2023 19:13:54 +0800 Subject: [PATCH 1/7] Reduce redundant code for AOV --- zenovis/xinxinoptix/DisneyBSDF.h | 15 +++------- zenovis/xinxinoptix/PTKernel.cu | 48 ++++++++++++++++---------------- zenovis/xinxinoptix/TraceStuff.h | 3 +- 3 files changed, 29 insertions(+), 37 deletions(-) diff --git a/zenovis/xinxinoptix/DisneyBSDF.h b/zenovis/xinxinoptix/DisneyBSDF.h index a4e8e92529..e16d65d020 100644 --- a/zenovis/xinxinoptix/DisneyBSDF.h +++ b/zenovis/xinxinoptix/DisneyBSDF.h @@ -600,9 +600,7 @@ namespace DisneyBSDF{ prd->fromDiff = false; if(r3first_hit_type; - prd->first_hit_type = prd->depth==0?DIFFUSE_HIT:first_hit_type; + prd->hit_type = DIFFUSE_HIT; if(wo.z<0 && mat.subsurface>0)//inside, scattering, go out for sure { wi = BRDFBasics::UniformSampleHemisphere(r1, r2); @@ -648,9 +646,7 @@ namespace DisneyBSDF{ } else if(r3first_hit_type; - prd->first_hit_type = prd->depth==0?SPECULAR_HIT:first_hit_type; + prd->hit_type = SPECULAR_HIT; float ax, ay; BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); @@ -704,13 +700,10 @@ namespace DisneyBSDF{ wi = normalize(wi - 1.01f * dot(wi, N2) * N2); } auto isReflection = dot(wi, N2) * dot(wo, N2)>0?1:0; - auto first_hit_type = prd->first_hit_type; - prd->first_hit_type = prd->depth==0? (isReflection==1?SPECULAR_HIT:TRANSMIT_HIT):first_hit_type; + prd->hit_type = (isReflection==1?SPECULAR_HIT:TRANSMIT_HIT); }else if(r3first_hit_type; - prd->first_hit_type = prd->depth==0?SPECULAR_HIT:first_hit_type; + prd->hit_type = SPECULAR_HIT; vec3 wm = BRDFBasics::SampleGTR1(mat.clearcoatRoughness, r1, r2); if (wm.z < 0.0) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index d45f1542c2..52cecb1855 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -173,7 +173,6 @@ extern "C" __global__ void __raygen__rg() prd.maxDistance = 1e16f; prd.medium = DisneyBSDF::PhaseFunctions::vacuum; - prd.origin = ray_origin; prd.direction = ray_direction; prd.samplePdf = 1.0f; @@ -186,8 +185,7 @@ extern "C" __global__ void __raygen__rg() prd.ss_alpha_queue[0] = vec3(-1.0f); prd.minSpecRough = 0.01; prd.samplePdf = 1.0f; - prd.first_hit_type = 0; - prd.hitEnv = false; + prd.hit_type = 0; auto _tmin_ = prd._tmin_; auto _mask_ = prd._mask_; @@ -200,6 +198,17 @@ extern "C" __global__ void __raygen__rg() // Primary Ray traceRadiance(params.handle, ray_origin, ray_direction, _tmin_, prd.maxDistance, &prd, _mask_); + const auto primary_hit_type = prd.hit_type; + + if(primary_hit_type > 0) { + result_d = prd.radiance_d * prd.attenuation2; + result_s = prd.radiance_s * prd.attenuation2; + result_t = prd.radiance_t * prd.attenuation2; + + result_b = { 1, 1, 1 }; + } else { // backgroud + result_b = {}; + } tmp_albedo = prd.tmp_albedo; tmp_normal = prd.tmp_normal; @@ -207,12 +216,10 @@ extern "C" __global__ void __raygen__rg() prd.trace_denoise_albedo = false; prd.trace_denoise_normal = false; + float3* aov[4] = {&result_b, &result_d, &result_s, &result_t}; + for(;;) { - prd.radiance_d = make_float3(0); - prd.radiance_s = make_float3(0); - prd.radiance_t = make_float3(0); - _tmin_ = prd._tmin_; _mask_ = prd._mask_; @@ -229,21 +236,10 @@ extern "C" __global__ void __raygen__rg() float3 clampped = clamp(vec3(temp_radiance), vec3(0), vec3(10)); result += prd.depth>1?clampped:temp_radiance; - if(prd.depth==1 && prd.hitEnv == false) - { - result_d += prd.radiance_d * prd.attenuation2; - result_s += prd.radiance_s * prd.attenuation2; - result_t += prd.radiance_t * prd.attenuation2; - } - if(prd.depth>1 || (prd.depth==1 && prd.hitEnv == true)) { - result_d += - prd.first_hit_type == 1 ? clampped : make_float3(0, 0, 0); - result_s += - prd.first_hit_type == 2 ? clampped : make_float3(0, 0, 0); - result_t += - prd.first_hit_type == 3 ? clampped : make_float3(0, 0, 0); - } + if(prd.depth>1 && primary_hit_type > 0) { + *aov[primary_hit_type] += clampped; + } } prd.radiance = make_float3(0); @@ -268,10 +264,14 @@ extern "C" __global__ void __raygen__rg() if(prd.countEmitted == true) prd.passed = true; + + prd.radiance_d = make_float3(0); + prd.radiance_s = make_float3(0); + prd.radiance_t = make_float3(0); + traceRadiance(params.handle, ray_origin, ray_direction, _tmin_, prd.maxDistance, &prd, _mask_); } - result_b += prd.first_hit_type == 0 ? make_float3(0, 0, 0) - : make_float3(1, 1, 1); + seed = prd.seed; } while( --i ); @@ -378,7 +378,7 @@ extern "C" __global__ void __miss__radiance() } prd->done = true; - prd->hitEnv = true; + prd->hit_type = 0; return; } diff --git a/zenovis/xinxinoptix/TraceStuff.h b/zenovis/xinxinoptix/TraceStuff.h index 22fcef0208..38f87e1922 100644 --- a/zenovis/xinxinoptix/TraceStuff.h +++ b/zenovis/xinxinoptix/TraceStuff.h @@ -62,7 +62,6 @@ struct RadiancePRD unsigned int seed; unsigned int eventseed; unsigned int flags; - bool hitEnv; int countEmitted; int done; float3 shadowAttanuation; @@ -88,7 +87,7 @@ struct RadiancePRD //return pcg_rng(this->seed); } - unsigned char first_hit_type; + unsigned char hit_type; vec3 extinction() { auto idx = clamp(curMatIdx, 0, 7); return sigma_t_queue[idx]; From 112f5c71a9359bcebe0115e9fe4a742963f7119f Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 4 Dec 2023 14:31:27 +0800 Subject: [PATCH 2/7] result_b --- zenovis/xinxinoptix/PTKernel.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 52cecb1855..2eff468fb2 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -206,8 +206,6 @@ extern "C" __global__ void __raygen__rg() result_t = prd.radiance_t * prd.attenuation2; result_b = { 1, 1, 1 }; - } else { // backgroud - result_b = {}; } tmp_albedo = prd.tmp_albedo; @@ -273,6 +271,10 @@ extern "C" __global__ void __raygen__rg() } seed = prd.seed; + + if (primary_hit_type > 0) { + result_b += make_float3(1); + } } while( --i ); From cfc282614a61193100a4784ca43011a4cab85381 Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 4 Dec 2023 17:18:36 +0800 Subject: [PATCH 3/7] Minor update --- zenovis/xinxinoptix/PTKernel.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 2eff468fb2..5f78ebdb0c 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -204,8 +204,6 @@ extern "C" __global__ void __raygen__rg() result_d = prd.radiance_d * prd.attenuation2; result_s = prd.radiance_s * prd.attenuation2; result_t = prd.radiance_t * prd.attenuation2; - - result_b = { 1, 1, 1 }; } tmp_albedo = prd.tmp_albedo; From 047b8689ebdc361aaa99bf908a0adb0854e32407 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 5 Dec 2023 16:46:49 +0800 Subject: [PATCH 4/7] Optix MAX_INSTANCE_ID --- zenovis/xinxinoptix/Light.cu | 6 +++--- zenovis/xinxinoptix/optixPathTracer.cpp | 11 ++++++++--- zenovis/xinxinoptix/optixPathTracer.h | 2 ++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/zenovis/xinxinoptix/Light.cu b/zenovis/xinxinoptix/Light.cu index 7471103a38..bc0af5b820 100644 --- a/zenovis/xinxinoptix/Light.cu +++ b/zenovis/xinxinoptix/Light.cu @@ -24,15 +24,15 @@ static __inline__ __device__ void evalSurface(float4* uniforms) { } static __inline__ __device__ bool checkLightGAS(uint instanceId) { - return ( instanceId >= OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID-2 ); + return ( instanceId >= params.maxInstanceID-2 ); } static __inline__ __device__ bool isPlaneLightGAS(uint instanceId) { - return ( instanceId == OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID-1 ); + return ( instanceId == params.maxInstanceID-1 ); } static __inline__ __device__ bool isTriangleLightGAS(uint instanceId) { - return ( instanceId == OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID-2 ); + return ( instanceId == params.maxInstanceID-2 ); } extern "C" __global__ void __closesthit__radiance() diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index bce4386bce..dea159b255 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -993,6 +993,11 @@ void buildRootIAS() optix_instances.push_back( optix_instance ); } + uint32_t MAX_INSTANCE_ID; + optixDeviceContextGetProperty( state.context, + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID, &MAX_INSTANCE_ID, sizeof(MAX_INSTANCE_ID)); + state.params.maxInstanceID = MAX_INSTANCE_ID; + //process light if (lightsWrapper.lightTrianglesGas != 0) { @@ -1003,7 +1008,7 @@ void buildRootIAS() auto shader_index = OptixUtil::matIDtoShaderIndex[combinedID]; opinstance.flags = OPTIX_INSTANCE_FLAG_NONE; - opinstance.instanceId = OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID-2; + opinstance.instanceId = MAX_INSTANCE_ID-2; opinstance.sbtOffset = shader_index * RAY_TYPE_COUNT; opinstance.visibilityMask = LightMatMask; opinstance.traversableHandle = lightsWrapper.lightTrianglesGas; @@ -1021,7 +1026,7 @@ void buildRootIAS() auto shader_index = OptixUtil::matIDtoShaderIndex[combinedID]; opinstance.flags = OPTIX_INSTANCE_FLAG_NONE; - opinstance.instanceId = OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID-1; + opinstance.instanceId = MAX_INSTANCE_ID-1; opinstance.sbtOffset = shader_index * RAY_TYPE_COUNT; opinstance.visibilityMask = LightMatMask; opinstance.traversableHandle = lightsWrapper.lightPlanesGas; @@ -1039,7 +1044,7 @@ void buildRootIAS() auto shader_index = OptixUtil::matIDtoShaderIndex[combinedID]; opinstance.flags = OPTIX_INSTANCE_FLAG_NONE; - opinstance.instanceId = OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID; + opinstance.instanceId = MAX_INSTANCE_ID; opinstance.sbtOffset = shader_index * RAY_TYPE_COUNT; opinstance.visibilityMask = LightMatMask; opinstance.traversableHandle = lightsWrapper.lightSpheresGas; diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index f9670675b6..e0af3deb41 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -182,6 +182,8 @@ struct Params uint32_t firstSphereLightIdx; uint32_t firstTriangleLightIdx; + uint32_t maxInstanceID; + unsigned long long lightTreeSampler; unsigned long long triangleLightCoordsBuffer; unsigned long long triangleLightNormalBuffer; From 0e59fe647b2e92f39b1fbc73fbaa237c318f05c6 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 5 Dec 2023 21:32:14 +0800 Subject: [PATCH 5/7] aov condition fix --- zenovis/xinxinoptix/PTKernel.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 5f78ebdb0c..8642e78f3d 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -233,7 +233,7 @@ extern "C" __global__ void __raygen__rg() result += prd.depth>1?clampped:temp_radiance; - if(prd.depth>1 && primary_hit_type > 0) { + if(primary_hit_type > 0 && ( prd.depth>1 || (prd.depth==1 && prd.hit_type == 0) )) { *aov[primary_hit_type] += clampped; } } From 36ca6c454e603063c8a69b6c28bbc36a96cb5ce4 Mon Sep 17 00:00:00 2001 From: iaomw Date: Wed, 6 Dec 2023 20:57:54 +0800 Subject: [PATCH 6/7] Reduce geometry overhead --- zenovis/xinxinoptix/DeflMatShader.cu | 273 +++++++++--------------- zenovis/xinxinoptix/optixPathTracer.cpp | 55 +++-- zenovis/xinxinoptix/optixPathTracer.h | 4 +- 3 files changed, 139 insertions(+), 193 deletions(-) diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index c435d966df..dbd5bc95b6 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -12,7 +12,6 @@ #include "Light.h" #define _SPHERE_ 0 -#define TRI_PER_MESH 4096 //COMMON_CODE #include "DisneyBRDF.h" @@ -185,7 +184,6 @@ static __inline__ __device__ bool isBadVector(const float3& vector) { extern "C" __global__ void __anyhit__shadow_cutout() { - const OptixTraversableHandle gas = optixGetGASTraversableHandle(); const uint sbtGASIndex = optixGetSbtGASIndex(); const uint primIdx = optixGetPrimitiveIndex(); @@ -237,96 +235,64 @@ extern "C" __global__ void __anyhit__shadow_cutout() unsigned short isLight = 0; #else - size_t inst_idx2 = optixGetInstanceIndex(); - size_t inst_idx = rt_data->meshIdxs[inst_idx2]; - size_t vert_idx_offset = (inst_idx * TRI_PER_MESH + primIdx)*3; - - float m16[16]; - m16[12]=0; m16[13]=0; m16[14]=0; m16[15]=1; - optixGetObjectToWorldTransformMatrix(m16); - mat4& meshMat = *reinterpret_cast(&m16); + size_t inst_idx = optixGetInstanceIndex(); + size_t vert_aux_offset = rt_data->auxOffset[inst_idx]; + size_t vert_idx_offset = (vert_aux_offset + primIdx)*3; float3 _vertices_[3]; optixGetTriangleVertexData( gas, primIdx, sbtGASIndex, 0, _vertices_); - float3& av0 = _vertices_[0]; //make_float3(rt_data->vertices[vert_idx_offset + 0]); - float3& av1 = _vertices_[1]; //make_float3(rt_data->vertices[vert_idx_offset + 1]); - float3& av2 = _vertices_[2]; //make_float3(rt_data->vertices[vert_idx_offset + 2]); - vec4 bv0 = vec4(av0.x, av0.y, av0.z, 1); - vec4 bv1 = vec4(av1.x, av1.y, av1.z, 1); - vec4 bv2 = vec4(av2.x, av2.y, av2.z, 1); - bv0 = meshMat * bv0; - bv1 = meshMat * bv1; - bv2 = meshMat * bv2; - float3& v0 = *(float3*)&bv0; //make_float3(bv0.x, bv0.y, bv0.z); - float3& v1 = *(float3*)&bv1; //make_float3(bv1.x, bv1.y, bv1.z); - float3& v2 = *(float3*)&bv2; //make_float3(bv2.x, bv2.y, bv2.z); - - float3 N_0 = normalize( cross( normalize(v1-v0), normalize(v2-v0) ) ); - - if (isBadVector(N_0)) - { - N_0 = DisneyBSDF::SampleScatterDirection(prd->seed); - N_0 = faceforward( N_0, -ray_dir, N_0 ); - } - //float w = rt_data->vertices[ vert_idx_offset+0 ].w; + const float3& v0 = _vertices_[0]; + const float3& v1 = _vertices_[1]; + const float3& v2 = _vertices_[2]; + + float3 N_Local = normalize( cross( normalize(v1-v0), normalize(v2-v1) ) ); /* MODMA */ float2 barys = optixGetTriangleBarycentrics(); - - mat3 meshMat3x3(meshMat); - float3 an0 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+0 ] )); - vec3 bn0(an0); - bn0 = meshMat3x3 * bn0; - float3 n0 = make_float3(bn0.x, bn0.y, bn0.z); - n0 = dot(n0, N_0)>0.8f?n0:N_0; - - float3 an1 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+1 ] )); - vec3 bn1(an1); - bn1 = meshMat3x3 * bn1; - float3 n1 = make_float3(bn1.x, bn1.y, bn1.z); - n1 = dot(n1, N_0)>0.8f?n1:N_0; - - float3 an2 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+2 ] )); - vec3 bn2(an2); - bn2 = meshMat3x3 * bn2; - float3 n2 = make_float3(bn2.x, bn2.y, bn2.z); - n2 = dot(n2, N_0)>0.8f?n2:N_0; - float3 uv0 = make_float3(rt_data->uv[ vert_idx_offset+0 ] ); - float3 uv1 = make_float3(rt_data->uv[ vert_idx_offset+1 ] ); - float3 uv2 = make_float3(rt_data->uv[ vert_idx_offset+2 ] ); - float3 clr0 = make_float3(rt_data->clr[ vert_idx_offset+0 ] ); - float3 clr1 = make_float3(rt_data->clr[ vert_idx_offset+1 ] ); - float3 clr2 = make_float3(rt_data->clr[ vert_idx_offset+2 ] ); - float3 atan0 = make_float3(rt_data->tan[ vert_idx_offset+0 ] ); - float3 atan1 = make_float3(rt_data->tan[ vert_idx_offset+1 ] ); - float3 atan2 = make_float3(rt_data->tan[ vert_idx_offset+2 ] ); - vec3 btan0(atan0); - vec3 btan1(atan1); - vec3 btan2(atan2); - btan0 = meshMat3x3 * btan0; - btan1 = meshMat3x3 * btan1; - btan2 = meshMat3x3 * btan2; - float3 tan0 = make_float3(btan0.x, btan0.y, btan0.z); - float3 tan1 = make_float3(btan1.x, btan1.y, btan1.z); - float3 tan2 = make_float3(btan2.x, btan2.y, btan2.z); - - N_0 = normalize(interp(barys, n0, n1, n2)); - float3 N = faceforward( N_0, -ray_dir, N_0 ); - attrs.pos = vec3(P.x, P.y, P.z); + float3 n0 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+0 ]) ); + n0 = dot(n0, N_Local)>0.8f?n0:N_Local; + float3 n1 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+1 ]) ); + n1 = dot(n1, N_Local)>0.8f?n1:N_Local; + float3 n2 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+2 ]) ); + n2 = dot(n2, N_Local)>0.8f?n2:N_Local; + + N_Local = normalize(interp(barys, n0, n1, n2)); + float3 N_World = optixTransformNormalFromObjectToWorldSpace(N_Local); + + if (isBadVector(N_World)) + { + N_World = DisneyBSDF::SampleScatterDirection(prd->seed); + } + + float3 N = faceforward( N_World, -ray_dir, N_World ); + + attrs.pos = P; attrs.nrm = N; + + const float3& uv0 = *(float3*)&( rt_data->uv[ vert_idx_offset+0 ] ); + const float3& uv1 = *(float3*)&( rt_data->uv[ vert_idx_offset+1 ] ); + const float3& uv2 = *(float3*)&( rt_data->uv[ vert_idx_offset+2 ] ); + const float3& clr0 = *(float3*)&( rt_data->clr[ vert_idx_offset+0 ] ); + const float3& clr1 = *(float3*)&( rt_data->clr[ vert_idx_offset+1 ] ); + const float3& clr2 = *(float3*)&( rt_data->clr[ vert_idx_offset+2 ] ); + const float3& tan0 = *(float3*)&( rt_data->tan[ vert_idx_offset+0 ] ); + const float3& tan1 = *(float3*)&( rt_data->tan[ vert_idx_offset+1 ] ); + const float3& tan2 = *(float3*)&( rt_data->tan[ vert_idx_offset+2 ] ); + attrs.uv = interp(barys, uv0, uv1, uv2);//todo later - //attrs.clr = rt_data->face_attrib_clr[vert_idx_offset]; attrs.clr = interp(barys, clr0, clr1, clr2); attrs.tang = interp(barys, tan0, tan1, tan2); - attrs.instPos = rt_data->instPos[inst_idx2]; - attrs.instNrm = rt_data->instNrm[inst_idx2]; - attrs.instUv = rt_data->instUv[inst_idx2]; - attrs.instClr = rt_data->instClr[inst_idx2]; - attrs.instTang = rt_data->instTang[inst_idx2]; + attrs.tang = optixTransformVectorFromObjectToWorldSpace(attrs.tang); - unsigned short isLight = rt_data->lightMark[inst_idx * TRI_PER_MESH + primIdx]; + attrs.instPos = rt_data->instPos[inst_idx]; + attrs.instNrm = rt_data->instNrm[inst_idx]; + attrs.instUv = rt_data->instUv[inst_idx]; + attrs.instClr = rt_data->instClr[inst_idx]; + attrs.instTang = rt_data->instTang[inst_idx]; + + unsigned short isLight = 0;//rt_data->lightMark[vert_aux_offset + primIdx]; #endif MatOutput mats = evalMaterial(zenotex, rt_data->uniforms, attrs); @@ -461,11 +427,9 @@ extern "C" __global__ void __closesthit__radiance() } prd->test_distance = false; - const OptixTraversableHandle gas = optixGetGASTraversableHandle(); const uint sbtGASIndex = optixGetSbtGASIndex(); - const uint primIdx = optixGetPrimitiveIndex(); - + const uint primIdx = optixGetPrimitiveIndex(); const float3 ray_orig = optixGetWorldRayOrigin(); const float3 ray_dir = optixGetWorldRayDirection(); @@ -473,7 +437,6 @@ extern "C" __global__ void __closesthit__radiance() HitGroupData* rt_data = (HitGroupData*)optixGetSbtDataPointer(); auto zenotex = rt_data->textures; - MatInput attrs{}; #if (_SPHERE_) @@ -508,87 +471,59 @@ extern "C" __global__ void __closesthit__radiance() #else - size_t inst_idx2 = optixGetInstanceIndex(); - size_t inst_idx = rt_data->meshIdxs[inst_idx2]; - size_t vert_idx_offset = (inst_idx * TRI_PER_MESH + primIdx)*3; + size_t inst_idx = optixGetInstanceIndex(); + size_t vert_aux_offset = rt_data->auxOffset[inst_idx]; + size_t vert_idx_offset = (vert_aux_offset + primIdx)*3; - float m16[16]; - m16[12]=0; m16[13]=0; m16[14]=0; m16[15]=1; - optixGetObjectToWorldTransformMatrix(m16); - mat4& meshMat = *reinterpret_cast(&m16); + unsigned short isLight = 0;//rt_data->lightMark[vert_aux_offset + primIdx]; float3 _vertices_[3]; optixGetTriangleVertexData( gas, primIdx, sbtGASIndex, 0, _vertices_); - float3& av0 = _vertices_[0]; //make_float3(rt_data->vertices[vert_idx_offset + 0]); - float3& av1 = _vertices_[1]; //make_float3(rt_data->vertices[vert_idx_offset + 1]); - float3& av2 = _vertices_[2]; //make_float3(rt_data->vertices[vert_idx_offset + 2]); - vec4 bv0 = vec4(av0.x, av0.y, av0.z, 1); - vec4 bv1 = vec4(av1.x, av1.y, av1.z, 1); - vec4 bv2 = vec4(av2.x, av2.y, av2.z, 1); - bv0 = meshMat * bv0; - bv1 = meshMat * bv1; - bv2 = meshMat * bv2; - float3& v0 = *(float3*)&bv0; //make_float3(bv0.x, bv0.y, bv0.z); - float3& v1 = *(float3*)&bv1; //make_float3(bv1.x, bv1.y, bv1.z); - float3& v2 = *(float3*)&bv2; //make_float3(bv2.x, bv2.y, bv2.z); - - float3 N_0 = normalize( cross( normalize(v1-v0), normalize(v2-v1) ) ); // this value has precision issue for big float - - if (isBadVector(N_0)) - { - N_0 = DisneyBSDF::SampleScatterDirection(prd->seed); - N_0 = faceforward( N_0, -ray_dir, N_0 ); - } - - prd->geometryNormal = N_0; - - unsigned short isLight = rt_data->lightMark[inst_idx * TRI_PER_MESH + primIdx]; - //float w = rt_data->vertices[ vert_idx_offset+0 ].w; + const float3& v0 = _vertices_[0]; + const float3& v1 = _vertices_[1]; + const float3& v2 = _vertices_[2]; /* MODMA */ float2 barys = optixGetTriangleBarycentrics(); - -// float3 n0 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+0 ] )); -// -// float3 n1 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+1 ] )); -// -// float3 n2 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+2 ] )); - - float3 uv0 = make_float3(rt_data->uv[ vert_idx_offset+0 ] ); - float3 uv1 = make_float3(rt_data->uv[ vert_idx_offset+1 ] ); - float3 uv2 = make_float3(rt_data->uv[ vert_idx_offset+2 ] ); - float3 clr0 = make_float3(rt_data->clr[ vert_idx_offset+0 ] ); - float3 clr1 = make_float3(rt_data->clr[ vert_idx_offset+1 ] ); - float3 clr2 = make_float3(rt_data->clr[ vert_idx_offset+2 ] ); - float3 atan0 = make_float3(rt_data->tan[ vert_idx_offset+0 ] ); - float3 atan1 = make_float3(rt_data->tan[ vert_idx_offset+1 ] ); - float3 atan2 = make_float3(rt_data->tan[ vert_idx_offset+2 ] ); - mat3 meshMat3x3(meshMat); - vec3 btan0(atan0); - vec3 btan1(atan1); - vec3 btan2(atan2); - btan0 = meshMat3x3 * btan0; - btan1 = meshMat3x3 * btan1; - btan2 = meshMat3x3 * btan2; - float3 tan0 = make_float3(btan0.x, btan0.y, btan0.z); - float3 tan1 = make_float3(btan1.x, btan1.y, btan1.z); - float3 tan2 = make_float3(btan2.x, btan2.y, btan2.z); - - //N_0 = normalize(interp(barys, n0, n1, n2)); - float3 N = N_0;//faceforward( N_0, -ray_dir, N_0 ); - P = interp(barys, v0, v1, v2); // this value has precision issue for big float - attrs.pos = vec3(P.x, P.y, P.z); + auto P_Local = interp(barys, v0, v1, v2); + P = optixTransformPointFromObjectToWorldSpace(P_Local); // this value has precision issue for big float + + attrs.pos = P; + + float3 N_Local = normalize( cross( normalize(v1-v0), normalize(v2-v1) ) ); // this value has precision issue for big float + float3 N_World = optixTransformNormalFromObjectToWorldSpace(N_Local); + + if (isBadVector(N_World)) + { + N_World = DisneyBSDF::SampleScatterDirection(prd->seed); + N_World = faceforward( N_World, -ray_dir, N_World ); + } + prd->geometryNormal = N_World; + + float3 N = N_World; attrs.nrm = N; + + const float3& uv0 = *(float3*)&( rt_data->uv[ vert_idx_offset+0 ] ); + const float3& uv1 = *(float3*)&( rt_data->uv[ vert_idx_offset+1 ] ); + const float3& uv2 = *(float3*)&( rt_data->uv[ vert_idx_offset+2 ] ); + const float3& clr0 = *(float3*)&( rt_data->clr[ vert_idx_offset+0 ] ); + const float3& clr1 = *(float3*)&( rt_data->clr[ vert_idx_offset+1 ] ); + const float3& clr2 = *(float3*)&( rt_data->clr[ vert_idx_offset+2 ] ); + const float3& tan0 = *(float3*)&( rt_data->tan[ vert_idx_offset+0 ] ); + const float3& tan1 = *(float3*)&( rt_data->tan[ vert_idx_offset+1 ] ); + const float3& tan2 = *(float3*)&( rt_data->tan[ vert_idx_offset+2 ] ); + attrs.uv = interp(barys, uv0, uv1, uv2);//todo later - //attrs.clr = rt_data->face_attrib_clr[vert_idx_offset]; attrs.clr = interp(barys, clr0, clr1, clr2); attrs.tang = normalize(interp(barys, tan0, tan1, tan2)); - attrs.instPos = rt_data->instPos[inst_idx2]; - attrs.instNrm = rt_data->instNrm[inst_idx2]; - attrs.instUv = rt_data->instUv[inst_idx2]; - attrs.instClr = rt_data->instClr[inst_idx2]; - attrs.instTang = rt_data->instTang[inst_idx2]; + attrs.tang = optixTransformVectorFromObjectToWorldSpace(attrs.tang); + + attrs.instPos = rt_data->instPos[inst_idx]; + attrs.instNrm = rt_data->instNrm[inst_idx]; + attrs.instUv = rt_data->instUv[inst_idx]; + attrs.instClr = rt_data->instClr[inst_idx]; + attrs.instTang = rt_data->instTang[inst_idx]; #endif @@ -603,28 +538,22 @@ extern "C" __global__ void __closesthit__radiance() #else - float3 an0 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+0 ] )); - vec3 bn0(an0); - bn0 = meshMat3x3 * bn0; - float3 n0 = make_float3(bn0.x, bn0.y, bn0.z); - n0 = dot(n0, N_0)>(1-mats.smoothness)?n0:N_0; - - float3 an1 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+1 ] )); - vec3 bn1(an1); - bn1 = meshMat3x3 * bn1; - float3 n1 = make_float3(bn1.x, bn1.y, bn1.z); - n1 = dot(n1, N_0)>(1-mats.smoothness)?n1:N_0; - - float3 an2 = normalize(make_float3(rt_data->nrm[ vert_idx_offset+2 ] )); - vec3 bn2(an2); - bn2 = meshMat3x3 * bn2; - float3 n2 = make_float3(bn2.x, bn2.y, bn2.z); - n2 = dot(n2, N_0)>(1-mats.smoothness)?n2:N_0; - N_0 = normalize(interp(barys, n0, n1, n2)); - N = N_0; + float3 n0 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+0 ]) ); + n0 = dot(n0, N_Local)>(1-mats.smoothness)?n0:N_Local; + + float3 n1 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+1 ]) ); + n1 = dot(n1, N_Local)>(1-mats.smoothness)?n1:N_Local; + + float3 n2 = normalize( *(float3*)&(rt_data->nrm[ vert_idx_offset+2 ]) ); + n2 = dot(n2, N_Local)>(1-mats.smoothness)?n2:N_Local; + + N_Local = normalize(interp(barys, n0, n1, n2)); + N_World = optixTransformNormalFromObjectToWorldSpace(N_Local); + + N = N_World; if(mats.doubleSide>0.5f||mats.thin>0.5f){ - N = faceforward( N_0, -ray_dir, N_0 ); + N = faceforward( N_World, -ray_dir, N_World ); prd->geometryNormal = faceforward( prd->geometryNormal, -ray_dir, prd->geometryNormal ); } #endif diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index dea159b255..59b23dfdf4 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -83,7 +83,6 @@ using namespace zeno::ChiefDesignerEXR; #ifndef M_PI #define M_PI 3.14159265358979323846 #endif -#define TRI_PER_MESH 4096 #include struct CppTimer { void tick() { @@ -196,7 +195,8 @@ struct PathTracerState raii d_tan; raii d_lightMark; raii d_mat_indices; - raii d_meshIdxs; + + raii vertexAuxOffsetGlobal; raii d_instPos; raii d_instNrm; @@ -778,17 +778,17 @@ static void buildMeshIAS(PathTracerState& state, int rayTypeCount, std::vector meshIdxs(num_instances); - + std::vector vertexAuxOffsetGlobal(num_instances); + uint32_t vertexAuxOffset = 0u; + std::vector instPos(num_instances); std::vector instNrm(num_instances); std::vector instUv(num_instances); std::vector instClr(num_instances); std::vector instTang(num_instances); size_t sbt_offset = 0; + for( size_t i = 0; i < g_staticAndDynamicMeshNum; ++i ) { auto mesh = m_meshes[i]; @@ -802,8 +802,9 @@ static void buildMeshIAS(PathTracerState& state, int rayTypeCount, std::vectorgas_handle; memcpy( optix_instance.transform, mat3r4c, sizeof( float ) * 12 ); - meshIdxs[i] = i; - + vertexAuxOffsetGlobal[i] = vertexAuxOffset; + vertexAuxOffset += mesh->verts.size(); + instPos[i] = defaultInstPos; instNrm[i] = defaultInstNrm; instUv[i] = defaultInstUv; @@ -828,7 +829,7 @@ static void buildMeshIAS(PathTracerState& state, int rayTypeCount, std::vector(instanceId); optix_instance.visibilityMask = DefaultMatMask; optix_instance.traversableHandle = mesh->gas_handle; - memcpy(optix_instance.transform, instMat4x4, sizeof(float) * 12); + memcpy(optix_instance.transform, instMat3r4c, sizeof(float) * 12); - meshIdxs[instanceId] = meshesOffset; + vertexAuxOffsetGlobal[instanceId] = vertexAuxOffset; instPos[instanceId] = instAttrs.pos[k]; instNrm[instanceId] = instAttrs.nrm[k]; @@ -849,6 +850,7 @@ static void buildMeshIAS(PathTracerState& state, int rayTypeCount, std::vectorverts.size(); ++meshesOffset; } } @@ -864,26 +866,27 @@ static void buildMeshIAS(PathTracerState& state, int rayTypeCount, std::vectorgas_handle; memcpy(optix_instance.transform, mat3r4c, sizeof(float) * 12); - meshIdxs[instanceId] = meshesOffset; - + vertexAuxOffsetGlobal[instanceId] = vertexAuxOffset; + instPos[instanceId] = defaultInstPos; instNrm[instanceId] = defaultInstNrm; instUv[instanceId] = defaultInstUv; instClr[instanceId] = defaultInstClr; instTang[instanceId] = defaultInstTang; - + ++instanceId; ++meshesOffset; + + vertexAuxOffset += mesh->verts.size(); } } } - state.d_meshIdxs.resize(sizeof(meshIdxs[0]) * meshIdxs.size(), 0); - // CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.d_meshIdxs.reset() ), sizeof(meshIdxs[0]) * meshIdxs.size()) ); + state.vertexAuxOffsetGlobal.resize(sizeof(vertexAuxOffsetGlobal[0]) * vertexAuxOffsetGlobal.size(), 0); CUDA_CHECK( cudaMemcpy( - reinterpret_cast( (CUdeviceptr)state.d_meshIdxs ), - meshIdxs.data(), - sizeof(meshIdxs[0]) * meshIdxs.size(), + reinterpret_cast( (CUdeviceptr)state.vertexAuxOffsetGlobal ), + vertexAuxOffsetGlobal.data(), + sizeof(vertexAuxOffsetGlobal[0]) * vertexAuxOffsetGlobal.size(), cudaMemcpyHostToDevice ) ); @@ -1184,7 +1187,7 @@ static void createSBT( PathTracerState& state ) hitgroup_records[sbt_idx].data.clr = reinterpret_cast( (CUdeviceptr)state.d_clr ); hitgroup_records[sbt_idx].data.tan = reinterpret_cast( (CUdeviceptr)state.d_tan ); hitgroup_records[sbt_idx].data.lightMark = reinterpret_cast( (CUdeviceptr)state.d_lightMark ); - hitgroup_records[sbt_idx].data.meshIdxs = reinterpret_cast( (CUdeviceptr)state.d_meshIdxs ); + hitgroup_records[sbt_idx].data.auxOffset = reinterpret_cast( (CUdeviceptr)state.vertexAuxOffsetGlobal ); hitgroup_records[sbt_idx].data.instPos = reinterpret_cast( (CUdeviceptr)state.d_instPos ); hitgroup_records[sbt_idx].data.instNrm = reinterpret_cast( (CUdeviceptr)state.d_instNrm ); @@ -1542,7 +1545,10 @@ void UpdateStaticMesh(std::map const &mtlidlut) { if(!using20xx) { splitMesh(g_vertices, g_mat_indices, g_meshPieces, 0, 0); g_staticMeshNum = g_meshPieces.size(); + size_t vertSize = TRI_PER_MESH * 3 * g_meshPieces.size(); + vertSize = std::min(g_vertices.size(), vertSize); + g_staticVertNum = vertSize; g_vertices.resize(vertSize); g_clr.resize(vertSize); @@ -1560,7 +1566,10 @@ void UpdateDynamicMesh(std::map const &mtlidlut) { if(!using20xx) { splitMesh(g_vertices, g_mat_indices, g_meshPieces, g_staticMeshNum, g_staticVertNum); g_staticAndDynamicMeshNum = g_meshPieces.size(); + size_t vertSize = TRI_PER_MESH * 3 * g_meshPieces.size(); + vertSize = std::min(g_vertices.size(), vertSize); + g_staticAndDynamicVertNum = vertSize; g_vertices.resize(vertSize); g_clr.resize(vertSize); @@ -1598,7 +1607,10 @@ void UpdateStaticInstMesh(const std::map &mtlidlut) splitMesh(vertices, mat_indices, meshPieces, 0, 0); staticMeshNum = meshPieces.size(); + std::size_t vertSize = TRI_PER_MESH * 3 * meshPieces.size(); + vertSize = std::min(vertices.size(), vertSize); + staticVertNum = vertSize; vertices.resize(vertSize); clr.resize(vertSize); @@ -1631,7 +1643,10 @@ void UpdateDynamicInstMesh(std::map const &mtlidlut) auto &meshPieces = instData.meshPieces; splitMesh(vertices, mat_indices, meshPieces, staticMeshNum, staticVertNum); + std::size_t vertSize = TRI_PER_MESH * 3 * meshPieces.size(); + vertSize = std::min(vertices.size(), vertSize); + vertices.resize(vertSize); clr.resize(vertSize); nrm.resize(vertSize); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index e0af3deb41..04b8e69488 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -7,6 +7,8 @@ // #include #include +#define TRI_PER_MESH (1<<29) //2^29 + enum RayType { RAY_TYPE_RADIANCE = 0, @@ -259,7 +261,7 @@ struct HitGroupData float4* clr; float4* tan; unsigned short* lightMark; - int* meshIdxs; + uint32_t* auxOffset; float3* instPos; float3* instNrm; From b67ac2325d3eff7ceabf68978907c6102bd5617d Mon Sep 17 00:00:00 2001 From: iaomw Date: Wed, 6 Dec 2023 20:58:30 +0800 Subject: [PATCH 7/7] Minor changes --- zenovis/xinxinoptix/DeflMatShader.cu | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index dbd5bc95b6..2e5d842f2b 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -194,8 +194,6 @@ extern "C" __global__ void __anyhit__shadow_cutout() HitGroupData* rt_data = (HitGroupData*)optixGetSbtDataPointer(); - const auto zenotex = rt_data->textures; - RadiancePRD* prd = getPRD(); MatInput attrs{}; @@ -295,7 +293,7 @@ extern "C" __global__ void __anyhit__shadow_cutout() unsigned short isLight = 0;//rt_data->lightMark[vert_aux_offset + primIdx]; #endif - MatOutput mats = evalMaterial(zenotex, rt_data->uniforms, attrs); + MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); if(length(attrs.tang)>0) { @@ -436,7 +434,6 @@ extern "C" __global__ void __closesthit__radiance() float3 P = ray_orig + optixGetRayTmax() * ray_dir; HitGroupData* rt_data = (HitGroupData*)optixGetSbtDataPointer(); - auto zenotex = rt_data->textures; MatInput attrs{}; #if (_SPHERE_) @@ -527,7 +524,7 @@ extern "C" __global__ void __closesthit__radiance() #endif - MatOutput mats = evalMaterial(zenotex, rt_data->uniforms, attrs); + MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); #if _SPHERE_ @@ -933,7 +930,7 @@ extern "C" __global__ void __closesthit__radiance() attrs.H = normalize(H); attrs.reflectance = reflectance; attrs.fresnel = DisneyBSDF::DisneyFresnel(mats.basecolor, mats.metallic, mats.ior, mats.specularTint, dot(attrs.H, attrs.V), dot(attrs.H, attrs.L), false); - MatOutput mat2 = evalReflectance(zenotex, rt_data->uniforms, attrs); + MatOutput mat2 = evalReflectance(rt_data->textures, rt_data->uniforms, attrs); reflectance = mat2.reflectance; } @@ -968,7 +965,7 @@ extern "C" __global__ void __closesthit__radiance() attrs.H = normalize(H); attrs.reflectance = lbrdf; attrs.fresnel = DisneyBSDF::DisneyFresnel( mats.basecolor, mats.metallic, mats.ior, mats.specularTint, dot(attrs.H, attrs.V), dot(attrs.H, attrs.L), false); - mat2 = evalReflectance(zenotex, rt_data->uniforms, attrs); + mat2 = evalReflectance(rt_data->textures, rt_data->uniforms, attrs); } return (mats.thin>0.5f? float3(mat2.reflectance):lbrdf);