Skip to content
This repository was archived by the owner on Jun 3, 2024. It is now read-only.

Add Nvidium compatibility #17

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ dependencies {
mappings "net.fabricmc:yarn:${project.yarn_mappings}:v2"
modImplementation "net.fabricmc:fabric-loader:${project.loader_version}"

// Mixin Extras
include(implementation(annotationProcessor("io.github.llamalad7:mixinextras-fabric:${project.mixin_extras_version}")))

// Fabric API. This is technically optional, but you probably want it anyway.
modImplementation "net.fabricmc.fabric-api:fabric-api:${project.fabric_api_version}"

Expand All @@ -60,6 +63,9 @@ dependencies {

// Apache Commons Math
implementation include("org.apache.commons:commons-math3:${project.apache_commons_math_version}")

// Nvidium (Modrinth Maven)
modImplementation "maven.modrinth:nvidium:${project.nvidium_version}"
}

processResources {
Expand Down
83 changes: 83 additions & 0 deletions docs/nvidium/shaders/occlusion/scene.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#define Vertex uvec4

// this is cause in the section rasterizer you get less cache misses thus higher throughput
struct Section {
ivec4 header;
//Header.x -> 0-3=offsetx 4-7=sizex 8-31=chunk x
//Header.y -> 0-3=offsetz 4-7=sizez 8-31=chunk z
//Header.z -> 0-3=offsety 4-7=sizey 8-15=chunk y
//Header.w -> quad offset

ivec4 renderRanges;
};

struct Region {
uint64_t a;
uint64_t b;
};

ivec3 unpackRegionSize(Region region) {
return ivec3((region.a>>59)&7, region.a>>62, (region.a>>56)&7);
}

ivec3 unpackRegionPosition(Region region) {
//TODO: optimize
int x = int(int64_t(region.a<<(64-24-24))>>(64-24));
int y = (int(region.a)<<8)>>8;
int z = int(int64_t(region.b)>>(64-24));
return ivec3(x,y,z);
}

int unpackRegionCount(Region region) {
return int((region.a>>48)&255);
}

bool sectionEmpty(ivec4 header) {
header.y &= ~0x1FF<<17;
return header == ivec4(0);
}


layout(std140, binding=0) uniform SceneData {
//Need to basicly go in order of alignment
//align(16)
mat4 MVP;
ivec4 chunkPosition;
vec4 subchunkOffset;
vec4 fogColour;

//vec4 subChunkPosition;//The subChunkTranslation is already done inside the MVP
//align(8)
readonly restrict uint16_t *regionIndicies;//Pointer to block of memory at the end of the SceneData struct, also mapped to be a uniform
readonly restrict Region *regionData;
restrict Section *sectionData;
//NOTE: for the following, can make it so that region visibility actually uses section visibility array
restrict uint8_t *regionVisibility;
restrict uint8_t *sectionVisibility;
//Terrain command buffer, the first 4 bytes are actually the count
writeonly restrict uvec2 *terrainCommandBuffer;
writeonly restrict uvec2 *translucencyCommandBuffer;

readonly restrict uint16_t *sortingRegionList;

//TODO:FIXME: only apply non readonly to translucency mesh
restrict Vertex *terrainData;//readonly


//readonly restrict u64vec4 *terrainData;
//uvec4 *terrainData;

uint32_t *statistics_buffer;

float fogStart;
float fogEnd;
bool isCylindricalFog;

//align(2)
uint16_t regionCount;//Number of regions in regionIndicies
//align(1)
uint8_t frameId;

int u_Far; // Blendium: the view distance
float u_ViewDistanceFactor; // Blendium: the view distance blend factor
};
45 changes: 45 additions & 0 deletions docs/nvidium/shaders/terrain/frag.frag
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#version 460
#extension GL_ARB_shading_language_include : enable
#pragma optionNV(unroll all)
#define UNROLL_LOOP
#extension GL_NV_gpu_shader5 : require
#extension GL_NV_bindless_texture : require
#extension GL_NV_shader_buffer_load : require

//#extension GL_NV_conservative_raster_underestimation : enable

//#extension GL_NV_fragment_shader_barycentric : require

#import <nvidium:occlusion/scene.glsl>

layout(location = 0) out vec4 colour;
layout(location = 1) in Interpolants {
f16vec4 uv_bias_cutoff;
f16vec3 tint;
f16vec3 addin;
// Blendium: the fragment distance
float16_t v_FragDistance;
};


layout(binding = 0) uniform sampler2D tex_diffuse;

uniform int u_Far; // Blendium: the view distance
uniform float u_ViewDistanceFactor; // Blendium: the view distance blend factor

//layout (depth_greater) out float gl_FragDepth;

void main() {
//uint uid = gl_PrimitiveID*132471+123571;
//colour = vec4(float((uid>>0)&7)/7, float((uid>>3)&7)/7, float((uid>>6)&7)/7, 1.0);
//colour = vec4(1.0,1.0,0,1);
colour = texture(tex_diffuse, uv_bias_cutoff.xy, uv_bias_cutoff.z);
if (colour.a < uv_bias_cutoff.w) discard;
colour.xyz *= tint;
colour.xyz += addin;
//colour = vec4(1.0,(uv_bias.z/-8.1f)+0.001f,0,1);

// Blendium: blend the alpha of the blocks
float far = u_Far * 16.0;
colour.a *= 1.0 - smoothstep(u_ViewDistanceFactor * far, far, v_FragDistance);
}
133 changes: 133 additions & 0 deletions docs/nvidium/shaders/terrain/mesh.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#version 460

#extension GL_ARB_shading_language_include : enable
#pragma optionNV(unroll all)
#define UNROLL_LOOP
#extension GL_NV_mesh_shader : require
#extension GL_NV_gpu_shader5 : require
#extension GL_NV_bindless_texture : require

#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_vote : require

#import <nvidium:occlusion/scene.glsl>
#import <nvidium:terrain/fog.glsl>
#import <nvidium:terrain/vertex_format.glsl>


//It seems like for terrain at least, the sweat spot is ~16 quads per mesh invocation (even if the local size is not 32 )
layout(local_size_x = 16) in;
layout(triangles, max_vertices=64, max_primitives=32) out;

layout(location=1) out Interpolants {
f16vec4 uv_bias_cutoff;
f16vec3 tint;
f16vec3 addin;
// Blendium: the fragment distance
float16_t v_FragDistance;
} OUT[];

taskNV in Task {
vec3 origin;
uint baseOffset;
uint quadCount;

//Binary search indexs and data
uvec4 binIa;
uvec4 binIb;
uvec4 binVa;
uvec4 binVb;
};

layout(binding = 1) uniform sampler2D tex_light;

vec4 sampleLight(uvec2 uv) {
return vec4(texelFetch(tex_light, ivec2(uv), 0).rgb, 1);
}


void emitQuadIndicies() {
uint primBase = gl_LocalInvocationID.x * 6;
uint vertexBase = gl_LocalInvocationID.x<<2;
gl_PrimitiveIndicesNV[primBase+0] = vertexBase+0;
gl_PrimitiveIndicesNV[primBase+1] = vertexBase+1;
gl_PrimitiveIndicesNV[primBase+2] = vertexBase+2;
gl_PrimitiveIndicesNV[primBase+3] = vertexBase+2;
gl_PrimitiveIndicesNV[primBase+4] = vertexBase+3;
gl_PrimitiveIndicesNV[primBase+5] = vertexBase+0;
}

void emitVertex(uint vertexBaseId, uint innerId) {
Vertex V = terrainData[vertexBaseId + innerId];
uint outId = (gl_LocalInvocationID.x<<2)+innerId;

vec3 pos = decodeVertexPosition(V)+origin;
gl_MeshVerticesNV[outId].gl_Position = MVP*vec4(pos,1.0);

//TODO: make this shared state between all the vertices?
float mippingBias = decodeVertexMippingBias(V);
float alphaCutoff = decodeVertexAlphaCutoff(V);

OUT[outId].uv_bias_cutoff = f16vec4(vec4(decodeVertexUV(V), mippingBias, alphaCutoff));

vec4 tint = decodeVertexColour(V);
tint *= sampleLight(decodeLightUV(V));
tint *= tint.w;

// Blendium: calculate the fragment distance
OUT[outId].v_FragDistance = (float16_t) getFragDistance(isCylindricalFog, pos+subchunkOffset.xyz);

vec3 tintO;
vec3 addiO;
computeFog(isCylindricalFog, pos+subchunkOffset.xyz, tint, fogColour, fogStart, fogEnd, tintO, addiO);
OUT[outId].tint = f16vec3(tintO);
OUT[outId].addin = f16vec3(addiO);
}


//Do a binary search via global invocation index to determine the base offset
// Note, all threads in the work group are probably going to take the same path
uint getOffset() {
uint gii = gl_GlobalInvocationID.x;

//TODO: replace this with binary search
if (gii < binIa.x) {
return binVa.x + gii + baseOffset;
} else if (gii < binIa.y) {
return binVa.y + (gii - binIa.x) + baseOffset;
} else if (gii < binIa.z) {
return binVa.z + (gii - binIa.y) + baseOffset;
} else if (gii < binIa.w) {
return binVa.w + (gii - binIa.z) + baseOffset;
} else if (gii < binIb.x) {
return binVb.x + (gii - binIa.w) + baseOffset;
} else if (gii < binIb.y) {
return binVb.y + (gii - binIb.x) + baseOffset;
} else if (gii < binIb.z) {
return binVb.z + (gii - binIb.y) + baseOffset;
} else if (gii < binIb.w) {
return binVb.w + (gii - binIb.z) + baseOffset;
} else {
return uint(-1);
}
}

void main() {
uint id = getOffset();

//If its over, dont render
if (id == uint(-1)) {
return;
}
emitQuadIndicies();
emitVertex(id<<2, 0);
emitVertex(id<<2, 1);
emitVertex(id<<2, 2);
emitVertex(id<<2, 3);

if (gl_LocalInvocationID.x == 0) {
//Remaining quads in workgroup
gl_PrimitiveCountNV = min(uint(int(quadCount)-int(gl_WorkGroupID.x<<4))<<1, 32);//2 primatives per quad
}
}
11 changes: 8 additions & 3 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
# Done to increase the memory available to gradle.
# Gradle settings
# Increase memory available to Gradle, enable parallelism, and enable caching
org.gradle.jvmargs=-Xmx1G
org.gradle.parallel=true
org.gradle.caching=true

# Fabric Properties
# Fabric properties
# Check these on https://modmuss50.me/fabric.html
minecraft_version=1.20.1
yarn_mappings=1.20.1+build.10
loader_version=0.14.21

# Mod Properties
# Mod properties
mod_version=1.0.1
maven_group=io.github.steveplays28
archives_base_name=blendium
supported_minecraft_version=1.20.x

# Dependencies
mixin_extras_version=0.2.2
fabric_api_version=0.91.0+1.20.1
modmenu_version=7.2.1
sodium_version=mc1.20.1-0.5.3
Expand All @@ -22,3 +26,4 @@ iris_shaders_version=1.6.11+1.20.1
glsl_transformer_version=2.0.0
jcpp_version=1.4.14
apache_commons_math_version=3.6.1
nvidium_version=0.2.3-beta
Loading