feat: updated engine version to 4.4-rc1

2025-02-23 14:38:14 +01:00 · 2025-02-23 14:38:14 +01:00 · 21ba8e33af
commit 21ba8e33af
parent ee00efde1f
5459 changed files with 1128836 additions and 198305 deletions
--- a/engine/servers/rendering/renderer_rd/shaders/effects/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_raster.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_raster.glsl
@ -260,14 +260,14 @@ void main() {
 #ifdef MODE_COMPOSITE_BOKEH
 	frag_color.rgb = texture(source_color, uv).rgb;

-	float center_weigth = texture(source_weight, uv).r;
+	float center_weight = texture(source_weight, uv).r;
 	float sample_weight = texture(original_weight, uv).r;

 	float mix_amount;
-	if (sample_weight < center_weigth) {
-		mix_amount = min(1.0, max(0.0, max(abs(center_weigth), abs(sample_weight)) - DEPTH_GAP));
+	if (sample_weight < center_weight) {
+		mix_amount = min(1.0, max(0.0, max(abs(center_weight), abs(sample_weight)) - DEPTH_GAP));
 	} else {
-		mix_amount = min(1.0, max(0.0, abs(center_weigth) - DEPTH_GAP));
+		mix_amount = min(1.0, max(0.0, abs(center_weight) - DEPTH_GAP));
 	}

 	// let alpha blending take care of mixing
--- a/engine/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl
@ -8,7 +8,6 @@ layout(push_constant, std430) uniform Params {
 	float z_far;
 	float z_near;
 	vec2 texel_size;
-	vec4 screen_rect;
 }
 params;

@ -17,8 +16,7 @@ layout(location = 0) out vec2 uv_interp;
 void main() {
 	vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
 	uv_interp = base_arr[gl_VertexIndex];
-	vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy;
-	gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0);
+	gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0);
 }

 #[fragment]
@ -35,7 +33,6 @@ layout(push_constant, std430) uniform Params {
 	float z_far;
 	float z_near;
 	vec2 texel_size;
-	vec4 screen_rect;
 }
 params;

--- a/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter.glsl
@ -177,24 +177,27 @@ void main() {

 			float theta;
 			if (Ny < Nx) {
-				if (Ny <= -0.999)
+				if (Ny <= -0.999) {
 					theta = Nx;
-				else
+				} else {
 					theta = Ny;
+				}
 			} else {
-				if (Ny >= 0.999)
+				if (Ny >= 0.999) {
 					theta = -Nx;
-				else
+				} else {
 					theta = -Ny;
+				}
 			}

 			float phi;
-			if (Nz <= -0.999)
+			if (Nz <= -0.999) {
 				phi = -NmaxXY;
-			else if (Nz >= 0.999)
+			} else if (Nz >= 0.999) {
 				phi = NmaxXY;
-			else
+			} else {
 				phi = Nz;
+			}

 			float theta2 = theta * theta;
 			float phi2 = phi * phi;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl
@ -170,24 +170,27 @@ void main() {

 			float theta;
 			if (Ny < Nx) {
-				if (Ny <= -0.999)
+				if (Ny <= -0.999) {
 					theta = Nx;
-				else
+				} else {
 					theta = Ny;
+				}
 			} else {
-				if (Ny >= 0.999)
+				if (Ny >= 0.999) {
 					theta = -Nx;
-				else
+				} else {
 					theta = -Ny;
+				}
 			}

 			float phi;
-			if (Nz <= -0.999)
+			if (Nz <= -0.999) {
 				phi = -NmaxXY;
-			else if (Nz >= 0.999)
+			} else if (Nz >= 0.999) {
 				phi = NmaxXY;
-			else
+			} else {
 				phi = Nz;
+			}

 			float theta2 = theta * theta;
 			float phi2 = phi * phi;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/effects/luminance_reduce_raster_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/luminance_reduce_raster_inc.glsl
@ -1,4 +1,3 @@
-
 layout(push_constant, std430) uniform PushConstant {
 	ivec2 source_size;
 	ivec2 dest_size;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/motion_vectors_store.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/motion_vectors_store.glsl
@ -0,0 +1,32 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "motion_vector_inc.glsl"
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+layout(set = 0, binding = 0) uniform sampler2D depth_buffer;
+layout(rg16f, set = 0, binding = 1) uniform restrict writeonly image2D velocity_buffer;
+
+layout(push_constant, std430) uniform Params {
+	highp mat4 reprojection_matrix;
+	vec2 resolution;
+	uint pad[2];
+}
+params;
+
+void main() {
+	// Out of bounds check.
+	if (any(greaterThanEqual(vec2(gl_GlobalInvocationID.xy), params.resolution))) {
+		return;
+	}
+
+	ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
+
+	float depth = texelFetch(depth_buffer, pos, 0).x;
+	vec2 uv = (vec2(pos) + 0.5f) / params.resolution;
+	vec2 velocity = derive_motion_vector(uv, depth, params.reprojection_matrix);
+	imageStore(velocity_buffer, pos, vec4(velocity, 0.0f, 0.0f));
+}
--- a/engine/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl
@ -237,7 +237,7 @@ void main() {

 		// This is an ad-hoc term to fade out the SSR as roughness increases. Values used
 		// are meant to match the visual appearance of a ReflectionProbe.
-		float roughness_fade = smoothstep(0.4, 0.7, 1.0 - normal_roughness.w);
+		float roughness_fade = smoothstep(0.4, 0.7, 1.0 - roughness);

 		// Schlick term.
 		float metallic = texelFetch(source_metallic, ssC << 1, 0).w;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/sort.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/sort.glsl
@ -70,8 +70,9 @@ void main() {

 	int i;
 	for (i = 0; i < 2 * ITERATIONS; ++i) {
-		if (GI + i * NUM_THREADS < numElementsInThreadGroup)
+		if (GI + i * NUM_THREADS < numElementsInThreadGroup) {
 			g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
+		}
 	}

 	groupMemoryBarrier();
@ -163,8 +164,9 @@ void main() {

 	// Load shared data
 	for (i = 0; i < 2; ++i) {
-		if (GI + i * NUM_THREADS < tgp.w)
+		if (GI + i * NUM_THREADS < tgp.w) {
 			g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
+		}
 	}

 	groupMemoryBarrier();
--- a/engine/servers/rendering/renderer_rd/shaders/effects/ssao.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/ssao.glsl
@ -50,11 +50,14 @@ const int num_taps[5] = { 3, 5, 12, 0, 0 };
 //
 #define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar
 //
-#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too
+// WARNING: The MIP generation on the C++ side will be enabled on quality preset 2 regardless of
+// this value, so if changing here, change the C++ side too.
+#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2)
 #define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically
 //
-// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for
-// testing purposes, it will not yield performance gains (or correct results)
+// WARNING: The edge handling is hard-coded to 'disabled' on quality level 0, and enabled above,
+// on the C++ side; while toggling it here will work for testing purposes, it will not yield
+// performance gains (or correct results).
 #define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1)
 //
 #define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1)
--- a/engine/servers/rendering/renderer_rd/shaders/effects/ssil.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/ssil.glsl
@ -49,8 +49,9 @@ const int num_taps[5] = { 3, 5, 12, 0, 0 };
 #define SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2)
 #define SSIL_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically
 //
-// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for
-// testing purposes, it will not yield performance gains (or correct results)
+// WARNING: The edge handling is hard-coded to 'disabled' on quality level 0, and enabled above,
+// on the C++ side; while toggling it here will work for testing purposes, it will not yield
+// performance gains (or correct results).
 #define SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1)
 //
 #define SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1)
--- a/engine/servers/rendering/renderer_rd/shaders/effects/subsurface_scattering.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/subsurface_scattering.glsl
@ -152,10 +152,10 @@ void main() {
 		float depth_scale;

 		if (params.orthogonal) {
-			depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0;
+			depth = -(depth * (params.camera_z_far - params.camera_z_near) - (params.camera_z_far + params.camera_z_near)) / 2.0;
 			depth_scale = params.unit_size; //remember depth is negative by default in OpenGL
 		} else {
-			depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near));
+			depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near + depth * (params.camera_z_far - params.camera_z_near));
 			depth_scale = params.unit_size / depth; //remember depth is negative by default in OpenGL
 		}

--- a/engine/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl
@ -250,19 +250,25 @@ vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) {
 	vec3 rmax = (aabb_max - p.xyz);
 	vec3 rmin = (aabb_min - p.xyz);

-	if (r.x > rmax.x + FLT_MIN)
+	if (r.x > rmax.x + FLT_MIN) {
 		r *= (rmax.x / r.x);
-	if (r.y > rmax.y + FLT_MIN)
+	}
+	if (r.y > rmax.y + FLT_MIN) {
 		r *= (rmax.y / r.y);
-	if (r.z > rmax.z + FLT_MIN)
+	}
+	if (r.z > rmax.z + FLT_MIN) {
 		r *= (rmax.z / r.z);
+	}

-	if (r.x < rmin.x - FLT_MIN)
+	if (r.x < rmin.x - FLT_MIN) {
 		r *= (rmin.x / r.x);
-	if (r.y < rmin.y - FLT_MIN)
+	}
+	if (r.y < rmin.y - FLT_MIN) {
 		r *= (rmin.y / r.y);
-	if (r.z < rmin.z - FLT_MIN)
+	}
+	if (r.z < rmin.z - FLT_MIN) {
 		r *= (rmin.z / r.z);
+	}

 	return p + r;
 }
@ -307,6 +313,8 @@ float luminance(vec3 color) {
 	return max(dot(color, lumCoeff), 0.0001f);
 }

+// This is "velocity disocclusion" as described by https://www.elopezr.com/temporal-aa-and-the-quest-for-the-holy-trail/.
+// We use texel space, so our scale and threshold differ.
 float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) {
 	vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy;
 	vec2 velocity_texels = velocity * params.resolution;
@ -336,7 +344,7 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_
 	// Compute blend factor
 	float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16.
 	{
-		// If re-projected UV is out of screen, converge to current color immediatel
+		// If re-projected UV is out of screen, converge to current color immediately.
 		float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0;

 		// Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting).
--- a/engine/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl
@ -207,6 +207,14 @@ vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) {

 #endif // !USE_GLOW_FILTER_BICUBIC

+// Based on Reinhard's extended formula, see equation 4 in https://doi.org/cjbgrt
+vec3 tonemap_reinhard(vec3 color, float white) {
+	float white_squared = white * white;
+	vec3 white_squared_color = white_squared * color;
+	// Equivalent to color * (1 + color / white_squared) / (1 + color)
+	return (white_squared_color + color * color) / (white_squared_color + white_squared);
+}
+
 vec3 tonemap_filmic(vec3 color, float white) {
 	// exposure bias: input scale (color *= bias, white *= bias) to make the brightness consistent with other tonemappers
 	// also useful to scale the input to the range that the tonemapper is designed for (some require very high input values)
@ -256,8 +264,77 @@ vec3 tonemap_aces(vec3 color, float white) {
 	return color_tonemapped / white_tonemapped;
 }

-vec3 tonemap_reinhard(vec3 color, float white) {
-	return (white * color + color) / (color * white + white);
+// Polynomial approximation of EaryChow's AgX sigmoid curve.
+// x must be within the range [0.0, 1.0]
+vec3 agx_contrast_approx(vec3 x) {
+	// Generated with Excel trendline
+	// Input data: Generated using python sigmoid with EaryChow's configuration and 57 steps
+	// Additional padding values were added to give correct intersections at 0.0 and 1.0
+	// 6th order, intercept of 0.0 to remove an operation and ensure intersection at 0.0
+	vec3 x2 = x * x;
+	vec3 x4 = x2 * x2;
+	return 0.021 * x + 4.0111 * x2 - 25.682 * x2 * x + 70.359 * x4 - 74.778 * x4 * x + 27.069 * x4 * x2;
+}
+
+// This is an approximation and simplification of EaryChow's AgX implementation that is used by Blender.
+// This code is based off of the script that generates the AgX_Base_sRGB.cube LUT that Blender uses.
+// Source: https://github.com/EaryChow/AgX_LUT_Gen/blob/main/AgXBasesRGB.py
+vec3 tonemap_agx(vec3 color) {
+	// Combined linear sRGB to linear Rec 2020 and Blender AgX inset matrices:
+	const mat3 srgb_to_rec2020_agx_inset_matrix = mat3(
+			0.54490813676363087053, 0.14044005884001287035, 0.088827411851915368603,
+			0.37377945959812267119, 0.75410959864013760045, 0.17887712465043811023,
+			0.081384976686407536266, 0.10543358536857773485, 0.73224999956948382528);
+
+	// Combined inverse AgX outset matrix and linear Rec 2020 to linear sRGB matrices.
+	const mat3 agx_outset_rec2020_to_srgb_matrix = mat3(
+			1.9645509602733325934, -0.29932243390911083839, -0.16436833806080403409,
+			-0.85585845117807513559, 1.3264510741502356555, -0.23822464068860595117,
+			-0.10886710826831608324, -0.027084020983874825605, 1.402665347143271889);
+
+	// LOG2_MIN      = -10.0
+	// LOG2_MAX      =  +6.5
+	// MIDDLE_GRAY   =  0.18
+	const float min_ev = -12.4739311883324; // log2(pow(2, LOG2_MIN) * MIDDLE_GRAY)
+	const float max_ev = 4.02606881166759; // log2(pow(2, LOG2_MAX) * MIDDLE_GRAY)
+
+	// Large negative values in one channel and large positive values in other
+	// channels can result in a colour that appears darker and more saturated than
+	// desired after passing it through the inset matrix. For this reason, it is
+	// best to prevent negative input values.
+	// This is done before the Rec. 2020 transform to allow the Rec. 2020
+	// transform to be combined with the AgX inset matrix. This results in a loss
+	// of color information that could be correctly interpreted within the
+	// Rec. 2020 color space as positive RGB values, but it is less common for Godot
+	// to provide this function with negative sRGB values and therefore not worth
+	// the performance cost of an additional matrix multiplication.
+	// A value of 2e-10 intentionally introduces insignificant error to prevent
+	// log2(0.0) after the inset matrix is applied; color will be >= 1e-10 after
+	// the matrix transform.
+	color = max(color, 2e-10);
+
+	// Do AGX in rec2020 to match Blender and then apply inset matrix.
+	color = srgb_to_rec2020_agx_inset_matrix * color;
+
+	// Log2 space encoding.
+	// Must be clamped because agx_contrast_approx may not work
+	// well with values outside of the range [0.0, 1.0]
+	color = clamp(log2(color), min_ev, max_ev);
+	color = (color - min_ev) / (max_ev - min_ev);
+
+	// Apply sigmoid function approximation.
+	color = agx_contrast_approx(color);
+
+	// Convert back to linear before applying outset matrix.
+	color = pow(color, vec3(2.4));
+
+	// Apply outset to make the result more chroma-laden and then go back to linear sRGB.
+	color = agx_outset_rec2020_to_srgb_matrix * color;
+
+	// Blender's lusRGB.compensate_low_side is too complex for this shader, so
+	// simply return the color, even if it has negative components. These negative
+	// components may be useful for subsequent color adjustments.
+	return color;
 }

 vec3 linear_to_srgb(vec3 color) {
@ -271,8 +348,9 @@ vec3 linear_to_srgb(vec3 color) {
 #define TONEMAPPER_REINHARD 1
 #define TONEMAPPER_FILMIC 2
 #define TONEMAPPER_ACES 3
+#define TONEMAPPER_AGX 4

-vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always outputs clamped [0;1] color
+vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR
 	// Ensure color values passed to tonemappers are positive.
 	// They can be negative in the case of negative lights, which leads to undesired behavior.
 	if (params.tonemapper == TONEMAPPER_LINEAR) {
@ -281,8 +359,10 @@ vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always o
 		return tonemap_reinhard(max(vec3(0.0f), color), white);
 	} else if (params.tonemapper == TONEMAPPER_FILMIC) {
 		return tonemap_filmic(max(vec3(0.0f), color), white);
-	} else { // TONEMAPPER_ACES
+	} else if (params.tonemapper == TONEMAPPER_ACES) {
 		return tonemap_aces(max(vec3(0.0f), color), white);
+	} else { // TONEMAPPER_AGX
+		return tonemap_agx(color);
 	}
 }