feat: updated engine version to 4.4-rc1

2025-02-23 14:38:14 +01:00 · 2025-02-23 14:38:14 +01:00 · 21ba8e33af
commit 21ba8e33af
parent ee00efde1f
5459 changed files with 1128836 additions and 198305 deletions
--- a/engine/servers/rendering/renderer_rd/shaders/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/SCsub
@ -1,18 +1,23 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

 if "RD_GLSL" in env["BUILDERS"]:
-    # find all include files
+    # find just the include files
    gl_include_files = [str(f) for f in Glob("*_inc.glsl")]

-    # find all shader code(all glsl files excluding our include files)
+    # find all shader code (all glsl files excluding our include files)
    glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files]

    # make sure we recompile shaders if include files change
    env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"])

-    # compile shaders
+    # compile include files
+    for glsl_file in gl_include_files:
+        env.GLSL_HEADER(glsl_file)
+
+    # compile RD shader
    for glsl_file in glsl_files:
        env.RD_GLSL(glsl_file)

--- a/engine/servers/rendering/renderer_rd/shaders/blit.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/blit.glsl
@ -8,6 +8,10 @@ layout(push_constant, std140) uniform Pos {
 	vec4 src_rect;
 	vec4 dst_rect;

+	float rotation_sin;
+	float rotation_cos;
+	vec2 pad;
+
 	vec2 eye_center;
 	float k1;
 	float k2;
@ -15,17 +19,23 @@ layout(push_constant, std140) uniform Pos {
 	float upscale;
 	float aspect_ratio;
 	uint layer;
-	uint pad1;
+	bool convert_to_srgb;
 }
 data;

 layout(location = 0) out vec2 uv;

 void main() {
+	mat4 swapchain_transform = mat4(1.0);
+	swapchain_transform[0][0] = data.rotation_cos;
+	swapchain_transform[0][1] = -data.rotation_sin;
+	swapchain_transform[1][0] = data.rotation_sin;
+	swapchain_transform[1][1] = data.rotation_cos;
+
 	vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
 	uv = data.src_rect.xy + base_arr[gl_VertexIndex] * data.src_rect.zw;
 	vec2 vtx = data.dst_rect.xy + base_arr[gl_VertexIndex] * data.dst_rect.zw;
-	gl_Position = vec4(vtx * 2.0 - 1.0, 0.0, 1.0);
+	gl_Position = swapchain_transform * vec4(vtx * 2.0 - 1.0, 0.0, 1.0);
 }

 #[fragment]
@ -38,6 +48,10 @@ layout(push_constant, std140) uniform Pos {
 	vec4 src_rect;
 	vec4 dst_rect;

+	float rotation_sin;
+	float rotation_cos;
+	vec2 pad;
+
 	vec2 eye_center;
 	float k1;
 	float k2;
--- a/engine/servers/rendering/renderer_rd/shaders/canvas.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/canvas.glsl
@ -24,6 +24,12 @@ layout(location = 11) in vec4 weight_attrib;

 #include "canvas_uniforms_inc.glsl"

+#ifndef USE_ATTRIBUTES
+
+layout(location = 4) out flat uint instance_index_interp;
+
+#endif // !USE_ATTRIBUTES
+
 layout(location = 0) out vec2 uv_interp;
 layout(location = 1) out vec4 color_interp;
 layout(location = 2) out vec2 vertex_interp;
@ -35,11 +41,11 @@ layout(location = 3) out vec2 pixel_size_interp;
 #endif

 #ifdef MATERIAL_UNIFORMS_USED
-layout(set = 1, binding = 0, std140) uniform MaterialUniforms{
-
+/* clang-format off */
+layout(set = 1, binding = 0, std140) uniform MaterialUniforms {
 #MATERIAL_UNIFORMS
-
 } material;
+/* clang-format on */
 #endif

 #GLOBALS
@ -59,6 +65,14 @@ void main() {
 	vec4 custom1 = vec4(0.0);
 #endif

+#ifdef USE_ATTRIBUTES
+	uint instance_index = params.base_instance_index;
+#else
+	uint instance_index = gl_InstanceIndex + params.base_instance_index;
+	instance_index_interp = instance_index;
+#endif // USE_ATTRIBUTES
+	const InstanceData draw_data = instances.data[instance_index];
+
 #ifdef USE_PRIMITIVE

 	//weird bug,
@ -87,7 +101,7 @@ void main() {

 	vec2 vertex = vertex_attrib;
 	vec4 color = color_attrib;
-	if (bool(draw_data.flags & FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR)) {
+	if (bool(canvas_data.flags & CANVAS_FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR)) {
 		color.rgb = srgb_to_linear(color.rgb);
 	}
 	color *= draw_data.modulation;
@ -108,7 +122,7 @@ void main() {
 	vec2 vertex_base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
 	vec2 vertex_base = vertex_base_arr[gl_VertexIndex];

-	vec2 uv = draw_data.src_rect.xy + abs(draw_data.src_rect.zw) * ((draw_data.flags & FLAGS_TRANSPOSE_RECT) != 0 ? vertex_base.yx : vertex_base.xy);
+	vec2 uv = draw_data.src_rect.xy + abs(draw_data.src_rect.zw) * ((draw_data.flags & INSTANCE_FLAGS_TRANSPOSE_RECT) != 0 ? vertex_base.yx : vertex_base.xy);
 	vec4 color = draw_data.modulation;
 	vec2 vertex = draw_data.dst_rect.xy + abs(draw_data.dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data.src_rect.zw, vec2(0.0, 0.0)));
 	uvec4 bones = uvec4(0, 0, 0, 0);
@ -117,13 +131,10 @@ void main() {

 	mat4 model_matrix = mat4(vec4(draw_data.world_x, 0.0, 0.0), vec4(draw_data.world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data.world_ofs, 0.0, 1.0));

-#define FLAGS_INSTANCING_MASK 0x7F
-#define FLAGS_INSTANCING_HAS_COLORS (1 << 7)
-#define FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << 8)
-
-	uint instancing = draw_data.flags & FLAGS_INSTANCING_MASK;
-
 #ifdef USE_ATTRIBUTES
+
+	uint instancing = params.batch_flags & BATCH_FLAGS_INSTANCING_MASK;
+
 	if (instancing > 1) {
 		// trails

@ -160,42 +171,29 @@ void main() {

 		vertex = new_vertex;
 		color *= pcolor;
-	} else
-#endif // USE_ATTRIBUTES
-	{
-		if (instancing == 1) {
-			uint stride = 2;
-			{
-				if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) {
-					stride += 1;
-				}
-				if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
-					stride += 1;
-				}
-			}
+	} else if (instancing == 1) {
+		uint stride = 2 + bitfieldExtract(params.batch_flags, BATCH_FLAGS_INSTANCING_HAS_COLORS_SHIFT, 1) + bitfieldExtract(params.batch_flags, BATCH_FLAGS_INSTANCING_HAS_CUSTOM_DATA_SHIFT, 1);

-			uint offset = stride * gl_InstanceIndex;
+		uint offset = stride * gl_InstanceIndex;

-			mat4 matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
-			offset += 2;
+		mat4 matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+		offset += 2;

-			if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) {
-				color *= transforms.data[offset];
-				offset += 1;
-			}
-
-			if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
-				instance_custom = transforms.data[offset];
-			}
-
-			matrix = transpose(matrix);
-			model_matrix = model_matrix * matrix;
+		if (bool(params.batch_flags & BATCH_FLAGS_INSTANCING_HAS_COLORS)) {
+			color *= transforms.data[offset];
+			offset += 1;
 		}
-	}

-#ifdef USE_POINT_SIZE
+		if (bool(params.batch_flags & BATCH_FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
+			instance_custom = transforms.data[offset];
+		}
+
+		matrix = transpose(matrix);
+		model_matrix = model_matrix * matrix;
+	}
+#endif // USE_ATTRIBUTES
+
 	float point_size = 1.0;
-#endif

 #ifdef USE_WORLD_VERTEX_COORDS
 	vertex = (model_matrix * vec4(vertex, 0.0, 1.0)).xy;
@ -214,6 +212,8 @@ void main() {

 	color_interp = color;

+	vertex = (canvas_data.canvas_transform * vec4(vertex, 0.0, 1.0)).xy;
+
 	if (canvas_data.use_pixel_snap) {
 		vertex = floor(vertex + 0.5);
 		// precision issue on some hardware creates artifacts within texture
@ -221,8 +221,6 @@ void main() {
 		uv += 1e-5;
 	}

-	vertex = (canvas_data.canvas_transform * vec4(vertex, 0.0, 1.0)).xy;
-
 	vertex_interp = vertex;
 	uv_interp = uv;

@ -241,6 +239,10 @@ void main() {

 #include "canvas_uniforms_inc.glsl"

+#ifndef USE_ATTRIBUTES
+layout(location = 4) in flat uint instance_index;
+#endif // USE_ATTRIBUTES
+
 layout(location = 0) in vec2 uv_interp;
 layout(location = 1) in vec4 color_interp;
 layout(location = 2) in vec2 vertex_interp;
@ -254,11 +256,11 @@ layout(location = 3) in vec2 pixel_size_interp;
 layout(location = 0) out vec4 frag_color;

 #ifdef MATERIAL_UNIFORMS_USED
-layout(set = 1, binding = 0, std140) uniform MaterialUniforms{
-
+/* clang-format off */
+layout(set = 1, binding = 0, std140) uniform MaterialUniforms {
 #MATERIAL_UNIFORMS
-
 } material;
+/* clang-format on */
 #endif

 vec2 screen_uv_to_sdf(vec2 p_uv) {
@ -320,6 +322,8 @@ vec4 light_compute(
 #ifdef USE_NINEPATCH

 float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, float margin_begin, float margin_end, int np_repeat, inout int draw_center) {
+	const InstanceData draw_data = instances.data[instance_index];
+
 	float tex_size = 1.0 / tex_pixel_size;

 	if (pixel < margin_begin) {
@ -327,9 +331,7 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo
 	} else if (pixel >= draw_size - margin_end) {
 		return (tex_size - (draw_size - pixel)) * tex_pixel_size;
 	} else {
-		if (!bool(draw_data.flags & FLAGS_NINEPACH_DRAW_CENTER)) {
-			draw_center--;
-		}
+		draw_center -= 1 - int(bitfieldExtract(draw_data.flags, INSTANCE_FLAGS_NINEPATCH_DRAW_CENTER_SHIFT, 1));

 		// np_repeat is passed as uniform using NinePatchRect::AxisStretchMode enum.
 		if (np_repeat == 0) { // Stretch.
@ -360,8 +362,6 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo

 #endif

-#ifdef USE_LIGHTING
-
 vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 light_color, vec4 specular_shininess, bool specular_shininess_used) {
 	float cNdotL = max(0.0, dot(normal, light_vec));

@ -451,8 +451,6 @@ void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) {
 	}
 }

-#endif
-
 float msdf_median(float r, float g, float b, float a) {
 	return min(max(min(r, g), min(max(r, g), b)), a);
 }
@ -462,14 +460,20 @@ void main() {
 	vec2 uv = uv_interp;
 	vec2 vertex = vertex_interp;

+#ifdef USE_ATTRIBUTES
+	const InstanceData draw_data = instances.data[params.base_instance_index];
+#else
+	const InstanceData draw_data = instances.data[instance_index];
+#endif // USE_ATTRIBUTES
+
 #if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE)

 #ifdef USE_NINEPATCH

 	int draw_center = 2;
 	uv = vec2(
-			map_ninepatch_axis(pixel_size_interp.x, abs(draw_data.dst_rect.z), draw_data.color_texture_pixel_size.x, draw_data.ninepatch_margins.x, draw_data.ninepatch_margins.z, int(draw_data.flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center),
-			map_ninepatch_axis(pixel_size_interp.y, abs(draw_data.dst_rect.w), draw_data.color_texture_pixel_size.y, draw_data.ninepatch_margins.y, draw_data.ninepatch_margins.w, int(draw_data.flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center));
+			map_ninepatch_axis(pixel_size_interp.x, abs(draw_data.dst_rect.z), draw_data.color_texture_pixel_size.x, draw_data.ninepatch_margins.x, draw_data.ninepatch_margins.z, int(bitfieldExtract(draw_data.flags, INSTANCE_FLAGS_NINEPATCH_H_MODE_SHIFT, 2)), draw_center),
+			map_ninepatch_axis(pixel_size_interp.y, abs(draw_data.dst_rect.w), draw_data.color_texture_pixel_size.y, draw_data.ninepatch_margins.y, draw_data.ninepatch_margins.w, int(bitfieldExtract(draw_data.flags, INSTANCE_FLAGS_NINEPATCH_V_MODE_SHIFT, 2)), draw_center));

 	if (draw_center == 0) {
 		color.a = 0.0;
@ -478,14 +482,15 @@ void main() {
 	uv = uv * draw_data.src_rect.zw + draw_data.src_rect.xy; //apply region if needed

 #endif
-	if (bool(draw_data.flags & FLAGS_CLIP_RECT_UV)) {
-		uv = clamp(uv, draw_data.src_rect.xy, draw_data.src_rect.xy + abs(draw_data.src_rect.zw));
+	if (bool(draw_data.flags & INSTANCE_FLAGS_CLIP_RECT_UV)) {
+		vec2 half_texpixel = draw_data.color_texture_pixel_size * 0.5;
+		uv = clamp(uv, draw_data.src_rect.xy + half_texpixel, draw_data.src_rect.xy + abs(draw_data.src_rect.zw) - half_texpixel);
 	}

 #endif

 #ifndef USE_PRIMITIVE
-	if (bool(draw_data.flags & FLAGS_USE_MSDF)) {
+	if (bool(draw_data.flags & INSTANCE_FLAGS_USE_MSDF)) {
 		float px_range = draw_data.ninepatch_margins.x;
 		float outline_thickness = draw_data.ninepatch_margins.y;
 		//float reserved1 = draw_data.ninepatch_margins.z;
@ -505,7 +510,7 @@ void main() {
 			float a = clamp(d * px_size + 0.5, 0.0, 1.0);
 			color.a = a * color.a;
 		}
-	} else if (bool(draw_data.flags & FLAGS_USE_LCD)) {
+	} else if (bool(draw_data.flags & INSTANCE_FLAGS_USE_LCD)) {
 		vec4 lcd_sample = texture(sampler2D(color_texture, texture_sampler), uv);
 		if (lcd_sample.a == 1.0) {
 			color.rgb = lcd_sample.rgb * color.a;
@ -519,8 +524,8 @@ void main() {
 		color *= texture(sampler2D(color_texture, texture_sampler), uv);
 	}

-	uint light_count = (draw_data.flags >> FLAGS_LIGHT_COUNT_SHIFT) & 0xF; //max 16 lights
-	bool using_light = light_count > 0 || canvas_data.directional_light_count > 0;
+	uint light_count = draw_data.flags & 15u; //max 15 lights
+	bool using_light = (light_count + canvas_data.directional_light_count) > 0;

 	vec3 normal;

@ -530,17 +535,15 @@ void main() {
 	bool normal_used = false;
 #endif

-	if (normal_used || (using_light && bool(draw_data.flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) {
+	if (normal_used || (using_light && bool(params.batch_flags & BATCH_FLAGS_DEFAULT_NORMAL_MAP_USED))) {
 		normal.xy = texture(sampler2D(normal_texture, texture_sampler), uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0);
-		if (bool(draw_data.flags & FLAGS_TRANSPOSE_RECT)) {
+
+#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE)
+		if (bool(draw_data.flags & INSTANCE_FLAGS_TRANSPOSE_RECT)) {
 			normal.xy = normal.yx;
 		}
-		if (bool(draw_data.flags & FLAGS_FLIP_H)) {
-			normal.x = -normal.x;
-		}
-		if (bool(draw_data.flags & FLAGS_FLIP_V)) {
-			normal.y = -normal.y;
-		}
+		normal.xy *= sign(draw_data.src_rect.zw);
+#endif
 		normal.z = sqrt(max(0.0, 1.0 - dot(normal.xy, normal.xy)));
 		normal_used = true;
 	} else {
@ -556,9 +559,9 @@ void main() {
 	bool specular_shininess_used = false;
 #endif

-	if (specular_shininess_used || (using_light && normal_used && bool(draw_data.flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
+	if (specular_shininess_used || (using_light && normal_used && bool(params.batch_flags & BATCH_FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
 		specular_shininess = texture(sampler2D(specular_texture, texture_sampler), uv);
-		specular_shininess *= unpackUnorm4x8(draw_data.specular_shininess);
+		specular_shininess *= unpackUnorm4x8(params.specular_shininess);
 		specular_shininess_used = true;
 	} else {
 		specular_shininess = vec4(1.0);
@ -603,136 +606,135 @@ void main() {
 	color *= canvas_data.canvas_modulation;
 #endif

-#if defined(USE_LIGHTING) && !defined(MODE_UNSHADED)
+#if !defined(MODE_UNSHADED)
+	if (sc_use_lighting()) {
+		// Directional Lights

-	// Directional Lights
+		for (uint i = 0; i < canvas_data.directional_light_count; i++) {
+			uint light_base = i;

-	for (uint i = 0; i < canvas_data.directional_light_count; i++) {
-		uint light_base = i;
-
-		vec2 direction = light_array.data[light_base].position;
-		vec4 light_color = light_array.data[light_base].color;
+			vec2 direction = light_array.data[light_base].position;
+			vec4 light_color = light_array.data[light_base].color;

 #ifdef LIGHT_CODE_USED

-		vec4 shadow_modulate = vec4(1.0);
-		light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true);
+			vec4 shadow_modulate = vec4(1.0);
+			light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true);
 #else

-		if (normal_used) {
-			vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height));
-			light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
-		} else {
-			light_color.rgb *= base_color.rgb;
-		}
-#endif
-
-		if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) {
-			vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
-
-			vec4 shadow_uv = vec4(shadow_pos.x, light_array.data[light_base].shadow_y_ofs, shadow_pos.y * light_array.data[light_base].shadow_zfar_inv, 1.0);
-
-			light_color = light_shadow_compute(light_base, light_color, shadow_uv
-#ifdef LIGHT_CODE_USED
-					,
-					shadow_modulate.rgb
-#endif
-			);
-		}
-
-		light_blend_compute(light_base, light_color, color.rgb);
-#ifdef MODE_LIGHT_ONLY
-		light_only_alpha += light_color.a;
-#endif
-	}
-
-	// Positional Lights
-
-	for (uint i = 0; i < MAX_LIGHTS_PER_ITEM; i++) {
-		if (i >= light_count) {
-			break;
-		}
-		uint light_base = draw_data.lights[i >> 2];
-		light_base >>= (i & 3) * 8;
-		light_base &= 0xFF;
-
-		vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
-		vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy;
-
-		if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
-			//if outside the light texture, light color is zero
-			continue;
-		}
-
-		vec4 light_color = textureLod(sampler2D(atlas_texture, texture_sampler), tex_uv_atlas, 0.0);
-		vec4 light_base_color = light_array.data[light_base].color;
-
-#ifdef LIGHT_CODE_USED
-
-		vec4 shadow_modulate = vec4(1.0);
-		vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
-
-		light_color.rgb *= light_base_color.rgb;
-		light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false);
-#else
-
-		light_color.rgb *= light_base_color.rgb * light_base_color.a;
-
-		if (normal_used) {
-			vec3 light_pos = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
-			vec3 pos = light_vertex;
-			vec3 light_vec = normalize(light_pos - pos);
-
-			light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
-		} else {
-			light_color.rgb *= base_color.rgb;
-		}
-#endif
-
-		if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) {
-			vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
-
-			vec2 pos_norm = normalize(shadow_pos);
-			vec2 pos_abs = abs(pos_norm);
-			vec2 pos_box = pos_norm / max(pos_abs.x, pos_abs.y);
-			vec2 pos_rot = pos_norm * mat2(vec2(0.7071067811865476, -0.7071067811865476), vec2(0.7071067811865476, 0.7071067811865476)); //is there a faster way to 45 degrees rot?
-			float tex_ofs;
-			float distance;
-			if (pos_rot.y > 0) {
-				if (pos_rot.x > 0) {
-					tex_ofs = pos_box.y * 0.125 + 0.125;
-					distance = shadow_pos.x;
-				} else {
-					tex_ofs = pos_box.x * -0.125 + (0.25 + 0.125);
-					distance = shadow_pos.y;
-				}
+			if (normal_used) {
+				vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height));
+				light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
 			} else {
-				if (pos_rot.x < 0) {
-					tex_ofs = pos_box.y * -0.125 + (0.5 + 0.125);
-					distance = -shadow_pos.x;
-				} else {
-					tex_ofs = pos_box.x * 0.125 + (0.75 + 0.125);
-					distance = -shadow_pos.y;
-				}
+				light_color.rgb *= base_color.rgb;
+			}
+#endif
+
+			if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) {
+				vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
+
+				vec4 shadow_uv = vec4(shadow_pos.x, light_array.data[light_base].shadow_y_ofs, shadow_pos.y * light_array.data[light_base].shadow_zfar_inv, 1.0);
+
+				light_color = light_shadow_compute(light_base, light_color, shadow_uv
+#ifdef LIGHT_CODE_USED
+						,
+						shadow_modulate.rgb
+#endif
+				);
 			}

-			distance *= light_array.data[light_base].shadow_zfar_inv;
-
-			//float distance = length(shadow_pos);
-			vec4 shadow_uv = vec4(tex_ofs, light_array.data[light_base].shadow_y_ofs, distance, 1.0);
-
-			light_color = light_shadow_compute(light_base, light_color, shadow_uv
-#ifdef LIGHT_CODE_USED
-					,
-					shadow_modulate.rgb
+			light_blend_compute(light_base, light_color, color.rgb);
+#ifdef MODE_LIGHT_ONLY
+			light_only_alpha += light_color.a;
 #endif
-			);
 		}

-		light_blend_compute(light_base, light_color, color.rgb);
-#ifdef MODE_LIGHT_ONLY
-		light_only_alpha += light_color.a;
+		// Positional Lights
+
+		for (uint i = 0; i < MAX_LIGHTS_PER_ITEM; i++) {
+			if (i >= light_count) {
+				break;
+			}
+			uint light_base = bitfieldExtract(draw_data.lights[i >> 2], (int(i) & 0x3) * 8, 8);
+
+			vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
+			vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy;
+
+			if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
+				//if outside the light texture, light color is zero
+				continue;
+			}
+
+			vec4 light_color = textureLod(sampler2D(atlas_texture, texture_sampler), tex_uv_atlas, 0.0);
+			vec4 light_base_color = light_array.data[light_base].color;
+
+#ifdef LIGHT_CODE_USED
+
+			vec4 shadow_modulate = vec4(1.0);
+			vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
+
+			light_color.rgb *= light_base_color.rgb;
+			light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false);
+#else
+
+			light_color.rgb *= light_base_color.rgb * light_base_color.a;
+
+			if (normal_used) {
+				vec3 light_pos = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
+				vec3 pos = light_vertex;
+				vec3 light_vec = normalize(light_pos - pos);
+
+				light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
+			} else {
+				light_color.rgb *= base_color.rgb;
+			}
 #endif
+
+			if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW) && bool(draw_data.flags & (INSTANCE_FLAGS_SHADOW_MASKED << i))) {
+				vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
+
+				vec2 pos_norm = normalize(shadow_pos);
+				vec2 pos_abs = abs(pos_norm);
+				vec2 pos_box = pos_norm / max(pos_abs.x, pos_abs.y);
+				vec2 pos_rot = pos_norm * mat2(vec2(0.7071067811865476, -0.7071067811865476), vec2(0.7071067811865476, 0.7071067811865476)); //is there a faster way to 45 degrees rot?
+				float tex_ofs;
+				float distance;
+				if (pos_rot.y > 0) {
+					if (pos_rot.x > 0) {
+						tex_ofs = pos_box.y * 0.125 + 0.125;
+						distance = shadow_pos.x;
+					} else {
+						tex_ofs = pos_box.x * -0.125 + (0.25 + 0.125);
+						distance = shadow_pos.y;
+					}
+				} else {
+					if (pos_rot.x < 0) {
+						tex_ofs = pos_box.y * -0.125 + (0.5 + 0.125);
+						distance = -shadow_pos.x;
+					} else {
+						tex_ofs = pos_box.x * 0.125 + (0.75 + 0.125);
+						distance = -shadow_pos.y;
+					}
+				}
+
+				distance *= light_array.data[light_base].shadow_zfar_inv;
+
+				//float distance = length(shadow_pos);
+				vec4 shadow_uv = vec4(tex_ofs, light_array.data[light_base].shadow_y_ofs, distance, 1.0);
+
+				light_color = light_shadow_compute(light_base, light_color, shadow_uv
+#ifdef LIGHT_CODE_USED
+						,
+						shadow_modulate.rgb
+#endif
+				);
+			}
+
+			light_blend_compute(light_base, light_color, color.rgb);
+#ifdef MODE_LIGHT_ONLY
+			light_only_alpha += light_color.a;
+#endif
+		}
 	}
 #endif

--- a/engine/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl
@ -6,26 +6,69 @@

 layout(location = 0) in highp vec3 vertex;

+#ifdef POSITIONAL_SHADOW
+layout(push_constant, std430) uniform Constants {
+	mat2x4 modelview;
+	vec4 rotation;
+	vec2 direction;
+	float z_far;
+	uint pad;
+	float z_near;
+	uint cull_mode;
+	float pad3;
+	float pad4;
+}
+constants;
+
+layout(set = 0, binding = 0, std430) restrict readonly buffer OccluderTransforms {
+	mat2x4 transforms[];
+}
+occluder_transforms;
+
+#else
+
 layout(push_constant, std430) uniform Constants {
 	mat4 projection;
 	mat2x4 modelview;
 	vec2 direction;
 	float z_far;
-	float pad;
+	uint cull_mode;
 }
 constants;

+#endif
+
 #ifdef MODE_SHADOW
 layout(location = 0) out highp float depth;
 #endif

 void main() {
-	highp vec4 vtx = vec4(vertex, 1.0) * mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+#ifdef POSITIONAL_SHADOW
+	float c = -(constants.z_far + constants.z_near) / (constants.z_far - constants.z_near);
+	float d = -2.0 * constants.z_far * constants.z_near / (constants.z_far - constants.z_near);
+
+	mat4 projection = mat4(vec4(1.0, 0.0, 0.0, 0.0),
+			vec4(0.0, 1.0, 0.0, 0.0),
+			vec4(0.0, 0.0, c, -1.0),
+			vec4(0.0, 0.0, d, 0.0));
+
+	// Precomputed:
+	// Vector3 cam_target = Basis::from_euler(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0));
+	// projection = projection * Projection(Transform3D().looking_at(cam_targets[i], Vector3(0, 0, -1)).affine_inverse());
+	projection *= mat4(vec4(constants.rotation.x, 0.0, constants.rotation.y, 0.0), vec4(constants.rotation.z, 0.0, constants.rotation.w, 0.0), vec4(0.0, -1.0, 0.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+	mat4 modelview = mat4(occluder_transforms.transforms[constants.pad]) * mat4(constants.modelview);
+#else
+	mat4 projection = constants.projection;
+	mat4 modelview = mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+#endif
+
+	highp vec4 vtx = vec4(vertex, 1.0) * modelview;

 #ifdef MODE_SHADOW
 	depth = dot(constants.direction, vtx.xy);
 #endif
-	gl_Position = constants.projection * vtx;
+
+	gl_Position = projection * vtx;
 }

 #[fragment]
@ -34,15 +77,33 @@ void main() {

 #VERSION_DEFINES

+#ifdef POSITIONAL_SHADOW
+layout(push_constant, std430) uniform Constants {
+	mat2x4 modelview;
+	vec4 rotation;
+	vec2 direction;
+	float z_far;
+	uint pad;
+	float z_near;
+	uint cull_mode;
+	float pad3;
+	float pad4;
+}
+constants;
+
+#else
+
 layout(push_constant, std430) uniform Constants {
 	mat4 projection;
 	mat2x4 modelview;
 	vec2 direction;
 	float z_far;
-	float pad;
+	uint cull_mode;
 }
 constants;

+#endif
+
 #ifdef MODE_SHADOW
 layout(location = 0) in highp float depth;
 layout(location = 0) out highp float distance_buf;
@ -50,8 +111,18 @@ layout(location = 0) out highp float distance_buf;
 layout(location = 0) out highp float sdf_buf;
 #endif

+#define POLYGON_CULL_DISABLED 0
+#define POLYGON_CULL_FRONT 1
+#define POLYGON_CULL_BACK 2
+
 void main() {
 #ifdef MODE_SHADOW
+	bool front_facing = gl_FrontFacing;
+	if (constants.cull_mode == POLYGON_CULL_BACK && !front_facing) {
+		discard;
+	} else if (constants.cull_mode == POLYGON_CULL_FRONT && front_facing) {
+		discard;
+	}
 	distance_buf = depth / constants.z_far;
 #else
 	sdf_buf = 1.0;
--- a/engine/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl
@ -1,42 +1,29 @@
-
 #define MAX_LIGHTS_PER_ITEM 16

 #define M_PI 3.14159265359

 #define SDF_MAX_LENGTH 16384.0

-//1 means enabled, 2+ means trails in use
-#define FLAGS_INSTANCING_MASK 0x7F
-#define FLAGS_INSTANCING_HAS_COLORS (1 << 7)
-#define FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << 8)
+#define INSTANCE_FLAGS_LIGHT_COUNT_SHIFT 0 // 4 bits.

-#define FLAGS_CLIP_RECT_UV (1 << 9)
-#define FLAGS_TRANSPOSE_RECT (1 << 10)
-#define FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR (1 << 11)
-#define FLAGS_NINEPACH_DRAW_CENTER (1 << 12)
+#define INSTANCE_FLAGS_CLIP_RECT_UV (1 << 4)
+#define INSTANCE_FLAGS_TRANSPOSE_RECT (1 << 5)
+#define INSTANCE_FLAGS_USE_MSDF (1 << 6)
+#define INSTANCE_FLAGS_USE_LCD (1 << 7)

-#define FLAGS_NINEPATCH_H_MODE_SHIFT 16
-#define FLAGS_NINEPATCH_V_MODE_SHIFT 18
+#define INSTANCE_FLAGS_NINEPATCH_DRAW_CENTER_SHIFT 8
+#define INSTANCE_FLAGS_NINEPATCH_H_MODE_SHIFT 9
+#define INSTANCE_FLAGS_NINEPATCH_V_MODE_SHIFT 11

-#define FLAGS_LIGHT_COUNT_SHIFT 20
+#define INSTANCE_FLAGS_SHADOW_MASKED_SHIFT 13 // 16 bits.
+#define INSTANCE_FLAGS_SHADOW_MASKED (1 << INSTANCE_FLAGS_SHADOW_MASKED_SHIFT)

-#define FLAGS_DEFAULT_NORMAL_MAP_USED (1 << 26)
-#define FLAGS_DEFAULT_SPECULAR_MAP_USED (1 << 27)
-
-#define FLAGS_USE_MSDF (1 << 28)
-#define FLAGS_USE_LCD (1 << 29)
-
-#define FLAGS_FLIP_H (1 << 30)
-#define FLAGS_FLIP_V (1 << 31)
-
-// Push Constant
-
-layout(push_constant, std430) uniform DrawData {
+struct InstanceData {
 	vec2 world_x;
 	vec2 world_y;
 	vec2 world_ofs;
 	uint flags;
-	uint specular_shininess;
+	uint instance_uniforms_ofs;
 #ifdef USE_PRIMITIVE
 	vec2 points[3];
 	vec2 uvs[3];
@ -50,9 +37,50 @@ layout(push_constant, std430) uniform DrawData {

 #endif
 	vec2 color_texture_pixel_size;
-	uint lights[4];
+	uvec4 lights;
+};
+
+//1 means enabled, 2+ means trails in use
+#define BATCH_FLAGS_INSTANCING_MASK 0x7F
+#define BATCH_FLAGS_INSTANCING_HAS_COLORS_SHIFT 7
+#define BATCH_FLAGS_INSTANCING_HAS_COLORS (1 << BATCH_FLAGS_INSTANCING_HAS_COLORS_SHIFT)
+#define BATCH_FLAGS_INSTANCING_HAS_CUSTOM_DATA_SHIFT 8
+#define BATCH_FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << BATCH_FLAGS_INSTANCING_HAS_CUSTOM_DATA_SHIFT)
+
+#define BATCH_FLAGS_DEFAULT_NORMAL_MAP_USED (1 << 9)
+#define BATCH_FLAGS_DEFAULT_SPECULAR_MAP_USED (1 << 10)
+
+layout(push_constant, std430) uniform Params {
+	uint base_instance_index; // base index to instance data
+	uint sc_packed_0;
+	uint specular_shininess;
+	uint batch_flags;
+}
+params;
+
+// Specialization constants.
+
+#ifdef UBERSHADER
+
+// Pull the constants from the draw call's push constants.
+uint sc_packed_0() {
+	return params.sc_packed_0;
+}
+
+#else
+
+// Pull the constants from the pipeline's specialization constants.
+layout(constant_id = 0) const uint pso_sc_packed_0 = 0;
+
+uint sc_packed_0() {
+	return pso_sc_packed_0;
+}
+
+#endif
+
+bool sc_use_lighting() {
+	return ((sc_packed_0() >> 0) & 1U) != 0;
 }
-draw_data;

 // In vulkan, sets should always be ordered using the following logic:
 // Lower Sets: Sets that change format and layout less often
@ -62,6 +90,8 @@ draw_data;

 /* SET0: Globals */

+#define CANVAS_FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR (1 << 0)
+
 // The values passed per draw primitives are cached within it

 layout(set = 0, binding = 1, std140) uniform CanvasData {
@ -79,7 +109,7 @@ layout(set = 0, binding = 1, std140) uniform CanvasData {

 	uint directional_light_count;
 	float tex_to_sdf;
-	uint pad1;
+	uint flags;
 	uint pad2;
 }
 canvas_data;
@ -150,3 +180,8 @@ layout(set = 3, binding = 0) uniform texture2D color_texture;
 layout(set = 3, binding = 1) uniform texture2D normal_texture;
 layout(set = 3, binding = 2) uniform texture2D specular_texture;
 layout(set = 3, binding = 3) uniform sampler texture_sampler;
+
+layout(set = 3, binding = 4, std430) restrict readonly buffer DrawData {
+	InstanceData data[];
+}
+instances;
--- a/engine/servers/rendering/renderer_rd/shaders/decal_data_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/decal_data_inc.glsl
@ -1,4 +1,3 @@
-
 struct DecalData {
 	highp mat4 xform; //to decal transform
 	highp vec3 inv_extents;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_raster.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_raster.glsl
@ -260,14 +260,14 @@ void main() {
 #ifdef MODE_COMPOSITE_BOKEH
 	frag_color.rgb = texture(source_color, uv).rgb;

-	float center_weigth = texture(source_weight, uv).r;
+	float center_weight = texture(source_weight, uv).r;
 	float sample_weight = texture(original_weight, uv).r;

 	float mix_amount;
-	if (sample_weight < center_weigth) {
-		mix_amount = min(1.0, max(0.0, max(abs(center_weigth), abs(sample_weight)) - DEPTH_GAP));
+	if (sample_weight < center_weight) {
+		mix_amount = min(1.0, max(0.0, max(abs(center_weight), abs(sample_weight)) - DEPTH_GAP));
 	} else {
-		mix_amount = min(1.0, max(0.0, abs(center_weigth) - DEPTH_GAP));
+		mix_amount = min(1.0, max(0.0, abs(center_weight) - DEPTH_GAP));
 	}

 	// let alpha blending take care of mixing
--- a/engine/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl
@ -8,7 +8,6 @@ layout(push_constant, std430) uniform Params {
 	float z_far;
 	float z_near;
 	vec2 texel_size;
-	vec4 screen_rect;
 }
 params;

@ -17,8 +16,7 @@ layout(location = 0) out vec2 uv_interp;
 void main() {
 	vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
 	uv_interp = base_arr[gl_VertexIndex];
-	vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy;
-	gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0);
+	gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0);
 }

 #[fragment]
@ -35,7 +33,6 @@ layout(push_constant, std430) uniform Params {
 	float z_far;
 	float z_near;
 	vec2 texel_size;
-	vec4 screen_rect;
 }
 params;

--- a/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter.glsl
@ -177,24 +177,27 @@ void main() {

 			float theta;
 			if (Ny < Nx) {
-				if (Ny <= -0.999)
+				if (Ny <= -0.999) {
 					theta = Nx;
-				else
+				} else {
 					theta = Ny;
+				}
 			} else {
-				if (Ny >= 0.999)
+				if (Ny >= 0.999) {
 					theta = -Nx;
-				else
+				} else {
 					theta = -Ny;
+				}
 			}

 			float phi;
-			if (Nz <= -0.999)
+			if (Nz <= -0.999) {
 				phi = -NmaxXY;
-			else if (Nz >= 0.999)
+			} else if (Nz >= 0.999) {
 				phi = NmaxXY;
-			else
+			} else {
 				phi = Nz;
+			}

 			float theta2 = theta * theta;
 			float phi2 = phi * phi;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl
@ -170,24 +170,27 @@ void main() {

 			float theta;
 			if (Ny < Nx) {
-				if (Ny <= -0.999)
+				if (Ny <= -0.999) {
 					theta = Nx;
-				else
+				} else {
 					theta = Ny;
+				}
 			} else {
-				if (Ny >= 0.999)
+				if (Ny >= 0.999) {
 					theta = -Nx;
-				else
+				} else {
 					theta = -Ny;
+				}
 			}

 			float phi;
-			if (Nz <= -0.999)
+			if (Nz <= -0.999) {
 				phi = -NmaxXY;
-			else if (Nz >= 0.999)
+			} else if (Nz >= 0.999) {
 				phi = NmaxXY;
-			else
+			} else {
 				phi = Nz;
+			}

 			float theta2 = theta * theta;
 			float phi2 = phi * phi;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/effects/luminance_reduce_raster_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/luminance_reduce_raster_inc.glsl
@ -1,4 +1,3 @@
-
 layout(push_constant, std430) uniform PushConstant {
 	ivec2 source_size;
 	ivec2 dest_size;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/motion_vectors_store.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/motion_vectors_store.glsl
@ -0,0 +1,32 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "motion_vector_inc.glsl"
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+layout(set = 0, binding = 0) uniform sampler2D depth_buffer;
+layout(rg16f, set = 0, binding = 1) uniform restrict writeonly image2D velocity_buffer;
+
+layout(push_constant, std430) uniform Params {
+	highp mat4 reprojection_matrix;
+	vec2 resolution;
+	uint pad[2];
+}
+params;
+
+void main() {
+	// Out of bounds check.
+	if (any(greaterThanEqual(vec2(gl_GlobalInvocationID.xy), params.resolution))) {
+		return;
+	}
+
+	ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
+
+	float depth = texelFetch(depth_buffer, pos, 0).x;
+	vec2 uv = (vec2(pos) + 0.5f) / params.resolution;
+	vec2 velocity = derive_motion_vector(uv, depth, params.reprojection_matrix);
+	imageStore(velocity_buffer, pos, vec4(velocity, 0.0f, 0.0f));
+}
--- a/engine/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl
@ -237,7 +237,7 @@ void main() {

 		// This is an ad-hoc term to fade out the SSR as roughness increases. Values used
 		// are meant to match the visual appearance of a ReflectionProbe.
-		float roughness_fade = smoothstep(0.4, 0.7, 1.0 - normal_roughness.w);
+		float roughness_fade = smoothstep(0.4, 0.7, 1.0 - roughness);

 		// Schlick term.
 		float metallic = texelFetch(source_metallic, ssC << 1, 0).w;
--- a/engine/servers/rendering/renderer_rd/shaders/effects/sort.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/sort.glsl
@ -70,8 +70,9 @@ void main() {

 	int i;
 	for (i = 0; i < 2 * ITERATIONS; ++i) {
-		if (GI + i * NUM_THREADS < numElementsInThreadGroup)
+		if (GI + i * NUM_THREADS < numElementsInThreadGroup) {
 			g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
+		}
 	}

 	groupMemoryBarrier();
@ -163,8 +164,9 @@ void main() {

 	// Load shared data
 	for (i = 0; i < 2; ++i) {
-		if (GI + i * NUM_THREADS < tgp.w)
+		if (GI + i * NUM_THREADS < tgp.w) {
 			g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
+		}
 	}

 	groupMemoryBarrier();
--- a/engine/servers/rendering/renderer_rd/shaders/effects/ssao.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/ssao.glsl
@ -50,11 +50,14 @@ const int num_taps[5] = { 3, 5, 12, 0, 0 };
 //
 #define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar
 //
-#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too
+// WARNING: The MIP generation on the C++ side will be enabled on quality preset 2 regardless of
+// this value, so if changing here, change the C++ side too.
+#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2)
 #define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically
 //
-// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for
-// testing purposes, it will not yield performance gains (or correct results)
+// WARNING: The edge handling is hard-coded to 'disabled' on quality level 0, and enabled above,
+// on the C++ side; while toggling it here will work for testing purposes, it will not yield
+// performance gains (or correct results).
 #define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1)
 //
 #define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1)
--- a/engine/servers/rendering/renderer_rd/shaders/effects/ssil.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/ssil.glsl
@ -49,8 +49,9 @@ const int num_taps[5] = { 3, 5, 12, 0, 0 };
 #define SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2)
 #define SSIL_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically
 //
-// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for
-// testing purposes, it will not yield performance gains (or correct results)
+// WARNING: The edge handling is hard-coded to 'disabled' on quality level 0, and enabled above,
+// on the C++ side; while toggling it here will work for testing purposes, it will not yield
+// performance gains (or correct results).
 #define SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1)
 //
 #define SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1)
--- a/engine/servers/rendering/renderer_rd/shaders/effects/subsurface_scattering.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/subsurface_scattering.glsl
@ -152,10 +152,10 @@ void main() {
 		float depth_scale;

 		if (params.orthogonal) {
-			depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0;
+			depth = -(depth * (params.camera_z_far - params.camera_z_near) - (params.camera_z_far + params.camera_z_near)) / 2.0;
 			depth_scale = params.unit_size; //remember depth is negative by default in OpenGL
 		} else {
-			depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near));
+			depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near + depth * (params.camera_z_far - params.camera_z_near));
 			depth_scale = params.unit_size / depth; //remember depth is negative by default in OpenGL
 		}

--- a/engine/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl
@ -250,19 +250,25 @@ vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) {
 	vec3 rmax = (aabb_max - p.xyz);
 	vec3 rmin = (aabb_min - p.xyz);

-	if (r.x > rmax.x + FLT_MIN)
+	if (r.x > rmax.x + FLT_MIN) {
 		r *= (rmax.x / r.x);
-	if (r.y > rmax.y + FLT_MIN)
+	}
+	if (r.y > rmax.y + FLT_MIN) {
 		r *= (rmax.y / r.y);
-	if (r.z > rmax.z + FLT_MIN)
+	}
+	if (r.z > rmax.z + FLT_MIN) {
 		r *= (rmax.z / r.z);
+	}

-	if (r.x < rmin.x - FLT_MIN)
+	if (r.x < rmin.x - FLT_MIN) {
 		r *= (rmin.x / r.x);
-	if (r.y < rmin.y - FLT_MIN)
+	}
+	if (r.y < rmin.y - FLT_MIN) {
 		r *= (rmin.y / r.y);
-	if (r.z < rmin.z - FLT_MIN)
+	}
+	if (r.z < rmin.z - FLT_MIN) {
 		r *= (rmin.z / r.z);
+	}

 	return p + r;
 }
@ -307,6 +313,8 @@ float luminance(vec3 color) {
 	return max(dot(color, lumCoeff), 0.0001f);
 }

+// This is "velocity disocclusion" as described by https://www.elopezr.com/temporal-aa-and-the-quest-for-the-holy-trail/.
+// We use texel space, so our scale and threshold differ.
 float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) {
 	vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy;
 	vec2 velocity_texels = velocity * params.resolution;
@ -336,7 +344,7 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_
 	// Compute blend factor
 	float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16.
 	{
-		// If re-projected UV is out of screen, converge to current color immediatel
+		// If re-projected UV is out of screen, converge to current color immediately.
 		float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0;

 		// Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting).
--- a/engine/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl
@ -207,6 +207,14 @@ vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) {

 #endif // !USE_GLOW_FILTER_BICUBIC

+// Based on Reinhard's extended formula, see equation 4 in https://doi.org/cjbgrt
+vec3 tonemap_reinhard(vec3 color, float white) {
+	float white_squared = white * white;
+	vec3 white_squared_color = white_squared * color;
+	// Equivalent to color * (1 + color / white_squared) / (1 + color)
+	return (white_squared_color + color * color) / (white_squared_color + white_squared);
+}
+
 vec3 tonemap_filmic(vec3 color, float white) {
 	// exposure bias: input scale (color *= bias, white *= bias) to make the brightness consistent with other tonemappers
 	// also useful to scale the input to the range that the tonemapper is designed for (some require very high input values)
@ -256,8 +264,77 @@ vec3 tonemap_aces(vec3 color, float white) {
 	return color_tonemapped / white_tonemapped;
 }

-vec3 tonemap_reinhard(vec3 color, float white) {
-	return (white * color + color) / (color * white + white);
+// Polynomial approximation of EaryChow's AgX sigmoid curve.
+// x must be within the range [0.0, 1.0]
+vec3 agx_contrast_approx(vec3 x) {
+	// Generated with Excel trendline
+	// Input data: Generated using python sigmoid with EaryChow's configuration and 57 steps
+	// Additional padding values were added to give correct intersections at 0.0 and 1.0
+	// 6th order, intercept of 0.0 to remove an operation and ensure intersection at 0.0
+	vec3 x2 = x * x;
+	vec3 x4 = x2 * x2;
+	return 0.021 * x + 4.0111 * x2 - 25.682 * x2 * x + 70.359 * x4 - 74.778 * x4 * x + 27.069 * x4 * x2;
+}
+
+// This is an approximation and simplification of EaryChow's AgX implementation that is used by Blender.
+// This code is based off of the script that generates the AgX_Base_sRGB.cube LUT that Blender uses.
+// Source: https://github.com/EaryChow/AgX_LUT_Gen/blob/main/AgXBasesRGB.py
+vec3 tonemap_agx(vec3 color) {
+	// Combined linear sRGB to linear Rec 2020 and Blender AgX inset matrices:
+	const mat3 srgb_to_rec2020_agx_inset_matrix = mat3(
+			0.54490813676363087053, 0.14044005884001287035, 0.088827411851915368603,
+			0.37377945959812267119, 0.75410959864013760045, 0.17887712465043811023,
+			0.081384976686407536266, 0.10543358536857773485, 0.73224999956948382528);
+
+	// Combined inverse AgX outset matrix and linear Rec 2020 to linear sRGB matrices.
+	const mat3 agx_outset_rec2020_to_srgb_matrix = mat3(
+			1.9645509602733325934, -0.29932243390911083839, -0.16436833806080403409,
+			-0.85585845117807513559, 1.3264510741502356555, -0.23822464068860595117,
+			-0.10886710826831608324, -0.027084020983874825605, 1.402665347143271889);
+
+	// LOG2_MIN      = -10.0
+	// LOG2_MAX      =  +6.5
+	// MIDDLE_GRAY   =  0.18
+	const float min_ev = -12.4739311883324; // log2(pow(2, LOG2_MIN) * MIDDLE_GRAY)
+	const float max_ev = 4.02606881166759; // log2(pow(2, LOG2_MAX) * MIDDLE_GRAY)
+
+	// Large negative values in one channel and large positive values in other
+	// channels can result in a colour that appears darker and more saturated than
+	// desired after passing it through the inset matrix. For this reason, it is
+	// best to prevent negative input values.
+	// This is done before the Rec. 2020 transform to allow the Rec. 2020
+	// transform to be combined with the AgX inset matrix. This results in a loss
+	// of color information that could be correctly interpreted within the
+	// Rec. 2020 color space as positive RGB values, but it is less common for Godot
+	// to provide this function with negative sRGB values and therefore not worth
+	// the performance cost of an additional matrix multiplication.
+	// A value of 2e-10 intentionally introduces insignificant error to prevent
+	// log2(0.0) after the inset matrix is applied; color will be >= 1e-10 after
+	// the matrix transform.
+	color = max(color, 2e-10);
+
+	// Do AGX in rec2020 to match Blender and then apply inset matrix.
+	color = srgb_to_rec2020_agx_inset_matrix * color;
+
+	// Log2 space encoding.
+	// Must be clamped because agx_contrast_approx may not work
+	// well with values outside of the range [0.0, 1.0]
+	color = clamp(log2(color), min_ev, max_ev);
+	color = (color - min_ev) / (max_ev - min_ev);
+
+	// Apply sigmoid function approximation.
+	color = agx_contrast_approx(color);
+
+	// Convert back to linear before applying outset matrix.
+	color = pow(color, vec3(2.4));
+
+	// Apply outset to make the result more chroma-laden and then go back to linear sRGB.
+	color = agx_outset_rec2020_to_srgb_matrix * color;
+
+	// Blender's lusRGB.compensate_low_side is too complex for this shader, so
+	// simply return the color, even if it has negative components. These negative
+	// components may be useful for subsequent color adjustments.
+	return color;
 }

 vec3 linear_to_srgb(vec3 color) {
@ -271,8 +348,9 @@ vec3 linear_to_srgb(vec3 color) {
 #define TONEMAPPER_REINHARD 1
 #define TONEMAPPER_FILMIC 2
 #define TONEMAPPER_ACES 3
+#define TONEMAPPER_AGX 4

-vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always outputs clamped [0;1] color
+vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR
 	// Ensure color values passed to tonemappers are positive.
 	// They can be negative in the case of negative lights, which leads to undesired behavior.
 	if (params.tonemapper == TONEMAPPER_LINEAR) {
@ -281,8 +359,10 @@ vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always o
 		return tonemap_reinhard(max(vec3(0.0f), color), white);
 	} else if (params.tonemapper == TONEMAPPER_FILMIC) {
 		return tonemap_filmic(max(vec3(0.0f), color), white);
-	} else { // TONEMAPPER_ACES
+	} else if (params.tonemapper == TONEMAPPER_ACES) {
 		return tonemap_aces(max(vec3(0.0f), color), white);
+	} else { // TONEMAPPER_AGX
+		return tonemap_agx(color);
 	}
 }

--- a/engine/servers/rendering/renderer_rd/shaders/environment/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/environment/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl
@ -49,9 +49,9 @@ layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps;

 #define HISTORY_BITS 10

-#define SKY_MODE_DISABLED 0
-#define SKY_MODE_COLOR 1
-#define SKY_MODE_SKY 2
+#define SKY_FLAGS_MODE_COLOR 0x01
+#define SKY_FLAGS_MODE_SKY 0x02
+#define SKY_FLAGS_ORIENTATION_SIGN 0x04

 layout(push_constant, std430) uniform Params {
 	vec3 grid_size;
@ -67,12 +67,12 @@ layout(push_constant, std430) uniform Params {
 	ivec2 image_size;

 	ivec3 world_offset;
-	uint sky_mode;
+	uint sky_flags;

 	ivec3 scroll;
 	float sky_energy;

-	vec3 sky_color;
+	vec3 sky_color_or_orientation;
 	float y_mult;

 	bool store_ambient_texture;
@ -265,17 +265,22 @@ void main() {
 				}
 			}

-		} else if (params.sky_mode == SKY_MODE_SKY) {
+		} else if (bool(params.sky_flags & SKY_FLAGS_MODE_SKY)) {
+			// Reconstruct sky orientation as quaternion and rotate ray_dir before sampling.
+			float sky_sign = bool(params.sky_flags & SKY_FLAGS_ORIENTATION_SIGN) ? 1.0 : -1.0;
+			vec4 sky_quat = vec4(params.sky_color_or_orientation, sky_sign * sqrt(1.0 - dot(params.sky_color_or_orientation, params.sky_color_or_orientation)));
+			vec3 sky_dir = cross(sky_quat.xyz, ray_dir);
+			sky_dir = ray_dir + ((sky_dir * sky_quat.w) + cross(sky_quat.xyz, sky_dir)) * 2.0;
 #ifdef USE_CUBEMAP_ARRAY
-			light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates.
+			light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(sky_dir, 0.0), 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates.
 #else
-			light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates.
+			light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), sky_dir, 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates.
 #endif
 			light.rgb *= params.sky_energy;
 			light.a = 0.0;

-		} else if (params.sky_mode == SKY_MODE_COLOR) {
-			light.rgb = params.sky_color;
+		} else if (bool(params.sky_flags & SKY_FLAGS_MODE_COLOR)) {
+			light.rgb = params.sky_color_or_orientation;
 			light.rgb *= params.sky_energy;
 			light.a = 0.0;
 		} else {
--- a/engine/servers/rendering/renderer_rd/shaders/environment/sky.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/environment/sky.glsl
@ -17,8 +17,9 @@ layout(push_constant, std430) uniform Params {
 	vec4 projection; // only applicable if not multiview
 	vec3 position;
 	float time;
-	vec3 pad;
+	vec2 pad;
 	float luminance_multiplier;
+	float brightness_multiplier;
 }
 params;

@ -57,8 +58,9 @@ layout(push_constant, std430) uniform Params {
 	vec4 projection; // only applicable if not multiview
 	vec3 position;
 	float time;
-	vec3 pad;
+	vec2 pad;
 	float luminance_multiplier;
+	float brightness_multiplier;
 }
 params;

@ -106,9 +108,11 @@ layout(set = 0, binding = 3, std140) uniform DirectionalLights {
 directional_lights;

 #ifdef MATERIAL_UNIFORMS_USED
-layout(set = 1, binding = 0, std140) uniform MaterialUniforms{
+/* clang-format off */
+layout(set = 1, binding = 0, std140) uniform MaterialUniforms {
 #MATERIAL_UNIFORMS
 } material;
+/* clang-format on */
 #endif

 layout(set = 2, binding = 0) uniform textureCube radiance;
@ -187,7 +191,7 @@ void main() {
 	vec3 cube_normal;
 #ifdef USE_MULTIVIEW
 	// In multiview our projection matrices will contain positional and rotational offsets that we need to properly unproject.
-	vec4 unproject = vec4(uv_interp.x, -uv_interp.y, 0.0, 1.0); // unproject at the far plane
+	vec4 unproject = vec4(uv_interp.x, uv_interp.y, 0.0, 1.0); // unproject at the far plane
 	vec4 unprojected = sky_scene_data.view_inv_projections[ViewIndex] * unproject;
 	cube_normal = unprojected.xyz / unprojected.w;

@ -196,7 +200,7 @@ void main() {
 #else
 	cube_normal.z = -1.0;
 	cube_normal.x = (cube_normal.z * (-uv_interp.x - params.projection.x)) / params.projection.y;
-	cube_normal.y = -(cube_normal.z * (-uv_interp.y - params.projection.z)) / params.projection.w;
+	cube_normal.y = -(cube_normal.z * (uv_interp.y - params.projection.z)) / params.projection.w;
 #endif
 	cube_normal = mat3(params.orientation) * cube_normal;
 	cube_normal = normalize(cube_normal);
@ -247,17 +251,14 @@ void main() {
 #endif //USE_CUBEMAP_PASS

 	{
-
 #CODE : SKY
-
 	}

 	frag_color.rgb = color;
 	frag_color.a = alpha;

-	// For mobile renderer we're multiplying by 0.5 as we're using a UNORM buffer.
-	// For both mobile and clustered, we also bake in the exposure value for the environment and camera.
-	frag_color.rgb = frag_color.rgb * params.luminance_multiplier;
+	// Apply environment 'brightness' setting separately before fog to ensure consistent luminance.
+	frag_color.rgb = frag_color.rgb * params.brightness_multiplier;

 #if !defined(DISABLE_FOG) && !defined(USE_CUBEMAP_PASS)

@ -278,6 +279,10 @@ void main() {

 #endif // DISABLE_FOG

+	// For mobile renderer we're multiplying by 0.5 as we're using a UNORM buffer.
+	// For both mobile and clustered, we also bake in the exposure value for the environment and camera.
+	frag_color.rgb = frag_color.rgb * params.luminance_multiplier;
+
 	// Blending is disabled for Sky, so alpha doesn't blend.
 	// Alpha is used for subsurface scattering so make sure it doesn't get applied to Sky.
 	if (!AT_CUBEMAP_PASS && !AT_HALF_RES_PASS && !AT_QUARTER_RES_PASS) {
--- a/engine/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl
@ -34,7 +34,7 @@ layout(push_constant, std430) uniform Params {
 }
 params;

-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 layout(set = 1, binding = 1) volatile buffer emissive_only_map_buffer {
 	uint emissive_only_map[];
 };
@ -64,7 +64,7 @@ layout(set = 1, binding = 2, std140) uniform SceneParams {
 }
 scene_params;

-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 layout(set = 1, binding = 3) volatile buffer density_only_map_buffer {
 	uint density_only_map[];
 };
@ -77,9 +77,11 @@ layout(r32ui, set = 1, binding = 4) uniform volatile uimage3D light_only_map;
 #endif

 #ifdef MATERIAL_UNIFORMS_USED
-layout(set = 2, binding = 0, std140) uniform MaterialUniforms{
+/* clang-format off */
+layout(set = 2, binding = 0, std140) uniform MaterialUniforms {
 #MATERIAL_UNIFORMS
 } material;
+/* clang-format on */
 #endif

 #GLOBALS
@ -117,7 +119,7 @@ void main() {
 	if (any(greaterThanEqual(pos, scene_params.fog_volume_size))) {
 		return; //do not compute
 	}
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 	uint lpos = pos.z * scene_params.fog_volume_size.x * scene_params.fog_volume_size.y + pos.y * scene_params.fog_volume_size.x + pos.x;
 #endif

@ -222,7 +224,7 @@ void main() {
 		density *= cull_mask;
 		if (abs(density) > 0.001) {
 			int final_density = int(density * DENSITY_SCALE);
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 			atomicAdd(density_only_map[lpos], uint(final_density));
 #else
 			imageAtomicAdd(density_only_map, pos, uint(final_density));
@ -236,7 +238,7 @@ void main() {
 				uvec3 emission_u = uvec3(emission.r * 511.0, emission.g * 511.0, emission.b * 255.0);
 				// R and G have 11 bits each and B has 10. Then pack them into a 32 bit uint
 				uint final_emission = emission_u.r << 21 | emission_u.g << 10 | emission_u.b;
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 				uint prev_emission = atomicAdd(emissive_only_map[lpos], final_emission);
 #else
 				uint prev_emission = imageAtomicAdd(emissive_only_map, pos, final_emission);
@ -252,7 +254,7 @@ void main() {
 				if (any(overflowing)) {
 					uvec3 overflow_factor = mix(uvec3(0), uvec3(2047 << 21, 2047 << 10, 1023), overflowing);
 					uint force_max = overflow_factor.r | overflow_factor.g | overflow_factor.b;
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 					atomicOr(emissive_only_map[lpos], force_max);
 #else
 					imageAtomicOr(emissive_only_map, pos, force_max);
@ -267,7 +269,7 @@ void main() {
 				uvec3 scattering_u = uvec3(scattering.r * 2047.0, scattering.g * 2047.0, scattering.b * 1023.0);
 				// R and G have 11 bits each and B has 10. Then pack them into a 32 bit uint
 				uint final_scattering = scattering_u.r << 21 | scattering_u.g << 10 | scattering_u.b;
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 				uint prev_scattering = atomicAdd(light_only_map[lpos], final_scattering);
 #else
 				uint prev_scattering = imageAtomicAdd(light_only_map, pos, final_scattering);
@ -283,7 +285,7 @@ void main() {
 				if (any(overflowing)) {
 					uvec3 overflow_factor = mix(uvec3(0), uvec3(2047 << 21, 2047 << 10, 1023), overflowing);
 					uint force_max = overflow_factor.r | overflow_factor.g | overflow_factor.b;
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 					atomicOr(light_only_map[lpos], force_max);
 #else
 					imageAtomicOr(light_only_map, pos, force_max);
--- a/engine/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl
@ -190,7 +190,7 @@ params;
 #ifndef MODE_COPY
 layout(set = 0, binding = 15) uniform texture3D prev_density_texture;

-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 layout(set = 0, binding = 16) buffer density_only_map_buffer {
 	uint density_only_map[];
 };
@ -287,7 +287,7 @@ void main() {
 	if (any(greaterThanEqual(pos, params.fog_volume_size))) {
 		return; //do not compute
 	}
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 	uint lpos = pos.z * params.fog_volume_size.x * params.fog_volume_size.y + pos.y * params.fog_volume_size.x + pos.x;
 #endif

@ -353,7 +353,7 @@ void main() {
 	vec3 total_light = vec3(0.0);

 	float total_density = params.base_density;
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 	uint local_density = density_only_map[lpos];
 #else
 	uint local_density = imageLoad(density_only_map, pos).x;
@ -362,7 +362,7 @@ void main() {
 	total_density += float(int(local_density)) / DENSITY_SCALE;
 	total_density = max(0.0, total_density);

-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 	uint scattering_u = light_only_map[lpos];
 #else
 	uint scattering_u = imageLoad(light_only_map, pos).x;
@ -370,7 +370,7 @@ void main() {
 	vec3 scattering = vec3(scattering_u >> 21, (scattering_u << 11) >> 21, scattering_u % 1024) / vec3(2047.0, 2047.0, 1023.0);
 	scattering += params.base_scattering * params.base_density;

-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 	uint emission_u = emissive_only_map[lpos];
 #else
 	uint emission_u = imageLoad(emissive_only_map, pos).x;
@ -513,6 +513,7 @@ void main() {
 							shadow_sample.z = 1.0 + abs(shadow_sample.z);
 							vec3 pos = vec3(shadow_sample.xy / shadow_sample.z, shadow_len - omni_lights.data[light_index].shadow_bias);
 							pos.z *= omni_lights.data[light_index].inv_radius;
+							pos.z = 1.0 - pos.z;

 							pos.xy = pos.xy * 0.5 + 0.5;
 							pos.xy = uv_rect.xy + pos.xy * uv_rect.zw;
@ -710,7 +711,7 @@ void main() {
 	final_density = mix(final_density, reprojected_density, reproject_amount);

 	imageStore(density_map, pos, final_density);
-#ifdef MOLTENVK_USED
+#ifdef NO_IMAGE_ATOMICS
 	density_only_map[lpos] = 0;
 	light_only_map[lpos] = 0;
 	emissive_only_map[lpos] = 0;
--- a/engine/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl
@ -128,38 +128,54 @@ void main() {
 	int index = x + y * 4;
 	float limit = 0.0;
 	if (x < 8) {
-		if (index == 0)
+		if (index == 0) {
 			limit = 0.0625;
-		if (index == 1)
+		}
+		if (index == 1) {
 			limit = 0.5625;
-		if (index == 2)
+		}
+		if (index == 2) {
 			limit = 0.1875;
-		if (index == 3)
+		}
+		if (index == 3) {
 			limit = 0.6875;
-		if (index == 4)
+		}
+		if (index == 4) {
 			limit = 0.8125;
-		if (index == 5)
+		}
+		if (index == 5) {
 			limit = 0.3125;
-		if (index == 6)
+		}
+		if (index == 6) {
 			limit = 0.9375;
-		if (index == 7)
+		}
+		if (index == 7) {
 			limit = 0.4375;
-		if (index == 8)
+		}
+		if (index == 8) {
 			limit = 0.25;
-		if (index == 9)
+		}
+		if (index == 9) {
 			limit = 0.75;
-		if (index == 10)
+		}
+		if (index == 10) {
 			limit = 0.125;
-		if (index == 11)
+		}
+		if (index == 11) {
 			limit = 0.625;
-		if (index == 12)
+		}
+		if (index == 12) {
 			limit = 1.0;
-		if (index == 13)
+		}
+		if (index == 13) {
 			limit = 0.5;
-		if (index == 14)
+		}
+		if (index == 14) {
 			limit = 0.875;
-		if (index == 15)
+		}
+		if (index == 15) {
 			limit = 0.375;
+		}
 	}
 	if (frag_color.a < limit) {
 		discard;
--- a/engine/servers/rendering/renderer_rd/shaders/forward_clustered/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/forward_clustered/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
--- a/engine/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl
@ -37,9 +37,121 @@ layout(push_constant, std430) uniform DrawCall {
 	uint uv_offset;
 	uint multimesh_motion_vectors_current_offset;
 	uint multimesh_motion_vectors_previous_offset;
+#ifdef UBERSHADER
+	uint sc_packed_0;
+	uint sc_packed_1;
+	uint sc_packed_2;
+	uint uc_packed_0;
+#endif
 }
 draw_call;

+/* Specialization Constants */
+
+#ifdef UBERSHADER
+
+#define POLYGON_CULL_DISABLED 0
+#define POLYGON_CULL_FRONT 1
+#define POLYGON_CULL_BACK 2
+
+// Pull the constants from the draw call's push constants.
+uint sc_packed_0() {
+	return draw_call.sc_packed_0;
+}
+
+uint sc_packed_1() {
+	return draw_call.sc_packed_1;
+}
+
+uint uc_cull_mode() {
+	return (draw_call.uc_packed_0 >> 0) & 3U;
+}
+
+#else
+
+// Pull the constants from the pipeline's specialization constants.
+layout(constant_id = 0) const uint pso_sc_packed_0 = 0;
+layout(constant_id = 1) const uint pso_sc_packed_1 = 0;
+
+uint sc_packed_0() {
+	return pso_sc_packed_0;
+}
+
+uint sc_packed_1() {
+	return pso_sc_packed_1;
+}
+
+#endif
+
+bool sc_use_forward_gi() {
+	return ((sc_packed_0() >> 0) & 1U) != 0;
+}
+
+bool sc_use_light_projector() {
+	return ((sc_packed_0() >> 1) & 1U) != 0;
+}
+
+bool sc_use_light_soft_shadows() {
+	return ((sc_packed_0() >> 2) & 1U) != 0;
+}
+
+bool sc_use_directional_soft_shadows() {
+	return ((sc_packed_0() >> 3) & 1U) != 0;
+}
+
+bool sc_decal_use_mipmaps() {
+	return ((sc_packed_0() >> 4) & 1U) != 0;
+}
+
+bool sc_projector_use_mipmaps() {
+	return ((sc_packed_0() >> 5) & 1U) != 0;
+}
+
+bool sc_use_depth_fog() {
+	return ((sc_packed_0() >> 6) & 1U) != 0;
+}
+
+bool sc_use_lightmap_bicubic_filter() {
+	return ((sc_packed_0() >> 7) & 1U) != 0;
+}
+
+uint sc_soft_shadow_samples() {
+	return (sc_packed_0() >> 8) & 63U;
+}
+
+uint sc_penumbra_shadow_samples() {
+	return (sc_packed_0() >> 14) & 63U;
+}
+
+uint sc_directional_soft_shadow_samples() {
+	return (sc_packed_0() >> 20) & 63U;
+}
+
+uint sc_directional_penumbra_shadow_samples() {
+	return (sc_packed_0() >> 26) & 63U;
+}
+
+bool sc_multimesh() {
+	return ((sc_packed_1() >> 0) & 1U) != 0;
+}
+
+bool sc_multimesh_format_2d() {
+	return ((sc_packed_1() >> 1) & 1U) != 0;
+}
+
+bool sc_multimesh_has_color() {
+	return ((sc_packed_1() >> 2) & 1U) != 0;
+}
+
+bool sc_multimesh_has_custom_data() {
+	return ((sc_packed_1() >> 3) & 1U) != 0;
+}
+
+float sc_luminance_multiplier() {
+	// Not used in clustered renderer but we share some code with the mobile renderer that requires this.
+	return 1.0;
+}
+
 #define SDFGI_MAX_CASCADES 8

 /* Set 0: Base Pass (never changes) */
@ -57,10 +169,6 @@ layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 #define INSTANCE_FLAGS_USE_SH_LIGHTMAP (1 << 9)
 #define INSTANCE_FLAGS_USE_VOXEL_GI (1 << 10)
 #define INSTANCE_FLAGS_PARTICLES (1 << 11)
-#define INSTANCE_FLAGS_MULTIMESH (1 << 12)
-#define INSTANCE_FLAGS_MULTIMESH_FORMAT_2D (1 << 13)
-#define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14)
-#define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15)
 #define INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT 16
 #define INSTANCE_FLAGS_FADE_SHIFT 24
 //3 bits of stride
@ -92,10 +200,16 @@ directional_lights;
 #define LIGHTMAP_FLAG_USE_DIRECTION 1
 #define LIGHTMAP_FLAG_USE_SPECULAR_DIRECTION 2

+#define LIGHTMAP_SHADOWMASK_MODE_NONE 0
+#define LIGHTMAP_SHADOWMASK_MODE_REPLACE 1
+#define LIGHTMAP_SHADOWMASK_MODE_OVERLAY 2
+#define LIGHTMAP_SHADOWMASK_MODE_ONLY 3
+
 struct Lightmap {
 	mat3 normal_xform;
-	vec3 pad;
+	vec2 light_texture_size;
 	float exposure_normalization;
+	uint flags;
 };

 layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps {
@ -240,7 +354,7 @@ layout(set = 1, binding = 5) uniform texture2D shadow_atlas;

 layout(set = 1, binding = 6) uniform texture2D directional_shadow_atlas;

-layout(set = 1, binding = 7) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES];
+layout(set = 1, binding = 7) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES * 2];

 layout(set = 1, binding = 8) uniform texture3D voxel_gi_textures[MAX_VOXEL_GI_INSTANCES];

--- a/engine/servers/rendering/renderer_rd/shaders/forward_mobile/SCsub
+++ b/engine/servers/rendering/renderer_rd/shaders/forward_mobile/SCsub
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from misc.utility.scons_hints import *

 Import("env")

--- a/engine/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl
--- a/engine/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl
@ -20,9 +20,190 @@ layout(push_constant, std430) uniform DrawCall {
 	vec2 uv_offset;
 	uint instance_index;
 	uint pad;
+#ifdef UBERSHADER
+	uint sc_packed_0;
+	uint sc_packed_1;
+	uint sc_packed_2;
+	float sc_packed_3;
+	uint uc_packed_0;
+	uint uc_padding_1;
+	uint uc_padding_2;
+	uint uc_padding_3;
+#endif
 }
 draw_call;

+/* Specialization Constants */
+
+#ifdef UBERSHADER
+
+#define POLYGON_CULL_DISABLED 0
+#define POLYGON_CULL_FRONT 1
+#define POLYGON_CULL_BACK 2
+
+// Pull the constants from the draw call's push constants.
+uint sc_packed_0() {
+	return draw_call.sc_packed_0;
+}
+
+uint sc_packed_1() {
+	return draw_call.sc_packed_1;
+}
+
+uint sc_packed_2() {
+	return draw_call.sc_packed_2;
+}
+
+float sc_packed_3() {
+	return draw_call.sc_packed_3;
+}
+
+uint uc_cull_mode() {
+	return (draw_call.uc_packed_0 >> 0) & 3U;
+}
+
+#else
+
+// Pull the constants from the pipeline's specialization constants.
+layout(constant_id = 0) const uint pso_sc_packed_0 = 0;
+layout(constant_id = 1) const uint pso_sc_packed_1 = 0;
+layout(constant_id = 2) const uint pso_sc_packed_2 = 0;
+layout(constant_id = 3) const float pso_sc_packed_3 = 2.0;
+
+uint sc_packed_0() {
+	return pso_sc_packed_0;
+}
+
+uint sc_packed_1() {
+	return pso_sc_packed_1;
+}
+
+uint sc_packed_2() {
+	return pso_sc_packed_2;
+}
+
+float sc_packed_3() {
+	return pso_sc_packed_3;
+}
+
+#endif
+
+bool sc_use_light_projector() {
+	return ((sc_packed_0() >> 0) & 1U) != 0;
+}
+
+bool sc_use_light_soft_shadows() {
+	return ((sc_packed_0() >> 1) & 1U) != 0;
+}
+
+bool sc_use_directional_soft_shadows() {
+	return ((sc_packed_0() >> 2) & 1U) != 0;
+}
+
+bool sc_decal_use_mipmaps() {
+	return ((sc_packed_0() >> 3) & 1U) != 0;
+}
+
+bool sc_projector_use_mipmaps() {
+	return ((sc_packed_0() >> 4) & 1U) != 0;
+}
+
+bool sc_disable_fog() {
+	return ((sc_packed_0() >> 5) & 1U) != 0;
+}
+
+bool sc_use_depth_fog() {
+	return ((sc_packed_0() >> 6) & 1U) != 0;
+}
+
+bool sc_use_fog_aerial_perspective() {
+	return ((sc_packed_0() >> 7) & 1U) != 0;
+}
+
+bool sc_use_fog_sun_scatter() {
+	return ((sc_packed_0() >> 8) & 1U) != 0;
+}
+
+bool sc_use_fog_height_density() {
+	return ((sc_packed_0() >> 9) & 1U) != 0;
+}
+
+bool sc_use_lightmap_bicubic_filter() {
+	return ((sc_packed_0() >> 10) & 1U) != 0;
+}
+
+bool sc_multimesh() {
+	return ((sc_packed_0() >> 11) & 1U) != 0;
+}
+
+bool sc_multimesh_format_2d() {
+	return ((sc_packed_0() >> 12) & 1U) != 0;
+}
+
+bool sc_multimesh_has_color() {
+	return ((sc_packed_0() >> 13) & 1U) != 0;
+}
+
+bool sc_multimesh_has_custom_data() {
+	return ((sc_packed_0() >> 14) & 1U) != 0;
+}
+
+bool sc_scene_use_ambient_cubemap() {
+	return ((sc_packed_0() >> 15) & 1U) != 0;
+}
+
+bool sc_scene_use_reflection_cubemap() {
+	return ((sc_packed_0() >> 16) & 1U) != 0;
+}
+
+bool sc_scene_roughness_limiter_enabled() {
+	return ((sc_packed_0() >> 17) & 1U) != 0;
+}
+
+uint sc_soft_shadow_samples() {
+	return (sc_packed_0() >> 20) & 63U;
+}
+
+uint sc_penumbra_shadow_samples() {
+	return (sc_packed_0() >> 26) & 63U;
+}
+
+uint sc_directional_soft_shadow_samples() {
+	return (sc_packed_1() >> 0) & 63U;
+}
+
+uint sc_directional_penumbra_shadow_samples() {
+	return (sc_packed_1() >> 6) & 63U;
+}
+
+uint sc_omni_lights() {
+	return (sc_packed_1() >> 12) & 15U;
+}
+
+uint sc_spot_lights() {
+	return (sc_packed_1() >> 16) & 15U;
+}
+
+uint sc_reflection_probes() {
+	return (sc_packed_1() >> 20) & 15U;
+}
+
+uint sc_directional_lights() {
+	return (sc_packed_1() >> 24) & 15U;
+}
+
+uint sc_decals() {
+	return (sc_packed_1() >> 28) & 15U;
+}
+
+bool sc_directional_light_blend_split(uint i) {
+	return ((sc_packed_2() >> i) & 1U) != 0;
+}
+
+float sc_luminance_multiplier() {
+	return sc_packed_3();
+}
+
 /* Set 0: Base Pass (never changes) */

 #include "../light_data_inc.glsl"
@ -38,10 +219,6 @@ layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 #define INSTANCE_FLAGS_USE_SH_LIGHTMAP (1 << 9)
 #define INSTANCE_FLAGS_USE_VOXEL_GI (1 << 10)
 #define INSTANCE_FLAGS_PARTICLES (1 << 11)
-#define INSTANCE_FLAGS_MULTIMESH (1 << 12)
-#define INSTANCE_FLAGS_MULTIMESH_FORMAT_2D (1 << 13)
-#define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14)
-#define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15)
 #define INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT 16
 //3 bits of stride
 #define INSTANCE_FLAGS_PARTICLE_TRAIL_MASK 0xFF
@ -69,10 +246,16 @@ directional_lights;
 #define LIGHTMAP_FLAG_USE_DIRECTION 1
 #define LIGHTMAP_FLAG_USE_SPECULAR_DIRECTION 2

+#define LIGHTMAP_SHADOWMASK_MODE_NONE 0
+#define LIGHTMAP_SHADOWMASK_MODE_REPLACE 1
+#define LIGHTMAP_SHADOWMASK_MODE_OVERLAY 2
+#define LIGHTMAP_SHADOWMASK_MODE_ONLY 3
+
 struct Lightmap {
 	mediump mat3 normal_xform;
-	vec3 pad;
+	vec2 light_texture_size;
 	float exposure_normalization;
+	uint flags;
 };

 layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps {
@ -152,7 +335,7 @@ layout(set = 1, binding = 4) uniform highp texture2D shadow_atlas;
 layout(set = 1, binding = 5) uniform highp texture2D directional_shadow_atlas;

 // this needs to change to providing just the lightmap we're using..
-layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES];
+layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES * 2];

 #ifdef USE_MULTIVIEW
 layout(set = 1, binding = 9) uniform highp texture2DArray depth_buffer;
--- a/engine/servers/rendering/renderer_rd/shaders/light_data_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/light_data_inc.glsl
@ -41,10 +41,14 @@ struct ReflectionData {
 	uint mask;
 	mediump vec3 ambient; // ambient color
 	mediump float intensity;
+	mediump float blend_distance;
 	bool exterior;
 	bool box_project;
 	uint ambient_mode;
 	float exposure_normalization;
+	float pad0;
+	float pad1;
+	float pad2;
 	//0-8 is intensity,8-9 is ambient, mode
 	highp mat4 local_matrix; // up to here for spot and omni, rest is for directional
 	// notes: for ambientblend, use distance to edge to blend between already existing global environment
--- a/engine/servers/rendering/renderer_rd/shaders/particles.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/particles.glsl
@ -168,11 +168,11 @@ layout(set = 2, binding = 1) uniform texture2D height_field_texture;
 /* SET 3: MATERIAL */

 #ifdef MATERIAL_UNIFORMS_USED
-layout(set = 3, binding = 0, std140) uniform MaterialUniforms{
-
+/* clang-format off */
+layout(set = 3, binding = 0, std140) uniform MaterialUniforms {
 #MATERIAL_UNIFORMS
-
 } material;
+/* clang-format on */
 #endif

 layout(push_constant, std430) uniform Params {
@ -488,7 +488,7 @@ void main() {
 			}
 			amount = pow(amount, FRAME.attractors[i].attenuation);
 			dir = safe_normalize(mix(dir, FRAME.attractors[i].transform[2].xyz, FRAME.attractors[i].directionality));
-			attractor_force -= amount * dir * FRAME.attractors[i].strength;
+			attractor_force -= mass * amount * dir * FRAME.attractors[i].strength;
 		}

 		float particle_size = FRAME.particle_size;
--- a/engine/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl
@ -52,11 +52,11 @@ struct SceneData {
 	uint fog_mode;
 	highp float fog_density;
 	highp float fog_height;
-	highp float fog_height_density;

+	highp float fog_height_density;
 	highp float fog_depth_curve;
-	highp float pad;
 	highp float fog_depth_begin;
+	highp float taa_frame_count;

 	mediump vec3 fog_light_color;
 	highp float fog_depth_end;
--- a/engine/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl
--- a/engine/servers/rendering/renderer_rd/shaders/scene_forward_vertex_lights_inc.glsl
+++ b/engine/servers/rendering/renderer_rd/shaders/scene_forward_vertex_lights_inc.glsl
@ -0,0 +1,82 @@
+// Simplified versions of light functions intended for the vertex shader.
+
+// Eyeballed approximation of `exp2(15.0 * (1.0 - roughness) + 1.0) * 0.25`.
+// Uses slightly more FMA instructions (2x rate) to avoid special instructions (0.25x rate).
+// Range is reduced to [0.64,4977] from [068,2,221,528] which makes mediump feasible for the rest of the shader.
+mediump float roughness_to_shininess(mediump float roughness) {
+	mediump float r = 1.2 - roughness;
+	mediump float r2 = r * r;
+	return r * r2 * r2 * 2000.0;
+}
+
+void light_compute_vertex(vec3 N, vec3 L, vec3 V, vec3 light_color, bool is_directional, float roughness,
+		inout vec3 diffuse_light, inout vec3 specular_light) {
+	float NdotL = min(dot(N, L), 1.0);
+	float cNdotL = max(NdotL, 0.0); // clamped NdotL
+
+#if defined(DIFFUSE_LAMBERT_WRAP)
+	// Energy conserving lambert wrap shader.
+	// https://web.archive.org/web/20210228210901/http://blog.stevemcauley.com/2011/12/03/energy-conserving-wrapped-diffuse/
+	float diffuse_brdf_NL = max(0.0, (cNdotL + roughness) / ((1.0 + roughness) * (1.0 + roughness))) * (1.0 / M_PI);
+#else
+	// lambert
+	float diffuse_brdf_NL = cNdotL * (1.0 / M_PI);
+#endif
+
+	diffuse_light += light_color * diffuse_brdf_NL;
+
+#if !defined(SPECULAR_DISABLED)
+	float specular_brdf_NL = 0.0;
+	// Normalized blinn always unless disabled.
+	vec3 H = normalize(V + L);
+	float cNdotH = clamp(dot(N, H), 0.0, 1.0);
+	float shininess = roughness_to_shininess(roughness);
+	float blinn = pow(cNdotH, shininess);
+	blinn *= (shininess + 2.0) * (1.0 / (8.0 * M_PI)) * cNdotL;
+	specular_brdf_NL = blinn;
+	specular_light += specular_brdf_NL * light_color;
+#endif
+}
+
+float get_omni_attenuation(float distance, float inv_range, float decay) {
+	float nd = distance * inv_range;
+	nd *= nd;
+	nd *= nd; // nd^4
+	nd = max(1.0 - nd, 0.0);
+	nd *= nd; // nd^2
+	return nd * pow(max(distance, 0.0001), -decay);
+}
+
+void light_process_omni_vertex(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, float roughness,
+		inout vec3 diffuse_light, inout vec3 specular_light) {
+	vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
+	float light_length = length(light_rel_vec);
+	float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation);
+	vec3 color = omni_lights.data[idx].color * omni_attenuation;
+
+	light_compute_vertex(normal, normalize(light_rel_vec), eye_vec, color, false, roughness,
+			diffuse_light,
+			specular_light);
+}
+
+void light_process_spot_vertex(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, float roughness,
+		inout vec3 diffuse_light,
+		inout vec3 specular_light) {
+	vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
+	float light_length = length(light_rel_vec);
+	float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation);
+	vec3 spot_dir = spot_lights.data[idx].direction;
+
+	// This conversion to a highp float is crucial to prevent light leaking
+	// due to precision errors in the following calculations (cone angle is mediump).
+	highp float cone_angle = spot_lights.data[idx].cone_angle;
+	float scos = max(dot(-normalize(light_rel_vec), spot_dir), cone_angle);
+	float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cone_angle));
+
+	spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation);
+	vec3 color = spot_lights.data[idx].color * spot_attenuation;
+	float specular_amount = spot_lights.data[idx].specular_amount;
+
+	light_compute_vertex(normal, normalize(light_rel_vec), eye_vec, color, false, roughness,
+			diffuse_light, specular_light);
+}