feat: updated engine version to 4.4-rc1

This commit is contained in:
Sara 2025-02-23 14:38:14 +01:00
parent ee00efde1f
commit 21ba8e33af
5459 changed files with 1128836 additions and 198305 deletions

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python
from misc.utility.scons_hints import *
Import("env")
Import("env_modules")
@ -65,18 +66,12 @@ if env["builtin_embree"]:
env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"]) # No assert() even in debug builds.
if not env.msvc:
if env["arch"] in ["x86_64", "x86_32"]:
env_raycast.Append(CCFLAGS=["-msse2", "-mxsave"])
if env["platform"] == "windows":
env_raycast.Append(CCFLAGS=["-mstackrealign"])
if env["platform"] == "windows":
if env.msvc:
env.Append(LINKFLAGS=["psapi.lib"])
else:
env.Append(LIBS=["psapi"])
env_raycast.Append(CCFLAGS=["-mstackrealign"])
if env.msvc: # Disable bogus warning about intentional struct padding.
env_raycast.Append(CCFLAGS=["/wd4324"])

View file

@ -4,8 +4,8 @@ import re
import shutil
import stat
import subprocess
from types import TracebackType
from typing import Any, Callable, Tuple, Type
import sys
from typing import Any, Callable
git_tag = "v4.3.1"
@ -100,9 +100,7 @@ subprocess.run(["git", "checkout", git_tag])
commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip()
def on_rm_error(
function: Callable[..., Any], path: str, excinfo: Tuple[Type[Exception], Exception, TracebackType]
) -> None:
def on_rm_error(function: Callable[..., Any], path: str, excinfo: Exception) -> None:
"""
Error handler for `shutil.rmtree()`.
@ -113,10 +111,12 @@ def on_rm_error(
os.unlink(path)
# 3.12 Python and beyond should replace `onerror` with `onexc`.
# We remove the .git directory because it contains
# a lot of read-only files that are problematic on Windows.
shutil.rmtree(".git", onerror=on_rm_error)
if sys.version_info >= (3, 12):
shutil.rmtree(".git", onexc=on_rm_error)
else:
shutil.rmtree(".git", onerror=on_rm_error) # type: ignore
all_files = set(cpp_files)

View file

@ -80,30 +80,22 @@ void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) {
memset(camera_ray_masks.ptr(), ~0, camera_rays_tile_count * TILE_RAYS * sizeof(uint32_t));
}
void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Vector3 &p_near_bottom_left, const Vector2 &p_near_extents, real_t p_z_far, bool p_cam_orthogonal) {
CameraRayThreadData td;
td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();
td.z_near = p_cam_projection.get_z_near();
td.z_far = p_cam_projection.get_z_far() * 1.05f;
td.z_near = -p_near_bottom_left.z;
td.z_far = p_z_far * 1.05f;
td.camera_pos = p_cam_transform.origin;
td.camera_dir = -p_cam_transform.basis.get_column(2);
td.camera_orthogonal = p_cam_orthogonal;
Projection inv_camera_matrix = p_cam_projection.inverse();
Vector3 camera_corner_proj = Vector3(-1.0f, -1.0f, -1.0f);
Vector3 camera_corner_view = inv_camera_matrix.xform(camera_corner_proj);
td.pixel_corner = p_cam_transform.xform(camera_corner_view);
// Calculate the world coordinates of the viewport.
td.pixel_corner = p_cam_transform.xform(p_near_bottom_left);
Vector3 top_corner_world = p_cam_transform.xform(p_near_bottom_left + Vector3(0, p_near_extents.y, 0));
Vector3 right_corner_world = p_cam_transform.xform(p_near_bottom_left + Vector3(p_near_extents.x, 0, 0));
Vector3 top_corner_proj = Vector3(-1.0f, 1.0f, -1.0f);
Vector3 top_corner_view = inv_camera_matrix.xform(top_corner_proj);
Vector3 top_corner_world = p_cam_transform.xform(top_corner_view);
Vector3 left_corner_proj = Vector3(1.0f, -1.0f, -1.0f);
Vector3 left_corner_view = inv_camera_matrix.xform(left_corner_proj);
Vector3 left_corner_world = p_cam_transform.xform(left_corner_view);
td.pixel_u_interp = left_corner_world - td.pixel_corner;
td.pixel_u_interp = right_corner_world - td.pixel_corner;
td.pixel_v_interp = top_corner_world - td.pixel_corner;
debug_tex_range = td.z_far;
@ -140,7 +132,7 @@ void RaycastOcclusionCull::RaycastHZBuffer::_generate_camera_rays(const CameraRa
Vector3 dir;
if (p_data->camera_orthogonal) {
dir = -p_data->camera_dir;
dir = p_data->camera_dir;
tile.ray.org_x[j] = pixel_pos.x - dir.x * p_data->z_near;
tile.ray.org_y[j] = pixel_pos.y - dir.y * p_data->z_near;
tile.ray.org_z[j] = pixel_pos.z - dir.z * p_data->z_near;
@ -181,17 +173,7 @@ void RaycastOcclusionCull::RaycastHZBuffer::sort_rays(const Vector3 &p_camera_di
}
int k = tile_i * TILE_SIZE + tile_j;
int tile_index = i * tile_grid_size.x + j;
float d = camera_rays[tile_index].ray.tfar[k];
if (!p_orthogonal) {
const float &dir_x = camera_rays[tile_index].ray.dir_x[k];
const float &dir_y = camera_rays[tile_index].ray.dir_y[k];
const float &dir_z = camera_rays[tile_index].ray.dir_z[k];
float cos_theta = p_camera_dir.x * dir_x + p_camera_dir.y * dir_y + p_camera_dir.z * dir_z;
d *= cos_theta;
}
mips[0][y * buffer_size.x + x] = d;
mips[0][y * buffer_size.x + x] = camera_rays[tile_index].ray.tfar[k];
}
}
}
@ -351,10 +333,10 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in
int vertices_size = occ->vertices.size();
// Embree requires the last element to be readable by a 16-byte SSE load instruction, so we add padding to be safe.
occ_inst->xformed_vertices.resize(vertices_size + 1);
occ_inst->xformed_vertices.resize(3 * vertices_size + 3);
const Vector3 *read_ptr = occ->vertices.ptr();
Vector3 *write_ptr = occ_inst->xformed_vertices.ptr();
float *write_ptr = occ_inst->xformed_vertices.ptr();
if (vertices_size > 1024) {
TransformThreadData td;
@ -382,9 +364,14 @@ void RaycastOcclusionCull::Scenario::_transform_vertices_thread(uint32_t p_threa
_transform_vertices_range(p_data->read, p_data->write, p_data->xform, from, to);
}
void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to) {
void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, float *p_write, const Transform3D &p_xform, int p_from, int p_to) {
float *floats_w = p_write + 3 * p_from;
for (int i = p_from; i < p_to; i++) {
p_write[i] = p_xform.xform(p_read[i]);
const Vector3 p = p_xform.xform(p_read[i]);
floats_w[0] = p.x;
floats_w[1] = p.y;
floats_w[2] = p.z;
floats_w += 3;
}
}
@ -475,7 +462,7 @@ void RaycastOcclusionCull::Scenario::update() {
}
RTCGeometry geom = rtcNewGeometry(raycast_singleton->ebr_device, RTC_GEOMETRY_TYPE_TRIANGLE);
rtcSetSharedGeometryBuffer(geom, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, occ_inst->xformed_vertices.ptr(), 0, sizeof(Vector3), occ_inst->xformed_vertices.size());
rtcSetSharedGeometryBuffer(geom, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, occ_inst->xformed_vertices.ptr(), 0, sizeof(float) * 3, occ_inst->xformed_vertices.size() / 3);
rtcSetSharedGeometryBuffer(geom, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, occ_inst->indices.ptr(), 0, sizeof(uint32_t) * 3, occ_inst->indices.size() / 3);
rtcCommitGeometry(geom);
rtcAttachGeometry(next_scene, geom);
@ -538,18 +525,16 @@ void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size)
buffers[p_buffer].resize(p_size);
}
Projection RaycastOcclusionCull::_jitter_projection(const Projection &p_cam_projection, const Size2i &p_viewport_size) {
Vector2 RaycastOcclusionCull::_jitter_half_extents(const Vector2 &p_half_extents, const Size2i &p_viewport_size) {
if (!_jitter_enabled) {
return p_cam_projection;
return p_half_extents;
}
// Prevent divide by zero when using NULL viewport.
if ((p_viewport_size.x <= 0) || (p_viewport_size.y <= 0)) {
return p_cam_projection;
return p_half_extents;
}
Projection p = p_cam_projection;
int32_t frame = Engine::get_singleton()->get_frames_drawn();
frame %= 9;
@ -584,16 +569,16 @@ Projection RaycastOcclusionCull::_jitter_projection(const Projection &p_cam_proj
} break;
}
// The multiplier here determines the divergence from center,
// and is to some extent a balancing act.
// Higher divergence gives fewer false hidden, but more false shown.
jitter *= Vector2(p_half_extents.x / (float)p_viewport_size.x, p_half_extents.y / (float)p_viewport_size.y);
// The multiplier here determines the jitter magnitude in pixels.
// It seems like a value of 0.66 matches well the above jittering pattern as it generates subpixel samples at 0, 1/3 and 2/3
// Higher magnitude gives fewer false hidden, but more false shown.
// False hidden is obvious to viewer, false shown is not.
// False shown can lower percentage that are occluded, and therefore performance.
jitter *= Vector2(1 / (float)p_viewport_size.x, 1 / (float)p_viewport_size.y) * 0.05f;
jitter *= 0.66f;
p.add_jitter_offset(jitter);
return p;
return p_half_extents + jitter;
}
void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
@ -610,9 +595,11 @@ void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_
Scenario &scenario = scenarios[buffer.scenario_rid];
scenario.update();
Projection jittered_proj = _jitter_projection(p_cam_projection, buffer.get_occlusion_buffer_size());
Vector2 viewport_half = p_cam_projection.get_viewport_half_extents();
Vector2 jitter_viewport_half = _jitter_half_extents(viewport_half, buffer.get_occlusion_buffer_size());
Vector3 near_bottom_left = Vector3(-jitter_viewport_half.x, -jitter_viewport_half.y, -p_cam_projection.get_z_near());
buffer.update_camera_rays(p_cam_transform, jittered_proj, p_cam_orthogonal);
buffer.update_camera_rays(p_cam_transform, near_bottom_left, 2 * viewport_half, p_cam_projection.get_z_far(), p_cam_orthogonal);
scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count);
buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal);

View file

@ -31,13 +31,9 @@
#ifndef RAYCAST_OCCLUSION_CULL_H
#define RAYCAST_OCCLUSION_CULL_H
#include "core/io/image.h"
#include "core/math/projection.h"
#include "core/object/object.h"
#include "core/object/ref_counted.h"
#include "core/templates/local_vector.h"
#include "core/templates/rid_owner.h"
#include "scene/resources/mesh.h"
#include "servers/rendering/renderer_scene_occlusion_cull.h"
#include <embree4/rtcore.h>
@ -76,7 +72,7 @@ public:
virtual void clear() override;
virtual void resize(const Size2i &p_size) override;
void sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal);
void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal);
void update_camera_rays(const Transform3D &p_cam_transform, const Vector3 &p_near_bottom_left, const Vector2 &p_near_extents, real_t p_z_far, bool p_cam_orthogonal);
~RaycastHZBuffer();
};
@ -109,7 +105,7 @@ private:
struct OccluderInstance {
RID occluder;
LocalVector<uint32_t> indices;
LocalVector<Vector3> xformed_vertices;
LocalVector<float> xformed_vertices;
Transform3D xform;
bool enabled = true;
bool removed = false;
@ -126,7 +122,7 @@ private:
uint32_t vertex_count;
Transform3D xform;
const Vector3 *read;
Vector3 *write = nullptr;
float *write = nullptr;
};
Thread *commit_thread = nullptr;
@ -144,7 +140,7 @@ private:
void _update_dirty_instance_thread(int p_idx, RID *p_instances);
void _update_dirty_instance(int p_idx, RID *p_instances);
void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data);
void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to);
void _transform_vertices_range(const Vector3 *p_read, float *p_write, const Transform3D &p_xform, int p_from, int p_to);
static void _commit_scene(void *p_ud);
void free();
void update();
@ -166,7 +162,7 @@ private:
bool _jitter_enabled = false;
void _init_embree();
Projection _jitter_projection(const Projection &p_cam_projection, const Size2i &p_viewport_size);
Vector2 _jitter_half_extents(const Vector2 &p_half_extents, const Size2i &p_viewport_size);
public:
virtual bool is_occluder(RID p_rid) override;