feat: modules moved and engine moved to submodule

This commit is contained in:
Jan van der Weide 2025-04-12 18:40:44 +02:00
parent dfb5e645cd
commit c33d2130cc
5136 changed files with 225275 additions and 64485 deletions

View file

@ -22,7 +22,7 @@ thirdparty_sources = [
]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
env_metal.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include"])
env_metal.Prepend(CPPEXTPATH=[thirdparty_dir, thirdparty_dir + "/include"])
# Must enable exceptions for SPIRV-Cross; otherwise, it will abort the process on errors.
if "-fno-exceptions" in env_metal["CXXFLAGS"]:

View file

@ -28,8 +28,7 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#ifndef INFLECTION_MAP_H
#define INFLECTION_MAP_H
#pragma once
#include "core/templates/hash_map.h"
#include "core/templates/local_vector.h"
@ -121,5 +120,3 @@ protected:
HashMap<KeyType, IndexValue> _inflection_indexes;
IndexValue _linear_indexes[LinearCount];
};
#endif // INFLECTION_MAP_H

View file

@ -28,6 +28,8 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
@ -48,9 +50,6 @@
/* permissions and limitations under the License. */
/**************************************************************************/
#ifndef METAL_DEVICE_PROPERTIES_H
#define METAL_DEVICE_PROPERTIES_H
#import "servers/rendering/rendering_device.h"
#import <Foundation/Foundation.h>
@ -125,6 +124,7 @@ struct MetalLimits {
uint32_t maxVertexInputBindingStride;
uint32_t maxDrawIndexedIndexValue;
uint32_t maxShaderVaryings;
uint32_t maxThreadGroupMemoryAllocation;
double temporalScalerInputContentMinScale;
double temporalScalerInputContentMaxScale;
@ -152,5 +152,3 @@ public:
private:
static const SampleCount sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX];
};
#endif // METAL_DEVICE_PROPERTIES_H

View file

@ -305,6 +305,14 @@ void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
limits.maxVertexInputBindingStride = (2 * KIBI);
limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxThreadGroupMemoryAllocation = 32768;
} else if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
limits.maxThreadGroupMemoryAllocation = 16384;
} else {
limits.maxThreadGroupMemoryAllocation = 16352;
}
#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
limits.minUniformBufferOffsetAlignment = 64;
#endif

View file

@ -28,6 +28,8 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
@ -48,14 +50,11 @@
/* permissions and limitations under the License. */
/**************************************************************************/
#ifndef METAL_OBJECTS_H
#define METAL_OBJECTS_H
#import "metal_device_properties.h"
#import "metal_utils.h"
#import "pixel_formats.h"
#import "servers/rendering/rendering_device_driver.h"
#include "servers/rendering/rendering_device_driver.h"
#import <CommonCrypto/CommonDigest.h>
#import <Foundation/Foundation.h>
@ -787,20 +786,20 @@ struct BoundUniformSet {
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet {
private:
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
public:
uint32_t index;
LocalVector<RDD::BoundUniform> uniforms;
HashMap<MDShader *, BoundUniformSet> bound_uniforms;
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage);
BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index);
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline {
@ -975,5 +974,3 @@ auto release(RDD::ID p_id) {
}
} // namespace rid
#endif // METAL_OBJECTS_H

View file

@ -213,36 +213,38 @@ void MDCommandBuffer::render_bind_uniform_set(RDD::UniformSetID p_uniform_set, R
DEV_ASSERT(type == MDCommandBufferStateType::Render);
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
if (render.uniform_sets.size() <= set->index) {
if (render.uniform_sets.size() <= p_set_index) {
uint32_t s = render.uniform_sets.size();
render.uniform_sets.resize(set->index + 1);
render.uniform_sets.resize(p_set_index + 1);
// Set intermediate values to null.
std::fill(&render.uniform_sets[s], &render.uniform_sets[set->index] + 1, nullptr);
std::fill(&render.uniform_sets[s], &render.uniform_sets[p_set_index] + 1, nullptr);
}
if (render.uniform_sets[set->index] != set) {
if (render.uniform_sets[p_set_index] != set) {
render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
render.uniform_set_mask |= 1ULL << set->index;
render.uniform_sets[set->index] = set;
render.uniform_set_mask |= 1ULL << p_set_index;
render.uniform_sets[p_set_index] = set;
}
}
void MDCommandBuffer::render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
for (size_t i = 0u; i < p_set_count; ++i) {
for (size_t i = 0; i < p_set_count; ++i) {
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
if (render.uniform_sets.size() <= set->index) {
uint32_t index = p_first_set_index + i;
if (render.uniform_sets.size() <= index) {
uint32_t s = render.uniform_sets.size();
render.uniform_sets.resize(set->index + 1);
render.uniform_sets.resize(index + 1);
// Set intermediate values to null.
std::fill(&render.uniform_sets[s], &render.uniform_sets[set->index] + 1, nullptr);
std::fill(&render.uniform_sets[s], &render.uniform_sets[index] + 1, nullptr);
}
if (render.uniform_sets[set->index] != set) {
if (render.uniform_sets[index] != set) {
render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
render.uniform_set_mask |= 1ULL << set->index;
render.uniform_sets[set->index] = set;
render.uniform_set_mask |= 1ULL << index;
render.uniform_sets[index] = set;
}
}
}
@ -474,14 +476,14 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
while (set_uniforms != 0) {
// Find the index of the next set bit.
int index = __builtin_ctzll(set_uniforms);
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
// Clear the set bit.
set_uniforms &= (set_uniforms - 1);
MDUniformSet *set = render.uniform_sets[index];
if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) {
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
continue;
}
set->bind_uniforms(shader, render);
set->bind_uniforms(shader, render, index);
}
}
@ -610,7 +612,7 @@ void MDCommandBuffer::_render_clear_render_area() {
bool shouldClearStencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true));
if (shouldClearDepth || shouldClearStencil) {
MDAttachment const &attachment = pass.attachments[ds_index];
BitField<RDD::TextureAspectBits> bits;
BitField<RDD::TextureAspectBits> bits = {};
if (shouldClearDepth && attachment.type & MDAttachmentType::Depth) {
bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT);
}
@ -955,7 +957,7 @@ void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set,
MDShader *shader = (MDShader *)(p_shader.id);
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
set->bind_uniforms(shader, compute);
set->bind_uniforms(shader, compute, p_set_index);
}
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
@ -966,7 +968,7 @@ void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_
// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
for (size_t i = 0u; i < p_set_count; ++i) {
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
set->bind_uniforms(shader, compute);
set->bind_uniforms(shader, compute, p_first_set_index + i);
}
}
@ -1052,50 +1054,50 @@ void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCo
}
}
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
DEV_ASSERT(p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);
UniformSet const &set_info = p_shader->sets[index];
UniformSet const &set_info = p_shader->sets[p_set_index];
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
id<MTLDevice> __unsafe_unretained device = enc.device;
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage);
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index);
// Set the buffer for the vertex stage.
{
uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
if (offset) {
[enc setVertexBuffer:bus.buffer offset:*offset atIndex:index];
[enc setVertexBuffer:bus.buffer offset:*offset atIndex:p_set_index];
}
}
// Set the buffer for the fragment stage.
{
uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
if (offset) {
[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:index];
[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:p_set_index];
}
}
}
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
DEV_ASSERT(!p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
UniformSet const &set = p_shader->sets[index];
UniformSet const &set = p_shader->sets[p_set_index];
for (uint32_t i = 0; i < MIN(uniforms.size(), set.uniforms.size()); i++) {
RDD::BoundUniform const &uniform = uniforms[i];
UniformInfo ui = set.uniforms[i];
const UniformInfo &ui = set.uniforms[i];
static const RDC::ShaderStage stage_usages[2] = { RDC::ShaderStage::SHADER_STAGE_VERTEX, RDC::ShaderStage::SHADER_STAGE_FRAGMENT };
for (const RDC::ShaderStage stage : stage_usages) {
ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
BindingInfo *bi = ui.bindings.getptr(stage);
const BindingInfo *bi = ui.bindings.getptr(stage);
if (bi == nullptr) {
// No binding for this stage.
continue;
@ -1129,7 +1131,7 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Ren
samplers[j] = sampler;
textures[j] = texture;
}
BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
const BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
if (sbi) {
if (stage == RDD::SHADER_STAGE_VERTEX) {
[enc setVertexSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
@ -1175,7 +1177,7 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Ren
[enc setFragmentTexture:obj atIndex:bi->index];
}
BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
const BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
if (sbi) {
id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
id<MTLBuffer> buf = tex.buffer;
@ -1256,47 +1258,47 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Ren
}
}
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
if (p_shader->uses_argument_buffers) {
bind_uniforms_argument_buffers(p_shader, p_state);
bind_uniforms_argument_buffers(p_shader, p_state, p_set_index);
} else {
bind_uniforms_direct(p_shader, p_state);
bind_uniforms_direct(p_shader, p_state, p_set_index);
}
}
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index) {
DEV_ASSERT(p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);
UniformSet const &set_info = p_shader->sets[index];
UniformSet const &set_info = p_shader->sets[p_set_index];
id<MTLComputeCommandEncoder> enc = p_state.encoder;
id<MTLDevice> device = enc.device;
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage);
BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index);
uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
if (offset) {
[enc setBuffer:bus.buffer offset:*offset atIndex:index];
[enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index];
}
}
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index) {
DEV_ASSERT(!p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);
id<MTLComputeCommandEncoder> __unsafe_unretained enc = p_state.encoder;
UniformSet const &set = p_shader->sets[index];
UniformSet const &set = p_shader->sets[p_set_index];
for (uint32_t i = 0; i < uniforms.size(); i++) {
RDD::BoundUniform const &uniform = uniforms[i];
UniformInfo ui = set.uniforms[i];
const UniformInfo &ui = set.uniforms[i];
const RDC::ShaderStage stage = RDC::ShaderStage::SHADER_STAGE_COMPUTE;
const ShaderStageUsage stage_usage = ShaderStageUsage(1 << stage);
BindingInfo *bi = ui.bindings.getptr(stage);
const BindingInfo *bi = ui.bindings.getptr(stage);
if (bi == nullptr) {
// No binding for this stage.
continue;
@ -1326,7 +1328,7 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Com
samplers[j] = sampler;
textures[j] = texture;
}
BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
const BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
if (sbi) {
[enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
}
@ -1352,7 +1354,7 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Com
id<MTLTexture> obj = rid::get(uniform.ids[0]);
[enc setTexture:obj atIndex:bi->index];
BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
const BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
if (sbi) {
id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
id<MTLBuffer> buf = tex.buffer;
@ -1407,22 +1409,22 @@ void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::Com
}
}
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index) {
if (p_shader->uses_argument_buffers) {
bind_uniforms_argument_buffers(p_shader, p_state);
bind_uniforms_argument_buffers(p_shader, p_state, p_set_index);
} else {
bind_uniforms_direct(p_shader, p_state);
bind_uniforms_direct(p_shader, p_state, p_set_index);
}
}
BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage) {
BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index) {
BoundUniformSet *sus = bound_uniforms.getptr(p_shader);
if (sus != nullptr) {
sus->merge_into(p_resource_usage);
return *sus;
}
UniformSet const &set = p_shader->sets[index];
UniformSet const &set = p_shader->sets[p_set_index];
HashMap<id<MTLResource>, StageResourceUsage> bound_resources;
auto add_usage = [&bound_resources](id<MTLResource> __unsafe_unretained res, RDD::ShaderStage stage, MTLResourceUsage usage) {

View file

@ -28,11 +28,12 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#ifndef METAL_UTILS_H
#define METAL_UTILS_H
#pragma once
#import <os/log.h>
#import <functional>
#pragma mark - Boolean flags
namespace flags {
@ -101,5 +102,3 @@ private:
extern os_log_t LOG_DRIVER;
// Used for dynamic tracing.
extern os_log_t LOG_INTERVALS;
#endif // METAL_UTILS_H

View file

@ -28,6 +28,8 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
@ -48,16 +50,14 @@
/* permissions and limitations under the License. */
/**************************************************************************/
#ifndef PIXEL_FORMATS_H
#define PIXEL_FORMATS_H
#include "core/typedefs.h"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")
#import "inflection_map.h"
#import "metal_device_properties.h"
#import "servers/rendering/rendering_device.h"
#include "servers/rendering/rendering_device.h"
#import <Metal/Metal.h>
@ -408,6 +408,4 @@ protected:
TightLocalVector<MTLFormatDesc> _mtl_vertex_format_descs;
};
#pragma clang diagnostic pop
#endif // PIXEL_FORMATS_H
GODOT_CLANG_WARNING_POP

View file

@ -450,8 +450,7 @@ void PixelFormats::initDataFormatCapabilities() {
addDataFormatDesc(X8_D24_UNORM_PACK32, Invalid, Depth24Unorm_Stencil8, Invalid, Invalid, 1, 1, 4, DepthStencil);
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunguarded-availability"
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")
addDataFormatDesc(BC1_RGB_UNORM_BLOCK, BC1_RGBA, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
addDataFormatDesc(BC1_RGB_SRGB_BLOCK, BC1_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
@ -476,7 +475,7 @@ void PixelFormats::initDataFormatCapabilities() {
addDataFormatDesc(BC7_UNORM_BLOCK, BC7_RGBAUnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
addDataFormatDesc(BC7_SRGB_BLOCK, BC7_RGBAUnorm_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
#pragma clang diagnostic pop
GODOT_CLANG_WARNING_POP
addDataFormatDesc(ETC2_R8G8B8_UNORM_BLOCK, ETC2_RGB8, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
addDataFormatDesc(ETC2_R8G8B8_SRGB_BLOCK, ETC2_RGB8_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
@ -493,32 +492,46 @@ void PixelFormats::initDataFormatCapabilities() {
addDataFormatDesc(EAC_R11G11_SNORM_BLOCK, EAC_RG11Snorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
addDataFormatDesc(ASTC_4x4_UNORM_BLOCK, ASTC_4x4_LDR, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
addDataFormatDesc(ASTC_4x4_SFLOAT_BLOCK, ASTC_4x4_HDR, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
addDataFormatDesc(ASTC_4x4_SRGB_BLOCK, ASTC_4x4_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
addDataFormatDesc(ASTC_5x4_UNORM_BLOCK, ASTC_5x4_LDR, Invalid, Invalid, Invalid, 5, 4, 16, Compressed);
addDataFormatDesc(ASTC_5x4_SFLOAT_BLOCK, ASTC_5x4_HDR, Invalid, Invalid, Invalid, 5, 4, 16, Compressed);
addDataFormatDesc(ASTC_5x4_SRGB_BLOCK, ASTC_5x4_sRGB, Invalid, Invalid, Invalid, 5, 4, 16, Compressed);
addDataFormatDesc(ASTC_5x5_UNORM_BLOCK, ASTC_5x5_LDR, Invalid, Invalid, Invalid, 5, 5, 16, Compressed);
addDataFormatDesc(ASTC_5x5_SFLOAT_BLOCK, ASTC_5x5_HDR, Invalid, Invalid, Invalid, 5, 5, 16, Compressed);
addDataFormatDesc(ASTC_5x5_SRGB_BLOCK, ASTC_5x5_sRGB, Invalid, Invalid, Invalid, 5, 5, 16, Compressed);
addDataFormatDesc(ASTC_6x5_UNORM_BLOCK, ASTC_6x5_LDR, Invalid, Invalid, Invalid, 6, 5, 16, Compressed);
addDataFormatDesc(ASTC_6x5_SFLOAT_BLOCK, ASTC_6x5_HDR, Invalid, Invalid, Invalid, 6, 5, 16, Compressed);
addDataFormatDesc(ASTC_6x5_SRGB_BLOCK, ASTC_6x5_sRGB, Invalid, Invalid, Invalid, 6, 5, 16, Compressed);
addDataFormatDesc(ASTC_6x6_UNORM_BLOCK, ASTC_6x6_LDR, Invalid, Invalid, Invalid, 6, 6, 16, Compressed);
addDataFormatDesc(ASTC_6x6_SFLOAT_BLOCK, ASTC_6x6_HDR, Invalid, Invalid, Invalid, 6, 6, 16, Compressed);
addDataFormatDesc(ASTC_6x6_SRGB_BLOCK, ASTC_6x6_sRGB, Invalid, Invalid, Invalid, 6, 6, 16, Compressed);
addDataFormatDesc(ASTC_8x5_UNORM_BLOCK, ASTC_8x5_LDR, Invalid, Invalid, Invalid, 8, 5, 16, Compressed);
addDataFormatDesc(ASTC_8x5_SFLOAT_BLOCK, ASTC_8x5_HDR, Invalid, Invalid, Invalid, 8, 5, 16, Compressed);
addDataFormatDesc(ASTC_8x5_SRGB_BLOCK, ASTC_8x5_sRGB, Invalid, Invalid, Invalid, 8, 5, 16, Compressed);
addDataFormatDesc(ASTC_8x6_UNORM_BLOCK, ASTC_8x6_LDR, Invalid, Invalid, Invalid, 8, 6, 16, Compressed);
addDataFormatDesc(ASTC_8x6_SFLOAT_BLOCK, ASTC_8x6_HDR, Invalid, Invalid, Invalid, 8, 6, 16, Compressed);
addDataFormatDesc(ASTC_8x6_SRGB_BLOCK, ASTC_8x6_sRGB, Invalid, Invalid, Invalid, 8, 6, 16, Compressed);
addDataFormatDesc(ASTC_8x8_UNORM_BLOCK, ASTC_8x8_LDR, Invalid, Invalid, Invalid, 8, 8, 16, Compressed);
addDataFormatDesc(ASTC_8x8_SFLOAT_BLOCK, ASTC_8x8_HDR, Invalid, Invalid, Invalid, 8, 8, 16, Compressed);
addDataFormatDesc(ASTC_8x8_SRGB_BLOCK, ASTC_8x8_sRGB, Invalid, Invalid, Invalid, 8, 8, 16, Compressed);
addDataFormatDesc(ASTC_10x5_UNORM_BLOCK, ASTC_10x5_LDR, Invalid, Invalid, Invalid, 10, 5, 16, Compressed);
addDataFormatDesc(ASTC_10x5_SFLOAT_BLOCK, ASTC_10x5_HDR, Invalid, Invalid, Invalid, 10, 5, 16, Compressed);
addDataFormatDesc(ASTC_10x5_SRGB_BLOCK, ASTC_10x5_sRGB, Invalid, Invalid, Invalid, 10, 5, 16, Compressed);
addDataFormatDesc(ASTC_10x6_UNORM_BLOCK, ASTC_10x6_LDR, Invalid, Invalid, Invalid, 10, 6, 16, Compressed);
addDataFormatDesc(ASTC_10x6_SFLOAT_BLOCK, ASTC_10x6_HDR, Invalid, Invalid, Invalid, 10, 6, 16, Compressed);
addDataFormatDesc(ASTC_10x6_SRGB_BLOCK, ASTC_10x6_sRGB, Invalid, Invalid, Invalid, 10, 6, 16, Compressed);
addDataFormatDesc(ASTC_10x8_UNORM_BLOCK, ASTC_10x8_LDR, Invalid, Invalid, Invalid, 10, 8, 16, Compressed);
addDataFormatDesc(ASTC_10x8_SFLOAT_BLOCK, ASTC_10x8_HDR, Invalid, Invalid, Invalid, 10, 8, 16, Compressed);
addDataFormatDesc(ASTC_10x8_SRGB_BLOCK, ASTC_10x8_sRGB, Invalid, Invalid, Invalid, 10, 8, 16, Compressed);
addDataFormatDesc(ASTC_10x10_UNORM_BLOCK, ASTC_10x10_LDR, Invalid, Invalid, Invalid, 10, 10, 16, Compressed);
addDataFormatDesc(ASTC_10x10_SFLOAT_BLOCK, ASTC_10x10_HDR, Invalid, Invalid, Invalid, 10, 10, 16, Compressed);
addDataFormatDesc(ASTC_10x10_SRGB_BLOCK, ASTC_10x10_sRGB, Invalid, Invalid, Invalid, 10, 10, 16, Compressed);
addDataFormatDesc(ASTC_12x10_UNORM_BLOCK, ASTC_12x10_LDR, Invalid, Invalid, Invalid, 12, 10, 16, Compressed);
addDataFormatDesc(ASTC_12x10_SFLOAT_BLOCK, ASTC_12x10_HDR, Invalid, Invalid, Invalid, 12, 10, 16, Compressed);
addDataFormatDesc(ASTC_12x10_SRGB_BLOCK, ASTC_12x10_sRGB, Invalid, Invalid, Invalid, 12, 10, 16, Compressed);
addDataFormatDesc(ASTC_12x12_UNORM_BLOCK, ASTC_12x12_LDR, Invalid, Invalid, Invalid, 12, 12, 16, Compressed);
addDataFormatDesc(ASTC_12x12_SFLOAT_BLOCK, ASTC_12x12_HDR, Invalid, Invalid, Invalid, 12, 12, 16, Compressed);
addDataFormatDesc(ASTC_12x12_SRGB_BLOCK, ASTC_12x12_sRGB, Invalid, Invalid, Invalid, 12, 12, 16, Compressed);
addDfFormatDescChromaSubsampling(G8B8G8R8_422_UNORM, GBGR422, 1, 8, 2, 1, 4);
@ -715,8 +728,7 @@ void PixelFormats::initMTLPixelFormatCapabilities() {
addMTLPixelFormatDescSRGB(ASTC_12x12_sRGB, ASTC_12x12, RF, ASTC_12x12_LDR);
addMTLPixelFormatDesc(ASTC_12x12_HDR, ASTC_12x12, RF);
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunguarded-availability"
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")
addMTLPixelFormatDesc(BC1_RGBA, BC1_RGBA, RF);
addMTLPixelFormatDescSRGB(BC1_RGBA_sRGB, BC1_RGBA, RF, BC1_RGBA);
@ -733,7 +745,7 @@ void PixelFormats::initMTLPixelFormatCapabilities() {
addMTLPixelFormatDesc(BC7_RGBAUnorm, BC7_RGBA, RF);
addMTLPixelFormatDescSRGB(BC7_RGBAUnorm_sRGB, BC7_RGBA, RF, BC7_RGBAUnorm);
#pragma clang diagnostic pop
GODOT_CLANG_WARNING_POP
// YUV pixel formats
addMTLPixelFormatDesc(GBGR422, None, RF);
@ -954,8 +966,7 @@ void PixelFormats::modifyMTLFormatCapabilities(const MetalFeatures &p_feat) {
setMTLPixFmtCapsIf(noHDR_ASTC, ASTC_12x10_HDR, None);
setMTLPixFmtCapsIf(noHDR_ASTC, ASTC_12x12_HDR, None);
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunguarded-availability"
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")
bool noBC = !p_feat.supportsBCTextureCompression;
setMTLPixFmtCapsIf(noBC, BC1_RGBA, None);
@ -973,7 +984,7 @@ void PixelFormats::modifyMTLFormatCapabilities(const MetalFeatures &p_feat) {
setMTLPixFmtCapsIf(noBC, BC7_RGBAUnorm, None);
setMTLPixFmtCapsIf(noBC, BC7_RGBAUnorm_sRGB, None);
#pragma clang diagnostic pop
GODOT_CLANG_WARNING_POP
setMTLPixFmtCapsIf(iosOnly2, BGRA10_XR, None);
setMTLPixFmtCapsIf(iosOnly2, BGRA10_XR_sRGB, None);

View file

@ -28,13 +28,12 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#ifndef RENDERING_CONTEXT_DRIVER_METAL_H
#define RENDERING_CONTEXT_DRIVER_METAL_H
#pragma once
#ifdef METAL_ENABLED
#import "servers/rendering/rendering_context_driver.h"
#import "servers/rendering/rendering_device_driver.h"
#include "servers/rendering/rendering_context_driver.h"
#include "servers/rendering/rendering_device_driver.h"
#import <CoreGraphics/CGGeometry.h>
@ -143,5 +142,3 @@ public:
};
#endif // METAL_ENABLED
#endif // RENDERING_CONTEXT_DRIVER_METAL_H

View file

@ -28,12 +28,11 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#ifndef RENDERING_DEVICE_DRIVER_METAL_H
#define RENDERING_DEVICE_DRIVER_METAL_H
#pragma once
#import "metal_objects.h"
#import "servers/rendering/rendering_device_driver.h"
#include "servers/rendering/rendering_device_driver.h"
#import <Metal/Metal.h>
#import <variant>
@ -66,6 +65,8 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
RDD::Capabilities capabilities;
RDD::MultiviewCapabilities multiview_capabilities;
RDD::FragmentShadingRateCapabilities fsr_capabilities;
RDD::FragmentDensityMapCapabilities fdm_capabilities;
id<MTLBinaryArchive> archive = nil;
uint32_t archive_count = 0;
@ -316,7 +317,7 @@ public:
// ----- SUBPASS -----
virtual RenderPassID render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count) override final;
virtual RenderPassID render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final;
virtual void render_pass_free(RenderPassID p_render_pass) override final;
// ----- COMMANDS -----
@ -421,6 +422,8 @@ public:
virtual uint64_t api_trait_get(ApiTrait p_trait) override final;
virtual bool has_feature(Features p_feature) override final;
virtual const MultiviewCapabilities &get_multiview_capabilities() override final;
virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final;
virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final;
virtual String get_api_name() const override final { return "Metal"; }
virtual String get_api_version() const override final;
virtual String get_pipeline_cache_uuid() const override final;
@ -444,5 +447,3 @@ public:
RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver);
~RenderingDeviceDriverMetal();
};
#endif // RENDERING_DEVICE_DRIVER_METAL_H

View file

@ -53,10 +53,10 @@
#import "pixel_formats.h"
#import "rendering_context_driver_metal.h"
#import "core/io/compression.h"
#import "core/io/marshalls.h"
#import "core/string/ustring.h"
#import "core/templates/hash_map.h"
#include "core/io/compression.h"
#include "core/io/marshalls.h"
#include "core/string/ustring.h"
#include "core/templates/hash_map.h"
#import <Metal/MTLTexture.h>
#import <Metal/Metal.h>
@ -405,6 +405,15 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create_from_extension(uint64_
RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) {
id<MTLTexture> src_texture = rid::get(p_original_texture);
NSUInteger slices = src_texture.arrayLength;
if (src_texture.textureType == MTLTextureTypeCube) {
// Metal expects Cube textures to have a slice count of 6.
slices = 6;
} else if (src_texture.textureType == MTLTextureTypeCubeArray) {
// Metal expects Cube Array textures to have 6 slices per layer.
slices *= 6;
}
#if DEV_ENABLED
if (src_texture.sampleCount > 1) {
// TODO(sgc): is it ok to create a shared texture from a multi-sample texture?
@ -434,7 +443,7 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_ori
id<MTLTexture> obj = [src_texture newTextureViewWithPixelFormat:format
textureType:src_texture.textureType
levels:NSMakeRange(0, src_texture.mipmapLevelCount)
slices:NSMakeRange(0, src_texture.arrayLength)
slices:NSMakeRange(0, slices)
swizzle:swizzle];
ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");
return rid::make(obj);
@ -566,7 +575,14 @@ void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture
r_layout->size = get_image_format_required_size(format, sz.width, sz.height, sz.depth, 1, &sbw, &sbh);
r_layout->row_pitch = r_layout->size / ((sbh / bh) * sz.depth);
r_layout->depth_pitch = r_layout->size / sz.depth;
r_layout->layer_pitch = r_layout->size / obj.arrayLength;
uint32_t array_length = obj.arrayLength;
if (obj.textureType == MTLTextureTypeCube) {
array_length = 6;
} else if (obj.textureType == MTLTextureTypeCubeArray) {
array_length *= 6;
}
r_layout->layer_pitch = r_layout->size / array_length;
} else {
CRASH_NOW_MSG("need to calculate layout for shared texture");
}
@ -974,7 +990,7 @@ RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextD
color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT);
subpass.color_references.push_back(color_ref);
RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1);
RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, RDD::AttachmentReference());
ERR_FAIL_COND_V(!render_pass, SwapChainID());
// Create the empty swap chain until it is resized.
@ -2460,6 +2476,8 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
HashMap<ShaderStage, MDLibrary *> libraries;
for (ShaderStageData &shader_data : binary_data.stages) {
r_shader_desc.stages.push_back(shader_data.stage);
SHA256Digest key = SHA256Digest(shader_data.source.ptr(), shader_data.source.length());
if (ShaderCacheEntry **p = _shader_cache.getptr(key); p != nullptr) {
@ -2506,7 +2524,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
su.writable = uniform.writable;
su.length = uniform.length;
su.binding = uniform.binding;
su.stages = uniform.stages;
su.stages = (ShaderStage)(uint8_t)uniform.stages;
uset.write[i] = su;
UniformInfo ui;
@ -2572,7 +2590,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
sc.type = c.type;
sc.constant_id = c.constant_id;
sc.int_value = c.int_value;
sc.stages = c.stages;
sc.stages = (ShaderStage)(uint8_t)c.stages;
r_shader_desc.specialization_constants.write[i] = sc;
}
@ -3044,7 +3062,7 @@ void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_c
String RenderingDeviceDriverMetal::_pipeline_get_cache_path() const {
String path = OS::get_singleton()->get_user_data_dir() + "/metal/pipelines";
path += "." + context_device.name.validate_filename().replace(" ", "_").to_lower();
path += "." + context_device.name.validate_filename().replace_char(' ', '_').to_lower();
if (Engine::get_singleton()->is_editor_hint()) {
path += ".editor";
}
@ -3102,7 +3120,7 @@ Vector<uint8_t> RenderingDeviceDriverMetal::pipeline_cache_serialize() {
// ----- SUBPASS -----
RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count) {
RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) {
PixelFormats &pf = *pixel_formats;
size_t subpass_count = p_subpasses.size();
@ -3899,16 +3917,16 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
MetalDeviceProperties const &props = (*device_properties);
MetalLimits const &limits = props.limits;
uint64_t safe_unbounded = ((uint64_t)1 << 30);
#if defined(DEV_ENABLED)
#define UNKNOWN(NAME) \
case NAME: \
WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \
return (uint64_t)1 << 30;
return safe_unbounded;
#else
#define UNKNOWN(NAME) \
case NAME: \
return (uint64_t)1 << 30
return safe_unbounded
#endif
// clang-format off
@ -3981,6 +3999,8 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
return limits.maxThreadsPerThreadGroup.height;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxThreadsPerThreadGroup.depth;
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return limits.maxThreadGroupMemoryAllocation;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
return limits.maxViewportDimensionX;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@ -4002,12 +4022,12 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
return (uint64_t)((1.0 / limits.temporalScalerInputContentMinScale) * 1000'000);
case LIMIT_MAX_SHADER_VARYINGS:
return limits.maxShaderVaryings;
UNKNOWN(LIMIT_VRS_TEXEL_WIDTH);
UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH);
UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT);
default:
ERR_FAIL_V(0);
default: {
#ifdef DEV_ENABLED
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
#endif
return safe_unbounded;
}
}
// clang-format on
return 0;
@ -4024,17 +4044,8 @@ uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) {
bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
switch (p_feature) {
case SUPPORTS_MULTIVIEW:
return multiview_capabilities.is_supported;
case SUPPORTS_FSR_HALF_FLOAT:
return true;
case SUPPORTS_ATTACHMENT_VRS:
// TODO(sgc): Maybe supported via https://developer.apple.com/documentation/metal/render_passes/rendering_at_different_rasterization_rates?language=objc
// See also:
//
// * https://forum.beyond3d.com/threads/variable-rate-shading-vs-variable-rate-rasterization.62243/post-2191363
//
return false;
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:
return true;
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
@ -4052,6 +4063,14 @@ const RDD::MultiviewCapabilities &RenderingDeviceDriverMetal::get_multiview_capa
return multiview_capabilities;
}
const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverMetal::get_fragment_shading_rate_capabilities() {
return fsr_capabilities;
}
const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragment_density_map_capabilities() {
return fdm_capabilities;
}
String RenderingDeviceDriverMetal::get_api_version() const {
return vformat("%d.%d", version_major, version_minor);
}