Add Betsy to speed up BC6 compression
This commit is contained in:
parent
96be44c0ec
commit
394ea653aa
17 changed files with 1325 additions and 2 deletions
76
modules/betsy/CrossPlatformSettings_piece_all.glsl
Normal file
76
modules/betsy/CrossPlatformSettings_piece_all.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
|
||||
#define min3(a, b, c) min(a, min(b, c))
|
||||
#define max3(a, b, c) max(a, max(b, c))
|
||||
|
||||
#define float2 vec2
|
||||
#define float3 vec3
|
||||
#define float4 vec4
|
||||
|
||||
#define int2 ivec2
|
||||
#define int3 ivec3
|
||||
#define int4 ivec4
|
||||
|
||||
#define uint2 uvec2
|
||||
#define uint3 uvec3
|
||||
#define uint4 uvec4
|
||||
|
||||
#define float2x2 mat2
|
||||
#define float3x3 mat3
|
||||
#define float4x4 mat4
|
||||
#define ogre_float4x3 mat3x4
|
||||
|
||||
#define ushort uint
|
||||
#define ushort3 uint3
|
||||
#define ushort4 uint4
|
||||
|
||||
//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
|
||||
#define rshort int
|
||||
#define rshort2 int2
|
||||
#define rint int
|
||||
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
|
||||
#define wshort2 int2
|
||||
#define wshort3 int3
|
||||
|
||||
#define toFloat3x3(x) mat3(x)
|
||||
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
|
||||
|
||||
#define mul(x, y) ((x) * (y))
|
||||
#define saturate(x) clamp((x), 0.0, 1.0)
|
||||
#define lerp mix
|
||||
#define rsqrt inversesqrt
|
||||
#define INLINE
|
||||
#define NO_INTERPOLATION_PREFIX flat
|
||||
#define NO_INTERPOLATION_SUFFIX
|
||||
|
||||
#define PARAMS_ARG_DECL
|
||||
#define PARAMS_ARG
|
||||
|
||||
#define reversebits bitfieldReverse
|
||||
|
||||
#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
|
||||
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
|
||||
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
|
||||
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
|
||||
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
|
||||
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
|
||||
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
|
||||
#define OGRE_ddx(val) dFdx(val)
|
||||
#define OGRE_ddy(val) dFdy(val)
|
||||
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
|
||||
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
|
||||
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
|
||||
|
||||
#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
|
||||
|
||||
#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
|
||||
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
|
||||
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
|
||||
|
||||
#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
|
||||
|
||||
#define OGRE_SAMPLER_ARG_DECL(samplerName)
|
||||
#define OGRE_SAMPLER_ARG(samplerName)
|
||||
|
||||
#define OGRE_Texture3D_float4 sampler3D
|
||||
#define OGRE_OUT_REF(declType, variableName) out declType variableName
|
||||
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName
|
||||
24
modules/betsy/SCsub
Normal file
24
modules/betsy/SCsub
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# !/ usr / bin / env python
|
||||
Import("env")
|
||||
Import("env_modules")
|
||||
|
||||
env_betsy = env_modules.Clone()
|
||||
env_betsy.GLSL_HEADER("bc6h.glsl")
|
||||
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])
|
||||
|
||||
# Thirdparty source files
|
||||
thirdparty_obj = []
|
||||
thirdparty_dir = "#thirdparty/betsy/"
|
||||
env_betsy.Prepend(CPPPATH=[thirdparty_dir])
|
||||
|
||||
env_thirdparty = env_betsy.Clone()
|
||||
env_thirdparty.disable_warnings()
|
||||
env.modules_sources += thirdparty_obj
|
||||
|
||||
# Godot source files
|
||||
module_obj = []
|
||||
env_betsy.add_source_files(module_obj, "*.cpp")
|
||||
env.modules_sources += module_obj
|
||||
|
||||
# Needed to force rebuilding the module files when the thirdparty library is updated.
|
||||
env.Depends(module_obj, thirdparty_obj)
|
||||
17
modules/betsy/UavCrossPlatform_piece_all.glsl
Normal file
17
modules/betsy/UavCrossPlatform_piece_all.glsl
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv))
|
||||
#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw))
|
||||
|
||||
#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0))
|
||||
#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0))
|
||||
#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value)
|
||||
|
||||
#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv))
|
||||
|
||||
#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
|
||||
#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
|
||||
|
||||
#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
|
||||
#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
|
||||
|
||||
//#define sharedOnlyBarrier memoryBarrierShared();barrier();
|
||||
653
modules/betsy/bc6h.glsl
Normal file
653
modules/betsy/bc6h.glsl
Normal file
|
|
@ -0,0 +1,653 @@
|
|||
#[versions]
|
||||
|
||||
signed = "#define SIGNED";
|
||||
unsigned = "";
|
||||
|
||||
#[compute]
|
||||
#version 450
|
||||
|
||||
#include "CrossPlatformSettings_piece_all.glsl"
|
||||
#include "UavCrossPlatform_piece_all.glsl"
|
||||
|
||||
#VERSION_DEFINES
|
||||
#define QUALITY
|
||||
|
||||
//SIGNED macro is WIP
|
||||
//#define SIGNED
|
||||
|
||||
float3 f32tof16(float3 value) {
|
||||
return float3(packHalf2x16(float2(value.x, 0.0)),
|
||||
packHalf2x16(float2(value.y, 0.0)),
|
||||
packHalf2x16(float2(value.z, 0.0)));
|
||||
}
|
||||
|
||||
float3 f16tof32(uint3 value) {
|
||||
return float3(unpackHalf2x16(value.x).x,
|
||||
unpackHalf2x16(value.y).x,
|
||||
unpackHalf2x16(value.z).x);
|
||||
}
|
||||
|
||||
float f32tof16(float value) {
|
||||
return packHalf2x16(float2(value.x, 0.0));
|
||||
}
|
||||
|
||||
float f16tof32(uint value) {
|
||||
return unpackHalf2x16(value.x).x;
|
||||
}
|
||||
|
||||
layout(binding = 0) uniform sampler2D srcTexture;
|
||||
layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||
|
||||
layout(push_constant, std430) uniform Params {
|
||||
float2 p_textureSizeRcp;
|
||||
uint padding0;
|
||||
uint padding1;
|
||||
}
|
||||
params;
|
||||
|
||||
const float HALF_MAX = 65504.0f;
|
||||
const uint PATTERN_NUM = 32u;
|
||||
|
||||
float CalcMSLE(float3 a, float3 b) {
|
||||
float3 err = log2((b + 1.0f) / (a + 1.0f));
|
||||
err = err * err;
|
||||
return err.x + err.y + err.z;
|
||||
}
|
||||
|
||||
uint PatternFixupID(uint i) {
|
||||
uint ret = 15u;
|
||||
ret = ((3441033216u >> i) & 0x1u) != 0 ? 2u : ret;
|
||||
ret = ((845414400u >> i) & 0x1u) != 0 ? 8u : ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint Pattern(uint p, uint i) {
|
||||
uint p2 = p / 2u;
|
||||
uint p3 = p - p2 * 2u;
|
||||
|
||||
uint enc = 0u;
|
||||
enc = p2 == 0u ? 2290666700u : enc;
|
||||
enc = p2 == 1u ? 3972591342u : enc;
|
||||
enc = p2 == 2u ? 4276930688u : enc;
|
||||
enc = p2 == 3u ? 3967876808u : enc;
|
||||
enc = p2 == 4u ? 4293707776u : enc;
|
||||
enc = p2 == 5u ? 3892379264u : enc;
|
||||
enc = p2 == 6u ? 4278255592u : enc;
|
||||
enc = p2 == 7u ? 4026597360u : enc;
|
||||
enc = p2 == 8u ? 9369360u : enc;
|
||||
enc = p2 == 9u ? 147747072u : enc;
|
||||
enc = p2 == 10u ? 1930428556u : enc;
|
||||
enc = p2 == 11u ? 2362323200u : enc;
|
||||
enc = p2 == 12u ? 823134348u : enc;
|
||||
enc = p2 == 13u ? 913073766u : enc;
|
||||
enc = p2 == 14u ? 267393000u : enc;
|
||||
enc = p2 == 15u ? 966553998u : enc;
|
||||
|
||||
enc = p3 != 0u ? enc >> 16u : enc;
|
||||
uint ret = (enc >> i) & 0x1u;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef SIGNED
|
||||
//UF
|
||||
float3 Quantize7(float3 x) {
|
||||
return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize9(float3 x) {
|
||||
return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize10(float3 x) {
|
||||
return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Unquantize7(float3 x) {
|
||||
return (x * 65536.0f + 0x8000) / 128.0f;
|
||||
}
|
||||
|
||||
float3 Unquantize9(float3 x) {
|
||||
return (x * 65536.0f + 0x8000) / 512.0f;
|
||||
}
|
||||
|
||||
float3 Unquantize10(float3 x) {
|
||||
return (x * 65536.0f + 0x8000) / 1024.0f;
|
||||
}
|
||||
|
||||
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
|
||||
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
|
||||
return f16tof32(uint3(comp));
|
||||
}
|
||||
#else
|
||||
//SF
|
||||
|
||||
float3 cmpSign(float3 value) {
|
||||
float3 signVal;
|
||||
signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
|
||||
signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
|
||||
signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
|
||||
return signVal;
|
||||
}
|
||||
|
||||
float3 Quantize7(float3 x) {
|
||||
float3 signVal = cmpSign(x);
|
||||
return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize9(float3 x) {
|
||||
float3 signVal = cmpSign(x);
|
||||
return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Quantize10(float3 x) {
|
||||
float3 signVal = cmpSign(x);
|
||||
return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
|
||||
}
|
||||
|
||||
float3 Unquantize7(float3 x) {
|
||||
float3 signVal = sign(x);
|
||||
x = abs(x);
|
||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
|
||||
finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
|
||||
finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
|
||||
finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
|
||||
return finalVal;
|
||||
}
|
||||
|
||||
float3 Unquantize9(float3 x) {
|
||||
float3 signVal = sign(x);
|
||||
x = abs(x);
|
||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
|
||||
finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
|
||||
finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
|
||||
finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
|
||||
return finalVal;
|
||||
}
|
||||
|
||||
float3 Unquantize10(float3 x) {
|
||||
float3 signVal = sign(x);
|
||||
x = abs(x);
|
||||
float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
|
||||
finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
|
||||
finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
|
||||
finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
|
||||
return finalVal;
|
||||
}
|
||||
|
||||
float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
|
||||
float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
|
||||
/*float3 signVal;
|
||||
signVal.x = comp.x >= 0.0f ? 0.0f : 0x8000;
|
||||
signVal.y = comp.y >= 0.0f ? 0.0f : 0x8000;
|
||||
signVal.z = comp.z >= 0.0f ? 0.0f : 0x8000;*/
|
||||
//return f16tof32( uint3( signVal + abs( comp ) ) );
|
||||
return f16tof32(uint3(comp));
|
||||
}
|
||||
#endif
|
||||
|
||||
void Swap(inout float3 a, inout float3 b) {
|
||||
float3 tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
void Swap(inout float a, inout float b) {
|
||||
float tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos) {
|
||||
float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos);
|
||||
return uint(clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f));
|
||||
}
|
||||
|
||||
uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) {
|
||||
float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos);
|
||||
return uint(clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f));
|
||||
}
|
||||
|
||||
void SignExtend(inout float3 v1, uint mask, uint signFlag) {
|
||||
int3 v = int3(v1);
|
||||
v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
|
||||
v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
|
||||
v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
|
||||
v1 = v;
|
||||
}
|
||||
|
||||
void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
|
||||
// compute endpoints (min/max RGB bbox)
|
||||
float3 blockMin = texels[0];
|
||||
float3 blockMax = texels[0];
|
||||
for (uint i = 1u; i < 16u; ++i) {
|
||||
blockMin = min(blockMin, texels[i]);
|
||||
blockMax = max(blockMax, texels[i]);
|
||||
}
|
||||
|
||||
// refine endpoints in log2 RGB space
|
||||
float3 refinedBlockMin = blockMax;
|
||||
float3 refinedBlockMax = blockMin;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
|
||||
refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
|
||||
}
|
||||
|
||||
float3 logBlockMax = log2(blockMax + 1.0f);
|
||||
float3 logBlockMin = log2(blockMin + 1.0f);
|
||||
float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f);
|
||||
float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f);
|
||||
float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
|
||||
logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
|
||||
logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
|
||||
blockMin = exp2(logBlockMin) - 1.0f;
|
||||
blockMax = exp2(logBlockMax) - 1.0f;
|
||||
|
||||
float3 blockDir = blockMax - blockMin;
|
||||
blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
|
||||
|
||||
float3 endpoint0 = Quantize10(blockMin);
|
||||
float3 endpoint1 = Quantize10(blockMax);
|
||||
float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
|
||||
float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
|
||||
|
||||
// check if endpoint swap is required
|
||||
float fixupTexelPos = f32tof16(dot(texels[0], blockDir));
|
||||
uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos);
|
||||
if (fixupIndex > 7) {
|
||||
Swap(endPoint0Pos, endPoint1Pos);
|
||||
Swap(endpoint0, endpoint1);
|
||||
}
|
||||
|
||||
// compute indices
|
||||
uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u };
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
float texelPos = f32tof16(dot(texels[i], blockDir));
|
||||
indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos);
|
||||
}
|
||||
|
||||
// compute compression error (MSLE)
|
||||
float3 endpoint0Unq = Unquantize10(endpoint0);
|
||||
float3 endpoint1Unq = Unquantize10(endpoint1);
|
||||
float msle = 0.0f;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
|
||||
float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
|
||||
|
||||
msle += CalcMSLE(texels[i], texelUnc);
|
||||
}
|
||||
|
||||
// encode block for mode 11
|
||||
blockMSLE = msle;
|
||||
block.x = 0x03;
|
||||
|
||||
// endpoints
|
||||
block.x |= uint(endpoint0.x) << 5u;
|
||||
block.x |= uint(endpoint0.y) << 15u;
|
||||
block.x |= uint(endpoint0.z) << 25u;
|
||||
block.y |= uint(endpoint0.z) >> 7u;
|
||||
block.y |= uint(endpoint1.x) << 3u;
|
||||
block.y |= uint(endpoint1.y) << 13u;
|
||||
block.y |= uint(endpoint1.z) << 23u;
|
||||
block.z |= uint(endpoint1.z) >> 9u;
|
||||
|
||||
// indices
|
||||
block.z |= indices[0] << 1u;
|
||||
block.z |= indices[1] << 4u;
|
||||
block.z |= indices[2] << 8u;
|
||||
block.z |= indices[3] << 12u;
|
||||
block.z |= indices[4] << 16u;
|
||||
block.z |= indices[5] << 20u;
|
||||
block.z |= indices[6] << 24u;
|
||||
block.z |= indices[7] << 28u;
|
||||
block.w |= indices[8] << 0u;
|
||||
block.w |= indices[9] << 4u;
|
||||
block.w |= indices[10] << 8u;
|
||||
block.w |= indices[11] << 12u;
|
||||
block.w |= indices[12] << 16u;
|
||||
block.w |= indices[13] << 20u;
|
||||
block.w |= indices[14] << 24u;
|
||||
block.w |= indices[15] << 28u;
|
||||
}
|
||||
|
||||
float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) {
|
||||
float3 w = Point - PointOnLine;
|
||||
float3 x = w - dot(w, LineDirection) * LineDirection;
|
||||
|
||||
return dot(x, x);
|
||||
}
|
||||
|
||||
float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
|
||||
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f);
|
||||
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f);
|
||||
|
||||
for (uint i = 0; i < 16; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
if (paletteID == 0) {
|
||||
p0BlockMin = min(p0BlockMin, texels[i]);
|
||||
p0BlockMax = max(p0BlockMax, texels[i]);
|
||||
} else {
|
||||
p1BlockMin = min(p1BlockMin, texels[i]);
|
||||
p1BlockMax = max(p1BlockMax, texels[i]);
|
||||
}
|
||||
}
|
||||
|
||||
float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
|
||||
float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
|
||||
|
||||
float sqDistanceFromLine = 0.0f;
|
||||
|
||||
for (uint i = 0; i < 16; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
if (paletteID == 0) {
|
||||
sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]);
|
||||
} else {
|
||||
sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return sqDistanceFromLine;
|
||||
}
|
||||
|
||||
void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) {
|
||||
float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f);
|
||||
float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
|
||||
float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f);
|
||||
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
if (paletteID == 0) {
|
||||
p0BlockMin = min(p0BlockMin, texels[i]);
|
||||
p0BlockMax = max(p0BlockMax, texels[i]);
|
||||
} else {
|
||||
p1BlockMin = min(p1BlockMin, texels[i]);
|
||||
p1BlockMax = max(p1BlockMax, texels[i]);
|
||||
}
|
||||
}
|
||||
|
||||
float3 p0BlockDir = p0BlockMax - p0BlockMin;
|
||||
float3 p1BlockDir = p1BlockMax - p1BlockMin;
|
||||
p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
|
||||
p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
|
||||
|
||||
float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir));
|
||||
float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir));
|
||||
float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir));
|
||||
float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir));
|
||||
|
||||
uint fixupID = PatternFixupID(pattern);
|
||||
float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir));
|
||||
float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir));
|
||||
uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos);
|
||||
uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos);
|
||||
if (p0FixupIndex > 3u) {
|
||||
Swap(p0Endpoint0Pos, p0Endpoint1Pos);
|
||||
Swap(p0BlockMin, p0BlockMax);
|
||||
}
|
||||
if (p1FixupIndex > 3u) {
|
||||
Swap(p1Endpoint0Pos, p1Endpoint1Pos);
|
||||
Swap(p1BlockMin, p1BlockMax);
|
||||
}
|
||||
|
||||
uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u };
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir));
|
||||
float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir));
|
||||
uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos);
|
||||
uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos);
|
||||
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
indices[i] = paletteID == 0u ? p0Index : p1Index;
|
||||
}
|
||||
|
||||
float3 endpoint760 = floor(Quantize7(p0BlockMin));
|
||||
float3 endpoint761 = floor(Quantize7(p0BlockMax));
|
||||
float3 endpoint762 = floor(Quantize7(p1BlockMin));
|
||||
float3 endpoint763 = floor(Quantize7(p1BlockMax));
|
||||
|
||||
float3 endpoint950 = floor(Quantize9(p0BlockMin));
|
||||
float3 endpoint951 = floor(Quantize9(p0BlockMax));
|
||||
float3 endpoint952 = floor(Quantize9(p1BlockMin));
|
||||
float3 endpoint953 = floor(Quantize9(p1BlockMax));
|
||||
|
||||
endpoint761 = endpoint761 - endpoint760;
|
||||
endpoint762 = endpoint762 - endpoint760;
|
||||
endpoint763 = endpoint763 - endpoint760;
|
||||
|
||||
endpoint951 = endpoint951 - endpoint950;
|
||||
endpoint952 = endpoint952 - endpoint950;
|
||||
endpoint953 = endpoint953 - endpoint950;
|
||||
|
||||
int maxVal76 = 0x1F;
|
||||
endpoint761 = clamp(endpoint761, -maxVal76, maxVal76);
|
||||
endpoint762 = clamp(endpoint762, -maxVal76, maxVal76);
|
||||
endpoint763 = clamp(endpoint763, -maxVal76, maxVal76);
|
||||
|
||||
int maxVal95 = 0xF;
|
||||
endpoint951 = clamp(endpoint951, -maxVal95, maxVal95);
|
||||
endpoint952 = clamp(endpoint952, -maxVal95, maxVal95);
|
||||
endpoint953 = clamp(endpoint953, -maxVal95, maxVal95);
|
||||
|
||||
float3 endpoint760Unq = Unquantize7(endpoint760);
|
||||
float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
|
||||
float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
|
||||
float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
|
||||
float3 endpoint950Unq = Unquantize9(endpoint950);
|
||||
float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
|
||||
float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
|
||||
float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
|
||||
|
||||
float msle76 = 0.0f;
|
||||
float msle95 = 0.0f;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
uint paletteID = Pattern(pattern, i);
|
||||
|
||||
float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
|
||||
float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
|
||||
float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
|
||||
float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
|
||||
|
||||
float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
|
||||
float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
|
||||
float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
|
||||
|
||||
msle76 += CalcMSLE(texels[i], texelUnc76);
|
||||
msle95 += CalcMSLE(texels[i], texelUnc95);
|
||||
}
|
||||
|
||||
SignExtend(endpoint761, 0x1F, 0x20);
|
||||
SignExtend(endpoint762, 0x1F, 0x20);
|
||||
SignExtend(endpoint763, 0x1F, 0x20);
|
||||
|
||||
SignExtend(endpoint951, 0xF, 0x10);
|
||||
SignExtend(endpoint952, 0xF, 0x10);
|
||||
SignExtend(endpoint953, 0xF, 0x10);
|
||||
|
||||
// encode block
|
||||
float p2MSLE = min(msle76, msle95);
|
||||
if (p2MSLE < blockMSLE) {
|
||||
blockMSLE = p2MSLE;
|
||||
block = uint4(0u, 0u, 0u, 0u);
|
||||
|
||||
if (p2MSLE == msle76) {
|
||||
// 7.6
|
||||
block.x = 0x1u;
|
||||
block.x |= (uint(endpoint762.y) & 0x20u) >> 3u;
|
||||
block.x |= (uint(endpoint763.y) & 0x10u) >> 1u;
|
||||
block.x |= (uint(endpoint763.y) & 0x20u) >> 1u;
|
||||
block.x |= uint(endpoint760.x) << 5u;
|
||||
block.x |= (uint(endpoint763.z) & 0x01u) << 12u;
|
||||
block.x |= (uint(endpoint763.z) & 0x02u) << 12u;
|
||||
block.x |= (uint(endpoint762.z) & 0x10u) << 10u;
|
||||
block.x |= uint(endpoint760.y) << 15u;
|
||||
block.x |= (uint(endpoint762.z) & 0x20u) << 17u;
|
||||
block.x |= (uint(endpoint763.z) & 0x04u) << 21u;
|
||||
block.x |= (uint(endpoint762.y) & 0x10u) << 20u;
|
||||
block.x |= uint(endpoint760.z) << 25u;
|
||||
block.y |= (uint(endpoint763.z) & 0x08u) >> 3u;
|
||||
block.y |= (uint(endpoint763.z) & 0x20u) >> 4u;
|
||||
block.y |= (uint(endpoint763.z) & 0x10u) >> 2u;
|
||||
block.y |= uint(endpoint761.x) << 3u;
|
||||
block.y |= (uint(endpoint762.y) & 0x0Fu) << 9u;
|
||||
block.y |= uint(endpoint761.y) << 13u;
|
||||
block.y |= (uint(endpoint763.y) & 0x0Fu) << 19u;
|
||||
block.y |= uint(endpoint761.z) << 23u;
|
||||
block.y |= (uint(endpoint762.z) & 0x07u) << 29u;
|
||||
block.z |= (uint(endpoint762.z) & 0x08u) >> 3u;
|
||||
block.z |= uint(endpoint762.x) << 1u;
|
||||
block.z |= uint(endpoint763.x) << 7u;
|
||||
} else {
|
||||
// 9.5
|
||||
block.x = 0xEu;
|
||||
block.x |= uint(endpoint950.x) << 5u;
|
||||
block.x |= (uint(endpoint952.z) & 0x10u) << 10u;
|
||||
block.x |= uint(endpoint950.y) << 15u;
|
||||
block.x |= (uint(endpoint952.y) & 0x10u) << 20u;
|
||||
block.x |= uint(endpoint950.z) << 25u;
|
||||
block.y |= uint(endpoint950.z) >> 7u;
|
||||
block.y |= (uint(endpoint953.z) & 0x10u) >> 2u;
|
||||
block.y |= uint(endpoint951.x) << 3u;
|
||||
block.y |= (uint(endpoint953.y) & 0x10u) << 4u;
|
||||
block.y |= (uint(endpoint952.y) & 0x0Fu) << 9u;
|
||||
block.y |= uint(endpoint951.y) << 13u;
|
||||
block.y |= (uint(endpoint953.z) & 0x01u) << 18u;
|
||||
block.y |= (uint(endpoint953.y) & 0x0Fu) << 19u;
|
||||
block.y |= uint(endpoint951.z) << 23u;
|
||||
block.y |= (uint(endpoint953.z) & 0x02u) << 27u;
|
||||
block.y |= uint(endpoint952.z) << 29u;
|
||||
block.z |= (uint(endpoint952.z) & 0x08u) >> 3u;
|
||||
block.z |= uint(endpoint952.x) << 1u;
|
||||
block.z |= (uint(endpoint953.z) & 0x04u) << 4u;
|
||||
block.z |= uint(endpoint953.x) << 7u;
|
||||
block.z |= (uint(endpoint953.z) & 0x08u) << 9u;
|
||||
}
|
||||
|
||||
block.z |= pattern << 13u;
|
||||
uint blockFixupID = PatternFixupID(pattern);
|
||||
if (blockFixupID == 15u) {
|
||||
block.z |= indices[0] << 18u;
|
||||
block.z |= indices[1] << 20u;
|
||||
block.z |= indices[2] << 23u;
|
||||
block.z |= indices[3] << 26u;
|
||||
block.z |= indices[4] << 29u;
|
||||
block.w |= indices[5] << 0u;
|
||||
block.w |= indices[6] << 3u;
|
||||
block.w |= indices[7] << 6u;
|
||||
block.w |= indices[8] << 9u;
|
||||
block.w |= indices[9] << 12u;
|
||||
block.w |= indices[10] << 15u;
|
||||
block.w |= indices[11] << 18u;
|
||||
block.w |= indices[12] << 21u;
|
||||
block.w |= indices[13] << 24u;
|
||||
block.w |= indices[14] << 27u;
|
||||
block.w |= indices[15] << 30u;
|
||||
} else if (blockFixupID == 2u) {
|
||||
block.z |= indices[0] << 18u;
|
||||
block.z |= indices[1] << 20u;
|
||||
block.z |= indices[2] << 23u;
|
||||
block.z |= indices[3] << 25u;
|
||||
block.z |= indices[4] << 28u;
|
||||
block.z |= indices[5] << 31u;
|
||||
block.w |= indices[5] >> 1u;
|
||||
block.w |= indices[6] << 2u;
|
||||
block.w |= indices[7] << 5u;
|
||||
block.w |= indices[8] << 8u;
|
||||
block.w |= indices[9] << 11u;
|
||||
block.w |= indices[10] << 14u;
|
||||
block.w |= indices[11] << 17u;
|
||||
block.w |= indices[12] << 20u;
|
||||
block.w |= indices[13] << 23u;
|
||||
block.w |= indices[14] << 26u;
|
||||
block.w |= indices[15] << 29u;
|
||||
} else {
|
||||
block.z |= indices[0] << 18u;
|
||||
block.z |= indices[1] << 20u;
|
||||
block.z |= indices[2] << 23u;
|
||||
block.z |= indices[3] << 26u;
|
||||
block.z |= indices[4] << 29u;
|
||||
block.w |= indices[5] << 0u;
|
||||
block.w |= indices[6] << 3u;
|
||||
block.w |= indices[7] << 6u;
|
||||
block.w |= indices[8] << 9u;
|
||||
block.w |= indices[9] << 11u;
|
||||
block.w |= indices[10] << 14u;
|
||||
block.w |= indices[11] << 17u;
|
||||
block.w |= indices[12] << 20u;
|
||||
block.w |= indices[13] << 23u;
|
||||
block.w |= indices[14] << 26u;
|
||||
block.w |= indices[15] << 29u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
layout(local_size_x = 8,
|
||||
local_size_y = 8,
|
||||
local_size_z = 1) in;
|
||||
|
||||
void main() {
|
||||
// gather texels for current 4x4 block
|
||||
// 0 1 2 3
|
||||
// 4 5 6 7
|
||||
// 8 9 10 11
|
||||
// 12 13 14 15
|
||||
float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
|
||||
float2 block0UV = uv;
|
||||
float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f);
|
||||
float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y);
|
||||
float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
|
||||
float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV);
|
||||
float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV);
|
||||
float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV);
|
||||
float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV);
|
||||
float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV);
|
||||
float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV);
|
||||
float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV);
|
||||
float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV);
|
||||
float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV);
|
||||
float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV);
|
||||
float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV);
|
||||
float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV);
|
||||
|
||||
float3 texels[16];
|
||||
texels[0] = float3(block0X.w, block0Y.w, block0Z.w);
|
||||
texels[1] = float3(block0X.z, block0Y.z, block0Z.z);
|
||||
texels[2] = float3(block1X.w, block1Y.w, block1Z.w);
|
||||
texels[3] = float3(block1X.z, block1Y.z, block1Z.z);
|
||||
texels[4] = float3(block0X.x, block0Y.x, block0Z.x);
|
||||
texels[5] = float3(block0X.y, block0Y.y, block0Z.y);
|
||||
texels[6] = float3(block1X.x, block1Y.x, block1Z.x);
|
||||
texels[7] = float3(block1X.y, block1Y.y, block1Z.y);
|
||||
texels[8] = float3(block2X.w, block2Y.w, block2Z.w);
|
||||
texels[9] = float3(block2X.z, block2Y.z, block2Z.z);
|
||||
texels[10] = float3(block3X.w, block3Y.w, block3Z.w);
|
||||
texels[11] = float3(block3X.z, block3Y.z, block3Z.z);
|
||||
texels[12] = float3(block2X.x, block2Y.x, block2Z.x);
|
||||
texels[13] = float3(block2X.y, block2Y.y, block2Z.y);
|
||||
texels[14] = float3(block3X.x, block3Y.x, block3Z.x);
|
||||
texels[15] = float3(block3X.y, block3Y.y, block3Z.y);
|
||||
|
||||
uint4 block = uint4(0u, 0u, 0u, 0u);
|
||||
float blockMSLE = 0.0f;
|
||||
|
||||
EncodeP1(block, blockMSLE, texels);
|
||||
|
||||
#ifdef QUALITY
|
||||
float bestScore = EvaluateP2Pattern(0, texels);
|
||||
uint bestPattern = 0;
|
||||
|
||||
for (uint i = 1u; i < 32u; ++i) {
|
||||
float score = EvaluateP2Pattern(i, texels);
|
||||
|
||||
if (score < bestScore) {
|
||||
bestPattern = i;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
|
||||
EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
|
||||
#endif
|
||||
|
||||
imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block);
|
||||
}
|
||||
6
modules/betsy/config.py
Normal file
6
modules/betsy/config.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
def can_build(env, platform):
|
||||
return env.editor_build
|
||||
|
||||
|
||||
def configure(env):
|
||||
pass
|
||||
354
modules/betsy/image_compress_betsy.cpp
Normal file
354
modules/betsy/image_compress_betsy.cpp
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
/**************************************************************************/
|
||||
/* image_compress_betsy.cpp */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#include "image_compress_betsy.h"
|
||||
|
||||
#include "servers/rendering/rendering_device_binds.h"
|
||||
#include "servers/rendering/rendering_server_default.h"
|
||||
|
||||
#if defined(VULKAN_ENABLED)
|
||||
#include "drivers/vulkan/rendering_context_driver_vulkan.h"
|
||||
#endif
|
||||
|
||||
#include "bc6h.glsl.gen.h"
|
||||
|
||||
struct BC6PushConstant {
|
||||
float sizeX;
|
||||
float sizeY;
|
||||
uint32_t padding[2];
|
||||
};
|
||||
|
||||
static int get_next_multiple(int n, int m) {
|
||||
return n + (m - (n % m));
|
||||
}
|
||||
|
||||
static bool is_image_signed(const Image *r_img) {
|
||||
if (r_img->get_format() >= Image::FORMAT_RH && r_img->get_format() <= Image::FORMAT_RGBAH) {
|
||||
const uint16_t *img_data = reinterpret_cast<const uint16_t *>(r_img->ptr());
|
||||
const uint64_t img_size = r_img->get_data_size() / 2;
|
||||
|
||||
for (uint64_t i = 0; i < img_size; i++) {
|
||||
if ((img_data[i] & 0x8000) != 0 && (img_data[i] & 0x7fff) != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (r_img->get_format() >= Image::FORMAT_RF && r_img->get_format() <= Image::FORMAT_RGBAF) {
|
||||
const uint32_t *img_data = reinterpret_cast<const uint32_t *>(r_img->ptr());
|
||||
const uint64_t img_size = r_img->get_data_size() / 4;
|
||||
|
||||
for (uint64_t i = 0; i < img_size; i++) {
|
||||
if ((img_data[i] & 0x80000000) != 0 && (img_data[i] & 0x7fffffff) != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
uint64_t start_time = OS::get_singleton()->get_ticks_msec();
|
||||
|
||||
if (r_img->is_compressed()) {
|
||||
return ERR_INVALID_DATA;
|
||||
}
|
||||
|
||||
ERR_FAIL_COND_V_MSG(r_img->get_format() < Image::FORMAT_RF || r_img->get_format() > Image::FORMAT_RGBE9995, ERR_INVALID_DATA, "Image is not an HDR image.");
|
||||
|
||||
Error err = OK;
|
||||
|
||||
// Create local RD.
|
||||
RenderingContextDriver *rcd = nullptr;
|
||||
RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
|
||||
|
||||
if (rd == nullptr) {
|
||||
#if defined(RD_ENABLED)
|
||||
#if defined(VULKAN_ENABLED)
|
||||
rcd = memnew(RenderingContextDriverVulkan);
|
||||
rd = memnew(RenderingDevice);
|
||||
#endif
|
||||
#endif
|
||||
if (rcd != nullptr && rd != nullptr) {
|
||||
err = rcd->initialize();
|
||||
if (err == OK) {
|
||||
err = rd->initialize(rcd);
|
||||
}
|
||||
|
||||
if (err != OK) {
|
||||
memdelete(rd);
|
||||
memdelete(rcd);
|
||||
rd = nullptr;
|
||||
rcd = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ERR_FAIL_NULL_V_MSG(rd, err, "Unable to create a local RenderingDevice.");
|
||||
|
||||
Ref<RDShaderFile> compute_shader;
|
||||
compute_shader.instantiate();
|
||||
|
||||
// Destination format.
|
||||
Image::Format dest_format = Image::FORMAT_MAX;
|
||||
|
||||
String version = "";
|
||||
|
||||
switch (p_format) {
|
||||
case BETSY_FORMAT_BC6: {
|
||||
err = compute_shader->parse_versions_from_text(bc6h_shader_glsl);
|
||||
|
||||
if (is_image_signed(r_img)) {
|
||||
dest_format = Image::FORMAT_BPTC_RGBF;
|
||||
version = "signed";
|
||||
} else {
|
||||
dest_format = Image::FORMAT_BPTC_RGBFU;
|
||||
version = "unsigned";
|
||||
}
|
||||
|
||||
} break;
|
||||
|
||||
default:
|
||||
err = ERR_INVALID_PARAMETER;
|
||||
break;
|
||||
}
|
||||
|
||||
if (err != OK) {
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// Compile the shader, return early if invalid.
|
||||
RID shader = rd->shader_create_from_spirv(compute_shader->get_spirv_stages(version));
|
||||
|
||||
if (shader.is_null()) {
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
RID pipeline = rd->compute_pipeline_create(shader);
|
||||
|
||||
// src_texture format information.
|
||||
RD::TextureFormat src_texture_format;
|
||||
{
|
||||
src_texture_format.array_layers = 1;
|
||||
src_texture_format.depth = 1;
|
||||
src_texture_format.mipmaps = 1;
|
||||
src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;
|
||||
src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
|
||||
}
|
||||
|
||||
switch (r_img->get_format()) {
|
||||
case Image::FORMAT_RH:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R16_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGH:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R16G16_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGBH:
|
||||
r_img->convert(Image::FORMAT_RGBAH);
|
||||
src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGBAH:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RF:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R32_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGF:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R32G32_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGBF:
|
||||
r_img->convert(Image::FORMAT_RGBAF);
|
||||
src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGBAF:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGBE9995:
|
||||
src_texture_format.format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;
|
||||
break;
|
||||
|
||||
default: {
|
||||
rd->free(shader);
|
||||
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the sampler state.
|
||||
RD::SamplerState src_sampler_state;
|
||||
{
|
||||
src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
||||
src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
||||
src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
RID src_sampler = rd->sampler_create(src_sampler_state);
|
||||
|
||||
// For the destination format just copy the source format and change the usage bits.
|
||||
RD::TextureFormat dst_texture_format = src_texture_format;
|
||||
dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
|
||||
dst_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
|
||||
|
||||
const int mip_count = r_img->get_mipmap_count() + 1;
|
||||
|
||||
// Container for the compressed data.
|
||||
Vector<uint8_t> dst_data;
|
||||
dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps()));
|
||||
uint8_t *dst_data_ptr = dst_data.ptrw();
|
||||
|
||||
Vector<Vector<uint8_t>> src_images;
|
||||
src_images.push_back(Vector<uint8_t>());
|
||||
Vector<uint8_t> *src_image_ptr = src_images.ptrw();
|
||||
|
||||
// Compress each mipmap.
|
||||
for (int i = 0; i < mip_count; i++) {
|
||||
int64_t ofs, size;
|
||||
int width, height;
|
||||
r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height);
|
||||
|
||||
// Set the source texture width and size.
|
||||
src_texture_format.height = height;
|
||||
src_texture_format.width = width;
|
||||
|
||||
// Set the destination texture width and size.
|
||||
dst_texture_format.height = (height + 3) >> 2;
|
||||
dst_texture_format.width = (width + 3) >> 2;
|
||||
|
||||
// Create a buffer filled with the source mip layer data.
|
||||
src_image_ptr[0].resize(size);
|
||||
memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size);
|
||||
|
||||
// Create the textures on the GPU.
|
||||
RID src_texture = rd->texture_create(src_texture_format, RD::TextureView(), src_images);
|
||||
RID dst_texture = rd->texture_create(dst_texture_format, RD::TextureView());
|
||||
|
||||
if (dest_format == Image::FORMAT_BPTC_RGBFU || dest_format == Image::FORMAT_BPTC_RGBF) {
|
||||
BC6PushConstant push_constant;
|
||||
push_constant.sizeX = 1.0f / width;
|
||||
push_constant.sizeY = 1.0f / height;
|
||||
push_constant.padding[0] = 0;
|
||||
push_constant.padding[1] = 0;
|
||||
|
||||
Vector<RD::Uniform> uniforms;
|
||||
{
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
||||
u.binding = 0;
|
||||
u.append_id(src_sampler);
|
||||
u.append_id(src_texture);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
u.binding = 1;
|
||||
u.append_id(dst_texture);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
}
|
||||
|
||||
RID uniform_set = rd->uniform_set_create(uniforms, shader, 0);
|
||||
RD::ComputeListID compute_list = rd->compute_list_begin();
|
||||
|
||||
rd->compute_list_bind_compute_pipeline(compute_list, pipeline);
|
||||
rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
|
||||
rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
|
||||
rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
|
||||
rd->compute_list_end();
|
||||
}
|
||||
|
||||
rd->submit();
|
||||
rd->sync();
|
||||
|
||||
// Copy data from the GPU to the buffer.
|
||||
const Vector<uint8_t> texture_data = rd->texture_get_data(dst_texture, 0);
|
||||
int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i);
|
||||
|
||||
memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
|
||||
|
||||
// Free the source and dest texture.
|
||||
rd->free(dst_texture);
|
||||
rd->free(src_texture);
|
||||
}
|
||||
|
||||
src_images.clear();
|
||||
|
||||
// Set the compressed data to the image.
|
||||
r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data);
|
||||
|
||||
// Free the shader (dependencies will be cleared automatically).
|
||||
rd->free(src_sampler);
|
||||
rd->free(shader);
|
||||
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
}
|
||||
|
||||
print_verbose(vformat("Betsy: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
|
||||
Image::Format format = r_img->get_format();
|
||||
|
||||
if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
|
||||
return _compress_betsy(BETSY_FORMAT_BC6, r_img);
|
||||
}
|
||||
|
||||
return ERR_UNAVAILABLE;
|
||||
}
|
||||
44
modules/betsy/image_compress_betsy.h
Normal file
44
modules/betsy/image_compress_betsy.h
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/**************************************************************************/
|
||||
/* image_compress_betsy.h */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#ifndef IMAGE_COMPRESS_BETSY_H
|
||||
#define IMAGE_COMPRESS_BETSY_H
|
||||
|
||||
#include "core/io/image.h"
|
||||
|
||||
enum BetsyFormat {
|
||||
BETSY_FORMAT_BC6,
|
||||
};
|
||||
|
||||
Error _compress_betsy(BetsyFormat p_format, Image *r_img);
|
||||
|
||||
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels);
|
||||
|
||||
#endif // IMAGE_COMPRESS_BETSY_H
|
||||
47
modules/betsy/register_types.cpp
Normal file
47
modules/betsy/register_types.cpp
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
/**************************************************************************/
|
||||
/* register_types.cpp */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#include "register_types.h"
|
||||
|
||||
#include "image_compress_betsy.h"
|
||||
|
||||
void initialize_betsy_module(ModuleInitializationLevel p_level) {
|
||||
if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
|
||||
return;
|
||||
}
|
||||
|
||||
Image::_image_compress_bptc_rd_func = _betsy_compress_bptc;
|
||||
}
|
||||
|
||||
void uninitialize_betsy_module(ModuleInitializationLevel p_level) {
|
||||
if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
39
modules/betsy/register_types.h
Normal file
39
modules/betsy/register_types.h
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/**************************************************************************/
|
||||
/* register_types.h */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#ifndef BETSY_REGISTER_TYPES_H
|
||||
#define BETSY_REGISTER_TYPES_H
|
||||
|
||||
#include "modules/register_module_types.h"
|
||||
|
||||
void initialize_betsy_module(ModuleInitializationLevel p_level);
|
||||
void uninitialize_betsy_module(ModuleInitializationLevel p_level);
|
||||
|
||||
#endif // BETSY_REGISTER_TYPES_H
|
||||
Loading…
Add table
Add a link
Reference in a new issue