generated from hertog/godot-module-template
Initial commit
This commit is contained in:
commit
65227bf3a5
12416 changed files with 6001067 additions and 0 deletions
1159
engine/thirdparty/README.md
vendored
Normal file
1159
engine/thirdparty/README.md
vendored
Normal file
File diff suppressed because it is too large
Load diff
2656
engine/thirdparty/amd-fsr/ffx_a.h
vendored
Normal file
2656
engine/thirdparty/amd-fsr/ffx_a.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
1199
engine/thirdparty/amd-fsr/ffx_fsr1.h
vendored
Normal file
1199
engine/thirdparty/amd-fsr/ffx_fsr1.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
19
engine/thirdparty/amd-fsr/license.txt
vendored
Normal file
19
engine/thirdparty/amd-fsr/license.txt
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
21
engine/thirdparty/amd-fsr2/LICENSE.txt
vendored
Normal file
21
engine/thirdparty/amd-fsr2/LICENSE.txt
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
FidelityFX Super Resolution 2.2
|
||||
=================================
|
||||
Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
81
engine/thirdparty/amd-fsr2/ffx_assert.cpp
vendored
Normal file
81
engine/thirdparty/amd-fsr2/ffx_assert.cpp
vendored
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "ffx_assert.h"
|
||||
#include <stdlib.h> // for malloc()
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h> // required for OutputDebugString()
|
||||
#include <stdio.h> // required for sprintf_s
|
||||
#endif // #ifndef _WIN32
|
||||
|
||||
static FfxAssertCallback s_assertCallback;
|
||||
|
||||
// set the printing callback function
|
||||
void ffxAssertSetPrintingCallback(FfxAssertCallback callback)
|
||||
{
|
||||
s_assertCallback = callback;
|
||||
return;
|
||||
}
|
||||
|
||||
// implementation of assert reporting
|
||||
bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* message)
|
||||
{
|
||||
if (!file) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// form the final assertion string and output to the TTY.
|
||||
const size_t bufferSize = static_cast<size_t>(snprintf(nullptr, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition)) + 1;
|
||||
char* tempBuf = static_cast<char*>(malloc(bufferSize));
|
||||
if (!tempBuf) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!message) {
|
||||
sprintf_s(tempBuf, bufferSize, "%s(%d): ASSERTION FAILED. %s\n", file, line, condition);
|
||||
} else {
|
||||
sprintf_s(tempBuf, bufferSize, "%s(%d): ASSERTION FAILED. %s\n", file, line, message);
|
||||
}
|
||||
|
||||
if (!s_assertCallback) {
|
||||
OutputDebugStringA(tempBuf);
|
||||
} else {
|
||||
s_assertCallback(tempBuf);
|
||||
}
|
||||
|
||||
// free the buffer.
|
||||
free(tempBuf);
|
||||
|
||||
#else
|
||||
FFX_UNUSED(line);
|
||||
FFX_UNUSED(condition);
|
||||
FFX_UNUSED(message);
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
132
engine/thirdparty/amd-fsr2/ffx_assert.h
vendored
Normal file
132
engine/thirdparty/amd-fsr2/ffx_assert.h
vendored
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ffx_types.h"
|
||||
#include "ffx_util.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // #ifdef __cplusplus
|
||||
|
||||
#ifdef _DEBUG
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifdef DISABLE_FFX_DEBUG_BREAK
|
||||
#define FFX_DEBUG_BREAK \
|
||||
{ \
|
||||
}
|
||||
#else
|
||||
/// Macro to force the debugger to break at this point in the code.
|
||||
#define FFX_DEBUG_BREAK __debugbreak();
|
||||
#endif
|
||||
#else
|
||||
#define FFX_DEBUG_BREAK \
|
||||
{ \
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
// don't allow debug break in release builds.
|
||||
#define FFX_DEBUG_BREAK
|
||||
#endif
|
||||
|
||||
/// A typedef for the callback function for assert printing.
|
||||
///
|
||||
/// This can be used to re-route printing of assert messages from the FFX backend
|
||||
/// to another destination. For example instead of the default behaviour of printing
|
||||
/// the assert messages to the debugger's TTY the message can be re-routed to a
|
||||
/// MessageBox in a GUI application.
|
||||
///
|
||||
/// @param [in] message The message generated by the assert.
|
||||
///
|
||||
typedef void (*FfxAssertCallback)(const char* message);
|
||||
|
||||
/// Function to report an assert.
|
||||
///
|
||||
/// @param [in] file The name of the file as a string.
|
||||
/// @param [in] line The index of the line in the file.
|
||||
/// @param [in] condition The boolean condition that was tested.
|
||||
/// @param [in] msg The optional message to print.
|
||||
///
|
||||
/// @returns
|
||||
/// Always returns true.
|
||||
///
|
||||
FFX_API bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* msg);
|
||||
|
||||
/// Provides the ability to set a callback for assert messages.
|
||||
///
|
||||
/// @param [in] callback The callback function that will receive assert messages.
|
||||
///
|
||||
FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
|
||||
|
||||
#ifdef _DEBUG
|
||||
/// Standard assert macro.
|
||||
#define FFX_ASSERT(condition) \
|
||||
do \
|
||||
{ \
|
||||
if (!(condition) && ffxAssertReport(__FILE__, __LINE__, #condition, NULL)) \
|
||||
FFX_DEBUG_BREAK \
|
||||
} while (0)
|
||||
|
||||
/// Assert macro with message.
|
||||
#define FFX_ASSERT_MESSAGE(condition, msg) \
|
||||
do \
|
||||
{ \
|
||||
if (!(condition) && ffxAssertReport(__FILE__, __LINE__, #condition, msg)) \
|
||||
FFX_DEBUG_BREAK \
|
||||
} while (0)
|
||||
|
||||
/// Assert macro that always fails.
|
||||
#define FFX_ASSERT_FAIL(message) \
|
||||
do \
|
||||
{ \
|
||||
ffxAssertReport(__FILE__, __LINE__, NULL, message); \
|
||||
FFX_DEBUG_BREAK \
|
||||
} while (0)
|
||||
#else
|
||||
// asserts disabled
|
||||
#define FFX_ASSERT(condition) \
|
||||
do \
|
||||
{ \
|
||||
FFX_UNUSED(condition); \
|
||||
} while (0)
|
||||
|
||||
#define FFX_ASSERT_MESSAGE(condition, message) \
|
||||
do \
|
||||
{ \
|
||||
FFX_UNUSED(condition); \
|
||||
FFX_UNUSED(message); \
|
||||
} while (0)
|
||||
|
||||
#define FFX_ASSERT_FAIL(message) \
|
||||
do \
|
||||
{ \
|
||||
FFX_UNUSED(message); \
|
||||
} while (0)
|
||||
#endif // #if _DEBUG
|
||||
|
||||
/// Simple static assert.
|
||||
#define FFX_STATIC_ASSERT(condition) static_assert(condition, #condition)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // #ifdef __cplusplus
|
||||
59
engine/thirdparty/amd-fsr2/ffx_error.h
vendored
Normal file
59
engine/thirdparty/amd-fsr2/ffx_error.h
vendored
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ffx_types.h"
|
||||
|
||||
/// Typedef for error codes returned from functions in the FidelityFX SDK.
|
||||
typedef int32_t FfxErrorCode;
|
||||
|
||||
static const FfxErrorCode FFX_OK = 0; ///< The operation completed successfully.
|
||||
static const FfxErrorCode FFX_ERROR_INVALID_POINTER = 0x80000000; ///< The operation failed due to an invalid pointer.
|
||||
static const FfxErrorCode FFX_ERROR_INVALID_ALIGNMENT = 0x80000001; ///< The operation failed due to an invalid alignment.
|
||||
static const FfxErrorCode FFX_ERROR_INVALID_SIZE = 0x80000002; ///< The operation failed due to an invalid size.
|
||||
static const FfxErrorCode FFX_EOF = 0x80000003; ///< The end of the file was encountered.
|
||||
static const FfxErrorCode FFX_ERROR_INVALID_PATH = 0x80000004; ///< The operation failed because the specified path was invalid.
|
||||
static const FfxErrorCode FFX_ERROR_EOF = 0x80000005; ///< The operation failed because end of file was reached.
|
||||
static const FfxErrorCode FFX_ERROR_MALFORMED_DATA = 0x80000006; ///< The operation failed because of some malformed data.
|
||||
static const FfxErrorCode FFX_ERROR_OUT_OF_MEMORY = 0x80000007; ///< The operation failed because it ran out memory.
|
||||
static const FfxErrorCode FFX_ERROR_INCOMPLETE_INTERFACE = 0x80000008; ///< The operation failed because the interface was not fully configured.
|
||||
static const FfxErrorCode FFX_ERROR_INVALID_ENUM = 0x80000009; ///< The operation failed because of an invalid enumeration value.
|
||||
static const FfxErrorCode FFX_ERROR_INVALID_ARGUMENT = 0x8000000a; ///< The operation failed because an argument was invalid.
|
||||
static const FfxErrorCode FFX_ERROR_OUT_OF_RANGE = 0x8000000b; ///< The operation failed because a value was out of range.
|
||||
static const FfxErrorCode FFX_ERROR_NULL_DEVICE = 0x8000000c; ///< The operation failed because a device was null.
|
||||
static const FfxErrorCode FFX_ERROR_BACKEND_API_ERROR = 0x8000000d; ///< The operation failed because the backend API returned an error code.
|
||||
static const FfxErrorCode FFX_ERROR_INSUFFICIENT_MEMORY = 0x8000000e; ///< The operation failed because there was not enough memory.
|
||||
|
||||
/// Helper macro to return error code y from a function when a specific condition, x, is not met.
|
||||
#define FFX_RETURN_ON_ERROR(x, y) \
|
||||
if (!(x)) \
|
||||
{ \
|
||||
return (y); \
|
||||
}
|
||||
|
||||
/// Helper macro to return error code x from a function when it is not FFX_OK.
|
||||
#define FFX_VALIDATE(x) \
|
||||
{ \
|
||||
FfxErrorCode ret = x; \
|
||||
FFX_RETURN_ON_ERROR(ret == FFX_OK, ret); \
|
||||
}
|
||||
|
||||
1369
engine/thirdparty/amd-fsr2/ffx_fsr2.cpp
vendored
Normal file
1369
engine/thirdparty/amd-fsr2/ffx_fsr2.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
455
engine/thirdparty/amd-fsr2/ffx_fsr2.h
vendored
Normal file
455
engine/thirdparty/amd-fsr2/ffx_fsr2.h
vendored
Normal file
|
|
@ -0,0 +1,455 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
// @defgroup FSR2
|
||||
|
||||
#pragma once
|
||||
|
||||
// Include the interface for the backend of the FSR2 API.
|
||||
#include "ffx_fsr2_interface.h"
|
||||
|
||||
/// FidelityFX Super Resolution 2 major version.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
#define FFX_FSR2_VERSION_MAJOR (2)
|
||||
|
||||
/// FidelityFX Super Resolution 2 minor version.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
#define FFX_FSR2_VERSION_MINOR (2)
|
||||
|
||||
/// FidelityFX Super Resolution 2 patch version.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
#define FFX_FSR2_VERSION_PATCH (1)
|
||||
|
||||
/// The size of the context specified in 32bit values.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
#define FFX_FSR2_CONTEXT_SIZE (16536)
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif // #if defined(__cplusplus)
|
||||
|
||||
/// An enumeration of all the quality modes supported by FidelityFX Super
|
||||
/// Resolution 2 upscaling.
|
||||
///
|
||||
/// In order to provide a consistent user experience across multiple
|
||||
/// applications which implement FSR2. It is strongly recommended that the
|
||||
/// following preset scaling factors are made available through your
|
||||
/// application's user interface.
|
||||
///
|
||||
/// If your application does not expose the notion of preset scaling factors
|
||||
/// for upscaling algorithms (perhaps instead implementing a fixed ratio which
|
||||
/// is immutable) or implementing a more dynamic scaling scheme (such as
|
||||
/// dynamic resolution scaling), then there is no need to use these presets.
|
||||
///
|
||||
/// Please note that <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> is
|
||||
/// an optional mode which may introduce significant quality degradation in the
|
||||
/// final image. As such it is recommended that you evaluate the final results
|
||||
/// of using this scaling mode before deciding if you should include it in your
|
||||
/// application.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef enum FfxFsr2QualityMode {
|
||||
|
||||
FFX_FSR2_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
|
||||
FFX_FSR2_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x.
|
||||
FFX_FSR2_QUALITY_MODE_PERFORMANCE = 3, ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x.
|
||||
FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE = 4 ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x.
|
||||
} FfxFsr2QualityMode;
|
||||
|
||||
/// An enumeration of bit flags used when creating a
|
||||
/// <c><i>FfxFsr2Context</i></c>. See <c><i>FfxFsr2ContextDescription</i></c>.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef enum FfxFsr2InitializationFlagBits {
|
||||
|
||||
FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided is using a high-dynamic range.
|
||||
FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<1), ///< A bit indicating if the motion vectors are rendered at display resolution.
|
||||
FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION = (1<<2), ///< A bit indicating that the motion vectors have the jittering pattern applied to them.
|
||||
FFX_FSR2_ENABLE_DEPTH_INVERTED = (1<<3), ///< A bit indicating that the input depth buffer data provided is inverted [1..0].
|
||||
FFX_FSR2_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane.
|
||||
FFX_FSR2_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data.
|
||||
FFX_FSR2_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling.
|
||||
FFX_FSR2_ENABLE_TEXTURE1D_USAGE = (1<<7), ///< A bit indicating that the backend should use 1D textures.
|
||||
FFX_FSR2_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues.
|
||||
} FfxFsr2InitializationFlagBits;
|
||||
|
||||
/// A structure encapsulating the parameters required to initialize FidelityFX
|
||||
/// Super Resolution 2 upscaling.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef struct FfxFsr2ContextDescription {
|
||||
|
||||
uint32_t flags; ///< A collection of <c><i>FfxFsr2InitializationFlagBits</i></c>.
|
||||
FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at.
|
||||
FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the upscaling process.
|
||||
FfxFsr2Interface callbacks; ///< A set of pointers to the backend implementation for FSR 2.0.
|
||||
FfxDevice device; ///< The abstracted device which is passed to some callback functions.
|
||||
|
||||
FfxFsr2Message fpMessage; ///< A pointer to a function that can recieve messages from the runtime.
|
||||
} FfxFsr2ContextDescription;
|
||||
|
||||
/// A structure encapsulating the parameters for dispatching the various passes
|
||||
/// of FidelityFX Super Resolution 2.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef struct FfxFsr2DispatchDescription {
|
||||
|
||||
FfxCommandList commandList; ///< The <c><i>FfxCommandList</i></c> to record FSR2 rendering commands into.
|
||||
FfxResource color; ///< A <c><i>FfxResource</i></c> containing the color buffer for the current frame (at render resolution).
|
||||
FfxResource depth; ///< A <c><i>FfxResource</i></c> containing 32bit depth values for the current frame (at render resolution).
|
||||
FfxResource motionVectors; ///< A <c><i>FfxResource</i></c> containing 2-dimensional motion vectors (at render resolution if <c><i>FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS</i></c> is not set).
|
||||
FfxResource exposure; ///< A optional <c><i>FfxResource</i></c> containing a 1x1 exposure value.
|
||||
FfxResource reactive; ///< A optional <c><i>FfxResource</i></c> containing alpha value of reactive objects in the scene.
|
||||
FfxResource transparencyAndComposition; ///< A optional <c><i>FfxResource</i></c> containing alpha value of special objects in the scene.
|
||||
FfxResource output; ///< A <c><i>FfxResource</i></c> containing the output color buffer for the current frame (at presentation resolution).
|
||||
FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera.
|
||||
FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors.
|
||||
FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
|
||||
bool enableSharpening; ///< Enable an additional sharpening pass.
|
||||
float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness.
|
||||
float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds).
|
||||
float preExposure; ///< The pre exposure value (must be > 0.0f)
|
||||
bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
|
||||
float cameraNear; ///< The distance to the near plane of the camera.
|
||||
float cameraFar; ///< The distance to the far plane of the camera.
|
||||
float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians).
|
||||
float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters
|
||||
|
||||
// EXPERIMENTAL reactive mask generation parameters
|
||||
bool enableAutoReactive; ///< A boolean value to indicate internal reactive autogeneration should be used
|
||||
FfxResource colorOpaqueOnly; ///< A <c><i>FfxResource</i></c> containing the opaque only color buffer for the current frame (at render resolution).
|
||||
float autoTcThreshold; ///< Cutoff value for TC
|
||||
float autoTcScale; ///< A value to scale the transparency and composition mask
|
||||
float autoReactiveScale; ///< A value to scale the reactive mask
|
||||
float autoReactiveMax; ///< A value to clamp the reactive mask
|
||||
|
||||
float reprojectionMatrix[16]; ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth.
|
||||
} FfxFsr2DispatchDescription;
|
||||
|
||||
/// A structure encapsulating the parameters for automatic generation of a reactive mask
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef struct FfxFsr2GenerateReactiveDescription {
|
||||
|
||||
FfxCommandList commandList; ///< The <c><i>FfxCommandList</i></c> to record FSR2 rendering commands into.
|
||||
FfxResource colorOpaqueOnly; ///< A <c><i>FfxResource</i></c> containing the opaque only color buffer for the current frame (at render resolution).
|
||||
FfxResource colorPreUpscale; ///< A <c><i>FfxResource</i></c> containing the opaque+translucent color buffer for the current frame (at render resolution).
|
||||
FfxResource outReactive; ///< A <c><i>FfxResource</i></c> containing the surface to generate the reactive mask into.
|
||||
FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
|
||||
float scale; ///< A value to scale the output
|
||||
float cutoffThreshold; ///< A threshold value to generate a binary reactive mask
|
||||
float binaryValue; ///< A value to set for the binary reactive mask
|
||||
uint32_t flags; ///< Flags to determine how to generate the reactive mask
|
||||
} FfxFsr2GenerateReactiveDescription;
|
||||
|
||||
/// A structure encapsulating the FidelityFX Super Resolution 2 context.
|
||||
///
|
||||
/// This sets up an object which contains all persistent internal data and
|
||||
/// resources that are required by FSR2.
|
||||
///
|
||||
/// The <c><i>FfxFsr2Context</i></c> object should have a lifetime matching
|
||||
/// your use of FSR2. Before destroying the FSR2 context care should be taken
|
||||
/// to ensure the GPU is not accessing the resources created or used by FSR2.
|
||||
/// It is therefore recommended that the GPU is idle before destroying the
|
||||
/// FSR2 context.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef struct FfxFsr2Context {
|
||||
|
||||
uint32_t data[FFX_FSR2_CONTEXT_SIZE]; ///< An opaque set of <c>uint32_t</c> which contain the data for the context.
|
||||
} FfxFsr2Context;
|
||||
|
||||
/// Create a FidelityFX Super Resolution 2 context from the parameters
|
||||
/// programmed to the <c><i>FfxFsr2CreateParams</i></c> structure.
|
||||
///
|
||||
/// The context structure is the main object used to interact with the FSR2
|
||||
/// API, and is responsible for the management of the internal resources used
|
||||
/// by the FSR2 algorithm. When this API is called, multiple calls will be
|
||||
/// made via the pointers contained in the <c><i>callbacks</i></c> structure.
|
||||
/// These callbacks will attempt to retreive the device capabilities, and
|
||||
/// create the internal resources, and pipelines required by FSR2's
|
||||
/// frame-to-frame function. Depending on the precise configuration used when
|
||||
/// creating the <c><i>FfxFsr2Context</i></c> a different set of resources and
|
||||
/// pipelines might be requested via the callback functions.
|
||||
///
|
||||
/// The flags included in the <c><i>flags</i></c> field of
|
||||
/// <c><i>FfxFsr2Context</i></c> how match the configuration of your
|
||||
/// application as well as the intended use of FSR2. It is important that these
|
||||
/// flags are set correctly (as well as a correct programmed
|
||||
/// <c><i>FfxFsr2DispatchDescription</i></c>) to ensure correct operation. It is
|
||||
/// recommended to consult the overview documentation for further details on
|
||||
/// how FSR2 should be integerated into an application.
|
||||
///
|
||||
/// When the <c><i>FfxFsr2Context</i></c> is created, you should use the
|
||||
/// <c><i>ffxFsr2ContextDispatch</i></c> function each frame where FSR2
|
||||
/// upscaling should be applied. See the documentation of
|
||||
/// <c><i>ffxFsr2ContextDispatch</i></c> for more details.
|
||||
///
|
||||
/// The <c><i>FfxFsr2Context</i></c> should be destroyed when use of it is
|
||||
/// completed, typically when an application is unloaded or FSR2 upscaling is
|
||||
/// disabled by a user. To destroy the FSR2 context you should call
|
||||
/// <c><i>ffxFsr2ContextDestroy</i></c>.
|
||||
///
|
||||
/// @param [out] context A pointer to a <c><i>FfxFsr2Context</i></c> structure to populate.
|
||||
/// @param [in] contextDescription A pointer to a <c><i>FfxFsr2ContextDescription</i></c> structure.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either <c><i>context</i></c> or <c><i>contextDescription</i></c> was <c><i>NULL</i></c>.
|
||||
/// @retval
|
||||
/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the <c><i>FfxFsr2ContextDescription.callbacks</i></c> was not fully specified.
|
||||
/// @retval
|
||||
/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription);
|
||||
|
||||
/// Dispatch the various passes that constitute FidelityFX Super Resolution 2.
|
||||
///
|
||||
/// FSR2 is a composite effect, meaning that it is compromised of multiple
|
||||
/// constituent passes (implemented as one or more clears, copies and compute
|
||||
/// dispatches). The <c><i>ffxFsr2ContextDispatch</i></c> function is the
|
||||
/// function which (via the use of the functions contained in the
|
||||
/// <c><i>callbacks</i></c> field of the <c><i>FfxFsr2Context</i></c>
|
||||
/// structure) utlimately generates the sequence of graphics API calls required
|
||||
/// each frame.
|
||||
///
|
||||
/// As with the creation of the <c><i>FfxFsr2Context</i></c> correctly
|
||||
/// programming the <c><i>FfxFsr2DispatchDescription</i></c> is key to ensuring
|
||||
/// the correct operation of FSR2. It is particularly important to ensure that
|
||||
/// camera jitter is correctly applied to your application's projection matrix
|
||||
/// (or camera origin for raytraced applications). FSR2 provides the
|
||||
/// <c><i>ffxFsr2GetJitterPhaseCount</i></c> and
|
||||
/// <c><i>ffxFsr2GetJitterOffset</i></c> entry points to help applications
|
||||
/// correctly compute the camera jitter. Whatever jitter pattern is used by the
|
||||
/// application it should be correctly programmed to the
|
||||
/// <c><i>jitterOffset</i></c> field of the <c><i>dispatchDescription</i></c>
|
||||
/// structure. For more guidance on camera jitter please consult the
|
||||
/// documentation for <c><i>ffxFsr2GetJitterOffset</i></c> as well as the
|
||||
/// accompanying overview documentation for FSR2.
|
||||
///
|
||||
/// @param [in] context A pointer to a <c><i>FfxFsr2Context</i></c> structure.
|
||||
/// @param [in] dispatchDescription A pointer to a <c><i>FfxFsr2DispatchDescription</i></c> structure.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either <c><i>context</i></c> or <c><i>dispatchDescription</i></c> was <c><i>NULL</i></c>.
|
||||
/// @retval
|
||||
/// FFX_ERROR_OUT_OF_RANGE The operation failed because <c><i>dispatchDescription.renderSize</i></c> was larger than the maximum render resolution.
|
||||
/// @retval
|
||||
/// FFX_ERROR_NULL_DEVICE The operation failed because the device inside the context was <c><i>NULL</i></c>.
|
||||
/// @retval
|
||||
/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchDescription);
|
||||
|
||||
/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects.
|
||||
///
|
||||
/// @param [in] context A pointer to a <c><i>FfxFsr2Context</i></c> structure.
|
||||
/// @param [in] params A pointer to a <c><i>FfxFsr2GenerateReactiveDescription</i></c> structure
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params);
|
||||
|
||||
/// Destroy the FidelityFX Super Resolution context.
|
||||
///
|
||||
/// @param [out] context A pointer to a <c><i>FfxFsr2Context</i></c> structure to destroy.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either <c><i>context</i></c> was <c><i>NULL</i></c>.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context);
|
||||
|
||||
/// Get the upscale ratio from the quality mode.
|
||||
///
|
||||
/// The following table enumerates the mapping of the quality modes to
|
||||
/// per-dimension scaling ratios.
|
||||
///
|
||||
/// Quality preset | Scale factor
|
||||
/// ----------------------------------------------------- | -------------
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_QUALITY</i></c> | 1.5x
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_BALANCED</i></c> | 1.7x
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_PERFORMANCE</i></c> | 2.0x
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> | 3.0x
|
||||
///
|
||||
/// Passing an invalid <c><i>qualityMode</i></c> will return 0.0f.
|
||||
///
|
||||
/// @param [in] qualityMode The quality mode preset.
|
||||
///
|
||||
/// @returns
|
||||
/// The upscaling the per-dimension upscaling ratio for
|
||||
/// <c><i>qualityMode</i></c> according to the table above.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode);
|
||||
|
||||
/// A helper function to calculate the rendering resolution from a target
|
||||
/// resolution and desired quality level.
|
||||
///
|
||||
/// This function applies the scaling factor returned by
|
||||
/// <c><i>ffxFsr2GetUpscaleRatioFromQualityMode</i></c> to each dimension.
|
||||
///
|
||||
/// @param [out] renderWidth A pointer to a <c>uint32_t</c> which will hold the calculated render resolution width.
|
||||
/// @param [out] renderHeight A pointer to a <c>uint32_t</c> which will hold the calculated render resolution height.
|
||||
/// @param [in] displayWidth The target display resolution width.
|
||||
/// @param [in] displayHeight The target display resolution height.
|
||||
/// @param [in] qualityMode The desired quality mode for FSR 2 upscaling.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// FFX_ERROR_INVALID_POINTER Either <c><i>renderWidth</i></c> or <c><i>renderHeight</i></c> was <c>NULL</c>.
|
||||
/// @retval
|
||||
/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
|
||||
uint32_t* renderWidth,
|
||||
uint32_t* renderHeight,
|
||||
uint32_t displayWidth,
|
||||
uint32_t displayHeight,
|
||||
FfxFsr2QualityMode qualityMode);
|
||||
|
||||
/// A helper function to calculate the jitter phase count from display
|
||||
/// resolution.
|
||||
///
|
||||
/// For more detailed information about the application of camera jitter to
|
||||
/// your application's rendering please refer to the
|
||||
/// <c><i>ffxFsr2GetJitterOffset</i></c> function.
|
||||
///
|
||||
/// The table below shows the jitter phase count which this function
|
||||
/// would return for each of the quality presets.
|
||||
///
|
||||
/// Quality preset | Scale factor | Phase count
|
||||
/// ----------------------------------------------------- | ------------- | ---------------
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_QUALITY</i></c> | 1.5x | 18
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_BALANCED</i></c> | 1.7x | 23
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_PERFORMANCE</i></c> | 2.0x | 32
|
||||
/// <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> | 3.0x | 72
|
||||
/// Custom | [1..n]x | ceil(8*n^2)
|
||||
///
|
||||
/// @param [in] renderWidth The render resolution width.
|
||||
/// @param [in] displayWidth The display resolution width.
|
||||
///
|
||||
/// @returns
|
||||
/// The jitter phase count for the scaling factor between <c><i>renderWidth</i></c> and <c><i>displayWidth</i></c>.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth);
|
||||
|
||||
/// A helper function to calculate the subpixel jitter offset.
|
||||
///
|
||||
/// FSR2 relies on the application to apply sub-pixel jittering while rendering.
|
||||
/// This is typically included in the projection matrix of the camera. To make
|
||||
/// the application of camera jitter simple, the FSR2 API provides a small set
|
||||
/// of utility function which computes the sub-pixel jitter offset for a
|
||||
/// particular frame within a sequence of separate jitter offsets. To begin, the
|
||||
/// index within the jitter phase must be computed. To calculate the
|
||||
/// sequence's length, you can call the <c><i>ffxFsr2GetJitterPhaseCount</i></c>
|
||||
/// function. The index should be a value which is incremented each frame modulo
|
||||
/// the length of the sequence computed by <c><i>ffxFsr2GetJitterPhaseCount</i></c>.
|
||||
/// The index within the jitter phase is passed to
|
||||
/// <c><i>ffxFsr2GetJitterOffset</i></c> via the <c><i>index</i></c> parameter.
|
||||
///
|
||||
/// This function uses a Halton(2,3) sequence to compute the jitter offset.
|
||||
/// The ultimate index used for the sequence is <c><i>index</i></c> %
|
||||
/// <c><i>phaseCount</i></c>.
|
||||
///
|
||||
/// It is important to understand that the values returned from the
|
||||
/// <c><i>ffxFsr2GetJitterOffset</i></c> function are in unit pixel space, and
|
||||
/// in order to composite this correctly into a projection matrix we must
|
||||
/// convert them into projection offsets. This is done as per the pseudo code
|
||||
/// listing which is shown below.
|
||||
///
|
||||
/// const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(renderWidth, displayWidth);
|
||||
///
|
||||
/// float jitterX = 0;
|
||||
/// float jitterY = 0;
|
||||
/// ffxFsr2GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount);
|
||||
///
|
||||
/// const float jitterX = 2.0f * jitterX / (float)renderWidth;
|
||||
/// const float jitterY = -2.0f * jitterY / (float)renderHeight;
|
||||
/// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0));
|
||||
/// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix;
|
||||
///
|
||||
/// Jitter should be applied to all rendering. This includes opaque, alpha
|
||||
/// transparent, and raytraced objects. For rasterized objects, the sub-pixel
|
||||
/// jittering values calculated by the <c><i>iffxFsr2GetJitterOffset</i></c>
|
||||
/// function can be applied to the camera projection matrix which is ultimately
|
||||
/// used to perform transformations during vertex shading. For raytraced
|
||||
/// rendering, the sub-pixel jitter should be applied to the ray's origin,
|
||||
/// often the camera's position.
|
||||
///
|
||||
/// Whether you elect to use the <c><i>ffxFsr2GetJitterOffset</i></c> function
|
||||
/// or your own sequence generator, you must program the
|
||||
/// <c><i>jitterOffset</i></c> field of the
|
||||
/// <c><i>FfxFsr2DispatchParameters</i></c> structure in order to inform FSR2
|
||||
/// of the jitter offset that has been applied in order to render each frame.
|
||||
///
|
||||
/// If not using the recommended <c><i>ffxFsr2GetJitterOffset</i></c> function,
|
||||
/// care should be taken that your jitter sequence never generates a null vector;
|
||||
/// that is value of 0 in both the X and Y dimensions.
|
||||
///
|
||||
/// @param [out] outX A pointer to a <c>float</c> which will contain the subpixel jitter offset for the x dimension.
|
||||
/// @param [out] outY A pointer to a <c>float</c> which will contain the subpixel jitter offset for the y dimension.
|
||||
/// @param [in] index The index within the jitter sequence.
|
||||
/// @param [in] phaseCount The length of jitter phase. See <c><i>ffxFsr2GetJitterPhaseCount</i></c>.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// FFX_ERROR_INVALID_POINTER Either <c><i>outX</i></c> or <c><i>outY</i></c> was <c>NULL</c>.
|
||||
/// @retval
|
||||
/// FFX_ERROR_INVALID_ARGUMENT Argument <c><i>phaseCount</i></c> must be greater than 0.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount);
|
||||
|
||||
/// A helper function to check if a resource is
|
||||
/// <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
|
||||
///
|
||||
/// @param [in] resource A <c><i>FfxResource</i></c>.
|
||||
///
|
||||
/// @returns
|
||||
/// true The <c><i>resource</i></c> was not <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
|
||||
/// @returns
|
||||
/// false The <c><i>resource</i></c> was <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif // #if defined(__cplusplus)
|
||||
395
engine/thirdparty/amd-fsr2/ffx_fsr2_interface.h
vendored
Normal file
395
engine/thirdparty/amd-fsr2/ffx_fsr2_interface.h
vendored
Normal file
|
|
@ -0,0 +1,395 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ffx_assert.h"
|
||||
#include "ffx_types.h"
|
||||
#include "ffx_error.h"
|
||||
|
||||
// Include the FSR2 resources defined in the HLSL code. This shared here to avoid getting out of sync.
|
||||
#define FFX_CPU
|
||||
#include "shaders/ffx_fsr2_resources.h"
|
||||
#include "shaders/ffx_fsr2_common.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif // #if defined(__cplusplus)
|
||||
|
||||
FFX_FORWARD_DECLARE(FfxFsr2Interface);
|
||||
|
||||
/// An enumeration of all the passes which constitute the FSR2 algorithm.
|
||||
///
|
||||
/// FSR2 is implemented as a composite of several compute passes each
|
||||
/// computing a key part of the final result. Each call to the
|
||||
/// <c><i>FfxFsr2ScheduleGpuJobFunc</i></c> callback function will
|
||||
/// correspond to a single pass included in <c><i>FfxFsr2Pass</i></c>. For a
|
||||
/// more comprehensive description of each pass, please refer to the FSR2
|
||||
/// reference documentation.
|
||||
///
|
||||
/// Please note in some cases e.g.: <c><i>FFX_FSR2_PASS_ACCUMULATE</i></c>
|
||||
/// and <c><i>FFX_FSR2_PASS_ACCUMULATE_SHARPEN</i></c> either one pass or the
|
||||
/// other will be used (they are mutually exclusive). The choice of which will
|
||||
/// depend on the way the <c><i>FfxFsr2Context</i></c> is created and the
|
||||
/// precise contents of <c><i>FfxFsr2DispatchParamters</i></c> each time a call
|
||||
/// is made to <c><i>ffxFsr2ContextDispatch</i></c>.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef enum FfxFsr2Pass {
|
||||
|
||||
FFX_FSR2_PASS_DEPTH_CLIP = 0, ///< A pass which performs depth clipping.
|
||||
FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1, ///< A pass which performs reconstruction of previous frame's depth.
|
||||
FFX_FSR2_PASS_LOCK = 2, ///< A pass which calculates pixel locks.
|
||||
FFX_FSR2_PASS_ACCUMULATE = 3, ///< A pass which performs upscaling.
|
||||
FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4, ///< A pass which performs upscaling when sharpening is used.
|
||||
FFX_FSR2_PASS_RCAS = 5, ///< A pass which performs sharpening.
|
||||
FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6, ///< A pass which generates the luminance mipmap chain for the current frame.
|
||||
FFX_FSR2_PASS_GENERATE_REACTIVE = 7, ///< An optional pass to generate a reactive mask
|
||||
FFX_FSR2_PASS_TCR_AUTOGENERATE = 8, ///< An optional pass to generate a texture-and-composition and reactive masks
|
||||
|
||||
FFX_FSR2_PASS_COUNT ///< The number of passes performed by FSR2.
|
||||
} FfxFsr2Pass;
|
||||
|
||||
typedef enum FfxFsr2MsgType {
|
||||
FFX_FSR2_MESSAGE_TYPE_ERROR = 0,
|
||||
FFX_FSR2_MESSAGE_TYPE_WARNING = 1,
|
||||
FFX_FSR2_MESSAGE_TYPE_COUNT
|
||||
} FfxFsr2MsgType;
|
||||
|
||||
/// Create and initialize the backend context.
|
||||
///
|
||||
/// The callback function sets up the backend context for rendering.
|
||||
/// It will create or reference the device and create required internal data structures.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] device The FfxDevice obtained by ffxGetDevice(DX12/VK/...).
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2CreateBackendContextFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxDevice device);
|
||||
|
||||
/// Get a list of capabilities of the device.
|
||||
///
|
||||
/// When creating an <c><i>FfxFsr2Context</i></c> it is desirable for the FSR2
|
||||
/// core implementation to be aware of certain characteristics of the platform
|
||||
/// that is being targetted. This is because some optimizations which FSR2
|
||||
/// attempts to perform are more effective on certain classes of hardware than
|
||||
/// others, or are not supported by older hardware. In order to avoid cases
|
||||
/// where optimizations actually have the effect of decreasing performance, or
|
||||
/// reduce the breadth of support provided by FSR2, FSR2 queries the
|
||||
/// capabilities of the device to make such decisions.
|
||||
///
|
||||
/// For target platforms with fixed hardware support you need not implement
|
||||
/// this callback function by querying the device, but instead may hardcore
|
||||
/// what features are available on the platform.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [out] outDeviceCapabilities The device capabilities structure to fill out.
|
||||
/// @param [in] device The device to query for capabilities.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode(*FfxFsr2GetDeviceCapabilitiesFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxDeviceCapabilities* outDeviceCapabilities,
|
||||
FfxDevice device);
|
||||
|
||||
/// Destroy the backend context and dereference the device.
|
||||
///
|
||||
/// This function is called when the <c><i>FfxFsr2Context</i></c> is destroyed.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode(*FfxFsr2DestroyBackendContextFunc)(
|
||||
FfxFsr2Interface* backendInterface);
|
||||
|
||||
/// Create a resource.
|
||||
///
|
||||
/// This callback is intended for the backend to create internal resources.
|
||||
///
|
||||
/// Please note: It is also possible that the creation of resources might
|
||||
/// itself cause additional resources to be created by simply calling the
|
||||
/// <c><i>FfxFsr2CreateResourceFunc</i></c> function pointer again. This is
|
||||
/// useful when handling the initial creation of resources which must be
|
||||
/// initialized. The flow in such a case would be an initial call to create the
|
||||
/// CPU-side resource, another to create the GPU-side resource, and then a call
|
||||
/// to schedule a copy render job to move the data between the two. Typically
|
||||
/// this type of function call flow is only seen during the creation of an
|
||||
/// <c><i>FfxFsr2Context</i></c>.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] createResourceDescription A pointer to a <c><i>FfxCreateResourceDescription</i></c>.
|
||||
/// @param [out] outResource A pointer to a <c><i>FfxResource</i></c> object.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2CreateResourceFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
const FfxCreateResourceDescription* createResourceDescription,
|
||||
FfxResourceInternal* outResource);
|
||||
|
||||
/// Register a resource in the backend for the current frame.
|
||||
///
|
||||
/// Since FSR2 and the backend are not aware how many different
|
||||
/// resources will get passed to FSR2 over time, it's not safe
|
||||
/// to register all resources simultaneously in the backend.
|
||||
/// Also passed resources may not be valid after the dispatch call.
|
||||
/// As a result it's safest to register them as FfxResourceInternal
|
||||
/// and clear them at the end of the dispatch call.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] inResource A pointer to a <c><i>FfxResource</i></c>.
|
||||
/// @param [out] outResource A pointer to a <c><i>FfxResourceInternal</i></c> object.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode(*FfxFsr2RegisterResourceFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
const FfxResource* inResource,
|
||||
FfxResourceInternal* outResource);
|
||||
|
||||
/// Unregister all temporary FfxResourceInternal from the backend.
|
||||
///
|
||||
/// Unregister FfxResourceInternal referencing resources passed to
|
||||
/// a function as a parameter.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode(*FfxFsr2UnregisterResourcesFunc)(
|
||||
FfxFsr2Interface* backendInterface);
|
||||
|
||||
/// Retrieve a <c><i>FfxResourceDescription</i></c> matching a
|
||||
/// <c><i>FfxResource</i></c> structure.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] resource A pointer to a <c><i>FfxResource</i></c> object.
|
||||
///
|
||||
/// @returns
|
||||
/// A description of the resource.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxResourceDescription (*FfxFsr2GetResourceDescriptionFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxResourceInternal resource);
|
||||
|
||||
/// Destroy a resource
|
||||
///
|
||||
/// This callback is intended for the backend to release an internal resource.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] resource A pointer to a <c><i>FfxResource</i></c> object.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2DestroyResourceFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxResourceInternal resource);
|
||||
|
||||
/// Create a render pipeline.
|
||||
///
|
||||
/// A rendering pipeline contains the shader as well as resource bindpoints
|
||||
/// and samplers.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] pass The identifier for the pass.
|
||||
/// @param [in] pipelineDescription A pointer to a <c><i>FfxPipelineDescription</i></c> describing the pipeline to be created.
|
||||
/// @param [out] outPipeline A pointer to a <c><i>FfxPipelineState</i></c> structure which should be populated.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2CreatePipelineFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxFsr2Pass pass,
|
||||
const FfxPipelineDescription* pipelineDescription,
|
||||
FfxPipelineState* outPipeline);
|
||||
|
||||
/// Destroy a render pipeline.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [out] pipeline A pointer to a <c><i>FfxPipelineState</i></c> structure which should be released.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2DestroyPipelineFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxPipelineState* pipeline);
|
||||
|
||||
/// Schedule a render job to be executed on the next call of
|
||||
/// <c><i>FfxFsr2ExecuteGpuJobsFunc</i></c>.
|
||||
///
|
||||
/// Render jobs can perform one of three different tasks: clear, copy or
|
||||
/// compute dispatches.
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] job A pointer to a <c><i>FfxGpuJobDescription</i></c> structure.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2ScheduleGpuJobFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
const FfxGpuJobDescription* job);
|
||||
|
||||
/// Execute scheduled render jobs on the <c><i>comandList</i></c> provided.
|
||||
///
|
||||
/// The recording of the graphics API commands should take place in this
|
||||
/// callback function, the render jobs which were previously enqueued (via
|
||||
/// callbacks made to <c><i>FfxFsr2ScheduleGpuJobFunc</i></c>) should be
|
||||
/// processed in the order they were received. Advanced users might choose to
|
||||
/// reorder the rendering jobs, but should do so with care to respect the
|
||||
/// resource dependencies.
|
||||
///
|
||||
/// Depending on the precise contents of <c><i>FfxFsr2DispatchDescription</i></c> a
|
||||
/// different number of render jobs might have previously been enqueued (for
|
||||
/// example if sharpening is toggled on and off).
|
||||
///
|
||||
/// @param [in] backendInterface A pointer to the backend interface.
|
||||
/// @param [in] commandList A pointer to a <c><i>FfxCommandList</i></c> structure.
|
||||
///
|
||||
/// @retval
|
||||
/// FFX_OK The operation completed successfully.
|
||||
/// @retval
|
||||
/// Anything else The operation failed.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef FfxErrorCode (*FfxFsr2ExecuteGpuJobsFunc)(
|
||||
FfxFsr2Interface* backendInterface,
|
||||
FfxCommandList commandList);
|
||||
|
||||
/// Pass a string message
|
||||
///
|
||||
/// Used for debug messages.
|
||||
///
|
||||
/// @param [in] type The type of message.
|
||||
/// @param [in] message A string message to pass.
|
||||
///
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef void(*FfxFsr2Message)(
|
||||
FfxFsr2MsgType type,
|
||||
const wchar_t* message);
|
||||
|
||||
/// A structure encapsulating the interface between the core implentation of
|
||||
/// the FSR2 algorithm and any graphics API that it should ultimately call.
|
||||
///
|
||||
/// This set of functions serves as an abstraction layer between FSR2 and the
|
||||
/// API used to implement it. While FSR2 ships with backends for DirectX12 and
|
||||
/// Vulkan, it is possible to implement your own backend for other platforms or
|
||||
/// which sits ontop of your engine's own abstraction layer. For details on the
|
||||
/// expectations of what each function should do you should refer the
|
||||
/// description of the following function pointer types:
|
||||
///
|
||||
/// <c><i>FfxFsr2CreateDeviceFunc</i></c>
|
||||
/// <c><i>FfxFsr2GetDeviceCapabilitiesFunc</i></c>
|
||||
/// <c><i>FfxFsr2DestroyDeviceFunc</i></c>
|
||||
/// <c><i>FfxFsr2CreateResourceFunc</i></c>
|
||||
/// <c><i>FfxFsr2GetResourceDescriptionFunc</i></c>
|
||||
/// <c><i>FfxFsr2DestroyResourceFunc</i></c>
|
||||
/// <c><i>FfxFsr2CreatePipelineFunc</i></c>
|
||||
/// <c><i>FfxFsr2DestroyPipelineFunc</i></c>
|
||||
/// <c><i>FfxFsr2ScheduleGpuJobFunc</i></c>
|
||||
/// <c><i>FfxFsr2ExecuteGpuJobsFunc</i></c>
|
||||
///
|
||||
/// Depending on the graphics API that is abstracted by the backend, it may be
|
||||
/// required that the backend is to some extent stateful. To ensure that
|
||||
/// applications retain full control to manage the memory used by FSR2, the
|
||||
/// <c><i>scratchBuffer</i></c> and <c><i>scratchBufferSize</i></c> fields are
|
||||
/// provided. A backend should provide a means of specifying how much scratch
|
||||
/// memory is required for its internal implementation (e.g: via a function
|
||||
/// or constant value). The application is that responsible for allocating that
|
||||
/// memory and providing it when setting up the FSR2 backend. Backends provided
|
||||
/// with FSR2 do not perform dynamic memory allocations, and instead
|
||||
/// suballocate all memory from the scratch buffers provided.
|
||||
///
|
||||
/// The <c><i>scratchBuffer</i></c> and <c><i>scratchBufferSize</i></c> fields
|
||||
/// should be populated according to the requirements of each backend. For
|
||||
/// example, if using the DirectX 12 backend you should call the
|
||||
/// <c><i>ffxFsr2GetScratchMemorySizeDX12</i></c> function. It is not required
|
||||
/// that custom backend implementations use a scratch buffer.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef struct FfxFsr2Interface {
|
||||
|
||||
FfxFsr2CreateBackendContextFunc fpCreateBackendContext; ///< A callback function to create and initialize the backend context.
|
||||
FfxFsr2GetDeviceCapabilitiesFunc fpGetDeviceCapabilities; ///< A callback function to query device capabilites.
|
||||
FfxFsr2DestroyBackendContextFunc fpDestroyBackendContext; ///< A callback function to destroy the backendcontext. This also dereferences the device.
|
||||
FfxFsr2CreateResourceFunc fpCreateResource; ///< A callback function to create a resource.
|
||||
FfxFsr2RegisterResourceFunc fpRegisterResource; ///< A callback function to register an external resource.
|
||||
FfxFsr2UnregisterResourcesFunc fpUnregisterResources; ///< A callback function to unregister external resource.
|
||||
FfxFsr2GetResourceDescriptionFunc fpGetResourceDescription; ///< A callback function to retrieve a resource description.
|
||||
FfxFsr2DestroyResourceFunc fpDestroyResource; ///< A callback function to destroy a resource.
|
||||
FfxFsr2CreatePipelineFunc fpCreatePipeline; ///< A callback function to create a render or compute pipeline.
|
||||
FfxFsr2DestroyPipelineFunc fpDestroyPipeline; ///< A callback function to destroy a render or compute pipeline.
|
||||
FfxFsr2ScheduleGpuJobFunc fpScheduleGpuJob; ///< A callback function to schedule a render job.
|
||||
FfxFsr2ExecuteGpuJobsFunc fpExecuteGpuJobs; ///< A callback function to execute all queued render jobs.
|
||||
|
||||
void* scratchBuffer; ///< A preallocated buffer for memory utilized internally by the backend.
|
||||
size_t scratchBufferSize; ///< Size of the buffer pointed to by <c><i>scratchBuffer</i></c>.
|
||||
} FfxFsr2Interface;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif // #if defined(__cplusplus)
|
||||
46
engine/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
vendored
Normal file
46
engine/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
vendored
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
// @internal
|
||||
|
||||
#pragma once
|
||||
|
||||
static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16;
|
||||
static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16;
|
||||
static const float ffxFsr2MaximumBias[] = {
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.976f, 1.841f, 1.774f, 1.737f, 1.716f, 1.71f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.914f, 1.784f, 1.716f, 1.673f, 1.649f, 1.641f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.793f, 1.676f, 1.604f, 1.562f, 1.54f, 1.533f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.619f, 1.536f, 1.492f, 1.467f, 1.454f, 1.449f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.575f, 1.496f, 1.456f, 1.432f, 1.416f, 1.408f, 1.405f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.555f, 1.479f, 1.438f, 1.413f, 1.398f, 1.387f, 1.381f, 1.379f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.555f, 1.474f, 1.43f, 1.404f, 1.387f, 1.376f, 1.368f, 1.363f, 1.362f,
|
||||
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.575f, 1.479f, 1.43f, 1.401f, 1.382f, 1.369f, 1.36f, 1.354f, 1.351f, 1.35f,
|
||||
2.0f, 2.0f, 1.976f, 1.914f, 1.793f, 1.619f, 1.496f, 1.438f, 1.404f, 1.382f, 1.367f, 1.357f, 1.349f, 1.344f, 1.341f, 1.34f,
|
||||
1.876f, 1.869f, 1.841f, 1.784f, 1.676f, 1.536f, 1.456f, 1.413f, 1.387f, 1.369f, 1.357f, 1.347f, 1.341f, 1.336f, 1.333f, 1.332f,
|
||||
1.809f, 1.801f, 1.774f, 1.716f, 1.604f, 1.492f, 1.432f, 1.398f, 1.376f, 1.36f, 1.349f, 1.341f, 1.335f, 1.33f, 1.328f, 1.327f,
|
||||
1.772f, 1.764f, 1.737f, 1.673f, 1.562f, 1.467f, 1.416f, 1.387f, 1.368f, 1.354f, 1.344f, 1.336f, 1.33f, 1.326f, 1.323f, 1.323f,
|
||||
1.753f, 1.745f, 1.716f, 1.649f, 1.54f, 1.454f, 1.408f, 1.381f, 1.363f, 1.351f, 1.341f, 1.333f, 1.328f, 1.323f, 1.321f, 1.32f,
|
||||
1.748f, 1.739f, 1.71f, 1.641f, 1.533f, 1.449f, 1.405f, 1.379f, 1.362f, 1.35f, 1.34f, 1.332f, 1.327f, 1.323f, 1.32f, 1.319f,
|
||||
|
||||
};
|
||||
84
engine/thirdparty/amd-fsr2/ffx_fsr2_private.h
vendored
Normal file
84
engine/thirdparty/amd-fsr2/ffx_fsr2_private.h
vendored
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Constants for FSR2 DX12 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h
|
||||
typedef struct Fsr2Constants {
|
||||
|
||||
int32_t renderSize[2];
|
||||
int32_t maxRenderSize[2];
|
||||
int32_t displaySize[2];
|
||||
int32_t inputColorResourceDimensions[2];
|
||||
int32_t lumaMipDimensions[2];
|
||||
int32_t lumaMipLevelToUse;
|
||||
int32_t frameIndex;
|
||||
|
||||
float deviceToViewDepth[4];
|
||||
float jitterOffset[2];
|
||||
float motionVectorScale[2];
|
||||
float downscaleFactor[2];
|
||||
float motionVectorJitterCancellation[2];
|
||||
float preExposure;
|
||||
float previousFramePreExposure;
|
||||
float tanHalfFOV;
|
||||
float jitterPhaseCount;
|
||||
float deltaTime;
|
||||
float dynamicResChangeFactor;
|
||||
float viewSpaceToMetersFactor;
|
||||
|
||||
float pad;
|
||||
float reprojectionMatrix[16];
|
||||
} Fsr2Constants;
|
||||
|
||||
struct FfxFsr2ContextDescription;
|
||||
struct FfxDeviceCapabilities;
|
||||
struct FfxPipelineState;
|
||||
struct FfxResource;
|
||||
|
||||
// FfxFsr2Context_Private
|
||||
// The private implementation of the FSR2 context.
|
||||
typedef struct FfxFsr2Context_Private {
|
||||
|
||||
FfxFsr2ContextDescription contextDescription;
|
||||
Fsr2Constants constants;
|
||||
FfxDevice device;
|
||||
FfxDeviceCapabilities deviceCapabilities;
|
||||
FfxPipelineState pipelineDepthClip;
|
||||
FfxPipelineState pipelineReconstructPreviousDepth;
|
||||
FfxPipelineState pipelineLock;
|
||||
FfxPipelineState pipelineAccumulate;
|
||||
FfxPipelineState pipelineAccumulateSharpen;
|
||||
FfxPipelineState pipelineRCAS;
|
||||
FfxPipelineState pipelineComputeLuminancePyramid;
|
||||
FfxPipelineState pipelineGenerateReactive;
|
||||
FfxPipelineState pipelineTcrAutogenerate;
|
||||
|
||||
// 2 arrays of resources, as e.g. FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV
|
||||
FfxResourceInternal srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
|
||||
FfxResourceInternal uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
|
||||
|
||||
bool firstExecution;
|
||||
bool refreshPipelineStates;
|
||||
uint32_t resourceFrameIndex;
|
||||
float previousJitterOffset[2];
|
||||
int32_t jitterPhaseCountRemaining;
|
||||
} FfxFsr2Context_Private;
|
||||
365
engine/thirdparty/amd-fsr2/ffx_types.h
vendored
Normal file
365
engine/thirdparty/amd-fsr2/ffx_types.h
vendored
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined (FFX_GCC)
|
||||
/// FidelityFX exported functions
|
||||
#define FFX_API
|
||||
#else
|
||||
/// FidelityFX exported functions
|
||||
#define FFX_API __declspec(dllexport)
|
||||
#endif // #if defined (FFX_GCC)
|
||||
|
||||
/// Maximum supported number of simultaneously bound SRVs.
|
||||
#define FFX_MAX_NUM_SRVS 16
|
||||
|
||||
/// Maximum supported number of simultaneously bound UAVs.
|
||||
#define FFX_MAX_NUM_UAVS 8
|
||||
|
||||
/// Maximum number of constant buffers bound.
|
||||
#define FFX_MAX_NUM_CONST_BUFFERS 2
|
||||
|
||||
/// Maximum size of bound constant buffers.
|
||||
#define FFX_MAX_CONST_SIZE 64
|
||||
|
||||
/// Off by default warnings
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable : 4365 4710 4820 5039)
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
|
||||
#pragma clang diagnostic ignored "-Wsign-compare"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
#pragma clang diagnostic ignored "-Wignored-qualifiers"
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // #ifdef __cplusplus
|
||||
|
||||
/// An enumeration of surface formats.
|
||||
typedef enum FfxSurfaceFormat {
|
||||
|
||||
FFX_SURFACE_FORMAT_UNKNOWN, ///< Unknown format
|
||||
FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format
|
||||
FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format
|
||||
FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format
|
||||
FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format
|
||||
FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format
|
||||
FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format
|
||||
FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format
|
||||
FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format
|
||||
FFX_SURFACE_FORMAT_R11G11B10_FLOAT, ///< 32 bit 3 channel float format
|
||||
FFX_SURFACE_FORMAT_R16G16_FLOAT, ///< 16 bit per channel, 2 channel float format
|
||||
FFX_SURFACE_FORMAT_R16G16_UINT, ///< 16 bit per channel, 2 channel unsigned int format
|
||||
FFX_SURFACE_FORMAT_R16_FLOAT, ///< 16 bit per channel, 1 channel float format
|
||||
FFX_SURFACE_FORMAT_R16_UINT, ///< 16 bit per channel, 1 channel unsigned int format
|
||||
FFX_SURFACE_FORMAT_R16_UNORM, ///< 16 bit per channel, 1 channel unsigned normalized format
|
||||
FFX_SURFACE_FORMAT_R16_SNORM, ///< 16 bit per channel, 1 channel signed normalized format
|
||||
FFX_SURFACE_FORMAT_R8_UNORM, ///< 8 bit per channel, 1 channel unsigned normalized format
|
||||
FFX_SURFACE_FORMAT_R8_UINT, ///< 8 bit per channel, 1 channel unsigned int format
|
||||
FFX_SURFACE_FORMAT_R8G8_UNORM, ///< 8 bit per channel, 2 channel unsigned normalized format
|
||||
FFX_SURFACE_FORMAT_R32_FLOAT ///< 32 bit per channel, 1 channel float format
|
||||
} FfxSurfaceFormat;
|
||||
|
||||
/// An enumeration of resource usage.
|
||||
typedef enum FfxResourceUsage {
|
||||
|
||||
FFX_RESOURCE_USAGE_READ_ONLY = 0, ///< No usage flags indicate a resource is read only.
|
||||
FFX_RESOURCE_USAGE_RENDERTARGET = (1<<0), ///< Indicates a resource will be used as render target.
|
||||
FFX_RESOURCE_USAGE_UAV = (1<<1), ///< Indicates a resource will be used as UAV.
|
||||
} FfxResourceUsage;
|
||||
|
||||
/// An enumeration of resource states.
|
||||
typedef enum FfxResourceStates {
|
||||
|
||||
FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1<<0), ///< Indicates a resource is in the state to be used as UAV.
|
||||
FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 1), ///< Indicates a resource is in the state to be read by compute shaders.
|
||||
FFX_RESOURCE_STATE_COPY_SRC = (1 << 2), ///< Indicates a resource is in the state to be used as source in a copy command.
|
||||
FFX_RESOURCE_STATE_COPY_DEST = (1 << 3), ///< Indicates a resource is in the state to be used as destination in a copy command.
|
||||
FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in generic (slow) read state.
|
||||
} FfxResourceStates;
|
||||
|
||||
/// An enumeration of surface dimensions.
|
||||
typedef enum FfxResourceDimension {
|
||||
|
||||
FFX_RESOURCE_DIMENSION_TEXTURE_1D, ///< A resource with a single dimension.
|
||||
FFX_RESOURCE_DIMENSION_TEXTURE_2D, ///< A resource with two dimensions.
|
||||
} FfxResourceDimension;
|
||||
|
||||
/// An enumeration of surface dimensions.
|
||||
typedef enum FfxResourceFlags {
|
||||
|
||||
FFX_RESOURCE_FLAGS_NONE = 0, ///< No flags.
|
||||
FFX_RESOURCE_FLAGS_ALIASABLE = (1<<0), ///< A bit indicating a resource does not need to persist across frames.
|
||||
} FfxResourceFlags;
|
||||
|
||||
/// An enumeration of all resource view types.
|
||||
typedef enum FfxResourceViewType {
|
||||
|
||||
FFX_RESOURCE_VIEW_UNORDERED_ACCESS, ///< The resource view is an unordered access view (UAV).
|
||||
FFX_RESOURCE_VIEW_SHADER_READ, ///< The resource view is a shader resource view (SRV).
|
||||
} FfxResourceViewType;
|
||||
|
||||
/// The type of filtering to perform when reading a texture.
|
||||
typedef enum FfxFilterType {
|
||||
|
||||
FFX_FILTER_TYPE_POINT, ///< Point sampling.
|
||||
FFX_FILTER_TYPE_LINEAR ///< Sampling with interpolation.
|
||||
} FfxFilterType;
|
||||
|
||||
/// An enumeration of all supported shader models.
|
||||
typedef enum FfxShaderModel {
|
||||
|
||||
FFX_SHADER_MODEL_5_1, ///< Shader model 5.1.
|
||||
FFX_SHADER_MODEL_6_0, ///< Shader model 6.0.
|
||||
FFX_SHADER_MODEL_6_1, ///< Shader model 6.1.
|
||||
FFX_SHADER_MODEL_6_2, ///< Shader model 6.2.
|
||||
FFX_SHADER_MODEL_6_3, ///< Shader model 6.3.
|
||||
FFX_SHADER_MODEL_6_4, ///< Shader model 6.4.
|
||||
FFX_SHADER_MODEL_6_5, ///< Shader model 6.5.
|
||||
FFX_SHADER_MODEL_6_6, ///< Shader model 6.6.
|
||||
FFX_SHADER_MODEL_6_7, ///< Shader model 6.7.
|
||||
} FfxShaderModel;
|
||||
|
||||
// An enumeration for different resource types
|
||||
typedef enum FfxResourceType {
|
||||
|
||||
FFX_RESOURCE_TYPE_BUFFER, ///< The resource is a buffer.
|
||||
FFX_RESOURCE_TYPE_TEXTURE1D, ///< The resource is a 1-dimensional texture.
|
||||
FFX_RESOURCE_TYPE_TEXTURE2D, ///< The resource is a 2-dimensional texture.
|
||||
FFX_RESOURCE_TYPE_TEXTURE3D, ///< The resource is a 3-dimensional texture.
|
||||
} FfxResourceType;
|
||||
|
||||
/// An enumeration for different heap types
|
||||
typedef enum FfxHeapType {
|
||||
|
||||
FFX_HEAP_TYPE_DEFAULT = 0, ///< Local memory.
|
||||
FFX_HEAP_TYPE_UPLOAD ///< Heap used for uploading resources.
|
||||
} FfxHeapType;
|
||||
|
||||
/// An enumberation for different render job types
|
||||
typedef enum FfxGpuJobType {
|
||||
|
||||
FFX_GPU_JOB_CLEAR_FLOAT = 0, ///< The GPU job is performing a floating-point clear.
|
||||
FFX_GPU_JOB_COPY = 1, ///< The GPU job is performing a copy.
|
||||
FFX_GPU_JOB_COMPUTE = 2, ///< The GPU job is performing a compute dispatch.
|
||||
} FfxGpuJobType;
|
||||
|
||||
/// A typedef representing the graphics device.
|
||||
typedef void* FfxDevice;
|
||||
|
||||
/// A typedef representing a command list or command buffer.
|
||||
typedef void* FfxCommandList;
|
||||
|
||||
/// A typedef for a root signature.
|
||||
typedef void* FfxRootSignature;
|
||||
|
||||
/// A typedef for a pipeline state object.
|
||||
typedef void* FfxPipeline;
|
||||
|
||||
/// A structure encapasulating a collection of device capabilities.
|
||||
typedef struct FfxDeviceCapabilities {
|
||||
|
||||
FfxShaderModel minimumSupportedShaderModel; ///< The minimum shader model supported by the device.
|
||||
uint32_t waveLaneCountMin; ///< The minimum supported wavefront width.
|
||||
uint32_t waveLaneCountMax; ///< The maximum supported wavefront width.
|
||||
bool fp16Supported; ///< The device supports FP16 in hardware.
|
||||
bool raytracingSupported; ///< The device supports raytracing.
|
||||
} FfxDeviceCapabilities;
|
||||
|
||||
/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers.
|
||||
typedef struct FfxDimensions2D {
|
||||
|
||||
uint32_t width; ///< The width of a 2-dimensional range.
|
||||
uint32_t height; ///< The height of a 2-dimensional range.
|
||||
} FfxDimensions2D;
|
||||
|
||||
/// A structure encapsulating a 2-dimensional point,
|
||||
typedef struct FfxIntCoords2D {
|
||||
|
||||
int32_t x; ///< The x coordinate of a 2-dimensional point.
|
||||
int32_t y; ///< The y coordinate of a 2-dimensional point.
|
||||
} FfxIntCoords2D;
|
||||
|
||||
/// A structure encapsulating a 2-dimensional set of floating point coordinates.
|
||||
typedef struct FfxFloatCoords2D {
|
||||
|
||||
float x; ///< The x coordinate of a 2-dimensional point.
|
||||
float y; ///< The y coordinate of a 2-dimensional point.
|
||||
} FfxFloatCoords2D;
|
||||
|
||||
/// A structure describing a resource.
|
||||
typedef struct FfxResourceDescription {
|
||||
|
||||
FfxResourceType type; ///< The type of the resource.
|
||||
FfxSurfaceFormat format; ///< The surface format.
|
||||
uint32_t width; ///< The width of the resource.
|
||||
uint32_t height; ///< The height of the resource.
|
||||
uint32_t depth; ///< The depth of the resource.
|
||||
uint32_t mipCount; ///< Number of mips (or 0 for full mipchain).
|
||||
FfxResourceFlags flags; ///< A set of <c><i>FfxResourceFlags</i></c> flags.
|
||||
} FfxResourceDescription;
|
||||
|
||||
/// An outward facing structure containing a resource
|
||||
typedef struct FfxResource {
|
||||
void* resource; ///< pointer to the resource.
|
||||
wchar_t name[64];
|
||||
FfxResourceDescription description;
|
||||
FfxResourceStates state;
|
||||
bool isDepth;
|
||||
uint64_t descriptorData;
|
||||
} FfxResource;
|
||||
|
||||
/// An internal structure containing a handle to a resource and resource views
|
||||
typedef struct FfxResourceInternal {
|
||||
int32_t internalIndex; ///< The index of the resource.
|
||||
} FfxResourceInternal;
|
||||
|
||||
|
||||
/// A structure defining a resource bind point
|
||||
typedef struct FfxResourceBinding
|
||||
{
|
||||
uint32_t slotIndex;
|
||||
uint32_t resourceIdentifier;
|
||||
wchar_t name[64];
|
||||
}FfxResourceBinding;
|
||||
|
||||
/// A structure encapsulating a single pass of an algorithm.
|
||||
typedef struct FfxPipelineState {
|
||||
|
||||
FfxRootSignature rootSignature; ///< The pipelines rootSignature
|
||||
FfxPipeline pipeline; ///< The pipeline object
|
||||
uint32_t uavCount; ///< Count of UAVs used in this pipeline
|
||||
uint32_t srvCount; ///< Count of SRVs used in this pipeline
|
||||
uint32_t constCount; ///< Count of constant buffers used in this pipeline
|
||||
|
||||
FfxResourceBinding uavResourceBindings[FFX_MAX_NUM_UAVS]; ///< Array of ResourceIdentifiers bound as UAVs
|
||||
FfxResourceBinding srvResourceBindings[FFX_MAX_NUM_SRVS]; ///< Array of ResourceIdentifiers bound as SRVs
|
||||
FfxResourceBinding cbResourceBindings[FFX_MAX_NUM_CONST_BUFFERS]; ///< Array of ResourceIdentifiers bound as CBs
|
||||
} FfxPipelineState;
|
||||
|
||||
/// A structure containing the data required to create a resource.
|
||||
typedef struct FfxCreateResourceDescription {
|
||||
|
||||
FfxHeapType heapType; ///< The heap type to hold the resource, typically <c><i>FFX_HEAP_TYPE_DEFAULT</i></c>.
|
||||
FfxResourceDescription resourceDescription; ///< A resource description.
|
||||
FfxResourceStates initalState; ///< The initial resource state.
|
||||
uint32_t initDataSize; ///< Size of initial data buffer.
|
||||
void* initData; ///< Buffer containing data to fill the resource.
|
||||
const wchar_t* name; ///< Name of the resource.
|
||||
FfxResourceUsage usage; ///< Resource usage flags.
|
||||
uint32_t id; ///< Internal resource ID.
|
||||
} FfxCreateResourceDescription;
|
||||
|
||||
/// A structure containing the description used to create a
|
||||
/// <c><i>FfxPipeline</i></c> structure.
|
||||
///
|
||||
/// A pipeline is the name given to a shader and the collection of state that
|
||||
/// is required to dispatch it. In the context of FSR2 and its architecture
|
||||
/// this means that a <c><i>FfxPipelineDescription</i></c> will map to either a
|
||||
/// monolithic object in an explicit API (such as a
|
||||
/// <c><i>PipelineStateObject</i></c> in DirectX 12). Or a shader and some
|
||||
/// ancillary API objects (in something like DirectX 11).
|
||||
///
|
||||
/// The <c><i>contextFlags</i></c> field contains a copy of the flags passed
|
||||
/// to <c><i>ffxFsr2ContextCreate</i></c> via the <c><i>flags</i></c> field of
|
||||
/// the <c><i>FfxFsr2InitializationParams</i></c> structure. These flags are
|
||||
/// used to determine which permutation of a pipeline for a specific
|
||||
/// <c><i>FfxFsr2Pass</i></c> should be used to implement the features required
|
||||
/// by each application, as well as to acheive the best performance on specific
|
||||
/// target hardware configurations.
|
||||
///
|
||||
/// When using one of the provided backends for FSR2 (such as DirectX 12 or
|
||||
/// Vulkan) the data required to create a pipeline is compiled offline and
|
||||
/// included into the backend library that you are using. For cases where the
|
||||
/// backend interface is overriden by providing custom callback function
|
||||
/// implementations care should be taken to respect the contents of the
|
||||
/// <c><i>contextFlags</i></c> field in order to correctly support the options
|
||||
/// provided by FSR2, and acheive best performance.
|
||||
///
|
||||
/// @ingroup FSR2
|
||||
typedef struct FfxPipelineDescription {
|
||||
|
||||
uint32_t contextFlags; ///< A collection of <c><i>FfxFsr2InitializationFlagBits</i></c> which were passed to the context.
|
||||
FfxFilterType* samplers; ///< Array of static samplers.
|
||||
size_t samplerCount; ///< The number of samples contained inside <c><i>samplers</i></c>.
|
||||
const uint32_t* rootConstantBufferSizes; ///< Array containing the sizes of the root constant buffers (count of 32 bit elements).
|
||||
uint32_t rootConstantBufferCount; ///< The number of root constants contained within <c><i>rootConstantBufferSizes</i></c>.
|
||||
} FfxPipelineDescription;
|
||||
|
||||
/// A structure containing a constant buffer.
|
||||
typedef struct FfxConstantBuffer {
|
||||
|
||||
uint32_t uint32Size; ///< Size of 32 bit chunks used in the constant buffer
|
||||
uint32_t data[FFX_MAX_CONST_SIZE]; ///< Constant buffer data
|
||||
}FfxConstantBuffer;
|
||||
|
||||
/// A structure describing a clear render job.
|
||||
typedef struct FfxClearFloatJobDescription {
|
||||
|
||||
float color[4]; ///< The clear color of the resource.
|
||||
FfxResourceInternal target; ///< The resource to be cleared.
|
||||
} FfxClearFloatJobDescription;
|
||||
|
||||
/// A structure describing a compute render job.
|
||||
typedef struct FfxComputeJobDescription {
|
||||
|
||||
FfxPipelineState pipeline; ///< Compute pipeline for the render job.
|
||||
uint32_t dimensions[3]; ///< Dispatch dimensions.
|
||||
FfxResourceInternal srvs[FFX_MAX_NUM_SRVS]; ///< SRV resources to be bound in the compute job.
|
||||
wchar_t srvNames[FFX_MAX_NUM_SRVS][64];
|
||||
FfxResourceInternal uavs[FFX_MAX_NUM_UAVS]; ///< UAV resources to be bound in the compute job.
|
||||
uint32_t uavMip[FFX_MAX_NUM_UAVS]; ///< Mip level of UAV resources to be bound in the compute job.
|
||||
wchar_t uavNames[FFX_MAX_NUM_UAVS][64];
|
||||
FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job.
|
||||
wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][64];
|
||||
uint32_t cbSlotIndex[FFX_MAX_NUM_CONST_BUFFERS]; ///< Slot index in the descriptor table
|
||||
} FfxComputeJobDescription;
|
||||
|
||||
/// A structure describing a copy render job.
|
||||
typedef struct FfxCopyJobDescription
|
||||
{
|
||||
FfxResourceInternal src; ///< Source resource for the copy.
|
||||
FfxResourceInternal dst; ///< Destination resource for the copy.
|
||||
} FfxCopyJobDescription;
|
||||
|
||||
/// A structure describing a single render job.
|
||||
typedef struct FfxGpuJobDescription{
|
||||
|
||||
FfxGpuJobType jobType; ///< Type of the job.
|
||||
|
||||
union {
|
||||
FfxClearFloatJobDescription clearJobDescriptor; ///< Clear job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_CLEAR_FLOAT</i></c>.
|
||||
FfxCopyJobDescription copyJobDescriptor; ///< Copy job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_COPY</i></c>.
|
||||
FfxComputeJobDescription computeJobDescriptor; ///< Compute job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_COMPUTE</i></c>.
|
||||
};
|
||||
} FfxGpuJobDescription;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // #ifdef __cplusplus
|
||||
78
engine/thirdparty/amd-fsr2/ffx_util.h
vendored
Normal file
78
engine/thirdparty/amd-fsr2/ffx_util.h
vendored
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ffx_types.h"
|
||||
|
||||
/// The value of Pi.
|
||||
const float FFX_PI = 3.141592653589793f;
|
||||
|
||||
/// An epsilon value for floating point numbers.
|
||||
const float FFX_EPSILON = 1e-06f;
|
||||
|
||||
/// Helper macro to create the version number.
|
||||
#define FFX_MAKE_VERSION(major, minor, patch) ((major << 22) | (minor << 12) | patch)
|
||||
|
||||
///< Use this to specify no version.
|
||||
#define FFX_UNSPECIFIED_VERSION 0xFFFFAD00
|
||||
|
||||
/// Helper macro to avoid warnings about unused variables.
|
||||
#define FFX_UNUSED(x) ((void)(x))
|
||||
|
||||
/// Helper macro to align an integer to the specified power of 2 boundary
|
||||
#define FFX_ALIGN_UP(x, y) (((x) + ((y)-1)) & ~((y)-1))
|
||||
|
||||
/// Helper macro to check if a value is aligned.
|
||||
#define FFX_IS_ALIGNED(x) (((x) != 0) && ((x) & ((x)-1)))
|
||||
|
||||
/// Helper macro to stringify a value.
|
||||
#define FFX_STR(s) FFX_XSTR(s)
|
||||
#define FFX_XSTR(s) #s
|
||||
|
||||
/// Helper macro to forward declare a structure.
|
||||
#define FFX_FORWARD_DECLARE(x) typedef struct x x
|
||||
|
||||
/// Helper macro to return the maximum of two values.
|
||||
#define FFX_MAXIMUM(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
/// Helper macro to return the minimum of two values.
|
||||
#define FFX_MINIMUM(x, y) (((x) < (y)) ? (x) : (y))
|
||||
|
||||
/// Helper macro to do safe free on a pointer.
|
||||
#define FFX_SAFE_FREE(x) \
|
||||
if (x) \
|
||||
free(x)
|
||||
|
||||
/// Helper macro to return the abs of an integer value.
|
||||
#define FFX_ABSOLUTE(x) (((x) < 0) ? (-(x)) : (x))
|
||||
|
||||
/// Helper macro to return sign of a value.
|
||||
#define FFX_SIGN(x) (((x) < 0) ? -1 : 1)
|
||||
|
||||
/// Helper macro to work out the number of elements in an array.
|
||||
#define FFX_ARRAY_ELEMENTS(x) (int32_t)((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
|
||||
|
||||
/// The maximum length of a path that can be specified to the FidelityFX API.
|
||||
#define FFX_MAXIMUM_PATH (260)
|
||||
|
||||
/// Helper macro to check if the specified key is set in a bitfield.
|
||||
#define FFX_CONTAINS_FLAG(options, key) ((options & key) == key)
|
||||
136
engine/thirdparty/amd-fsr2/patches/0001-build-fixes.patch
vendored
Normal file
136
engine/thirdparty/amd-fsr2/patches/0001-build-fixes.patch
vendored
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-fsr2/ffx_fsr2.cpp
|
||||
index 051018e437..3970aa7f5b 100644
|
||||
--- a/thirdparty/amd-fsr2/ffx_fsr2.cpp
|
||||
+++ b/thirdparty/amd-fsr2/ffx_fsr2.cpp
|
||||
@@ -36,6 +36,15 @@
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
|
||||
+#ifndef _countof
|
||||
+#define _countof(array) (sizeof(array) / sizeof(array[0]))
|
||||
+#endif
|
||||
+
|
||||
+#ifndef _MSC_VER
|
||||
+#include <wchar.h>
|
||||
+#define wcscpy_s wcscpy
|
||||
+#endif
|
||||
+
|
||||
// max queued frames for descriptor management
|
||||
static const uint32_t FSR2_MAX_QUEUED_FRAMES = 16;
|
||||
|
||||
diff --git a/thirdparty/amd-fsr2/ffx_types.h b/thirdparty/amd-fsr2/ffx_types.h
|
||||
index 74edd192c4..f71b259cce 100644
|
||||
--- a/thirdparty/amd-fsr2/ffx_types.h
|
||||
+++ b/thirdparty/amd-fsr2/ffx_types.h
|
||||
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
+#include <stdlib.h>
|
||||
|
||||
#if defined (FFX_GCC)
|
||||
/// FidelityFX exported functions
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
|
||||
index ebbe610ffa..31d68292d4 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
|
||||
index 7ae41cf0c1..3b86c17d4d 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
|
||||
index 15186e3bb6..8439c4e9d4 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
|
||||
index fcb2b76528..45ec5bdb86 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
|
||||
index f7cad59c20..7c3a4c2740 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
|
||||
index f0823c2bc8..8b4ebc6afc 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
|
||||
index 20e17eef8c..be4395aaed 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
|
||||
index bebca91099..7d6a66b8ac 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
|
||||
@@ -19,7 +19,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
-#version 450
|
||||
+//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
121
engine/thirdparty/amd-fsr2/patches/0002-godot-fsr2-options.patch
vendored
Normal file
121
engine/thirdparty/amd-fsr2/patches/0002-godot-fsr2-options.patch
vendored
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-fsr2/ffx_fsr2.cpp
|
||||
index 3970aa7f5b..ec571b9cd2 100644
|
||||
--- a/thirdparty/amd-fsr2/ffx_fsr2.cpp
|
||||
+++ b/thirdparty/amd-fsr2/ffx_fsr2.cpp
|
||||
@@ -952,6 +952,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
|
||||
context->constants.lumaMipDimensions[0] = uint32_t(context->constants.maxRenderSize[0] / mipDiv);
|
||||
context->constants.lumaMipDimensions[1] = uint32_t(context->constants.maxRenderSize[1] / mipDiv);
|
||||
|
||||
+ memcpy(context->constants.reprojectionMatrix, params->reprojectionMatrix, sizeof(context->constants.reprojectionMatrix));
|
||||
+
|
||||
// reactive mask bias
|
||||
const int32_t threadGroupWorkRegionDim = 8;
|
||||
const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
||||
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.h b/thirdparty/amd-fsr2/ffx_fsr2.h
|
||||
index 2a1c74abb1..dfcd4caf35 100644
|
||||
--- a/thirdparty/amd-fsr2/ffx_fsr2.h
|
||||
+++ b/thirdparty/amd-fsr2/ffx_fsr2.h
|
||||
@@ -146,6 +146,7 @@ typedef struct FfxFsr2DispatchDescription {
|
||||
float autoReactiveScale; ///< A value to scale the reactive mask
|
||||
float autoReactiveMax; ///< A value to clamp the reactive mask
|
||||
|
||||
+ float reprojectionMatrix[16]; ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth.
|
||||
} FfxFsr2DispatchDescription;
|
||||
|
||||
/// A structure encapsulating the parameters for automatic generation of a reactive mask
|
||||
diff --git a/thirdparty/amd-fsr2/ffx_fsr2_private.h b/thirdparty/amd-fsr2/ffx_fsr2_private.h
|
||||
index 6b5fbc5117..8a9aec5778 100644
|
||||
--- a/thirdparty/amd-fsr2/ffx_fsr2_private.h
|
||||
+++ b/thirdparty/amd-fsr2/ffx_fsr2_private.h
|
||||
@@ -44,6 +44,9 @@ typedef struct Fsr2Constants {
|
||||
float deltaTime;
|
||||
float dynamicResChangeFactor;
|
||||
float viewSpaceToMetersFactor;
|
||||
+
|
||||
+ float pad;
|
||||
+ float reprojectionMatrix[16];
|
||||
} Fsr2Constants;
|
||||
|
||||
struct FfxFsr2ContextDescription;
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
|
||||
index 31d68292d4..2e98c8a6c5 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
|
||||
@@ -35,7 +35,7 @@
|
||||
#endif
|
||||
#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3
|
||||
#define FSR2_BIND_SRV_LOCK_STATUS 4
|
||||
-#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5
|
||||
+//#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5
|
||||
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6
|
||||
#define FSR2_BIND_SRV_LUMA_INSTABILITY 7
|
||||
#define FSR2_BIND_SRV_LANCZOS_LUT 8
|
||||
@@ -52,6 +52,10 @@
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 18
|
||||
|
||||
+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
+#define FSR2_BIND_SRV_INPUT_DEPTH 5
|
||||
+#endif
|
||||
+
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
|
||||
index 10da13fb81..b610037cc6 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
|
||||
@@ -52,6 +52,9 @@
|
||||
FfxFloat32 fDeltaTime;
|
||||
FfxFloat32 fDynamicResChangeFactor;
|
||||
FfxFloat32 fViewSpaceToMetersFactor;
|
||||
+
|
||||
+ FfxFloat32 fPad;
|
||||
+ mat4 mReprojectionMatrix;
|
||||
} cbFSR2;
|
||||
#endif
|
||||
|
||||
@@ -317,7 +320,11 @@ FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
|
||||
FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
|
||||
{
|
||||
+#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
|
||||
+ return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
|
||||
+#else
|
||||
return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
|
||||
+#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -354,6 +361,16 @@ FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
|
||||
|
||||
+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
+ bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
|
||||
+ if (bInvalidMotionVector)
|
||||
+ {
|
||||
+ FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
|
||||
+ FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
|
||||
+ fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
|
||||
|
||||
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
|
||||
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
|
||||
index 7d6a66b8ac..5c042c332a 100644
|
||||
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
|
||||
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
|
||||
@@ -40,6 +40,10 @@
|
||||
#define FSR2_BIND_CB_FSR2 11
|
||||
#define FSR2_BIND_CB_REACTIVE 12
|
||||
|
||||
+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
+#define FSR2_BIND_SRV_INPUT_DEPTH 13
|
||||
+#endif
|
||||
+
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
429
engine/thirdparty/amd-fsr2/shaders/ffx_common_types.h
vendored
Normal file
429
engine/thirdparty/amd-fsr2/shaders/ffx_common_types.h
vendored
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
#ifndef FFX_COMMON_TYPES_H
|
||||
#define FFX_COMMON_TYPES_H
|
||||
|
||||
#if defined(FFX_CPU)
|
||||
#define FFX_PARAMETER_IN
|
||||
#define FFX_PARAMETER_OUT
|
||||
#define FFX_PARAMETER_INOUT
|
||||
#elif defined(FFX_HLSL)
|
||||
#define FFX_PARAMETER_IN in
|
||||
#define FFX_PARAMETER_OUT out
|
||||
#define FFX_PARAMETER_INOUT inout
|
||||
#elif defined(FFX_GLSL)
|
||||
#define FFX_PARAMETER_IN in
|
||||
#define FFX_PARAMETER_OUT out
|
||||
#define FFX_PARAMETER_INOUT inout
|
||||
#endif // #if defined(FFX_CPU)
|
||||
|
||||
#if defined(FFX_CPU)
|
||||
/// A typedef for a boolean value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef bool FfxBoolean;
|
||||
|
||||
/// A typedef for a unsigned 8bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint8_t FfxUInt8;
|
||||
|
||||
/// A typedef for a unsigned 16bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint16_t FfxUInt16;
|
||||
|
||||
/// A typedef for a unsigned 32bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32;
|
||||
|
||||
/// A typedef for a unsigned 64bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint64_t FfxUInt64;
|
||||
|
||||
/// A typedef for a signed 8bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int8_t FfxInt8;
|
||||
|
||||
/// A typedef for a signed 16bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int16_t FfxInt16;
|
||||
|
||||
/// A typedef for a signed 32bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int32_t FfxInt32;
|
||||
|
||||
/// A typedef for a signed 64bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int64_t FfxInt64;
|
||||
|
||||
/// A typedef for a floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32;
|
||||
|
||||
/// A typedef for a 2-dimensional floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32x2[2];
|
||||
|
||||
/// A typedef for a 3-dimensional floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32x3[3];
|
||||
|
||||
/// A typedef for a 4-dimensional floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32x4[4];
|
||||
|
||||
/// A typedef for a 2-dimensional 32bit unsigned integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32x2[2];
|
||||
|
||||
/// A typedef for a 3-dimensional 32bit unsigned integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32x3[3];
|
||||
|
||||
/// A typedef for a 4-dimensional 32bit unsigned integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32x4[4];
|
||||
#endif // #if defined(FFX_CPU)
|
||||
|
||||
#if defined(FFX_HLSL)
|
||||
/// A typedef for a boolean value.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef bool FfxBoolean;
|
||||
|
||||
#if FFX_HLSL_6_2
|
||||
typedef float32_t FfxFloat32;
|
||||
typedef float32_t2 FfxFloat32x2;
|
||||
typedef float32_t3 FfxFloat32x3;
|
||||
typedef float32_t4 FfxFloat32x4;
|
||||
|
||||
/// A typedef for a unsigned 32bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef uint32_t FfxUInt32;
|
||||
typedef uint32_t2 FfxUInt32x2;
|
||||
typedef uint32_t3 FfxUInt32x3;
|
||||
typedef uint32_t4 FfxUInt32x4;
|
||||
typedef int32_t FfxInt32;
|
||||
typedef int32_t2 FfxInt32x2;
|
||||
typedef int32_t3 FfxInt32x3;
|
||||
typedef int32_t4 FfxInt32x4;
|
||||
#else
|
||||
#define FfxFloat32 float
|
||||
#define FfxFloat32x2 float2
|
||||
#define FfxFloat32x3 float3
|
||||
#define FfxFloat32x4 float4
|
||||
|
||||
/// A typedef for a unsigned 32bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef uint FfxUInt32;
|
||||
typedef uint2 FfxUInt32x2;
|
||||
typedef uint3 FfxUInt32x3;
|
||||
typedef uint4 FfxUInt32x4;
|
||||
typedef int FfxInt32;
|
||||
typedef int2 FfxInt32x2;
|
||||
typedef int3 FfxInt32x3;
|
||||
typedef int4 FfxInt32x4;
|
||||
#endif // #if defined(FFX_HLSL_6_2)
|
||||
|
||||
#if FFX_HALF
|
||||
#if FFX_HLSL_6_2
|
||||
typedef float16_t FfxFloat16;
|
||||
typedef float16_t2 FfxFloat16x2;
|
||||
typedef float16_t3 FfxFloat16x3;
|
||||
typedef float16_t4 FfxFloat16x4;
|
||||
|
||||
/// A typedef for an unsigned 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef uint16_t FfxUInt16;
|
||||
typedef uint16_t2 FfxUInt16x2;
|
||||
typedef uint16_t3 FfxUInt16x3;
|
||||
typedef uint16_t4 FfxUInt16x4;
|
||||
|
||||
/// A typedef for a signed 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef int16_t FfxInt16;
|
||||
typedef int16_t2 FfxInt16x2;
|
||||
typedef int16_t3 FfxInt16x3;
|
||||
typedef int16_t4 FfxInt16x4;
|
||||
#else
|
||||
typedef min16float FfxFloat16;
|
||||
typedef min16float2 FfxFloat16x2;
|
||||
typedef min16float3 FfxFloat16x3;
|
||||
typedef min16float4 FfxFloat16x4;
|
||||
|
||||
/// A typedef for an unsigned 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef min16uint FfxUInt16;
|
||||
typedef min16uint2 FfxUInt16x2;
|
||||
typedef min16uint3 FfxUInt16x3;
|
||||
typedef min16uint4 FfxUInt16x4;
|
||||
|
||||
/// A typedef for a signed 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef min16int FfxInt16;
|
||||
typedef min16int2 FfxInt16x2;
|
||||
typedef min16int3 FfxInt16x3;
|
||||
typedef min16int4 FfxInt16x4;
|
||||
#endif // FFX_HLSL_6_2
|
||||
#endif // FFX_HALF
|
||||
#endif // #if defined(FFX_HLSL)
|
||||
|
||||
#if defined(FFX_GLSL)
|
||||
/// A typedef for a boolean value.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
#define FfxBoolean bool
|
||||
#define FfxFloat32 float
|
||||
#define FfxFloat32x2 vec2
|
||||
#define FfxFloat32x3 vec3
|
||||
#define FfxFloat32x4 vec4
|
||||
#define FfxUInt32 uint
|
||||
#define FfxUInt32x2 uvec2
|
||||
#define FfxUInt32x3 uvec3
|
||||
#define FfxUInt32x4 uvec4
|
||||
#define FfxInt32 int
|
||||
#define FfxInt32x2 ivec2
|
||||
#define FfxInt32x3 ivec3
|
||||
#define FfxInt32x4 ivec4
|
||||
#if FFX_HALF
|
||||
#define FfxFloat16 float16_t
|
||||
#define FfxFloat16x2 f16vec2
|
||||
#define FfxFloat16x3 f16vec3
|
||||
#define FfxFloat16x4 f16vec4
|
||||
#define FfxUInt16 uint16_t
|
||||
#define FfxUInt16x2 u16vec2
|
||||
#define FfxUInt16x3 u16vec3
|
||||
#define FfxUInt16x4 u16vec4
|
||||
#define FfxInt16 int16_t
|
||||
#define FfxInt16x2 i16vec2
|
||||
#define FfxInt16x3 i16vec3
|
||||
#define FfxInt16x4 i16vec4
|
||||
#endif // FFX_HALF
|
||||
#endif // #if defined(FFX_GLSL)
|
||||
|
||||
// Global toggles:
|
||||
// #define FFX_HALF (1)
|
||||
// #define FFX_HLSL_6_2 (1)
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#if FFX_HLSL_6_2
|
||||
|
||||
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
|
||||
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType##16_t, COL> TypeName;
|
||||
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
|
||||
|
||||
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
|
||||
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType##16_t, COL> TypeName;
|
||||
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
|
||||
|
||||
#else //FFX_HLSL_6_2
|
||||
|
||||
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName;
|
||||
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<min16##BaseComponentType, COL> TypeName;
|
||||
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<min16##BaseComponentType, ROW, COL> TypeName;
|
||||
|
||||
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType );
|
||||
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
|
||||
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
|
||||
|
||||
#endif //FFX_HLSL_6_2
|
||||
|
||||
#else //FFX_HALF
|
||||
|
||||
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
|
||||
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType, COL> TypeName;
|
||||
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
|
||||
|
||||
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
|
||||
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType, COL> TypeName;
|
||||
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
|
||||
|
||||
#endif //FFX_HALF
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
// Common typedefs:
|
||||
#if defined(FFX_HLSL)
|
||||
FFX_MIN16_SCALAR( FFX_MIN16_F , float );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 );
|
||||
|
||||
FFX_MIN16_SCALAR( FFX_MIN16_I, int );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 );
|
||||
|
||||
FFX_MIN16_SCALAR( FFX_MIN16_U, uint );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 );
|
||||
|
||||
FFX_16BIT_SCALAR( FFX_F16_t , float );
|
||||
FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 );
|
||||
FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 );
|
||||
FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 );
|
||||
|
||||
FFX_16BIT_SCALAR( FFX_I16_t, int );
|
||||
FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 );
|
||||
FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 );
|
||||
FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 );
|
||||
|
||||
FFX_16BIT_SCALAR( FFX_U16_t, uint );
|
||||
FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 );
|
||||
FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 );
|
||||
FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 );
|
||||
|
||||
#define TYPEDEF_MIN16_TYPES(Prefix) \
|
||||
typedef FFX_MIN16_F Prefix##_F; \
|
||||
typedef FFX_MIN16_F2 Prefix##_F2; \
|
||||
typedef FFX_MIN16_F3 Prefix##_F3; \
|
||||
typedef FFX_MIN16_F4 Prefix##_F4; \
|
||||
typedef FFX_MIN16_I Prefix##_I; \
|
||||
typedef FFX_MIN16_I2 Prefix##_I2; \
|
||||
typedef FFX_MIN16_I3 Prefix##_I3; \
|
||||
typedef FFX_MIN16_I4 Prefix##_I4; \
|
||||
typedef FFX_MIN16_U Prefix##_U; \
|
||||
typedef FFX_MIN16_U2 Prefix##_U2; \
|
||||
typedef FFX_MIN16_U3 Prefix##_U3; \
|
||||
typedef FFX_MIN16_U4 Prefix##_U4;
|
||||
|
||||
#define TYPEDEF_16BIT_TYPES(Prefix) \
|
||||
typedef FFX_16BIT_F Prefix##_F; \
|
||||
typedef FFX_16BIT_F2 Prefix##_F2; \
|
||||
typedef FFX_16BIT_F3 Prefix##_F3; \
|
||||
typedef FFX_16BIT_F4 Prefix##_F4; \
|
||||
typedef FFX_16BIT_I Prefix##_I; \
|
||||
typedef FFX_16BIT_I2 Prefix##_I2; \
|
||||
typedef FFX_16BIT_I3 Prefix##_I3; \
|
||||
typedef FFX_16BIT_I4 Prefix##_I4; \
|
||||
typedef FFX_16BIT_U Prefix##_U; \
|
||||
typedef FFX_16BIT_U2 Prefix##_U2; \
|
||||
typedef FFX_16BIT_U3 Prefix##_U3; \
|
||||
typedef FFX_16BIT_U4 Prefix##_U4;
|
||||
|
||||
#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \
|
||||
typedef FfxFloat32 Prefix##_F; \
|
||||
typedef FfxFloat32x2 Prefix##_F2; \
|
||||
typedef FfxFloat32x3 Prefix##_F3; \
|
||||
typedef FfxFloat32x4 Prefix##_F4; \
|
||||
typedef FfxInt32 Prefix##_I; \
|
||||
typedef FfxInt32x2 Prefix##_I2; \
|
||||
typedef FfxInt32x3 Prefix##_I3; \
|
||||
typedef FfxInt32x4 Prefix##_I4; \
|
||||
typedef FfxUInt32 Prefix##_U; \
|
||||
typedef FfxUInt32x2 Prefix##_U2; \
|
||||
typedef FfxUInt32x3 Prefix##_U3; \
|
||||
typedef FfxUInt32x4 Prefix##_U4;
|
||||
#endif // #if defined(FFX_HLSL)
|
||||
|
||||
#if defined(FFX_GLSL)
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#define FFX_MIN16_F float16_t
|
||||
#define FFX_MIN16_F2 f16vec2
|
||||
#define FFX_MIN16_F3 f16vec3
|
||||
#define FFX_MIN16_F4 f16vec4
|
||||
|
||||
#define FFX_MIN16_I int16_t
|
||||
#define FFX_MIN16_I2 i16vec2
|
||||
#define FFX_MIN16_I3 i16vec3
|
||||
#define FFX_MIN16_I4 i16vec4
|
||||
|
||||
#define FFX_MIN16_U uint16_t
|
||||
#define FFX_MIN16_U2 u16vec2
|
||||
#define FFX_MIN16_U3 u16vec3
|
||||
#define FFX_MIN16_U4 u16vec4
|
||||
|
||||
#define FFX_16BIT_F float16_t
|
||||
#define FFX_16BIT_F2 f16vec2
|
||||
#define FFX_16BIT_F3 f16vec3
|
||||
#define FFX_16BIT_F4 f16vec4
|
||||
|
||||
#define FFX_16BIT_I int16_t
|
||||
#define FFX_16BIT_I2 i16vec2
|
||||
#define FFX_16BIT_I3 i16vec3
|
||||
#define FFX_16BIT_I4 i16vec4
|
||||
|
||||
#define FFX_16BIT_U uint16_t
|
||||
#define FFX_16BIT_U2 u16vec2
|
||||
#define FFX_16BIT_U3 u16vec3
|
||||
#define FFX_16BIT_U4 u16vec4
|
||||
|
||||
#else // FFX_HALF
|
||||
|
||||
#define FFX_MIN16_F float
|
||||
#define FFX_MIN16_F2 vec2
|
||||
#define FFX_MIN16_F3 vec3
|
||||
#define FFX_MIN16_F4 vec4
|
||||
|
||||
#define FFX_MIN16_I int
|
||||
#define FFX_MIN16_I2 ivec2
|
||||
#define FFX_MIN16_I3 ivec3
|
||||
#define FFX_MIN16_I4 ivec4
|
||||
|
||||
#define FFX_MIN16_U uint
|
||||
#define FFX_MIN16_U2 uvec2
|
||||
#define FFX_MIN16_U3 uvec3
|
||||
#define FFX_MIN16_U4 uvec4
|
||||
|
||||
#define FFX_16BIT_F float
|
||||
#define FFX_16BIT_F2 vec2
|
||||
#define FFX_16BIT_F3 vec3
|
||||
#define FFX_16BIT_F4 vec4
|
||||
|
||||
#define FFX_16BIT_I int
|
||||
#define FFX_16BIT_I2 ivec2
|
||||
#define FFX_16BIT_I3 ivec3
|
||||
#define FFX_16BIT_I4 ivec4
|
||||
|
||||
#define FFX_16BIT_U uint
|
||||
#define FFX_16BIT_U2 uvec2
|
||||
#define FFX_16BIT_U3 uvec3
|
||||
#define FFX_16BIT_U4 uvec4
|
||||
|
||||
#endif // FFX_HALF
|
||||
|
||||
#endif // #if defined(FFX_GLSL)
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
||||
#endif // #ifndef FFX_COMMON_TYPES_H
|
||||
52
engine/thirdparty/amd-fsr2/shaders/ffx_core.h
vendored
Normal file
52
engine/thirdparty/amd-fsr2/shaders/ffx_core.h
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
/// @defgroup Core
|
||||
/// @defgroup HLSL
|
||||
/// @defgroup GLSL
|
||||
/// @defgroup GPU
|
||||
/// @defgroup CPU
|
||||
/// @defgroup CAS
|
||||
/// @defgroup FSR1
|
||||
|
||||
#if !defined(FFX_CORE_H)
|
||||
#define FFX_CORE_H
|
||||
|
||||
#include "ffx_common_types.h"
|
||||
|
||||
#if defined(FFX_CPU)
|
||||
#include "ffx_core_cpu.h"
|
||||
#endif // #if defined(FFX_CPU)
|
||||
|
||||
#if defined(FFX_GLSL) && defined(FFX_GPU)
|
||||
#include "ffx_core_glsl.h"
|
||||
#endif // #if defined(FFX_GLSL) && defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_HLSL) && defined(FFX_GPU)
|
||||
#include "ffx_core_hlsl.h"
|
||||
#endif // #if defined(FFX_HLSL) && defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#include "ffx_core_gpu_common.h"
|
||||
#include "ffx_core_gpu_common_half.h"
|
||||
#include "ffx_core_portability.h"
|
||||
#endif // #if defined(FFX_GPU)
|
||||
#endif // #if !defined(FFX_CORE_H)
|
||||
332
engine/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
vendored
Normal file
332
engine/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
vendored
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
/// A define for a true value in a boolean expression.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
#define FFX_TRUE (1)
|
||||
|
||||
/// A define for a false value in a boolean expression.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
#define FFX_FALSE (0)
|
||||
|
||||
#if !defined(FFX_STATIC)
|
||||
/// A define to abstract declaration of static variables and functions.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
#define FFX_STATIC static
|
||||
#endif // #if !defined(FFX_STATIC)
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
|
||||
/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
|
||||
///
|
||||
/// @param [in] x A 32bit floating value.
|
||||
///
|
||||
/// @returns
|
||||
/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
|
||||
{
|
||||
union
|
||||
{
|
||||
FfxFloat32 f;
|
||||
FfxUInt32 u;
|
||||
} bits;
|
||||
|
||||
bits.f = x;
|
||||
return bits.u;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
|
||||
{
|
||||
return a[0] * b[0] + a[1] * b[1];
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
|
||||
{
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
|
||||
{
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
|
||||
}
|
||||
|
||||
/// Compute the linear interopation between two values.
|
||||
///
|
||||
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
|
||||
/// following math:
|
||||
///
|
||||
/// (1 - t) * x + t * y
|
||||
///
|
||||
/// @param [in] x The first value to lerp between.
|
||||
/// @param [in] y The second value to lerp between.
|
||||
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
|
||||
///
|
||||
/// @returns
|
||||
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
|
||||
{
|
||||
return y * t + (-x * t + x);
|
||||
}
|
||||
|
||||
/// Compute the reciprocal of a value.
|
||||
///
|
||||
/// @param [in] x The value to compute the reciprocal for.
|
||||
///
|
||||
/// @returns
|
||||
/// The reciprocal value of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
|
||||
{
|
||||
return 1.0f / a;
|
||||
}
|
||||
|
||||
/// Compute the square root of a value.
|
||||
///
|
||||
/// @param [in] x The first value to compute the min of.
|
||||
///
|
||||
/// @returns
|
||||
/// The the square root of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
|
||||
{
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
|
||||
{
|
||||
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
|
||||
}
|
||||
|
||||
/// Compute the factional part of a decimal value.
|
||||
///
|
||||
/// This function calculates <c><i>x - floor(x)</i></c>.
|
||||
///
|
||||
/// @param [in] x The value to compute the fractional part from.
|
||||
///
|
||||
/// @returns
|
||||
/// The fractional part of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
|
||||
{
|
||||
return a - floor(a);
|
||||
}
|
||||
|
||||
/// Compute the reciprocal square root of a value.
|
||||
///
|
||||
/// @param [in] x The value to compute the reciprocal for.
|
||||
///
|
||||
/// @returns
|
||||
/// The reciprocal square root value of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a)
|
||||
{
|
||||
return ffxReciprocal(ffxSqrt(a));
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
|
||||
{
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
|
||||
{
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
/// Clamp a value to a [0..1] range.
|
||||
///
|
||||
/// @param [in] x The value to clamp to [0..1] range.
|
||||
///
|
||||
/// @returns
|
||||
/// The clamped version of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a)
|
||||
{
|
||||
return ffxMin(1.0f, ffxMax(0.0f, a));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d[0] = a[0] + b;
|
||||
d[1] = a[1] + b;
|
||||
d[2] = a[2] + b;
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d[0] = a[0];
|
||||
d[1] = a[1];
|
||||
d[2] = a[2];
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
|
||||
{
|
||||
d[0] = a[0] * b[0];
|
||||
d[1] = a[1] * b[1];
|
||||
d[2] = a[2] * b[2];
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d[0] = a[0] * b;
|
||||
d[1] = a[1] * b;
|
||||
d[2] = a[2] * b;
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d[0] = ffxReciprocal(a[0]);
|
||||
d[1] = ffxReciprocal(a[1]);
|
||||
d[2] = ffxReciprocal(a[2]);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Convert FfxFloat32 to half (in lower 16-bits of output).
|
||||
///
|
||||
/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
|
||||
///
|
||||
/// The function supports denormals.
|
||||
///
|
||||
/// Some conversion rules are to make computations possibly "safer" on the GPU,
|
||||
/// -INF & -NaN -> -65504
|
||||
/// +INF & +NaN -> +65504
|
||||
///
|
||||
/// @param [in] f The 32bit floating point value to convert.
|
||||
///
|
||||
/// @returns
|
||||
/// The closest 16bit floating point value to <c><i>f</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
|
||||
{
|
||||
static FfxUInt16 base[512] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
|
||||
0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
|
||||
0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
|
||||
0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
|
||||
0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
|
||||
};
|
||||
|
||||
static FfxUInt8 shift[512] = {
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
FfxFloat32 f;
|
||||
FfxUInt32 u;
|
||||
} bits;
|
||||
|
||||
bits.f = f;
|
||||
FfxUInt32 u = bits.u;
|
||||
FfxUInt32 i = u >> 23;
|
||||
return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
|
||||
}
|
||||
|
||||
/// Pack 2x32-bit floating point values in a single 32bit value.
|
||||
///
|
||||
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
|
||||
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
|
||||
/// 32bit unsigned integer respectively.
|
||||
///
|
||||
/// @param [in] value A 2-dimensional floating point value to convert and pack.
|
||||
///
|
||||
/// @returns
|
||||
/// A packed 32bit value containing 2 16bit floating point values.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a)
|
||||
{
|
||||
return f32tof16(a[0]) + (f32tof16(a[1]) << 16);
|
||||
}
|
||||
1669
engine/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
vendored
Normal file
1669
engine/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
2784
engine/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
vendored
Normal file
2784
engine/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
2978
engine/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
vendored
Normal file
2978
engine/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
1502
engine/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
vendored
Normal file
1502
engine/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
50
engine/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
vendored
Normal file
50
engine/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
vendored
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d = a + ffxBroadcast3(b);
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d = a;
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
|
||||
{
|
||||
d = a * b;
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d = a * ffxBroadcast3(b);
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d = rcp(a);
|
||||
return d;
|
||||
}
|
||||
1250
engine/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
vendored
Normal file
1250
engine/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
295
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
vendored
Normal file
295
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
vendored
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_ACCUMULATE_H
|
||||
#define FFX_FSR2_ACCUMULATE_H
|
||||
|
||||
FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
return length(fMotionVector * DisplaySize());
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
|
||||
{
|
||||
return length(fMotionVector * FFX_MIN16_F2(DisplaySize()));
|
||||
}
|
||||
#endif
|
||||
|
||||
void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
|
||||
{
|
||||
// Aviod invalid values when accumulation and upsampled weight is 0
|
||||
fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
//YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
|
||||
fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
|
||||
fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
|
||||
#endif
|
||||
|
||||
const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
|
||||
fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
|
||||
|
||||
fHistoryColor = YCoCgToRGB(fHistoryColor);
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
fHistoryColor = InverseTonemap(fHistoryColor);
|
||||
#endif
|
||||
}
|
||||
|
||||
void RectifyHistory(
|
||||
const AccumulationPassCommonParams params,
|
||||
RectificationBox clippingBox,
|
||||
FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
|
||||
FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
|
||||
FfxFloat32 fLockContributionThisFrame,
|
||||
FfxFloat32 fTemporalReactiveFactor,
|
||||
FfxFloat32 fLumaInstabilityFactor)
|
||||
{
|
||||
FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
|
||||
|
||||
const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
|
||||
const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
|
||||
FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
|
||||
|
||||
FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
|
||||
FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
|
||||
FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
|
||||
FfxFloat32x3 boxCenter = clippingBox.boxCenter;
|
||||
FfxFloat32 boxVecSize = length(clippingBox.boxVec);
|
||||
|
||||
boxMin = ffxMax(clippingBox.aabbMin, boxMin);
|
||||
boxMax = ffxMin(clippingBox.aabbMax, boxMax);
|
||||
|
||||
if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) {
|
||||
|
||||
const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
|
||||
|
||||
FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
|
||||
|
||||
const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
|
||||
const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
|
||||
fHistoryContribution *= fReactiveContribution;
|
||||
|
||||
// Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
|
||||
fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
|
||||
|
||||
// Scale accumulation using rectification info
|
||||
const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f));
|
||||
fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
|
||||
}
|
||||
}
|
||||
|
||||
void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
|
||||
{
|
||||
StoreUpscaledOutput(iPxHrPos, fUpscaledColor);
|
||||
}
|
||||
|
||||
void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight)
|
||||
{
|
||||
// we expect similar motion for next frame
|
||||
// kill lock if that location is outside screen, avoid locks to be clamped to screen borders
|
||||
FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
|
||||
if (IsUvInside(fEstimatedUvNextFrame) == false) {
|
||||
KillLock(fLockStatus);
|
||||
}
|
||||
else {
|
||||
// Decrease lock lifetime
|
||||
const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
|
||||
const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
|
||||
}
|
||||
|
||||
StoreLockStatus(params.iPxHrPos, fLockStatus);
|
||||
}
|
||||
|
||||
|
||||
FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
|
||||
{
|
||||
// Always assume max accumulation was reached
|
||||
FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
|
||||
|
||||
fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
|
||||
|
||||
fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
|
||||
|
||||
return fBaseAccumulation.xxx;
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff)
|
||||
{
|
||||
const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
|
||||
const FfxInt32 N_MINUS_1 = 0;
|
||||
const FfxInt32 N_MINUS_2 = 1;
|
||||
const FfxInt32 N_MINUS_3 = 2;
|
||||
const FfxInt32 N_MINUS_4 = 3;
|
||||
|
||||
FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
|
||||
#endif
|
||||
|
||||
fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
|
||||
|
||||
const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
|
||||
FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f);
|
||||
|
||||
FfxFloat32 fLumaInstability = 0.0f;
|
||||
FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
|
||||
|
||||
FfxFloat32 fMin = abs(fDiffs0);
|
||||
|
||||
if (fMin >= fUnormThreshold)
|
||||
{
|
||||
for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
|
||||
FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
|
||||
|
||||
if (sign(fDiffs0) == sign(fDiffs1)) {
|
||||
|
||||
// Scale difference to protect historically similar values
|
||||
const FfxFloat32 fMinBias = 1.0f;
|
||||
fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
|
||||
}
|
||||
}
|
||||
|
||||
const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
|
||||
const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
|
||||
|
||||
fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
|
||||
fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
|
||||
|
||||
fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
|
||||
}
|
||||
|
||||
//shift history
|
||||
fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
|
||||
fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
|
||||
fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
|
||||
fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
|
||||
|
||||
StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory);
|
||||
|
||||
return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
|
||||
{
|
||||
FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
|
||||
|
||||
fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
|
||||
|
||||
fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
|
||||
|
||||
// Force reactive factor for new samples
|
||||
fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
|
||||
|
||||
if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
|
||||
fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f;
|
||||
}
|
||||
|
||||
return fNewFactor;
|
||||
}
|
||||
|
||||
AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
|
||||
{
|
||||
AccumulationPassCommonParams params;
|
||||
|
||||
params.iPxHrPos = iPxHrPos;
|
||||
const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
|
||||
params.fHrUv = fHrUv;
|
||||
|
||||
const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
|
||||
params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
|
||||
|
||||
params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
|
||||
params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
|
||||
|
||||
ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
|
||||
|
||||
params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler));
|
||||
|
||||
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
|
||||
params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
|
||||
params.fAccumulationMask = fDilatedReactiveMasks.y;
|
||||
params.bIsResetFrame = (0 == FrameIndex());
|
||||
|
||||
params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame);
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
void Accumulate(FfxInt32x2 iPxHrPos)
|
||||
{
|
||||
const AccumulationPassCommonParams params = InitParams(iPxHrPos);
|
||||
|
||||
FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
|
||||
FfxFloat32x2 fLockStatus;
|
||||
InitializeNewLockSample(fLockStatus);
|
||||
|
||||
FfxFloat32 fTemporalReactiveFactor = 0.0f;
|
||||
FfxBoolean bInMotionLastFrame = FFX_FALSE;
|
||||
LockState lockState = { FFX_FALSE , FFX_FALSE };
|
||||
if (params.bIsExistingSample && !params.bIsResetFrame) {
|
||||
ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
|
||||
lockState = ReprojectHistoryLockStatus(params, fLockStatus);
|
||||
}
|
||||
|
||||
FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
|
||||
|
||||
FfxFloat32 fLuminanceDiff = 0.0f;
|
||||
FfxFloat32 fLockContributionThisFrame = 0.0f;
|
||||
UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
|
||||
|
||||
// Load upsampled input color
|
||||
RectificationBox clippingBox;
|
||||
FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
|
||||
|
||||
const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff);
|
||||
|
||||
|
||||
FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
|
||||
|
||||
if (params.bIsNewSample) {
|
||||
fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
|
||||
}
|
||||
else {
|
||||
RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
|
||||
|
||||
Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
|
||||
}
|
||||
|
||||
fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
|
||||
|
||||
FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w);
|
||||
|
||||
// Get new temporal reactive factor
|
||||
fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
|
||||
|
||||
StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor));
|
||||
|
||||
// Output final color when RCAS is disabled
|
||||
#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0
|
||||
WriteUpscaledOutput(iPxHrPos, fHistoryColor);
|
||||
#endif
|
||||
StoreNewLocks(iPxHrPos, 0);
|
||||
}
|
||||
|
||||
#endif // FFX_FSR2_ACCUMULATE_H
|
||||
91
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
vendored
Normal file
91
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
// Needed for rw_upscaled_output declaration
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
|
||||
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2
|
||||
#else
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
|
||||
#endif
|
||||
#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3
|
||||
#define FSR2_BIND_SRV_LOCK_STATUS 4
|
||||
//#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5
|
||||
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6
|
||||
#define FSR2_BIND_SRV_LUMA_INSTABILITY 7
|
||||
#define FSR2_BIND_SRV_LANCZOS_LUT 8
|
||||
#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9
|
||||
#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 10
|
||||
#define FSR2_BIND_SRV_AUTO_EXPOSURE 11
|
||||
#define FSR2_BIND_SRV_LUMA_HISTORY 12
|
||||
|
||||
#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13
|
||||
#define FSR2_BIND_UAV_LOCK_STATUS 14
|
||||
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15
|
||||
#define FSR2_BIND_UAV_NEW_LOCKS 16
|
||||
#define FSR2_BIND_UAV_LUMA_HISTORY 17
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 18
|
||||
|
||||
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 5
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_upsample.h"
|
||||
#include "ffx_fsr2_postprocess_lock_status.h"
|
||||
#include "ffx_fsr2_reproject.h"
|
||||
#include "ffx_fsr2_accumulate.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
uvec2 uGroupId = gl_WorkGroupID.xy;
|
||||
const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT;
|
||||
uGroupId.y = GroupRows - uGroupId.y - 1;
|
||||
|
||||
uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
|
||||
|
||||
Accumulate(ivec2(uDispatchThreadId));
|
||||
}
|
||||
93
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
vendored
Normal file
93
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 1
|
||||
#define FSR2_BIND_UAV_AUTOREACTIVE 2
|
||||
#define FSR2_BIND_CB_REACTIVE 3
|
||||
#define FSR2_BIND_CB_FSR2 4
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha;
|
||||
// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha;
|
||||
// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask;
|
||||
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
#if defined(FSR2_BIND_CB_REACTIVE)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
|
||||
{
|
||||
float scale;
|
||||
float threshold;
|
||||
float binaryValue;
|
||||
uint flags;
|
||||
} cbGenerateReactive;
|
||||
#endif
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy;
|
||||
|
||||
FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb;
|
||||
FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb;
|
||||
|
||||
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0)
|
||||
{
|
||||
ColorPreAlpha = Tonemap(ColorPreAlpha);
|
||||
ColorPostAlpha = Tonemap(ColorPostAlpha);
|
||||
}
|
||||
|
||||
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0)
|
||||
{
|
||||
ColorPreAlpha = InverseTonemap(ColorPreAlpha);
|
||||
ColorPostAlpha = InverseTonemap(ColorPostAlpha);
|
||||
}
|
||||
|
||||
FfxFloat32 out_reactive_value = 0.f;
|
||||
FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha);
|
||||
|
||||
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
|
||||
out_reactive_value *= cbGenerateReactive.scale;
|
||||
|
||||
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value;
|
||||
|
||||
imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value));
|
||||
}
|
||||
698
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
vendored
Normal file
698
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
vendored
Normal file
|
|
@ -0,0 +1,698 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
#include "ffx_fsr2_resources.h"
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#include "ffx_core.h"
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#ifndef FFX_FSR2_PREFER_WAVE64
|
||||
#define FFX_FSR2_PREFER_WAVE64
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FSR2_BIND_CB_FSR2)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
|
||||
{
|
||||
FfxInt32x2 iRenderSize;
|
||||
FfxInt32x2 iMaxRenderSize;
|
||||
FfxInt32x2 iDisplaySize;
|
||||
FfxInt32x2 iInputColorResourceDimensions;
|
||||
FfxInt32x2 iLumaMipDimensions;
|
||||
FfxInt32 iLumaMipLevelToUse;
|
||||
FfxInt32 iFrameIndex;
|
||||
|
||||
FfxFloat32x4 fDeviceToViewDepth;
|
||||
FfxFloat32x2 fJitter;
|
||||
FfxFloat32x2 fMotionVectorScale;
|
||||
FfxFloat32x2 fDownscaleFactor;
|
||||
FfxFloat32x2 fMotionVectorJitterCancellation;
|
||||
FfxFloat32 fPreExposure;
|
||||
FfxFloat32 fPreviousFramePreExposure;
|
||||
FfxFloat32 fTanHalfFOV;
|
||||
FfxFloat32 fJitterSequenceLength;
|
||||
FfxFloat32 fDeltaTime;
|
||||
FfxFloat32 fDynamicResChangeFactor;
|
||||
FfxFloat32 fViewSpaceToMetersFactor;
|
||||
|
||||
FfxFloat32 fPad;
|
||||
mat4 mReprojectionMatrix;
|
||||
} cbFSR2;
|
||||
#endif
|
||||
|
||||
FfxInt32x2 RenderSize()
|
||||
{
|
||||
return cbFSR2.iRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 MaxRenderSize()
|
||||
{
|
||||
return cbFSR2.iMaxRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 DisplaySize()
|
||||
{
|
||||
return cbFSR2.iDisplaySize;
|
||||
}
|
||||
|
||||
FfxInt32x2 InputColorResourceDimensions()
|
||||
{
|
||||
return cbFSR2.iInputColorResourceDimensions;
|
||||
}
|
||||
|
||||
FfxInt32x2 LumaMipDimensions()
|
||||
{
|
||||
return cbFSR2.iLumaMipDimensions;
|
||||
}
|
||||
|
||||
FfxInt32 LumaMipLevelToUse()
|
||||
{
|
||||
return cbFSR2.iLumaMipLevelToUse;
|
||||
}
|
||||
|
||||
FfxInt32 FrameIndex()
|
||||
{
|
||||
return cbFSR2.iFrameIndex;
|
||||
}
|
||||
|
||||
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
|
||||
{
|
||||
return cbFSR2.fDeviceToViewDepth;
|
||||
}
|
||||
|
||||
FfxFloat32x2 Jitter()
|
||||
{
|
||||
return cbFSR2.fJitter;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorScale()
|
||||
{
|
||||
return cbFSR2.fMotionVectorScale;
|
||||
}
|
||||
|
||||
FfxFloat32x2 DownscaleFactor()
|
||||
{
|
||||
return cbFSR2.fDownscaleFactor;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorJitterCancellation()
|
||||
{
|
||||
return cbFSR2.fMotionVectorJitterCancellation;
|
||||
}
|
||||
|
||||
FfxFloat32 PreExposure()
|
||||
{
|
||||
return cbFSR2.fPreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 PreviousFramePreExposure()
|
||||
{
|
||||
return cbFSR2.fPreviousFramePreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 TanHalfFoV()
|
||||
{
|
||||
return cbFSR2.fTanHalfFOV;
|
||||
}
|
||||
|
||||
FfxFloat32 JitterSequenceLength()
|
||||
{
|
||||
return cbFSR2.fJitterSequenceLength;
|
||||
}
|
||||
|
||||
FfxFloat32 DeltaTime()
|
||||
{
|
||||
return cbFSR2.fDeltaTime;
|
||||
}
|
||||
|
||||
FfxFloat32 DynamicResChangeFactor()
|
||||
{
|
||||
return cbFSR2.fDynamicResChangeFactor;
|
||||
}
|
||||
|
||||
FfxFloat32 ViewSpaceToMetersFactor()
|
||||
{
|
||||
return cbFSR2.fViewSpaceToMetersFactor;
|
||||
}
|
||||
|
||||
layout (set = 0, binding = 0) uniform sampler s_PointClamp;
|
||||
layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
|
||||
|
||||
// SRVs
|
||||
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
|
||||
#endif
|
||||
#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_RCAS_INPUT)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha;
|
||||
#endif
|
||||
|
||||
// UAV
|
||||
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_DEPTH
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LOCK_STATUS
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_NEW_LOCKS
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
|
||||
#endif
|
||||
|
||||
#if defined FSR2_BIND_UAV_AUTOREACTIVE
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha;
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
|
||||
FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel)
|
||||
{
|
||||
return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
|
||||
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel)
|
||||
{
|
||||
return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
|
||||
FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_depth, iPxPos, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
|
||||
FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
|
||||
{
|
||||
#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
|
||||
return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
|
||||
#else
|
||||
return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
|
||||
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
|
||||
FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_color_jittered, iPxPos, 0).rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
|
||||
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
|
||||
FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_prepared_input_color, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
|
||||
FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
|
||||
|
||||
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
|
||||
if (bInvalidMotionVector)
|
||||
{
|
||||
FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
|
||||
FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
|
||||
fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
|
||||
|
||||
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
|
||||
fUvMotionVector -= MotionVectorJitterCancellation();
|
||||
#endif
|
||||
|
||||
return fUvMotionVector;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
|
||||
FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory)
|
||||
{
|
||||
return texelFetch(r_internal_upscaled_color, iPxHistory, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
|
||||
void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
|
||||
{
|
||||
imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
|
||||
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
|
||||
void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
|
||||
{
|
||||
imageStore(rw_internal_upscaled_color, iPxHistory, fHistory);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
|
||||
void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
|
||||
{
|
||||
imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
|
||||
void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
|
||||
{
|
||||
imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
|
||||
FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos)
|
||||
{
|
||||
FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg;
|
||||
|
||||
return fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_STATUS)
|
||||
void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus)
|
||||
{
|
||||
imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
|
||||
FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_lock_input_luma, iPxPos, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
|
||||
void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma)
|
||||
{
|
||||
imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
|
||||
FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_new_locks, iPxPos, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS)
|
||||
FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return imageLoad(rw_new_locks, iPxPos).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS)
|
||||
void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock)
|
||||
{
|
||||
imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR)
|
||||
void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
|
||||
{
|
||||
imageStore(rw_prepared_input_color, iPxPos, fTonemapped);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
|
||||
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
|
||||
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
|
||||
{
|
||||
FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg;
|
||||
return fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DEPTH)
|
||||
FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_input_depth, iPxInput, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
|
||||
{
|
||||
FfxUInt32 uDepth = floatBitsToUint(fDepth);
|
||||
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth);
|
||||
#else
|
||||
imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
|
||||
{
|
||||
imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
|
||||
void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
|
||||
{
|
||||
//FfxUInt32 uDepth = f32tof16(fDepth);
|
||||
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS)
|
||||
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
|
||||
FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
|
||||
FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
|
||||
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg;
|
||||
}
|
||||
|
||||
FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
|
||||
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_dilatedDepth, iPxInput, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
|
||||
FfxFloat32 Exposure()
|
||||
{
|
||||
FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
|
||||
FfxFloat32 AutoExposure()
|
||||
{
|
||||
FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
|
||||
{
|
||||
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
|
||||
return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x;
|
||||
#else
|
||||
return 0.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
|
||||
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
|
||||
{
|
||||
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
|
||||
return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
|
||||
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
|
||||
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS)
|
||||
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
|
||||
{
|
||||
imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
|
||||
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_opaque_only, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
|
||||
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
|
||||
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_AUTOREACTIVE)
|
||||
#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION)
|
||||
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
|
||||
{
|
||||
imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f));
|
||||
|
||||
imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR)
|
||||
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR)
|
||||
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
||||
799
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
vendored
Normal file
799
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
vendored
Normal file
|
|
@ -0,0 +1,799 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "ffx_fsr2_resources.h"
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#ifdef __hlsl_dx_compiler
|
||||
#pragma dxc diagnostic push
|
||||
#pragma dxc diagnostic ignored "-Wambig-lit-shift"
|
||||
#endif //__hlsl_dx_compiler
|
||||
#include "ffx_core.h"
|
||||
#ifdef __hlsl_dx_compiler
|
||||
#pragma dxc diagnostic pop
|
||||
#endif //__hlsl_dx_compiler
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#ifndef FFX_FSR2_PREFER_WAVE64
|
||||
#define FFX_FSR2_PREFER_WAVE64
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#pragma warning(disable: 3205) // conversion from larger type to smaller
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
|
||||
#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
|
||||
#define DECLARE_CB_REGISTER(regIndex) b##regIndex
|
||||
#define FFX_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
|
||||
#define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
|
||||
#define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
|
||||
|
||||
#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL)
|
||||
cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
|
||||
{
|
||||
FfxInt32x2 iRenderSize;
|
||||
FfxInt32x2 iMaxRenderSize;
|
||||
FfxInt32x2 iDisplaySize;
|
||||
FfxInt32x2 iInputColorResourceDimensions;
|
||||
FfxInt32x2 iLumaMipDimensions;
|
||||
FfxInt32 iLumaMipLevelToUse;
|
||||
FfxInt32 iFrameIndex;
|
||||
|
||||
FfxFloat32x4 fDeviceToViewDepth;
|
||||
FfxFloat32x2 fJitter;
|
||||
FfxFloat32x2 fMotionVectorScale;
|
||||
FfxFloat32x2 fDownscaleFactor;
|
||||
FfxFloat32x2 fMotionVectorJitterCancellation;
|
||||
FfxFloat32 fPreExposure;
|
||||
FfxFloat32 fPreviousFramePreExposure;
|
||||
FfxFloat32 fTanHalfFOV;
|
||||
FfxFloat32 fJitterSequenceLength;
|
||||
FfxFloat32 fDeltaTime;
|
||||
FfxFloat32 fDynamicResChangeFactor;
|
||||
FfxFloat32 fViewSpaceToMetersFactor;
|
||||
};
|
||||
|
||||
#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
|
||||
#endif
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
|
||||
#define FFX_FSR2_ROOTSIG_STR(p) #p
|
||||
#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
|
||||
"StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
|
||||
"StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
|
||||
|
||||
#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
|
||||
|
||||
#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
|
||||
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
|
||||
"StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
|
||||
"StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
|
||||
#if defined(FFX_FSR2_EMBED_ROOTSIG)
|
||||
#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
|
||||
#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
|
||||
#else
|
||||
#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
|
||||
#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
|
||||
#endif // #if FFX_FSR2_EMBED_ROOTSIG
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
/* Define getter functions in the order they are defined in the CB! */
|
||||
FfxInt32x2 RenderSize()
|
||||
{
|
||||
return iRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 MaxRenderSize()
|
||||
{
|
||||
return iMaxRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 DisplaySize()
|
||||
{
|
||||
return iDisplaySize;
|
||||
}
|
||||
|
||||
FfxInt32x2 InputColorResourceDimensions()
|
||||
{
|
||||
return iInputColorResourceDimensions;
|
||||
}
|
||||
|
||||
FfxInt32x2 LumaMipDimensions()
|
||||
{
|
||||
return iLumaMipDimensions;
|
||||
}
|
||||
|
||||
FfxInt32 LumaMipLevelToUse()
|
||||
{
|
||||
return iLumaMipLevelToUse;
|
||||
}
|
||||
|
||||
FfxInt32 FrameIndex()
|
||||
{
|
||||
return iFrameIndex;
|
||||
}
|
||||
|
||||
FfxFloat32x2 Jitter()
|
||||
{
|
||||
return fJitter;
|
||||
}
|
||||
|
||||
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
|
||||
{
|
||||
return fDeviceToViewDepth;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorScale()
|
||||
{
|
||||
return fMotionVectorScale;
|
||||
}
|
||||
|
||||
FfxFloat32x2 DownscaleFactor()
|
||||
{
|
||||
return fDownscaleFactor;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorJitterCancellation()
|
||||
{
|
||||
return fMotionVectorJitterCancellation;
|
||||
}
|
||||
|
||||
FfxFloat32 PreExposure()
|
||||
{
|
||||
return fPreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 PreviousFramePreExposure()
|
||||
{
|
||||
return fPreviousFramePreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 TanHalfFoV()
|
||||
{
|
||||
return fTanHalfFOV;
|
||||
}
|
||||
|
||||
FfxFloat32 JitterSequenceLength()
|
||||
{
|
||||
return fJitterSequenceLength;
|
||||
}
|
||||
|
||||
FfxFloat32 DeltaTime()
|
||||
{
|
||||
return fDeltaTime;
|
||||
}
|
||||
|
||||
FfxFloat32 DynamicResChangeFactor()
|
||||
{
|
||||
return fDynamicResChangeFactor;
|
||||
}
|
||||
|
||||
FfxFloat32 ViewSpaceToMetersFactor()
|
||||
{
|
||||
return fViewSpaceToMetersFactor;
|
||||
}
|
||||
|
||||
|
||||
SamplerState s_PointClamp : register(s0);
|
||||
SamplerState s_LinearClamp : register(s1);
|
||||
|
||||
// SRVs
|
||||
#if defined(FFX_INTERNAL)
|
||||
Texture2D<FfxFloat32x4> r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY);
|
||||
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
|
||||
Texture2D<FfxFloat32x4> r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
|
||||
Texture2D<FfxFloat32> r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
|
||||
Texture2D<FfxFloat32x2> r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
|
||||
Texture2D<FfxFloat32x2> r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
|
||||
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
|
||||
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
|
||||
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
|
||||
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
|
||||
Texture2D<FfxFloat32x2> r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS);
|
||||
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
|
||||
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
||||
Texture2D<unorm FfxFloat32x2> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
||||
Texture2D<FfxFloat32> r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
|
||||
Texture2D<unorm FfxFloat32> r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
|
||||
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
||||
Texture2D<FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
||||
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
|
||||
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
|
||||
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE);
|
||||
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
|
||||
Texture2D<unorm FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
|
||||
Texture2D<float3> r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
|
||||
Texture2D<float3> r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
|
||||
|
||||
Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
|
||||
|
||||
// UAV declarations
|
||||
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
|
||||
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
|
||||
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
|
||||
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
||||
RWTexture2D<FfxFloat32> rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
|
||||
RWTexture2D<unorm FfxFloat32> rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
|
||||
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
||||
RWTexture2D<FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
||||
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
|
||||
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE);
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5);
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
|
||||
RWTexture2D<FfxFloat32x2> rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
|
||||
globallycoherent RWTexture2D<FfxUInt32> rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT);
|
||||
RWTexture2D<FfxFloat32x4> rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
|
||||
|
||||
RWTexture2D<float> rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE);
|
||||
RWTexture2D<float> rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION);
|
||||
RWTexture2D<float3> rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
|
||||
RWTexture2D<float3> rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
|
||||
|
||||
#else // #if defined(FFX_INTERNAL)
|
||||
#if defined FSR2_BIND_SRV_INPUT_COLOR
|
||||
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY
|
||||
Texture2D<FfxFloat32x4> r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS
|
||||
Texture2D<FfxFloat32x4> r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_DEPTH
|
||||
Texture2D<FfxFloat32> r_input_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_EXPOSURE
|
||||
Texture2D<FfxFloat32x2> r_input_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_AUTO_EXPOSURE
|
||||
Texture2D<FfxFloat32x2> r_auto_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_REACTIVE_MASK
|
||||
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
|
||||
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
|
||||
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS
|
||||
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
|
||||
Texture2D<FfxFloat32x2> r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_DILATED_DEPTH
|
||||
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INTERNAL_UPSCALED
|
||||
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LOCK_STATUS
|
||||
Texture2D<unorm FfxFloat32x2> r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA
|
||||
Texture2D<FfxFloat32> r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_NEW_LOCKS
|
||||
Texture2D<unorm FfxFloat32> r_new_locks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
|
||||
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LUMA_HISTORY
|
||||
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_RCAS_INPUT
|
||||
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LANCZOS_LUT
|
||||
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS
|
||||
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
|
||||
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS
|
||||
Texture2D<unorm FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS);
|
||||
#endif
|
||||
|
||||
#if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR
|
||||
Texture2D<float3> r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR
|
||||
Texture2D<float3> r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
|
||||
#endif
|
||||
|
||||
// UAV declarations
|
||||
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
|
||||
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
|
||||
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_DEPTH
|
||||
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
|
||||
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LOCK_STATUS
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA
|
||||
RWTexture2D<FfxFloat32> rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_NEW_LOCKS
|
||||
RWTexture2D<unorm FfxFloat32> rw_new_locks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
||||
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
||||
RWTexture2D<FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
|
||||
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE
|
||||
RWTexture2D<FfxFloat32x2> rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
|
||||
RWTexture2D<FfxFloat32x2> rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
|
||||
globallycoherent RWTexture2D<FfxUInt32> rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC);
|
||||
#endif
|
||||
|
||||
#if defined FSR2_BIND_UAV_AUTOREACTIVE
|
||||
RWTexture2D<float> rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
|
||||
RWTexture2D<float> rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
|
||||
RWTexture2D<float3> rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
|
||||
RWTexture2D<float3> rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR);
|
||||
#endif
|
||||
#endif // #if defined(FFX_INTERNAL)
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
|
||||
{
|
||||
return r_imgMips.mips[mipLevel][iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
|
||||
{
|
||||
return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_input_depth[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_reactive_mask[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_transparency_and_composition_mask[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_input_color_jittered[iPxPos].rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_prepared_input_color[iPxPos].xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
|
||||
|
||||
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
|
||||
|
||||
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
|
||||
fUvMotionVector -= MotionVectorJitterCancellation();
|
||||
#endif
|
||||
|
||||
return fUvMotionVector;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
|
||||
{
|
||||
return r_internal_upscaled_color[iPxHistory];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL)
|
||||
void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
|
||||
{
|
||||
rw_luma_history[iPxPos] = fLumaHistory;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
|
||||
void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
|
||||
{
|
||||
rw_internal_upscaled_color[iPxHistory] = fHistory;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
|
||||
void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
|
||||
{
|
||||
rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL)
|
||||
void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
|
||||
{
|
||||
rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
|
||||
}
|
||||
#endif
|
||||
|
||||
//LOCK_LIFETIME_REMAINING == 0
|
||||
//Should make LockInitialLifetime() return a const 1.0f later
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_lock_status[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL)
|
||||
void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
|
||||
{
|
||||
rw_lock_status[iPxPos] = fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_lock_input_luma[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
|
||||
void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
|
||||
{
|
||||
rw_lock_input_luma[iPxPos] = fLuma;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_new_locks[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return rw_new_locks[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
|
||||
void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
|
||||
{
|
||||
rw_new_locks[iPxPos] = newLock;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
|
||||
{
|
||||
rw_prepared_input_color[iPxPos] = fTonemapped;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
|
||||
{
|
||||
FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
|
||||
return fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
|
||||
void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
|
||||
{
|
||||
FfxUInt32 uDepth = asuint(fDepth);
|
||||
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
|
||||
#else
|
||||
InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
|
||||
void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
|
||||
{
|
||||
rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL)
|
||||
void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
|
||||
{
|
||||
rw_dilatedDepth[iPxPos] = fDepth;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
rw_dilated_motion_vectors[iPxPos] = fMotionVector;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
|
||||
{
|
||||
return r_dilated_motion_vectors[iPxInput].xy;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
|
||||
{
|
||||
return r_previous_dilated_motion_vectors[iPxInput].xy;
|
||||
}
|
||||
|
||||
FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
|
||||
{
|
||||
return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
|
||||
{
|
||||
return r_dilatedDepth[iPxInput];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 Exposure()
|
||||
{
|
||||
FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 AutoExposure()
|
||||
{
|
||||
FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
|
||||
{
|
||||
#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL)
|
||||
return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
|
||||
#else
|
||||
return 0.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
|
||||
{
|
||||
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
|
||||
return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_dilated_reactive_masks[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
|
||||
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
|
||||
{
|
||||
rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return r_input_opaque_only[iPxPos].xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return r_input_prev_color_pre_alpha[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return r_input_prev_color_post_alpha[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL)
|
||||
#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL)
|
||||
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
|
||||
{
|
||||
rw_output_autoreactive[iPxPos] = fReactive.x;
|
||||
|
||||
rw_output_autocomposition[iPxPos] = fReactive.y;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
rw_output_prev_color_pre_alpha[iPxPos] = color;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
rw_output_prev_color_post_alpha[iPxPos] = color;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
||||
565
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
vendored
Normal file
565
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
vendored
Normal file
|
|
@ -0,0 +1,565 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#if !defined(FFX_FSR2_COMMON_H)
|
||||
#define FFX_FSR2_COMMON_H
|
||||
|
||||
#if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
//Locks
|
||||
#define LOCK_LIFETIME_REMAINING 0
|
||||
#define LOCK_TEMPORAL_LUMA 1
|
||||
#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX;
|
||||
FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f;
|
||||
|
||||
// treat vector truncation warnings as errors
|
||||
#pragma warning(error: 3206)
|
||||
|
||||
// suppress warnings
|
||||
#pragma warning(disable: 3205) // conversion from larger type to smaller
|
||||
#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative
|
||||
|
||||
// Reconstructed depth usage
|
||||
FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
|
||||
|
||||
// Accumulation
|
||||
FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f;
|
||||
FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
|
||||
FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
|
||||
FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
|
||||
|
||||
// Auto exposure
|
||||
FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f;
|
||||
|
||||
struct AccumulationPassCommonParams
|
||||
{
|
||||
FfxInt32x2 iPxHrPos;
|
||||
FfxFloat32x2 fHrUv;
|
||||
FfxFloat32x2 fLrUv_HwSampler;
|
||||
FfxFloat32x2 fMotionVector;
|
||||
FfxFloat32x2 fReprojectedHrUv;
|
||||
FfxFloat32 fHrVelocity;
|
||||
FfxFloat32 fDepthClipFactor;
|
||||
FfxFloat32 fDilatedReactiveFactor;
|
||||
FfxFloat32 fAccumulationMask;
|
||||
|
||||
FfxBoolean bIsResetFrame;
|
||||
FfxBoolean bIsExistingSample;
|
||||
FfxBoolean bIsNewSample;
|
||||
};
|
||||
|
||||
struct LockState
|
||||
{
|
||||
FfxBoolean NewLock; //Set for both unique new and re-locked new
|
||||
FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
|
||||
};
|
||||
|
||||
void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus)
|
||||
{
|
||||
fLockStatus = FfxFloat32x2(0, 0);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus)
|
||||
{
|
||||
fLockStatus = FFX_MIN16_F2(0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
|
||||
{
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus)
|
||||
{
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct RectificationBox
|
||||
{
|
||||
FfxFloat32x3 boxCenter;
|
||||
FfxFloat32x3 boxVec;
|
||||
FfxFloat32x3 aabbMin;
|
||||
FfxFloat32x3 aabbMax;
|
||||
FfxFloat32 fBoxCenterWeight;
|
||||
};
|
||||
#if FFX_HALF
|
||||
struct RectificationBoxMin16
|
||||
{
|
||||
FFX_MIN16_F3 boxCenter;
|
||||
FFX_MIN16_F3 boxVec;
|
||||
FFX_MIN16_F3 aabbMin;
|
||||
FFX_MIN16_F3 aabbMax;
|
||||
FFX_MIN16_F fBoxCenterWeight;
|
||||
};
|
||||
#endif
|
||||
|
||||
void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = FfxFloat32(0);
|
||||
|
||||
rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
|
||||
rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
|
||||
rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
|
||||
rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
|
||||
}
|
||||
#if FFX_HALF
|
||||
void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0);
|
||||
|
||||
rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0);
|
||||
rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0);
|
||||
rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
|
||||
rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
|
||||
}
|
||||
#endif
|
||||
|
||||
void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
|
||||
{
|
||||
rectificationBox.aabbMin = colorSample;
|
||||
rectificationBox.aabbMax = colorSample;
|
||||
|
||||
FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter = weightedSample;
|
||||
rectificationBox.boxVec = colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight = fSampleWeight;
|
||||
}
|
||||
|
||||
void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
|
||||
{
|
||||
if (bInitialSample) {
|
||||
RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
|
||||
} else {
|
||||
rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
|
||||
rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
|
||||
|
||||
FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter += weightedSample;
|
||||
rectificationBox.boxVec += colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight += fSampleWeight;
|
||||
}
|
||||
}
|
||||
#if FFX_HALF
|
||||
void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
|
||||
{
|
||||
rectificationBox.aabbMin = colorSample;
|
||||
rectificationBox.aabbMax = colorSample;
|
||||
|
||||
FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter = weightedSample;
|
||||
rectificationBox.boxVec = colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight = fSampleWeight;
|
||||
}
|
||||
|
||||
void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
|
||||
{
|
||||
if (bInitialSample) {
|
||||
RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
|
||||
} else {
|
||||
rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
|
||||
rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
|
||||
|
||||
FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter += weightedSample;
|
||||
rectificationBox.boxVec += colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight += fSampleWeight;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
|
||||
rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
|
||||
rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
|
||||
FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
|
||||
rectificationBox.boxVec = stdDev;
|
||||
}
|
||||
#if FFX_HALF
|
||||
void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f));
|
||||
rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
|
||||
rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
|
||||
FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
|
||||
rectificationBox.boxVec = stdDev;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
|
||||
{
|
||||
return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v)
|
||||
{
|
||||
return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
|
||||
{
|
||||
const FfxFloat32 m = ffxMax(v0, v1);
|
||||
return m != 0 ? ffxMin(v0, v1) / m : 0;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1)
|
||||
{
|
||||
const FFX_MIN16_F m = ffxMax(v0, v1);
|
||||
return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
|
||||
{
|
||||
FfxFloat32x3 fRgb;
|
||||
|
||||
fRgb = FfxFloat32x3(
|
||||
fYCoCg.x + fYCoCg.y - fYCoCg.z,
|
||||
fYCoCg.x + fYCoCg.z,
|
||||
fYCoCg.x - fYCoCg.y - fYCoCg.z);
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg)
|
||||
{
|
||||
FFX_MIN16_F3 fRgb;
|
||||
|
||||
fRgb = FFX_MIN16_F3(
|
||||
fYCoCg.x + fYCoCg.y - fYCoCg.z,
|
||||
fYCoCg.x + fYCoCg.z,
|
||||
fYCoCg.x - fYCoCg.y - fYCoCg.z);
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
|
||||
{
|
||||
FfxFloat32x3 fYCoCg;
|
||||
|
||||
fYCoCg = FfxFloat32x3(
|
||||
0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
|
||||
0.5f * fRgb.r - 0.5f * fRgb.b,
|
||||
-0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
|
||||
|
||||
return fYCoCg;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb)
|
||||
{
|
||||
FFX_MIN16_F3 fYCoCg;
|
||||
|
||||
fYCoCg = FFX_MIN16_F3(
|
||||
0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
|
||||
0.5 * fRgb.r - 0.5 * fRgb.b,
|
||||
-0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
|
||||
|
||||
return fYCoCg;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
|
||||
{
|
||||
return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb)
|
||||
{
|
||||
return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
|
||||
{
|
||||
FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
|
||||
|
||||
FfxFloat32 fPercievedLuminance = 0;
|
||||
if (fLuminance <= 216.0f / 24389.0f) {
|
||||
fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
|
||||
}
|
||||
else {
|
||||
fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
|
||||
}
|
||||
|
||||
return fPercievedLuminance * 0.01f;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb)
|
||||
{
|
||||
FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
|
||||
|
||||
FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0);
|
||||
if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) {
|
||||
fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f);
|
||||
}
|
||||
else {
|
||||
fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f);
|
||||
}
|
||||
|
||||
return fPercievedLuminance * FFX_MIN16_F(0.01f);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
|
||||
{
|
||||
return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
|
||||
}
|
||||
|
||||
FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
|
||||
{
|
||||
return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb)
|
||||
{
|
||||
return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx;
|
||||
}
|
||||
|
||||
FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb)
|
||||
{
|
||||
return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
|
||||
{
|
||||
FfxInt32x2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
|
||||
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
|
||||
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
|
||||
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
|
||||
return result;
|
||||
|
||||
// return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
|
||||
{
|
||||
FFX_MIN16_I2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
|
||||
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
|
||||
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
|
||||
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
|
||||
return result;
|
||||
|
||||
// return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1));
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
|
||||
{
|
||||
const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
|
||||
const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
|
||||
const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
|
||||
|
||||
return fClampedUv;
|
||||
}
|
||||
|
||||
FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
|
||||
{
|
||||
return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
|
||||
}
|
||||
#if FFX_HALF
|
||||
FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size)
|
||||
{
|
||||
return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size)));
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
|
||||
{
|
||||
Lavg = exp(Lavg);
|
||||
|
||||
const FfxFloat32 S = 100.0f; //ISO arithmetic speed
|
||||
const FfxFloat32 K = 12.5f;
|
||||
FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
|
||||
|
||||
const FfxFloat32 q = 0.65f;
|
||||
FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
|
||||
|
||||
return 1 / Lmax;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg)
|
||||
{
|
||||
Lavg = exp(Lavg);
|
||||
|
||||
const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed
|
||||
const FFX_MIN16_F K = FFX_MIN16_F(12.5f);
|
||||
const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
|
||||
|
||||
const FFX_MIN16_F q = FFX_MIN16_F(0.65f);
|
||||
const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100);
|
||||
|
||||
return FFX_MIN16_F(1) / Lmax;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
|
||||
FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
|
||||
FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
|
||||
return iPxHrPos;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
|
||||
{
|
||||
FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
|
||||
FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
|
||||
FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
|
||||
return iPxHrPos;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
|
||||
{
|
||||
return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
|
||||
}
|
||||
|
||||
FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
|
||||
|
||||
// fDeviceToViewDepth details found in ffx_fsr2.cpp
|
||||
return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
|
||||
}
|
||||
|
||||
FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
|
||||
}
|
||||
|
||||
FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
|
||||
|
||||
const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
|
||||
|
||||
const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
|
||||
const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
|
||||
const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
|
||||
|
||||
return FfxFloat32x3(X, Y, Z);
|
||||
}
|
||||
|
||||
FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
|
||||
}
|
||||
|
||||
FfxFloat32 GetMaxDistanceInMeters()
|
||||
{
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
|
||||
#else
|
||||
return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
|
||||
#endif
|
||||
}
|
||||
|
||||
FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
|
||||
{
|
||||
fRgb /= fPreExposure;
|
||||
fRgb *= fExposure;
|
||||
|
||||
fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX);
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
|
||||
FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
|
||||
{
|
||||
fRgb /= fExposure;
|
||||
fRgb *= PreExposure();
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
|
||||
|
||||
struct BilinearSamplingData
|
||||
{
|
||||
FfxInt32x2 iOffsets[4];
|
||||
FfxFloat32 fWeights[4];
|
||||
FfxInt32x2 iBasePos;
|
||||
};
|
||||
|
||||
BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
|
||||
{
|
||||
BilinearSamplingData data;
|
||||
|
||||
FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
|
||||
data.iBasePos = FfxInt32x2(floor(fPxSample));
|
||||
FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
|
||||
|
||||
data.iOffsets[0] = FfxInt32x2(0, 0);
|
||||
data.iOffsets[1] = FfxInt32x2(1, 0);
|
||||
data.iOffsets[2] = FfxInt32x2(0, 1);
|
||||
data.iOffsets[3] = FfxInt32x2(1, 1);
|
||||
|
||||
data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
|
||||
data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
|
||||
data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
|
||||
data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
struct PlaneData
|
||||
{
|
||||
FfxFloat32x3 fNormal;
|
||||
FfxFloat32 fDistanceFromOrigin;
|
||||
};
|
||||
|
||||
PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
|
||||
{
|
||||
PlaneData plane;
|
||||
|
||||
FfxFloat32x3 v0 = fP0 - fP1;
|
||||
FfxFloat32x3 v1 = fP0 - fP2;
|
||||
plane.fNormal = normalize(cross(v0, v1));
|
||||
plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
|
||||
|
||||
return plane;
|
||||
}
|
||||
|
||||
FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
|
||||
{
|
||||
return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
|
||||
}
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#endif //!defined(FFX_FSR2_COMMON_H)
|
||||
189
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
vendored
Normal file
189
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
vendored
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
FFX_GROUPSHARED FfxUInt32 spdCounter;
|
||||
|
||||
#ifndef SPD_PACKED_ONLY
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
|
||||
|
||||
FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
|
||||
fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
|
||||
FfxFloat32x3 fRgb = SampleInputColor(fUv);
|
||||
|
||||
fRgb /= PreExposure();
|
||||
|
||||
//compute log luma
|
||||
const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
|
||||
|
||||
// Make sure out of screen pixels contribute no value to the end result
|
||||
const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
|
||||
|
||||
return FfxFloat32x4(result, 0, 0, 0);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
|
||||
{
|
||||
return SPD_LoadMipmap5(tex);
|
||||
}
|
||||
|
||||
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
|
||||
{
|
||||
if (index == LumaMipLevelToUse() || index == 5)
|
||||
{
|
||||
SPD_SetMipmap(pix, index, outValue.r);
|
||||
}
|
||||
|
||||
if (index == MipCount() - 1) { //accumulate on 1x1 level
|
||||
|
||||
if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
|
||||
{
|
||||
FfxFloat32 prev = SPD_LoadExposureBuffer().y;
|
||||
FfxFloat32 result = outValue.r;
|
||||
|
||||
if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
|
||||
{
|
||||
FfxFloat32 rate = 1.0f;
|
||||
result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
|
||||
}
|
||||
FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
|
||||
SPD_SetExposureBuffer(spdOutput);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
SPD_IncreaseAtomicCounter(spdCounter);
|
||||
}
|
||||
|
||||
FfxUInt32 SpdGetAtomicCounter()
|
||||
{
|
||||
return spdCounter;
|
||||
}
|
||||
|
||||
void SpdResetAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
SPD_ResetAtomicCounter();
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return FfxFloat32x4(
|
||||
spdIntermediateR[x][y],
|
||||
spdIntermediateG[x][y],
|
||||
spdIntermediateB[x][y],
|
||||
spdIntermediateA[x][y]);
|
||||
}
|
||||
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
|
||||
{
|
||||
spdIntermediateR[x][y] = value.x;
|
||||
spdIntermediateG[x][y] = value.y;
|
||||
spdIntermediateB[x][y] = value.z;
|
||||
spdIntermediateA[x][y] = value.w;
|
||||
}
|
||||
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
|
||||
{
|
||||
return (v0 + v1 + v2 + v3) * 0.25f;
|
||||
}
|
||||
#endif
|
||||
|
||||
// define fetch and store functions Packed
|
||||
#if FFX_HALF
|
||||
#error Callback must be implemented
|
||||
|
||||
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
|
||||
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
|
||||
|
||||
FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
|
||||
}
|
||||
FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
|
||||
}
|
||||
void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
if (index == LumaMipLevelToUse() || index == 5)
|
||||
{
|
||||
imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
|
||||
return;
|
||||
}
|
||||
imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
|
||||
}
|
||||
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
|
||||
}
|
||||
FfxUInt32 SpdGetAtomicCounter()
|
||||
{
|
||||
return spdCounter;
|
||||
}
|
||||
void SpdResetAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
|
||||
}
|
||||
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return FfxFloat16x4(
|
||||
spdIntermediateRG[x][y].x,
|
||||
spdIntermediateRG[x][y].y,
|
||||
spdIntermediateBA[x][y].x,
|
||||
spdIntermediateBA[x][y].y);
|
||||
}
|
||||
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
|
||||
{
|
||||
spdIntermediateRG[x][y] = value.xy;
|
||||
spdIntermediateBA[x][y] = value.zw;
|
||||
}
|
||||
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
|
||||
{
|
||||
return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "ffx_spd.h"
|
||||
|
||||
void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
|
||||
{
|
||||
#if FFX_HALF
|
||||
SpdDownsampleH(
|
||||
FfxUInt32x2(WorkGroupId.xy),
|
||||
FfxUInt32(LocalThreadIndex),
|
||||
FfxUInt32(MipCount()),
|
||||
FfxUInt32(NumWorkGroups()),
|
||||
FfxUInt32(WorkGroupId.z),
|
||||
FfxUInt32x2(WorkGroupOffset()));
|
||||
#else
|
||||
SpdDownsample(
|
||||
FfxUInt32x2(WorkGroupId.xy),
|
||||
FfxUInt32(LocalThreadIndex),
|
||||
FfxUInt32(MipCount()),
|
||||
FfxUInt32(NumWorkGroups()),
|
||||
FfxUInt32(WorkGroupId.z),
|
||||
FfxUInt32x2(WorkGroupOffset()));
|
||||
#endif
|
||||
}
|
||||
134
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
vendored
Normal file
134
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 0
|
||||
#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1
|
||||
#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2
|
||||
#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3
|
||||
#define FSR2_BIND_UAV_AUTO_EXPOSURE 4
|
||||
#define FSR2_BIND_CB_FSR2 5
|
||||
#define FSR2_BIND_CB_SPD 6
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
#if defined(FSR2_BIND_CB_SPD)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t
|
||||
{
|
||||
uint mips;
|
||||
uint numWorkGroups;
|
||||
uvec2 workGroupOffset;
|
||||
uvec2 renderSize;
|
||||
} cbSPD;
|
||||
|
||||
uint MipCount()
|
||||
{
|
||||
return cbSPD.mips;
|
||||
}
|
||||
|
||||
uint NumWorkGroups()
|
||||
{
|
||||
return cbSPD.numWorkGroups;
|
||||
}
|
||||
|
||||
uvec2 WorkGroupOffset()
|
||||
{
|
||||
return cbSPD.workGroupOffset;
|
||||
}
|
||||
|
||||
uvec2 SPD_RenderSize()
|
||||
{
|
||||
return cbSPD.renderSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
vec2 SPD_LoadExposureBuffer()
|
||||
{
|
||||
return imageLoad(rw_auto_exposure, ivec2(0,0)).xy;
|
||||
}
|
||||
|
||||
void SPD_SetExposureBuffer(vec2 value)
|
||||
{
|
||||
imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f));
|
||||
}
|
||||
|
||||
vec4 SPD_LoadMipmap5(ivec2 iPxPos)
|
||||
{
|
||||
return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value)
|
||||
{
|
||||
switch (slice)
|
||||
{
|
||||
case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
|
||||
imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
|
||||
break;
|
||||
case 5:
|
||||
imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
|
||||
break;
|
||||
default:
|
||||
|
||||
// avoid flattened side effect
|
||||
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
|
||||
imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f));
|
||||
#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
|
||||
imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
|
||||
{
|
||||
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
|
||||
}
|
||||
|
||||
void SPD_ResetAtomicCounter()
|
||||
{
|
||||
imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0));
|
||||
}
|
||||
|
||||
#include "ffx_fsr2_compute_luminance_pyramid.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 256
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex);
|
||||
}
|
||||
258
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
vendored
Normal file
258
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
vendored
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_DEPTH_CLIP_H
|
||||
#define FFX_FSR2_DEPTH_CLIP_H
|
||||
|
||||
FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
|
||||
|
||||
FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
|
||||
{
|
||||
FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
|
||||
BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
|
||||
|
||||
FfxFloat32 fDilatedSum = 0.0f;
|
||||
FfxFloat32 fDepth = 0.0f;
|
||||
FfxFloat32 fWeightSum = 0.0f;
|
||||
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
|
||||
|
||||
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
|
||||
const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
|
||||
|
||||
if (IsOnScreen(iSamplePos, RenderSize())) {
|
||||
const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
|
||||
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
|
||||
|
||||
const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
|
||||
const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
|
||||
|
||||
const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
|
||||
|
||||
if (fDepthDiff > 0.0f) {
|
||||
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
|
||||
#else
|
||||
const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
|
||||
#endif
|
||||
|
||||
const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
|
||||
const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
|
||||
|
||||
const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
|
||||
const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
|
||||
|
||||
const FfxFloat32 Ksep = 1.37e-05f;
|
||||
const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
|
||||
const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
|
||||
|
||||
const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
|
||||
const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
|
||||
fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
|
||||
fWeightSum += fWeight;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
|
||||
{
|
||||
FfxFloat32 minconvergence = 1.0f;
|
||||
|
||||
FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
|
||||
FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
|
||||
FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
|
||||
|
||||
const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
|
||||
|
||||
if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
|
||||
for (FfxInt32 y = -1; y <= 1; ++y) {
|
||||
for (FfxInt32 x = -1; x <= 1; ++x) {
|
||||
|
||||
FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
|
||||
|
||||
FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
|
||||
FfxFloat32 fVelocityUv = length(fMotionVector);
|
||||
|
||||
fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
|
||||
fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
|
||||
minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
|
||||
{
|
||||
const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
|
||||
FfxFloat32 fDepthMax = 0.0f;
|
||||
FfxFloat32 fDepthMin = fMaxDistInMeters;
|
||||
|
||||
FfxInt32 iMaxDistFound = 0;
|
||||
|
||||
for (FfxInt32 y = -1; y < 2; y++) {
|
||||
for (FfxInt32 x = -1; x < 2; x++) {
|
||||
|
||||
const FfxInt32x2 iOffset = FfxInt32x2(x, y);
|
||||
const FfxInt32x2 iSamplePos = iPxPos + iOffset;
|
||||
|
||||
const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
|
||||
FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
|
||||
|
||||
iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
|
||||
|
||||
fDepthMin = ffxMin(fDepthMin, fDepth);
|
||||
fDepthMax = ffxMax(fDepthMax, fDepth);
|
||||
}
|
||||
}
|
||||
|
||||
return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
|
||||
{
|
||||
const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
|
||||
|
||||
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
|
||||
FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
|
||||
fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
|
||||
FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
|
||||
|
||||
float fPxDistance = length(fMotionVector * DisplaySize());
|
||||
return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
|
||||
}
|
||||
|
||||
void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
|
||||
{
|
||||
// Compensate for bilinear sampling in accumulation pass
|
||||
|
||||
FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
|
||||
FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
|
||||
|
||||
float fMasksSum = 0.0f;
|
||||
|
||||
FfxFloat32x3 fColorSamples[9];
|
||||
FfxFloat32 fReactiveSamples[9];
|
||||
FfxFloat32 fTransparencyAndCompositionSamples[9];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 y = -1; y < 2; y++) {
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 x = -1; x < 2; x++) {
|
||||
|
||||
const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
|
||||
|
||||
FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
|
||||
|
||||
FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
|
||||
FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
|
||||
FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
|
||||
|
||||
fColorSamples[sampleIdx] = fColorSample;
|
||||
fReactiveSamples[sampleIdx] = fReactiveSample;
|
||||
fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
|
||||
|
||||
fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
|
||||
}
|
||||
}
|
||||
|
||||
if (fMasksSum > 0)
|
||||
{
|
||||
for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
|
||||
{
|
||||
FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
|
||||
FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
|
||||
FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
|
||||
|
||||
const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
|
||||
const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
|
||||
|
||||
// Increase power for non-similar samples
|
||||
const FfxFloat32 fPowerBiasMax = 6.0f;
|
||||
const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
|
||||
const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
|
||||
const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
|
||||
|
||||
fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
|
||||
}
|
||||
}
|
||||
|
||||
StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
|
||||
}
|
||||
|
||||
FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
//We assume linear data. if non-linear input (sRGB, ...),
|
||||
//then we should convert to linear first and back to sRGB on output.
|
||||
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
|
||||
|
||||
fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
|
||||
|
||||
const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
|
||||
|
||||
return fPreparedYCoCg;
|
||||
}
|
||||
|
||||
FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
|
||||
FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
|
||||
FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
|
||||
|
||||
return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
|
||||
}
|
||||
|
||||
void DepthClip(FfxInt32x2 iPxPos)
|
||||
{
|
||||
FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
|
||||
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
|
||||
|
||||
// Discard tiny mvs
|
||||
fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
|
||||
|
||||
const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
|
||||
const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
|
||||
const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
|
||||
|
||||
// Compute prepared input color and depth clip
|
||||
FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
|
||||
FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
|
||||
StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
|
||||
|
||||
// Compute dilated reactive mask
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
FfxInt32x2 iSamplePos = iPxPos;
|
||||
#else
|
||||
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
|
||||
#endif
|
||||
|
||||
FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
|
||||
FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
|
||||
|
||||
PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
|
||||
66
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
vendored
Normal file
66
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0
|
||||
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1
|
||||
#define FSR2_BIND_SRV_DILATED_DEPTH 2
|
||||
#define FSR2_BIND_SRV_REACTIVE_MASK 3
|
||||
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4
|
||||
#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 8
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 9
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 10
|
||||
|
||||
#define FSR2_BIND_UAV_DEPTH_CLIP 11
|
||||
#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12
|
||||
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 14
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_depth_clip.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
DepthClip(ivec2(gl_GlobalInvocationID.xy));
|
||||
}
|
||||
1
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
vendored
Normal file
1
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
// This file doesn't exist in this version of FSR.
|
||||
1
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
vendored
Normal file
1
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
// This file doesn't exist in this version of FSR.
|
||||
115
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
vendored
Normal file
115
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
vendored
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_LOCK_H
|
||||
#define FFX_FSR2_LOCK_H
|
||||
|
||||
void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
|
||||
{
|
||||
if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
|
||||
{
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
const FfxUInt32 farZ = 0x0;
|
||||
#else
|
||||
const FfxUInt32 farZ = 0x3f800000;
|
||||
#endif
|
||||
SetReconstructedDepth(iPxHrPos, farZ);
|
||||
}
|
||||
}
|
||||
|
||||
FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
|
||||
{
|
||||
const FfxInt32 RADIUS = 1;
|
||||
|
||||
FfxFloat32 fNucleus = LoadLockInputLuma(pos);
|
||||
|
||||
FfxFloat32 similar_threshold = 1.05f;
|
||||
FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX;
|
||||
FfxFloat32 dissimilarLumaMax = 0;
|
||||
|
||||
/*
|
||||
0 1 2
|
||||
3 4 5
|
||||
6 7 8
|
||||
*/
|
||||
|
||||
#define SETBIT(x) (1U << x)
|
||||
|
||||
FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
|
||||
|
||||
const FfxUInt32 uNumRejectionMasks = 4;
|
||||
const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
|
||||
SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
|
||||
SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
|
||||
SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
|
||||
SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
|
||||
};
|
||||
|
||||
FfxInt32 idx = 0;
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
|
||||
if (x == 0 && y == 0) continue;
|
||||
|
||||
FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
|
||||
|
||||
FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos);
|
||||
FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
|
||||
|
||||
if (difference > 0 && (difference < similar_threshold)) {
|
||||
mask |= SETBIT(idx);
|
||||
} else {
|
||||
dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
|
||||
dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
|
||||
|
||||
if (FFX_FALSE == isRidge) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 i = 0; i < 4; i++) {
|
||||
|
||||
if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ComputeLock(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
if (ComputeThinFeatureConfidence(iPxLrPos))
|
||||
{
|
||||
StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
|
||||
}
|
||||
|
||||
ClearResourcesForNextFrame(iPxLrPos);
|
||||
}
|
||||
|
||||
#endif // FFX_FSR2_LOCK_H
|
||||
56
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
vendored
Normal file
56
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0
|
||||
#define FSR2_BIND_UAV_NEW_LOCKS 1
|
||||
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2
|
||||
#define FSR2_BIND_CB_FSR2 3
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_lock.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
uvec2 uDispatchThreadId = gl_WorkGroupID.xy * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
|
||||
|
||||
ComputeLock(ivec2(uDispatchThreadId));
|
||||
}
|
||||
106
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
vendored
Normal file
106
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
vendored
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
|
||||
#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
|
||||
|
||||
FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
|
||||
{
|
||||
return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample)
|
||||
{
|
||||
return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
|
||||
#else
|
||||
DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
|
||||
#endif
|
||||
DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
|
||||
|
||||
FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
|
||||
{
|
||||
FfxFloat32 fShadingChangeLuma = 0;
|
||||
|
||||
#if 0
|
||||
fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x);
|
||||
#else
|
||||
|
||||
const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse());
|
||||
FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
|
||||
|
||||
fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
|
||||
fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
|
||||
#endif
|
||||
|
||||
fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
|
||||
|
||||
return fShadingChangeLuma;
|
||||
}
|
||||
|
||||
void UpdateLockStatus(AccumulationPassCommonParams params,
|
||||
FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
|
||||
FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
|
||||
FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
|
||||
FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
|
||||
|
||||
const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
|
||||
|
||||
//init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
|
||||
fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
|
||||
|
||||
FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
|
||||
|
||||
fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
|
||||
|
||||
if (state.NewLock) {
|
||||
fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
|
||||
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
|
||||
}
|
||||
else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
|
||||
fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
|
||||
}
|
||||
else {
|
||||
if (fLuminanceDiff > 0.1f) {
|
||||
KillLock(fLockStatus);
|
||||
}
|
||||
}
|
||||
|
||||
fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
|
||||
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
|
||||
|
||||
// Compute this frame lock contribution
|
||||
const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
|
||||
const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
|
||||
|
||||
fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H )
|
||||
67
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
vendored
Normal file
67
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
vendored
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#define GROUP_SIZE 8
|
||||
|
||||
#define FSR_RCAS_DENOISE 1
|
||||
|
||||
void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
|
||||
{
|
||||
StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
|
||||
}
|
||||
|
||||
#define FSR_RCAS_F
|
||||
FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
|
||||
{
|
||||
FfxFloat32x4 fColor = LoadRCAS_Input(p);
|
||||
|
||||
fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
|
||||
|
||||
return fColor;
|
||||
}
|
||||
|
||||
void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
|
||||
|
||||
#include "ffx_fsr1.h"
|
||||
|
||||
|
||||
void CurrFilter(FFX_MIN16_U2 pos)
|
||||
{
|
||||
FfxFloat32x3 c;
|
||||
FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
|
||||
|
||||
c = UnprepareRgb(c, Exposure());
|
||||
|
||||
WriteUpscaledOutput(pos, c);
|
||||
}
|
||||
|
||||
void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
|
||||
{
|
||||
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||
FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
gxy.x += 8u;
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
gxy.y += 8u;
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
gxy.x -= 8u;
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
}
|
||||
80
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
vendored
Normal file
80
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
// Needed for rw_upscaled_output declaration
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
|
||||
#define FSR2_BIND_SRV_RCAS_INPUT 1
|
||||
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2
|
||||
#define FSR2_BIND_CB_FSR2 3
|
||||
#define FSR2_BIND_CB_RCAS 4
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
//Move to prototype shader!
|
||||
#if defined(FSR2_BIND_CB_RCAS)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t
|
||||
{
|
||||
uvec4 rcasConfig;
|
||||
} cbRCAS;
|
||||
|
||||
uvec4 RCASConfig()
|
||||
{
|
||||
return cbRCAS.rcasConfig;
|
||||
}
|
||||
#else
|
||||
uvec4 RCASConfig()
|
||||
{
|
||||
return uvec4(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
vec4 LoadRCAS_Input(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_rcas_input, iPxPos, 0);
|
||||
}
|
||||
|
||||
#include "ffx_fsr2_rcas.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 64
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz);
|
||||
}
|
||||
145
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
vendored
Normal file
145
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
vendored
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
|
||||
#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
|
||||
|
||||
void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
|
||||
{
|
||||
fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
|
||||
|
||||
FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
|
||||
FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
|
||||
|
||||
BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
|
||||
|
||||
// Project current depth into previous frame locations.
|
||||
// Push to all pixels having some contribution if reprojection is using bilinear logic.
|
||||
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
|
||||
|
||||
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
|
||||
FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
|
||||
|
||||
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
|
||||
|
||||
FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
|
||||
if (IsOnScreen(iStorePos, iPxDepthSize)) {
|
||||
StoreReconstructedDepth(iStorePos, fDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
|
||||
{
|
||||
const FfxInt32 iSampleCount = 9;
|
||||
const FfxInt32x2 iSampleOffsets[iSampleCount] = {
|
||||
FfxInt32x2(+0, +0),
|
||||
FfxInt32x2(+1, +0),
|
||||
FfxInt32x2(+0, +1),
|
||||
FfxInt32x2(+0, -1),
|
||||
FfxInt32x2(-1, +0),
|
||||
FfxInt32x2(-1, +1),
|
||||
FfxInt32x2(+1, +1),
|
||||
FfxInt32x2(-1, -1),
|
||||
FfxInt32x2(+1, -1),
|
||||
};
|
||||
|
||||
// pull out the depth loads to allow SC to batch them
|
||||
FfxFloat32 depth[9];
|
||||
FfxInt32 iSampleIndex = 0;
|
||||
FFX_UNROLL
|
||||
for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
|
||||
|
||||
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
|
||||
depth[iSampleIndex] = LoadInputDepth(iPos);
|
||||
}
|
||||
|
||||
// find closest depth
|
||||
fNearestDepthCoord = iPxPos;
|
||||
fNearestDepth = depth[0];
|
||||
FFX_UNROLL
|
||||
for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
|
||||
|
||||
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
|
||||
if (IsOnScreen(iPos, iPxSize)) {
|
||||
|
||||
FfxFloat32 fNdDepth = depth[iSampleIndex];
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
if (fNdDepth > fNearestDepth) {
|
||||
#else
|
||||
if (fNdDepth < fNearestDepth) {
|
||||
#endif
|
||||
fNearestDepthCoord = iPos;
|
||||
fNearestDepth = fNdDepth;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
//We assume linear data. if non-linear input (sRGB, ...),
|
||||
//then we should convert to linear first and back to sRGB on output.
|
||||
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
|
||||
|
||||
// Use internal auto exposure for locking logic
|
||||
fRgb /= PreExposure();
|
||||
fRgb *= Exposure();
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
fRgb = Tonemap(fRgb);
|
||||
#endif
|
||||
|
||||
//compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
|
||||
const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
|
||||
|
||||
return fLockInputLuma;
|
||||
}
|
||||
|
||||
void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
FfxFloat32 fDilatedDepth;
|
||||
FfxInt32x2 iNearestDepthCoord;
|
||||
|
||||
FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
|
||||
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
FfxInt32x2 iSamplePos = iPxLrPos;
|
||||
FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
|
||||
#else
|
||||
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
|
||||
FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
|
||||
|
||||
StoreDilatedDepth(iPxLrPos, fDilatedDepth);
|
||||
StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector);
|
||||
|
||||
ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
|
||||
|
||||
FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
|
||||
StoreLockInputLuma(iPxLrPos, fLockInputLuma);
|
||||
}
|
||||
|
||||
|
||||
#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
|
||||
65
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
vendored
Normal file
65
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 1
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 2
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 3
|
||||
#define FSR2_BIND_SRV_LUMA_HISTORY 4
|
||||
|
||||
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5
|
||||
#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6
|
||||
#define FSR2_BIND_UAV_DILATED_DEPTH 7
|
||||
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8
|
||||
#define FSR2_BIND_UAV_LUMA_HISTORY 9
|
||||
#define FSR2_BIND_UAV_LUMA_INSTABILITY 10
|
||||
#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 12
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
ReconstructAndDilate(FFX_MIN16_I2(gl_GlobalInvocationID.xy));
|
||||
}
|
||||
136
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
vendored
Normal file
136
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
vendored
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_REPROJECT_H
|
||||
#define FFX_FSR2_REPROJECT_H
|
||||
|
||||
#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
|
||||
#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
|
||||
#endif
|
||||
|
||||
FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
|
||||
{
|
||||
return LoadHistory(iPxSample);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample)
|
||||
{
|
||||
return FFX_MIN16_F4(LoadHistory(iPxSample));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory)
|
||||
DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
|
||||
#else
|
||||
DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
|
||||
DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
|
||||
#endif
|
||||
|
||||
FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
|
||||
{
|
||||
FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
|
||||
return fSample;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample)
|
||||
{
|
||||
FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
|
||||
|
||||
return fSample;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
|
||||
#else
|
||||
DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
|
||||
#endif
|
||||
#else
|
||||
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
|
||||
#else
|
||||
DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
|
||||
{
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize()));
|
||||
#else
|
||||
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
|
||||
#endif
|
||||
|
||||
return fDilatedMotionVector;
|
||||
}
|
||||
|
||||
FfxBoolean IsUvInside(FfxFloat32x2 fUv)
|
||||
{
|
||||
return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
|
||||
}
|
||||
|
||||
void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample)
|
||||
{
|
||||
fReprojectedHrUv = params.fHrUv + params.fMotionVector;
|
||||
|
||||
bIsExistingSample = IsUvInside(fReprojectedHrUv);
|
||||
}
|
||||
|
||||
void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
|
||||
{
|
||||
FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
|
||||
|
||||
fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
|
||||
|
||||
fHistoryColor = RGBToYCoCg(fHistoryColor);
|
||||
|
||||
//Compute temporal reactivity info
|
||||
fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
|
||||
bInMotionLastFrame = (fHistory.w < 0.0f);
|
||||
}
|
||||
|
||||
LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
|
||||
{
|
||||
LockState state = { FFX_FALSE, FFX_FALSE };
|
||||
const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos);
|
||||
state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
|
||||
|
||||
FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
|
||||
|
||||
fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
|
||||
|
||||
if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
|
||||
state.WasLockedPrevFrame = true;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_REPROJECT_H )
|
||||
105
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
vendored
Normal file
105
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
vendored
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_RESOURCES_H
|
||||
#define FFX_FSR2_RESOURCES_H
|
||||
|
||||
#if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26
|
||||
#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION 45
|
||||
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 53
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 54
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 55
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 56
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 57
|
||||
|
||||
// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
|
||||
#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
|
||||
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 58
|
||||
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3
|
||||
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8
|
||||
|
||||
#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
|
||||
#endif //!defined( FFX_FSR2_RESOURCES_H )
|
||||
605
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
vendored
Normal file
605
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
vendored
Normal file
|
|
@ -0,0 +1,605 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_SAMPLE_H
|
||||
#define FFX_FSR2_SAMPLE_H
|
||||
|
||||
// suppress warnings
|
||||
#ifdef FFX_HLSL
|
||||
#pragma warning(disable: 4008) // potentially divide by zero
|
||||
#endif //FFX_HLSL
|
||||
|
||||
struct FetchedBilinearSamples {
|
||||
|
||||
FfxFloat32x4 fColor00;
|
||||
FfxFloat32x4 fColor10;
|
||||
|
||||
FfxFloat32x4 fColor01;
|
||||
FfxFloat32x4 fColor11;
|
||||
};
|
||||
|
||||
struct FetchedBicubicSamples {
|
||||
|
||||
FfxFloat32x4 fColor00;
|
||||
FfxFloat32x4 fColor10;
|
||||
FfxFloat32x4 fColor20;
|
||||
FfxFloat32x4 fColor30;
|
||||
|
||||
FfxFloat32x4 fColor01;
|
||||
FfxFloat32x4 fColor11;
|
||||
FfxFloat32x4 fColor21;
|
||||
FfxFloat32x4 fColor31;
|
||||
|
||||
FfxFloat32x4 fColor02;
|
||||
FfxFloat32x4 fColor12;
|
||||
FfxFloat32x4 fColor22;
|
||||
FfxFloat32x4 fColor32;
|
||||
|
||||
FfxFloat32x4 fColor03;
|
||||
FfxFloat32x4 fColor13;
|
||||
FfxFloat32x4 fColor23;
|
||||
FfxFloat32x4 fColor33;
|
||||
};
|
||||
|
||||
#if FFX_HALF
|
||||
struct FetchedBilinearSamplesMin16 {
|
||||
|
||||
FFX_MIN16_F4 fColor00;
|
||||
FFX_MIN16_F4 fColor10;
|
||||
|
||||
FFX_MIN16_F4 fColor01;
|
||||
FFX_MIN16_F4 fColor11;
|
||||
};
|
||||
|
||||
struct FetchedBicubicSamplesMin16 {
|
||||
|
||||
FFX_MIN16_F4 fColor00;
|
||||
FFX_MIN16_F4 fColor10;
|
||||
FFX_MIN16_F4 fColor20;
|
||||
FFX_MIN16_F4 fColor30;
|
||||
|
||||
FFX_MIN16_F4 fColor01;
|
||||
FFX_MIN16_F4 fColor11;
|
||||
FFX_MIN16_F4 fColor21;
|
||||
FFX_MIN16_F4 fColor31;
|
||||
|
||||
FFX_MIN16_F4 fColor02;
|
||||
FFX_MIN16_F4 fColor12;
|
||||
FFX_MIN16_F4 fColor22;
|
||||
FFX_MIN16_F4 fColor32;
|
||||
|
||||
FFX_MIN16_F4 fColor03;
|
||||
FFX_MIN16_F4 fColor13;
|
||||
FFX_MIN16_F4 fColor23;
|
||||
FFX_MIN16_F4 fColor33;
|
||||
};
|
||||
#else //FFX_HALF
|
||||
#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
|
||||
#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
|
||||
{
|
||||
return A + (B - A) * t;
|
||||
}
|
||||
|
||||
FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
|
||||
{
|
||||
return A + (B - A) * t;
|
||||
}
|
||||
|
||||
FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
|
||||
return fColorXY;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
|
||||
{
|
||||
const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
|
||||
return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
|
||||
}
|
||||
|
||||
FfxFloat32 Lanczos2(FfxFloat32 x)
|
||||
{
|
||||
x = ffxMin(abs(x), 2.0f);
|
||||
return Lanczos2NoClamp(x);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#if 0
|
||||
FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
|
||||
{
|
||||
const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
|
||||
return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
|
||||
}
|
||||
#endif
|
||||
|
||||
FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
|
||||
{
|
||||
x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
|
||||
return FFX_MIN16_F(Lanczos2NoClamp(x));
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
// FSR1 lanczos approximation. Input is x*x and must be <= 4.
|
||||
FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
|
||||
{
|
||||
FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
|
||||
FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
|
||||
return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
|
||||
{
|
||||
FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
|
||||
FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
|
||||
return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
|
||||
{
|
||||
x2 = ffxMin(x2, 4.0f);
|
||||
return Lanczos2ApproxSqNoClamp(x2);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
|
||||
{
|
||||
x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
|
||||
return Lanczos2ApproxSqNoClamp(x2);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
|
||||
{
|
||||
return Lanczos2ApproxSqNoClamp(x * x);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
|
||||
{
|
||||
return Lanczos2ApproxSqNoClamp(x * x);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2Approx(FfxFloat32 x)
|
||||
{
|
||||
return Lanczos2ApproxSq(x * x);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
|
||||
{
|
||||
return Lanczos2ApproxSq(x * x);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
|
||||
{
|
||||
return SampleLanczos2Weight(abs(x));
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
|
||||
{
|
||||
return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
||||
{
|
||||
FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
|
||||
FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
|
||||
FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
|
||||
FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
|
||||
{
|
||||
FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
|
||||
FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
|
||||
FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
|
||||
FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
||||
{
|
||||
FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
|
||||
FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
|
||||
FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
|
||||
FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FfxFloat32x4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
||||
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
|
||||
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
|
||||
{
|
||||
FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
|
||||
FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
|
||||
FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
|
||||
FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
|
||||
FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FFX_MIN16_F4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
|
||||
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
|
||||
FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FfxFloat32x4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
||||
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
|
||||
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FFX_MIN16_F4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
|
||||
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
|
||||
|
||||
FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
||||
{
|
||||
FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
|
||||
FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
|
||||
FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
|
||||
FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
|
||||
{
|
||||
FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
|
||||
FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
|
||||
FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
|
||||
FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FfxFloat32x4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
||||
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FFX_MIN16_F4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
|
||||
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
|
||||
FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
|
||||
{
|
||||
FfxInt32x2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
|
||||
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
|
||||
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
|
||||
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
|
||||
return result;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
|
||||
{
|
||||
FFX_MIN16_I2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
|
||||
result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
|
||||
result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
|
||||
result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
|
||||
return result;
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
|
||||
#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
|
||||
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
|
||||
{ \
|
||||
SampleType Samples; \
|
||||
\
|
||||
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
|
||||
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
|
||||
Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
|
||||
Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
|
||||
\
|
||||
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
|
||||
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
|
||||
Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
|
||||
Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
|
||||
\
|
||||
Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
|
||||
Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
|
||||
Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
|
||||
Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
|
||||
\
|
||||
Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
|
||||
Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
|
||||
Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
|
||||
Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
|
||||
\
|
||||
return Samples; \
|
||||
}
|
||||
|
||||
#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
|
||||
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
|
||||
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
|
||||
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
|
||||
{ \
|
||||
SampleType Samples; \
|
||||
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
|
||||
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
|
||||
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
|
||||
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
|
||||
return Samples; \
|
||||
}
|
||||
|
||||
#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
|
||||
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
|
||||
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
|
||||
// is common, so iPxSample can "jitter"
|
||||
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
|
||||
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
|
||||
{ \
|
||||
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
|
||||
/* Clamp base coords */ \
|
||||
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
|
||||
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
|
||||
/* */ \
|
||||
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
||||
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
|
||||
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
||||
return fColorXY; \
|
||||
}
|
||||
|
||||
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
|
||||
FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
|
||||
{ \
|
||||
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
|
||||
/* Clamp base coords */ \
|
||||
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
|
||||
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
|
||||
/* */ \
|
||||
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
||||
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
|
||||
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
||||
return fColorXY; \
|
||||
}
|
||||
|
||||
#define FFX_FSR2_CONCAT_ID(x, y) x ## y
|
||||
#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
|
||||
#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
|
||||
#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
|
||||
#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
|
||||
|
||||
#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
|
||||
|
||||
#endif //!defined( FFX_FSR2_SAMPLE_H )
|
||||
250
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
vendored
Normal file
250
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
vendored
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#define USE_YCOCG 1
|
||||
|
||||
#define fAutogenEpsilon 0.01f
|
||||
|
||||
// EXPERIMENTAL
|
||||
|
||||
FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
|
||||
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
|
||||
|
||||
#if USE_YCOCG
|
||||
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
|
||||
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
|
||||
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
|
||||
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
|
||||
FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
|
||||
FfxFloat32x3 X = colorPreAlpha;
|
||||
FfxFloat32x3 Y = colorPostAlpha;
|
||||
FfxFloat32x3 Z = colorPrevPreAlpha;
|
||||
FfxFloat32x3 W = colorPrevPostAlpha;
|
||||
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
|
||||
|
||||
// cleanup very small values
|
||||
retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// works ok: thin edges
|
||||
FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
|
||||
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
|
||||
|
||||
#if USE_YCOCG
|
||||
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
|
||||
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
|
||||
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
|
||||
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
|
||||
FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
|
||||
FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha =
|
||||
FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
|
||||
FfxFloat32x3 X = colorPrevPreAlpha;
|
||||
FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
|
||||
FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
|
||||
|
||||
FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
|
||||
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
|
||||
|
||||
// only pixels that have significantly changed in color shuold be considered
|
||||
retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// This function computes the TransparencyAndComposition mask:
|
||||
// This mask indicates pixels that should discard locks and apply color clamping.
|
||||
//
|
||||
// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of
|
||||
// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
|
||||
// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
|
||||
//
|
||||
// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
|
||||
// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
|
||||
// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
|
||||
// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
|
||||
//
|
||||
// In the final step it stores the current textures in internal textures for the next frame
|
||||
|
||||
FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
|
||||
|
||||
// [branch]
|
||||
if (retVal > FFX_MIN16_F(0.01f))
|
||||
{
|
||||
retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
|
||||
{
|
||||
float lum[9];
|
||||
int i = 0;
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
|
||||
FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
|
||||
lum[i++] = length(curCol - prevCol);
|
||||
}
|
||||
}
|
||||
|
||||
//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
|
||||
//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
|
||||
|
||||
//return sqrt(gradX * gradX + gradY * gradY);
|
||||
|
||||
float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
|
||||
float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
|
||||
|
||||
return sqrt(sqrt(gradX * gradY));
|
||||
}
|
||||
|
||||
float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
|
||||
{
|
||||
float lum[9];
|
||||
int i = 0;
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
|
||||
FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
|
||||
lum[i++] = length(curCol - prevCol);
|
||||
}
|
||||
}
|
||||
|
||||
//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
|
||||
//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
|
||||
|
||||
//return sqrt(gradX * gradX + gradY * gradY);
|
||||
|
||||
float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
|
||||
float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
|
||||
|
||||
return sqrt(sqrt(gradX * gradY));
|
||||
}
|
||||
|
||||
FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
|
||||
|
||||
FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
|
||||
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
|
||||
|
||||
#if USE_YCOCG
|
||||
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
|
||||
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
|
||||
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
|
||||
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
|
||||
#endif
|
||||
FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
|
||||
FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
|
||||
|
||||
#if USE_YCOCG
|
||||
W = RGBToYCoCg(W);
|
||||
#endif
|
||||
minPrev = min(minPrev, W);
|
||||
maxPrev = max(maxPrev, W);
|
||||
}
|
||||
}
|
||||
// instead of computing the overlap: simply count how many samples are outside
|
||||
// set reactive based on that
|
||||
FFX_MIN16_F count = FFX_MIN16_F(0.f);
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
|
||||
|
||||
#if USE_YCOCG
|
||||
Y = RGBToYCoCg(Y);
|
||||
#endif
|
||||
count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
|
||||
count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
|
||||
count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
|
||||
}
|
||||
}
|
||||
retVal = count / FFX_MIN16_F(27.f);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
// This function computes the Reactive mask:
|
||||
// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
|
||||
// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
|
||||
// As a result history would not be trustworthy.
|
||||
// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
|
||||
// For mirrors we may assume the pre-alpha is pretty uniform color.
|
||||
//
|
||||
// This works well generally, but also marks edge pixels
|
||||
FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
// we only get here if alpha has a significant contribution and has changed since last frame.
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
|
||||
|
||||
// mark pixels with huge variance in alpha as reactive
|
||||
FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
|
||||
FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
|
||||
retVal = ffxSaturate(alphaEdge - opaqueEdge);
|
||||
|
||||
// the above also marks edge pixels due to jitter, so we need to cancel those out
|
||||
|
||||
|
||||
return retVal;
|
||||
}
|
||||
120
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
vendored
Normal file
120
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
vendored
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 1
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
|
||||
#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3
|
||||
#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4
|
||||
#define FSR2_BIND_SRV_REACTIVE_MASK 5
|
||||
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6
|
||||
|
||||
#define FSR2_BIND_UAV_AUTOREACTIVE 7
|
||||
#define FSR2_BIND_UAV_AUTOCOMPOSITION 8
|
||||
#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9
|
||||
#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 11
|
||||
#define FSR2_BIND_CB_REACTIVE 12
|
||||
|
||||
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 13
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
#ifdef FSR2_BIND_CB_REACTIVE
|
||||
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
|
||||
{
|
||||
float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
|
||||
float fTcScale;
|
||||
float fReactiveScale;
|
||||
float fReactiveMax;
|
||||
} cbGenerateReactive;
|
||||
|
||||
float getTcThreshold()
|
||||
{
|
||||
return cbGenerateReactive.fTcThreshold;
|
||||
}
|
||||
|
||||
#else
|
||||
float getTcThreshold()
|
||||
{
|
||||
return 0.05f;
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr2_tcr_autogen.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy);
|
||||
|
||||
// ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV
|
||||
// fetch pre- and post-alpha color values
|
||||
FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() );
|
||||
FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) );
|
||||
FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f);
|
||||
|
||||
FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) );
|
||||
FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) );
|
||||
|
||||
FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f );
|
||||
|
||||
outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx);
|
||||
|
||||
if (outReactiveMask.y > 0.5f)
|
||||
{
|
||||
outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx);
|
||||
outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale);
|
||||
outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax );
|
||||
}
|
||||
|
||||
outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale);
|
||||
|
||||
outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId)));
|
||||
outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId)));
|
||||
|
||||
StoreAutoReactive(uDispatchThreadId, outReactiveMask);
|
||||
|
||||
StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha);
|
||||
StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha);
|
||||
}
|
||||
194
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
vendored
Normal file
194
engine/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
vendored
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_UPSAMPLE_H
|
||||
#define FFX_FSR2_UPSAMPLE_H
|
||||
|
||||
FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
|
||||
|
||||
void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
|
||||
{
|
||||
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
|
||||
}
|
||||
#if FFX_HALF
|
||||
void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
|
||||
{
|
||||
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
|
||||
#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
|
||||
#endif
|
||||
|
||||
FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
|
||||
{
|
||||
FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
|
||||
FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
|
||||
FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
|
||||
FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
|
||||
#else
|
||||
#error "Invalid Lanczos type"
|
||||
#endif
|
||||
return fSampleWeight;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
|
||||
{
|
||||
FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
|
||||
FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
|
||||
FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
|
||||
FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
|
||||
|
||||
// To Test: Save reciproqual sqrt compute
|
||||
// FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
|
||||
#else
|
||||
#error "Invalid Lanczos type"
|
||||
#endif
|
||||
return fSampleWeight;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 ComputeMaxKernelWeight() {
|
||||
const FfxFloat32 fKernelSizeBias = 1.0f;
|
||||
|
||||
FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
|
||||
|
||||
return ffxMin(FfxFloat32(1.99f), fKernelWeight);
|
||||
}
|
||||
|
||||
FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
|
||||
FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
|
||||
{
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
#include "ffx_fsr2_force16_begin.h"
|
||||
#endif
|
||||
// We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
|
||||
FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position
|
||||
FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position
|
||||
FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors...
|
||||
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
#include "ffx_fsr2_force16_end.h"
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 fSamples[iLanczos2SampleCount];
|
||||
|
||||
FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
|
||||
|
||||
FfxInt32x2 offsetTL;
|
||||
offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
|
||||
offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
|
||||
|
||||
//Load samples
|
||||
// If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
|
||||
// Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
|
||||
// This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
|
||||
const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
|
||||
const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
|
||||
|
||||
FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 row = 0; row < 3; row++) {
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 col = 0; col < 3; col++) {
|
||||
FfxInt32 iSampleIndex = col + (row << 2);
|
||||
|
||||
FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
|
||||
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
|
||||
|
||||
const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
|
||||
|
||||
fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
|
||||
}
|
||||
}
|
||||
|
||||
FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
|
||||
FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
|
||||
|
||||
// Identify how much of each upsampled color to be used for this frame
|
||||
const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
|
||||
const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
|
||||
|
||||
const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
|
||||
const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
|
||||
const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
|
||||
|
||||
const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 row = 0; row < 3; row++) {
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 col = 0; col < 3; col++) {
|
||||
FfxInt32 iSampleIndex = col + (row << 2);
|
||||
|
||||
const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
|
||||
const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
|
||||
FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
|
||||
|
||||
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
|
||||
|
||||
const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
|
||||
FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
|
||||
|
||||
fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
|
||||
|
||||
// Update rectification box
|
||||
{
|
||||
const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
|
||||
const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
|
||||
|
||||
const FfxBoolean bInitialSample = (row == 0) && (col == 0);
|
||||
RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RectificationBoxComputeVarianceBoxData(clippingBox);
|
||||
|
||||
fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);
|
||||
|
||||
if (fColorAndWeight.w > FSR2_EPSILON) {
|
||||
// Normalize for deringing (we need to compare colors)
|
||||
fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
|
||||
fColorAndWeight.w *= fUpsampleLanczosWeightScale;
|
||||
|
||||
Deringing(clippingBox, fColorAndWeight.xyz);
|
||||
}
|
||||
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
#include "ffx_fsr2_force16_end.h"
|
||||
#endif
|
||||
|
||||
return fColorAndWeight;
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_UPSAMPLE_H )
|
||||
936
engine/thirdparty/amd-fsr2/shaders/ffx_spd.h
vendored
Normal file
936
engine/thirdparty/amd-fsr2/shaders/ffx_spd.h
vendored
Normal file
|
|
@ -0,0 +1,936 @@
|
|||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifdef FFX_CPU
|
||||
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy
|
||||
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant
|
||||
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant
|
||||
FfxUInt32x4 rectInfo, // left, top, width, height
|
||||
FfxInt32 mips) // optional: if -1, calculate based on rect width and height
|
||||
{
|
||||
workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left
|
||||
workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top
|
||||
|
||||
FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width
|
||||
FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height
|
||||
|
||||
dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0];
|
||||
dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1];
|
||||
|
||||
numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]);
|
||||
|
||||
if (mips >= 0)
|
||||
{
|
||||
numWorkGroupsAndMips[1] = FfxUInt32(mips);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate based on rect width and height
|
||||
FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]);
|
||||
numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12))));
|
||||
}
|
||||
}
|
||||
|
||||
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy
|
||||
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant
|
||||
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant
|
||||
FfxUInt32x4 rectInfo) // left, top, width, height
|
||||
{
|
||||
SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1);
|
||||
}
|
||||
#endif // #ifdef FFX_CPU
|
||||
|
||||
|
||||
//==============================================================================================================================
|
||||
// NON-PACKED VERSION
|
||||
//==============================================================================================================================
|
||||
#ifdef FFX_GPU
|
||||
#ifdef SPD_PACKED_ONLY
|
||||
// Avoid compiler error
|
||||
FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
}
|
||||
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
|
||||
{
|
||||
}
|
||||
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
#endif // #ifdef SPD_PACKED_ONLY
|
||||
|
||||
//_____________________________________________________________/\_______________________________________________________________
|
||||
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
#extension GL_KHR_shader_subgroup_quad:require
|
||||
#endif
|
||||
|
||||
void SpdWorkgroupShuffleBarrier()
|
||||
{
|
||||
#ifdef FFX_GLSL
|
||||
barrier();
|
||||
#endif
|
||||
#ifdef FFX_HLSL
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Only last active workgroup should proceed
|
||||
bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice)
|
||||
{
|
||||
// global atomic counter
|
||||
if (localInvocationIndex == 0)
|
||||
{
|
||||
SpdIncreaseAtomicCounter(slice);
|
||||
}
|
||||
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
return (SpdGetAtomicCounter() != (numWorkGroups - 1));
|
||||
}
|
||||
|
||||
// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3);
|
||||
FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v)
|
||||
{
|
||||
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
|
||||
FfxFloat32x4 v0 = v;
|
||||
FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v);
|
||||
FfxFloat32x4 v2 = subgroupQuadSwapVertical(v);
|
||||
FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
|
||||
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
|
||||
// requires SM6.0
|
||||
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
|
||||
FfxFloat32x4 v0 = v;
|
||||
FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1);
|
||||
FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2);
|
||||
FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
/*
|
||||
// if SM6.0 is not available, you can use the AMD shader intrinsics
|
||||
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
|
||||
// https://gpuopen.com/amd-gpu-services-ags-library/
|
||||
// works for DX11
|
||||
FfxFloat32x4 v0 = v;
|
||||
FfxFloat32x4 v1;
|
||||
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
FfxFloat32x4 v2;
|
||||
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
FfxFloat32x4 v3;
|
||||
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
*/
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
|
||||
{
|
||||
FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y);
|
||||
FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y);
|
||||
FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y);
|
||||
FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice);
|
||||
FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice);
|
||||
FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice);
|
||||
FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice);
|
||||
FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice);
|
||||
FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice);
|
||||
FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_LINEAR_SAMPLER
|
||||
return SpdLoadSourceImage(FfxInt32x2(base), slice);
|
||||
#else
|
||||
return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[3], 0, slice);
|
||||
|
||||
if (mip <= 1)
|
||||
return;
|
||||
|
||||
v[0] = SpdReduceQuad(v[0]);
|
||||
v[1] = SpdReduceQuad(v[1]);
|
||||
v[2] = SpdReduceQuad(v[2]);
|
||||
v[3] = SpdReduceQuad(v[3]);
|
||||
|
||||
if ((localInvocationIndex % 4) == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
|
||||
SpdStoreIntermediate(x / 2, y / 2, v[0]);
|
||||
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
|
||||
SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]);
|
||||
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
|
||||
SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]);
|
||||
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
|
||||
SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[3], 0, slice);
|
||||
|
||||
if (mip <= 1)
|
||||
return;
|
||||
|
||||
for (FfxUInt32 i = 0; i < 4; i++)
|
||||
{
|
||||
SpdStoreIntermediate(x, y, v[i]);
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
|
||||
}
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
}
|
||||
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
SpdStoreIntermediate(x + 0, y + 0, v[0]);
|
||||
SpdStoreIntermediate(x + 8, y + 0, v[1]);
|
||||
SpdStoreIntermediate(x + 0, y + 8, v[2]);
|
||||
SpdStoreIntermediate(x + 8, y + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice);
|
||||
#else
|
||||
SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS, try to reduce bank conflicts
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// ...
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
SpdStoreIntermediate(x * 2 + y % 2, y * 2, v);
|
||||
}
|
||||
#else
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(x, y);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediate(x + (y / 2) % 2, y, v);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
// x 0 x 0
|
||||
// 0 0 0 0
|
||||
// 0 x 0 x
|
||||
// 0 0 0 0
|
||||
FfxFloat32x4 v =
|
||||
SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
|
||||
// ...
|
||||
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
|
||||
// ...
|
||||
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
|
||||
// ...
|
||||
SpdStoreIntermediate(x * 4 + y, y * 4, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediate(x * 2 + y / 2, y * 2, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
// x 0 0 0 x 0 0 0
|
||||
// ...
|
||||
// 0 x 0 0 0 x 0 0
|
||||
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
|
||||
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
SpdStoreIntermediate(x + y * 2, 0, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediate(x / 2 + y, 0, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 1)
|
||||
{
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
|
||||
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
|
||||
FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v0, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
|
||||
FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v1, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
|
||||
FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v2, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
|
||||
FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v3, 6, slice);
|
||||
|
||||
if (mips <= 7)
|
||||
return;
|
||||
// no barrier needed, working on values only from the same thread
|
||||
|
||||
FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3);
|
||||
SpdStore(FfxInt32x2(x, y), v, 7, slice);
|
||||
SpdStoreIntermediate(x, y, v);
|
||||
}
|
||||
|
||||
void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
if (mips <= baseMip)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice);
|
||||
|
||||
if (mips <= baseMip + 1)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
|
||||
|
||||
if (mips <= baseMip + 2)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
|
||||
|
||||
if (mips <= baseMip + 3)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice);
|
||||
}
|
||||
|
||||
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
|
||||
{
|
||||
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
|
||||
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
|
||||
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
|
||||
SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
|
||||
SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
|
||||
|
||||
if (mips <= 6)
|
||||
return;
|
||||
|
||||
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
|
||||
return;
|
||||
|
||||
SpdResetAtomicCounter(slice);
|
||||
|
||||
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
|
||||
SpdDownsampleMips_6_7(x, y, mips, slice);
|
||||
|
||||
SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
|
||||
}
|
||||
|
||||
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
|
||||
{
|
||||
SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//==============================================================================================================================
|
||||
// PACKED VERSION
|
||||
//==============================================================================================================================
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#ifdef FFX_GLSL
|
||||
#extension GL_EXT_shader_subgroup_extended_types_float16:require
|
||||
#endif
|
||||
|
||||
FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v)
|
||||
{
|
||||
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
FfxFloat16x4 v0 = v;
|
||||
FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v);
|
||||
FfxFloat16x4 v2 = subgroupQuadSwapVertical(v);
|
||||
FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
// requires SM6.0
|
||||
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
|
||||
FfxFloat16x4 v0 = v;
|
||||
FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1);
|
||||
FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2);
|
||||
FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
/*
|
||||
// if SM6.0 is not available, you can use the AMD shader intrinsics
|
||||
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
|
||||
// https://gpuopen.com/amd-gpu-services-ags-library/
|
||||
// works for DX11
|
||||
FfxFloat16x4 v0 = v;
|
||||
FfxFloat16x4 v1;
|
||||
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
FfxFloat16x4 v2;
|
||||
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
FfxFloat16x4 v3;
|
||||
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
*/
|
||||
#endif
|
||||
return FfxFloat16x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
|
||||
{
|
||||
FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y);
|
||||
FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y);
|
||||
FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y);
|
||||
FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice);
|
||||
FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice);
|
||||
FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice);
|
||||
FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice);
|
||||
FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice);
|
||||
FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice);
|
||||
FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_LINEAR_SAMPLER
|
||||
return SpdLoadSourceImageH(FfxInt32x2(base), slice);
|
||||
#else
|
||||
return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[3], 0, slice);
|
||||
|
||||
if (mips <= 1)
|
||||
return;
|
||||
|
||||
v[0] = SpdReduceQuadH(v[0]);
|
||||
v[1] = SpdReduceQuadH(v[1]);
|
||||
v[2] = SpdReduceQuadH(v[2]);
|
||||
v[3] = SpdReduceQuadH(v[3]);
|
||||
|
||||
if ((localInvocationIndex % 4) == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2, y / 2, v[0]);
|
||||
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]);
|
||||
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]);
|
||||
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[3], 0, slice);
|
||||
|
||||
if (mips <= 1)
|
||||
return;
|
||||
|
||||
for (FfxInt32 i = 0; i < 4; i++)
|
||||
{
|
||||
SpdStoreIntermediateH(x, y, v[i]);
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
|
||||
}
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
}
|
||||
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
SpdStoreIntermediateH(x + 0, y + 0, v[0]);
|
||||
SpdStoreIntermediateH(x + 8, y + 0, v[1]);
|
||||
SpdStoreIntermediateH(x + 0, y + 8, v[2]);
|
||||
SpdStoreIntermediateH(x + 8, y + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
#else
|
||||
SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS, try to reduce bank conflicts
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// ...
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v);
|
||||
}
|
||||
#else
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(x, y);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediateH(x + (y / 2) % 2, y, v);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
// x 0 x 0
|
||||
// 0 0 0 0
|
||||
// 0 x 0 x
|
||||
// 0 0 0 0
|
||||
FfxFloat16x4 v =
|
||||
SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
|
||||
// ...
|
||||
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
|
||||
// ...
|
||||
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
|
||||
// ...
|
||||
SpdStoreIntermediateH(x * 4 + y, y * 4, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
// x 0 0 0 x 0 0 0
|
||||
// ...
|
||||
// 0 x 0 0 0 x 0 0
|
||||
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
|
||||
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
SpdStoreIntermediateH(x + y * 2, 0, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediateH(x / 2 + y, 0, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 1)
|
||||
{
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
|
||||
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
|
||||
FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v0, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
|
||||
FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v1, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
|
||||
FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v2, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
|
||||
FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v3, 6, slice);
|
||||
|
||||
if (mips < 8)
|
||||
return;
|
||||
// no barrier needed, working on values only from the same thread
|
||||
|
||||
FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3);
|
||||
SpdStoreH(FfxInt32x2(x, y), v, 7, slice);
|
||||
SpdStoreIntermediateH(x, y, v);
|
||||
}
|
||||
|
||||
void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
if (mips <= baseMip)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice);
|
||||
|
||||
if (mips <= baseMip + 1)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
|
||||
|
||||
if (mips <= baseMip + 2)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
|
||||
|
||||
if (mips <= baseMip + 3)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice);
|
||||
}
|
||||
|
||||
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
|
||||
{
|
||||
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
|
||||
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
|
||||
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
|
||||
|
||||
SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
|
||||
SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
|
||||
|
||||
if (mips < 7)
|
||||
return;
|
||||
|
||||
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
|
||||
return;
|
||||
|
||||
SpdResetAtomicCounter(slice);
|
||||
|
||||
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
|
||||
SpdDownsampleMips_6_7H(x, y, mips, slice);
|
||||
|
||||
SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
|
||||
}
|
||||
|
||||
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
|
||||
{
|
||||
SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
|
||||
}
|
||||
|
||||
#endif // #if FFX_HALF
|
||||
#endif // #ifdef FFX_GPU
|
||||
32
engine/thirdparty/angle/LICENSE
vendored
Normal file
32
engine/thirdparty/angle/LICENSE
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2018 The ANGLE Project Authors.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
//
|
||||
// Neither the name of TransGaming Inc., Google Inc., 3DLabs Inc.
|
||||
// Ltd., nor the names of their contributors may be used to endorse
|
||||
// or promote products derived from this software without specific
|
||||
// prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
342
engine/thirdparty/angle/include/EGL/egl.h
vendored
Normal file
342
engine/thirdparty/angle/include/EGL/egl.h
vendored
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
#ifndef __egl_h_
|
||||
#define __egl_h_ 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Copyright 2013-2020 The Khronos Group Inc.
|
||||
** SPDX-License-Identifier: Apache-2.0
|
||||
**
|
||||
** This header is generated from the Khronos EGL XML API Registry.
|
||||
** The current version of the Registry, generator scripts
|
||||
** used to make the header, and the header can be found at
|
||||
** http://www.khronos.org/registry/egl
|
||||
**
|
||||
** Khronos $Git commit SHA1: 6fb1daea15 $ on $Git commit date: 2022-05-25 09:41:13 -0600 $
|
||||
*/
|
||||
|
||||
#include <EGL/eglplatform.h>
|
||||
|
||||
#ifndef EGL_EGL_PROTOTYPES
|
||||
#define EGL_EGL_PROTOTYPES 1
|
||||
#endif
|
||||
|
||||
/* Generated on date 20220525 */
|
||||
|
||||
/* Generated C header for:
|
||||
* API: egl
|
||||
* Versions considered: .*
|
||||
* Versions emitted: .*
|
||||
* Default extensions included: None
|
||||
* Additional extensions included: _nomatch_^
|
||||
* Extensions removed: _nomatch_^
|
||||
*/
|
||||
|
||||
#ifndef EGL_VERSION_1_0
|
||||
#define EGL_VERSION_1_0 1
|
||||
typedef unsigned int EGLBoolean;
|
||||
typedef void *EGLDisplay;
|
||||
#include <KHR/khrplatform.h>
|
||||
#include <EGL/eglplatform.h>
|
||||
typedef void *EGLConfig;
|
||||
typedef void *EGLSurface;
|
||||
typedef void *EGLContext;
|
||||
typedef void (*__eglMustCastToProperFunctionPointerType)(void);
|
||||
#define EGL_ALPHA_SIZE 0x3021
|
||||
#define EGL_BAD_ACCESS 0x3002
|
||||
#define EGL_BAD_ALLOC 0x3003
|
||||
#define EGL_BAD_ATTRIBUTE 0x3004
|
||||
#define EGL_BAD_CONFIG 0x3005
|
||||
#define EGL_BAD_CONTEXT 0x3006
|
||||
#define EGL_BAD_CURRENT_SURFACE 0x3007
|
||||
#define EGL_BAD_DISPLAY 0x3008
|
||||
#define EGL_BAD_MATCH 0x3009
|
||||
#define EGL_BAD_NATIVE_PIXMAP 0x300A
|
||||
#define EGL_BAD_NATIVE_WINDOW 0x300B
|
||||
#define EGL_BAD_PARAMETER 0x300C
|
||||
#define EGL_BAD_SURFACE 0x300D
|
||||
#define EGL_BLUE_SIZE 0x3022
|
||||
#define EGL_BUFFER_SIZE 0x3020
|
||||
#define EGL_CONFIG_CAVEAT 0x3027
|
||||
#define EGL_CONFIG_ID 0x3028
|
||||
#define EGL_CORE_NATIVE_ENGINE 0x305B
|
||||
#define EGL_DEPTH_SIZE 0x3025
|
||||
#define EGL_DONT_CARE EGL_CAST(EGLint,-1)
|
||||
#define EGL_DRAW 0x3059
|
||||
#define EGL_EXTENSIONS 0x3055
|
||||
#define EGL_FALSE 0
|
||||
#define EGL_GREEN_SIZE 0x3023
|
||||
#define EGL_HEIGHT 0x3056
|
||||
#define EGL_LARGEST_PBUFFER 0x3058
|
||||
#define EGL_LEVEL 0x3029
|
||||
#define EGL_MAX_PBUFFER_HEIGHT 0x302A
|
||||
#define EGL_MAX_PBUFFER_PIXELS 0x302B
|
||||
#define EGL_MAX_PBUFFER_WIDTH 0x302C
|
||||
#define EGL_NATIVE_RENDERABLE 0x302D
|
||||
#define EGL_NATIVE_VISUAL_ID 0x302E
|
||||
#define EGL_NATIVE_VISUAL_TYPE 0x302F
|
||||
#define EGL_NONE 0x3038
|
||||
#define EGL_NON_CONFORMANT_CONFIG 0x3051
|
||||
#define EGL_NOT_INITIALIZED 0x3001
|
||||
#define EGL_NO_CONTEXT EGL_CAST(EGLContext,0)
|
||||
#define EGL_NO_DISPLAY EGL_CAST(EGLDisplay,0)
|
||||
#define EGL_NO_SURFACE EGL_CAST(EGLSurface,0)
|
||||
#define EGL_PBUFFER_BIT 0x0001
|
||||
#define EGL_PIXMAP_BIT 0x0002
|
||||
#define EGL_READ 0x305A
|
||||
#define EGL_RED_SIZE 0x3024
|
||||
#define EGL_SAMPLES 0x3031
|
||||
#define EGL_SAMPLE_BUFFERS 0x3032
|
||||
#define EGL_SLOW_CONFIG 0x3050
|
||||
#define EGL_STENCIL_SIZE 0x3026
|
||||
#define EGL_SUCCESS 0x3000
|
||||
#define EGL_SURFACE_TYPE 0x3033
|
||||
#define EGL_TRANSPARENT_BLUE_VALUE 0x3035
|
||||
#define EGL_TRANSPARENT_GREEN_VALUE 0x3036
|
||||
#define EGL_TRANSPARENT_RED_VALUE 0x3037
|
||||
#define EGL_TRANSPARENT_RGB 0x3052
|
||||
#define EGL_TRANSPARENT_TYPE 0x3034
|
||||
#define EGL_TRUE 1
|
||||
#define EGL_VENDOR 0x3053
|
||||
#define EGL_VERSION 0x3054
|
||||
#define EGL_WIDTH 0x3057
|
||||
#define EGL_WINDOW_BIT 0x0004
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLCHOOSECONFIGPROC) (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOPYBUFFERSPROC) (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
|
||||
typedef EGLContext (EGLAPIENTRYP PFNEGLCREATECONTEXTPROC) (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPBUFFERSURFACEPROC) (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEPROC) (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEWINDOWSURFACEPROC) (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYCONTEXTPROC) (EGLDisplay dpy, EGLContext ctx);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSURFACEPROC) (EGLDisplay dpy, EGLSurface surface);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCONFIGATTRIBPROC) (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCONFIGSPROC) (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config);
|
||||
typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETCURRENTDISPLAYPROC) (void);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLGETCURRENTSURFACEPROC) (EGLint readdraw);
|
||||
typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETDISPLAYPROC) (EGLNativeDisplayType display_id);
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLGETERRORPROC) (void);
|
||||
typedef __eglMustCastToProperFunctionPointerType (EGLAPIENTRYP PFNEGLGETPROCADDRESSPROC) (const char *procname);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLINITIALIZEPROC) (EGLDisplay dpy, EGLint *major, EGLint *minor);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLMAKECURRENTPROC) (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYCONTEXTPROC) (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value);
|
||||
typedef const char *(EGLAPIENTRYP PFNEGLQUERYSTRINGPROC) (EGLDisplay dpy, EGLint name);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSPROC) (EGLDisplay dpy, EGLSurface surface);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLTERMINATEPROC) (EGLDisplay dpy);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITGLPROC) (void);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITNATIVEPROC) (EGLint engine);
|
||||
#if EGL_EGL_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
|
||||
EGLAPI EGLContext EGLAPIENTRY eglCreateContext (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext (EGLDisplay dpy, EGLContext ctx);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface (EGLDisplay dpy, EGLSurface surface);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config);
|
||||
EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay (void);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface (EGLint readdraw);
|
||||
EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay (EGLNativeDisplayType display_id);
|
||||
EGLAPI EGLint EGLAPIENTRY eglGetError (void);
|
||||
EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress (const char *procname);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglInitialize (EGLDisplay dpy, EGLint *major, EGLint *minor);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value);
|
||||
EGLAPI const char *EGLAPIENTRY eglQueryString (EGLDisplay dpy, EGLint name);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers (EGLDisplay dpy, EGLSurface surface);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglTerminate (EGLDisplay dpy);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL (void);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative (EGLint engine);
|
||||
#endif
|
||||
#endif /* EGL_VERSION_1_0 */
|
||||
|
||||
#ifndef EGL_VERSION_1_1
|
||||
#define EGL_VERSION_1_1 1
|
||||
#define EGL_BACK_BUFFER 0x3084
|
||||
#define EGL_BIND_TO_TEXTURE_RGB 0x3039
|
||||
#define EGL_BIND_TO_TEXTURE_RGBA 0x303A
|
||||
#define EGL_CONTEXT_LOST 0x300E
|
||||
#define EGL_MIN_SWAP_INTERVAL 0x303B
|
||||
#define EGL_MAX_SWAP_INTERVAL 0x303C
|
||||
#define EGL_MIPMAP_TEXTURE 0x3082
|
||||
#define EGL_MIPMAP_LEVEL 0x3083
|
||||
#define EGL_NO_TEXTURE 0x305C
|
||||
#define EGL_TEXTURE_2D 0x305F
|
||||
#define EGL_TEXTURE_FORMAT 0x3080
|
||||
#define EGL_TEXTURE_RGB 0x305D
|
||||
#define EGL_TEXTURE_RGBA 0x305E
|
||||
#define EGL_TEXTURE_TARGET 0x3081
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLBINDTEXIMAGEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLRELEASETEXIMAGEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSURFACEATTRIBPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPINTERVALPROC) (EGLDisplay dpy, EGLint interval);
|
||||
#if EGL_EGL_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval (EGLDisplay dpy, EGLint interval);
|
||||
#endif
|
||||
#endif /* EGL_VERSION_1_1 */
|
||||
|
||||
#ifndef EGL_VERSION_1_2
|
||||
#define EGL_VERSION_1_2 1
|
||||
typedef unsigned int EGLenum;
|
||||
typedef void *EGLClientBuffer;
|
||||
#define EGL_ALPHA_FORMAT 0x3088
|
||||
#define EGL_ALPHA_FORMAT_NONPRE 0x308B
|
||||
#define EGL_ALPHA_FORMAT_PRE 0x308C
|
||||
#define EGL_ALPHA_MASK_SIZE 0x303E
|
||||
#define EGL_BUFFER_PRESERVED 0x3094
|
||||
#define EGL_BUFFER_DESTROYED 0x3095
|
||||
#define EGL_CLIENT_APIS 0x308D
|
||||
#define EGL_COLORSPACE 0x3087
|
||||
#define EGL_COLORSPACE_sRGB 0x3089
|
||||
#define EGL_COLORSPACE_LINEAR 0x308A
|
||||
#define EGL_COLOR_BUFFER_TYPE 0x303F
|
||||
#define EGL_CONTEXT_CLIENT_TYPE 0x3097
|
||||
#define EGL_DISPLAY_SCALING 10000
|
||||
#define EGL_HORIZONTAL_RESOLUTION 0x3090
|
||||
#define EGL_LUMINANCE_BUFFER 0x308F
|
||||
#define EGL_LUMINANCE_SIZE 0x303D
|
||||
#define EGL_OPENGL_ES_BIT 0x0001
|
||||
#define EGL_OPENVG_BIT 0x0002
|
||||
#define EGL_OPENGL_ES_API 0x30A0
|
||||
#define EGL_OPENVG_API 0x30A1
|
||||
#define EGL_OPENVG_IMAGE 0x3096
|
||||
#define EGL_PIXEL_ASPECT_RATIO 0x3092
|
||||
#define EGL_RENDERABLE_TYPE 0x3040
|
||||
#define EGL_RENDER_BUFFER 0x3086
|
||||
#define EGL_RGB_BUFFER 0x308E
|
||||
#define EGL_SINGLE_BUFFER 0x3085
|
||||
#define EGL_SWAP_BEHAVIOR 0x3093
|
||||
#define EGL_UNKNOWN EGL_CAST(EGLint,-1)
|
||||
#define EGL_VERTICAL_RESOLUTION 0x3091
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLBINDAPIPROC) (EGLenum api);
|
||||
typedef EGLenum (EGLAPIENTRYP PFNEGLQUERYAPIPROC) (void);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC) (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLRELEASETHREADPROC) (void);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITCLIENTPROC) (void);
|
||||
#if EGL_EGL_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI (EGLenum api);
|
||||
EGLAPI EGLenum EGLAPIENTRY eglQueryAPI (void);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread (void);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient (void);
|
||||
#endif
|
||||
#endif /* EGL_VERSION_1_2 */
|
||||
|
||||
#ifndef EGL_VERSION_1_3
|
||||
#define EGL_VERSION_1_3 1
|
||||
#define EGL_CONFORMANT 0x3042
|
||||
#define EGL_CONTEXT_CLIENT_VERSION 0x3098
|
||||
#define EGL_MATCH_NATIVE_PIXMAP 0x3041
|
||||
#define EGL_OPENGL_ES2_BIT 0x0004
|
||||
#define EGL_VG_ALPHA_FORMAT 0x3088
|
||||
#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B
|
||||
#define EGL_VG_ALPHA_FORMAT_PRE 0x308C
|
||||
#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040
|
||||
#define EGL_VG_COLORSPACE 0x3087
|
||||
#define EGL_VG_COLORSPACE_sRGB 0x3089
|
||||
#define EGL_VG_COLORSPACE_LINEAR 0x308A
|
||||
#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020
|
||||
#endif /* EGL_VERSION_1_3 */
|
||||
|
||||
#ifndef EGL_VERSION_1_4
|
||||
#define EGL_VERSION_1_4 1
|
||||
#define EGL_DEFAULT_DISPLAY EGL_CAST(EGLNativeDisplayType,0)
|
||||
#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200
|
||||
#define EGL_MULTISAMPLE_RESOLVE 0x3099
|
||||
#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A
|
||||
#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B
|
||||
#define EGL_OPENGL_API 0x30A2
|
||||
#define EGL_OPENGL_BIT 0x0008
|
||||
#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400
|
||||
typedef EGLContext (EGLAPIENTRYP PFNEGLGETCURRENTCONTEXTPROC) (void);
|
||||
#if EGL_EGL_PROTOTYPES
|
||||
EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext (void);
|
||||
#endif
|
||||
#endif /* EGL_VERSION_1_4 */
|
||||
|
||||
#ifndef EGL_VERSION_1_5
|
||||
#define EGL_VERSION_1_5 1
|
||||
typedef void *EGLSync;
|
||||
typedef intptr_t EGLAttrib;
|
||||
typedef khronos_utime_nanoseconds_t EGLTime;
|
||||
typedef void *EGLImage;
|
||||
#define EGL_CONTEXT_MAJOR_VERSION 0x3098
|
||||
#define EGL_CONTEXT_MINOR_VERSION 0x30FB
|
||||
#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD
|
||||
#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD
|
||||
#define EGL_NO_RESET_NOTIFICATION 0x31BE
|
||||
#define EGL_LOSE_CONTEXT_ON_RESET 0x31BF
|
||||
#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001
|
||||
#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002
|
||||
#define EGL_CONTEXT_OPENGL_DEBUG 0x31B0
|
||||
#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1
|
||||
#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS 0x31B2
|
||||
#define EGL_OPENGL_ES3_BIT 0x00000040
|
||||
#define EGL_CL_EVENT_HANDLE 0x309C
|
||||
#define EGL_SYNC_CL_EVENT 0x30FE
|
||||
#define EGL_SYNC_CL_EVENT_COMPLETE 0x30FF
|
||||
#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE 0x30F0
|
||||
#define EGL_SYNC_TYPE 0x30F7
|
||||
#define EGL_SYNC_STATUS 0x30F1
|
||||
#define EGL_SYNC_CONDITION 0x30F8
|
||||
#define EGL_SIGNALED 0x30F2
|
||||
#define EGL_UNSIGNALED 0x30F3
|
||||
#define EGL_SYNC_FLUSH_COMMANDS_BIT 0x0001
|
||||
#define EGL_FOREVER 0xFFFFFFFFFFFFFFFFull
|
||||
#define EGL_TIMEOUT_EXPIRED 0x30F5
|
||||
#define EGL_CONDITION_SATISFIED 0x30F6
|
||||
#define EGL_NO_SYNC EGL_CAST(EGLSync,0)
|
||||
#define EGL_SYNC_FENCE 0x30F9
|
||||
#define EGL_GL_COLORSPACE 0x309D
|
||||
#define EGL_GL_COLORSPACE_SRGB 0x3089
|
||||
#define EGL_GL_COLORSPACE_LINEAR 0x308A
|
||||
#define EGL_GL_RENDERBUFFER 0x30B9
|
||||
#define EGL_GL_TEXTURE_2D 0x30B1
|
||||
#define EGL_GL_TEXTURE_LEVEL 0x30BC
|
||||
#define EGL_GL_TEXTURE_3D 0x30B2
|
||||
#define EGL_GL_TEXTURE_ZOFFSET 0x30BD
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8
|
||||
#define EGL_IMAGE_PRESERVED 0x30D2
|
||||
#define EGL_NO_IMAGE EGL_CAST(EGLImage,0)
|
||||
typedef EGLSync (EGLAPIENTRYP PFNEGLCREATESYNCPROC) (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCPROC) (EGLDisplay dpy, EGLSync sync);
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCPROC) (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBPROC) (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value);
|
||||
typedef EGLImage (EGLAPIENTRYP PFNEGLCREATEIMAGEPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEPROC) (EGLDisplay dpy, EGLImage image);
|
||||
typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYPROC) (EGLenum platform, void *native_display, const EGLAttrib *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITSYNCPROC) (EGLDisplay dpy, EGLSync sync, EGLint flags);
|
||||
#if EGL_EGL_PROTOTYPES
|
||||
EGLAPI EGLSync EGLAPIENTRY eglCreateSync (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroySync (EGLDisplay dpy, EGLSync sync);
|
||||
EGLAPI EGLint EGLAPIENTRY eglClientWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttrib (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value);
|
||||
EGLAPI EGLImage EGLAPIENTRY eglCreateImage (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImage (EGLDisplay dpy, EGLImage image);
|
||||
EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplay (EGLenum platform, void *native_display, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurface (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurface (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags);
|
||||
#endif
|
||||
#endif /* EGL_VERSION_1_5 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1486
engine/thirdparty/angle/include/EGL/eglext.h
vendored
Normal file
1486
engine/thirdparty/angle/include/EGL/eglext.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
428
engine/thirdparty/angle/include/EGL/eglext_angle.h
vendored
Normal file
428
engine/thirdparty/angle/include/EGL/eglext_angle.h
vendored
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
//
|
||||
// Copyright 2017 The ANGLE Project Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
//
|
||||
// eglext_angle.h: ANGLE modifications to the eglext.h header file.
|
||||
// Currently we don't include this file directly, we patch eglext.h
|
||||
// to include it implicitly so it is visible throughout our code.
|
||||
|
||||
#ifndef INCLUDE_EGL_EGLEXT_ANGLE_
|
||||
#define INCLUDE_EGL_EGLEXT_ANGLE_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#ifndef EGL_ANGLE_robust_resource_initialization
|
||||
#define EGL_ANGLE_robust_resource_initialization 1
|
||||
#define EGL_ROBUST_RESOURCE_INITIALIZATION_ANGLE 0x3453
|
||||
#endif /* EGL_ANGLE_robust_resource_initialization */
|
||||
|
||||
#ifndef EGL_ANGLE_keyed_mutex
|
||||
#define EGL_ANGLE_keyed_mutex 1
|
||||
#define EGL_DXGI_KEYED_MUTEX_ANGLE 0x33A2
|
||||
#endif /* EGL_ANGLE_keyed_mutex */
|
||||
|
||||
#ifndef EGL_ANGLE_d3d_texture_client_buffer
|
||||
#define EGL_ANGLE_d3d_texture_client_buffer 1
|
||||
#define EGL_D3D_TEXTURE_ANGLE 0x33A3
|
||||
#define EGL_TEXTURE_OFFSET_X_ANGLE 0x3490
|
||||
#define EGL_TEXTURE_OFFSET_Y_ANGLE 0x3491
|
||||
#define EGL_D3D11_TEXTURE_PLANE_ANGLE 0x3492
|
||||
#define EGL_D3D11_TEXTURE_ARRAY_SLICE_ANGLE 0x3493
|
||||
#endif /* EGL_ANGLE_d3d_texture_client_buffer */
|
||||
|
||||
#ifndef EGL_ANGLE_software_display
|
||||
#define EGL_ANGLE_software_display 1
|
||||
#define EGL_SOFTWARE_DISPLAY_ANGLE ((EGLNativeDisplayType)-1)
|
||||
#endif /* EGL_ANGLE_software_display */
|
||||
|
||||
#ifndef EGL_ANGLE_direct3d_display
|
||||
#define EGL_ANGLE_direct3d_display 1
|
||||
#define EGL_D3D11_ELSE_D3D9_DISPLAY_ANGLE ((EGLNativeDisplayType)-2)
|
||||
#define EGL_D3D11_ONLY_DISPLAY_ANGLE ((EGLNativeDisplayType)-3)
|
||||
#endif /* EGL_ANGLE_direct3d_display */
|
||||
|
||||
#ifndef EGL_ANGLE_direct_composition
|
||||
#define EGL_ANGLE_direct_composition 1
|
||||
#define EGL_DIRECT_COMPOSITION_ANGLE 0x33A5
|
||||
#endif /* EGL_ANGLE_direct_composition */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle
|
||||
#define EGL_ANGLE_platform_angle 1
|
||||
#define EGL_PLATFORM_ANGLE_ANGLE 0x3202
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_ANGLE 0x3203
|
||||
#define EGL_PLATFORM_ANGLE_MAX_VERSION_MAJOR_ANGLE 0x3204
|
||||
#define EGL_PLATFORM_ANGLE_MAX_VERSION_MINOR_ANGLE 0x3205
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_DEFAULT_ANGLE 0x3206
|
||||
#define EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED_ANGLE 0x3451
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_ANGLE 0x3209
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE 0x320A
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_NULL_ANGLE 0x345E
|
||||
#define EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE 0x348F
|
||||
#endif /* EGL_ANGLE_platform_angle */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_d3d
|
||||
#define EGL_ANGLE_platform_angle_d3d 1
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_D3D9_ANGLE 0x3207
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_D3D11_ANGLE 0x3208
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_D3D_WARP_ANGLE 0x320B
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_D3D_REFERENCE_ANGLE 0x320C
|
||||
#define EGL_PLATFORM_ANGLE_ENABLE_AUTOMATIC_TRIM_ANGLE 0x320F
|
||||
#endif /* EGL_ANGLE_platform_angle_d3d */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_d3d_luid
|
||||
#define EGL_ANGLE_platform_angle_d3d_luid 1
|
||||
#define EGL_PLATFORM_ANGLE_D3D_LUID_HIGH_ANGLE 0x34A0
|
||||
#define EGL_PLATFORM_ANGLE_D3D_LUID_LOW_ANGLE 0x34A1
|
||||
#endif /* EGL_ANGLE_platform_angle_d3d_luid */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_d3d11on12
|
||||
#define EGL_ANGLE_platform_angle_d3d11on12 1
|
||||
#define EGL_PLATFORM_ANGLE_D3D11ON12_ANGLE 0x3488
|
||||
#endif /* EGL_ANGLE_platform_angle_d3d11on12 */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_opengl
|
||||
#define EGL_ANGLE_platform_angle_opengl 1
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_OPENGL_ANGLE 0x320D
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_OPENGLES_ANGLE 0x320E
|
||||
#define EGL_PLATFORM_ANGLE_EGL_HANDLE_ANGLE 0x3480
|
||||
#endif /* EGL_ANGLE_platform_angle_opengl */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_null
|
||||
#define EGL_ANGLE_platform_angle_null 1
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_NULL_ANGLE 0x33AE
|
||||
#endif /* EGL_ANGLE_platform_angle_null */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_vulkan
|
||||
#define EGL_ANGLE_platform_angle_vulkan 1
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE 0x3450
|
||||
#define EGL_PLATFORM_VULKAN_DISPLAY_MODE_SIMPLE_ANGLE 0x34A4
|
||||
#define EGL_PLATFORM_VULKAN_DISPLAY_MODE_HEADLESS_ANGLE 0x34A5
|
||||
#endif /* EGL_ANGLE_platform_angle_vulkan */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_metal
|
||||
#define EGL_ANGLE_platform_angle_metal 1
|
||||
#define EGL_PLATFORM_ANGLE_TYPE_METAL_ANGLE 0x3489
|
||||
#endif /* EGL_ANGLE_platform_angle_metal */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_device_type_swiftshader
|
||||
#define EGL_ANGLE_platform_angle_device_type_swiftshader
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_SWIFTSHADER_ANGLE 0x3487
|
||||
#endif /* EGL_ANGLE_platform_angle_device_type_swiftshader */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_device_type_egl_angle
|
||||
#define EGL_ANGLE_platform_angle_device_type_egl_angle
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_EGL_ANGLE 0x348E
|
||||
#endif /* EGL_ANGLE_platform_angle_device_type_egl_angle */
|
||||
|
||||
#ifndef EGL_ANGLE_context_virtualization
|
||||
#define EGL_ANGLE_context_virtualization 1
|
||||
#define EGL_CONTEXT_VIRTUALIZATION_GROUP_ANGLE 0x3481
|
||||
#endif /* EGL_ANGLE_context_virtualization */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_device_context_volatile_eagl
|
||||
#define EGL_ANGLE_platform_angle_device_context_volatile_eagl 1
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_CONTEXT_VOLATILE_EAGL_ANGLE 0x34A2
|
||||
#endif /* EGL_ANGLE_platform_angle_device_context_volatile_eagl */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_device_context_volatile_cgl
|
||||
#define EGL_ANGLE_platform_angle_device_context_volatile_cgl 1
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_CONTEXT_VOLATILE_CGL_ANGLE 0x34A3
|
||||
#endif /* EGL_ANGLE_platform_angle_device_context_volatile_cgl */
|
||||
|
||||
#ifndef EGL_ANGLE_platform_angle_device_id
|
||||
#define EGL_ANGLE_platform_angle_device_id
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_ID_HIGH_ANGLE 0x34D6
|
||||
#define EGL_PLATFORM_ANGLE_DEVICE_ID_LOW_ANGLE 0x34D7
|
||||
#define EGL_PLATFORM_ANGLE_DISPLAY_KEY_ANGLE 0x34DC
|
||||
#endif /* EGL_ANGLE_platform_angle_device_id */
|
||||
|
||||
#ifndef EGL_ANGLE_x11_visual
|
||||
#define EGL_ANGLE_x11_visual
|
||||
#define EGL_X11_VISUAL_ID_ANGLE 0x33A3
|
||||
#endif /* EGL_ANGLE_x11_visual */
|
||||
|
||||
#ifndef EGL_ANGLE_surface_orientation
|
||||
#define EGL_ANGLE_surface_orientation
|
||||
#define EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE 0x33A7
|
||||
#define EGL_SURFACE_ORIENTATION_ANGLE 0x33A8
|
||||
#define EGL_SURFACE_ORIENTATION_INVERT_X_ANGLE 0x0001
|
||||
#define EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE 0x0002
|
||||
#endif /* EGL_ANGLE_surface_orientation */
|
||||
|
||||
#ifndef EGL_ANGLE_experimental_present_path
|
||||
#define EGL_ANGLE_experimental_present_path
|
||||
#define EGL_EXPERIMENTAL_PRESENT_PATH_ANGLE 0x33A4
|
||||
#define EGL_EXPERIMENTAL_PRESENT_PATH_FAST_ANGLE 0x33A9
|
||||
#define EGL_EXPERIMENTAL_PRESENT_PATH_COPY_ANGLE 0x33AA
|
||||
#endif /* EGL_ANGLE_experimental_present_path */
|
||||
|
||||
#ifndef EGL_ANGLE_stream_producer_d3d_texture
|
||||
#define EGL_ANGLE_stream_producer_d3d_texture
|
||||
#define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x33AB
|
||||
typedef EGLBoolean(EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERD3DTEXTUREANGLEPROC)(EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
|
||||
typedef EGLBoolean(EGLAPIENTRYP PFNEGLSTREAMPOSTD3DTEXTUREANGLEPROC)(EGLDisplay dpy, EGLStreamKHR stream, void *texture, const EGLAttrib *attrib_list);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglCreateStreamProducerD3DTextureANGLE(EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglStreamPostD3DTextureANGLE(EGLDisplay dpy, EGLStreamKHR stream, void *texture, const EGLAttrib *attrib_list);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_stream_producer_d3d_texture */
|
||||
|
||||
#ifndef EGL_ANGLE_create_context_webgl_compatibility
|
||||
#define EGL_ANGLE_create_context_webgl_compatibility 1
|
||||
#define EGL_CONTEXT_WEBGL_COMPATIBILITY_ANGLE 0x33AC
|
||||
#endif /* EGL_ANGLE_create_context_webgl_compatibility */
|
||||
|
||||
#ifndef EGL_ANGLE_display_texture_share_group
|
||||
#define EGL_ANGLE_display_texture_share_group 1
|
||||
#define EGL_DISPLAY_TEXTURE_SHARE_GROUP_ANGLE 0x33AF
|
||||
#endif /* EGL_ANGLE_display_texture_share_group */
|
||||
|
||||
#ifndef EGL_CHROMIUM_create_context_bind_generates_resource
|
||||
#define EGL_CHROMIUM_create_context_bind_generates_resource 1
|
||||
#define EGL_CONTEXT_BIND_GENERATES_RESOURCE_CHROMIUM 0x33AD
|
||||
#endif /* EGL_CHROMIUM_create_context_bind_generates_resource */
|
||||
|
||||
#ifndef EGL_ANGLE_metal_create_context_ownership_identity
|
||||
#define EGL_ANGLE_metal_create_context_ownership_identity 1
|
||||
#define EGL_CONTEXT_METAL_OWNERSHIP_IDENTITY_ANGLE 0x34D2
|
||||
#endif /* EGL_ANGLE_metal_create_context_ownership_identity */
|
||||
|
||||
#ifndef EGL_ANGLE_create_context_client_arrays
|
||||
#define EGL_ANGLE_create_context_client_arrays 1
|
||||
#define EGL_CONTEXT_CLIENT_ARRAYS_ENABLED_ANGLE 0x3452
|
||||
#endif /* EGL_ANGLE_create_context_client_arrays */
|
||||
|
||||
#ifndef EGL_ANGLE_device_creation
|
||||
#define EGL_ANGLE_device_creation 1
|
||||
typedef EGLDeviceEXT(EGLAPIENTRYP PFNEGLCREATEDEVICEANGLEPROC) (EGLint device_type, void *native_device, const EGLAttrib *attrib_list);
|
||||
typedef EGLBoolean(EGLAPIENTRYP PFNEGLRELEASEDEVICEANGLEPROC) (EGLDeviceEXT device);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLDeviceEXT EGLAPIENTRY eglCreateDeviceANGLE(EGLint device_type, void *native_device, const EGLAttrib *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglReleaseDeviceANGLE(EGLDeviceEXT device);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_device_creation */
|
||||
|
||||
#ifndef EGL_ANGLE_program_cache_control
|
||||
#define EGL_ANGLE_program_cache_control 1
|
||||
#define EGL_PROGRAM_CACHE_SIZE_ANGLE 0x3455
|
||||
#define EGL_PROGRAM_CACHE_KEY_LENGTH_ANGLE 0x3456
|
||||
#define EGL_PROGRAM_CACHE_RESIZE_ANGLE 0x3457
|
||||
#define EGL_PROGRAM_CACHE_TRIM_ANGLE 0x3458
|
||||
#define EGL_CONTEXT_PROGRAM_BINARY_CACHE_ENABLED_ANGLE 0x3459
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLPROGRAMCACHEGETATTRIBANGLEPROC) (EGLDisplay dpy, EGLenum attrib);
|
||||
typedef void (EGLAPIENTRYP PFNEGLPROGRAMCACHEQUERYANGLEPROC) (EGLDisplay dpy, EGLint index, void *key, EGLint *keysize, void *binary, EGLint *binarysize);
|
||||
typedef void (EGLAPIENTRYP PFNEGLPROGRAMCACHEPOPULATEANGLEPROC) (EGLDisplay dpy, const void *key, EGLint keysize, const void *binary, EGLint binarysize);
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLPROGRAMCACHERESIZEANGLEPROC) (EGLDisplay dpy, EGLint limit, EGLint mode);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLint EGLAPIENTRY eglProgramCacheGetAttribANGLE(EGLDisplay dpy, EGLenum attrib);
|
||||
EGLAPI void EGLAPIENTRY eglProgramCacheQueryANGLE(EGLDisplay dpy, EGLint index, void *key, EGLint *keysize, void *binary, EGLint *binarysize);
|
||||
EGLAPI void EGLAPIENTRY eglProgramCachePopulateANGLE(EGLDisplay dpy, const void *key, EGLint keysize, const void *binary, EGLint binarysize);
|
||||
EGLAPI EGLint EGLAPIENTRY eglProgramCacheResizeANGLE(EGLDisplay dpy, EGLint limit, EGLint mode);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_program_cache_control */
|
||||
|
||||
#ifndef EGL_ANGLE_iosurface_client_buffer
|
||||
#define EGL_ANGLE_iosurface_client_buffer 1
|
||||
#define EGL_IOSURFACE_ANGLE 0x3454
|
||||
#define EGL_IOSURFACE_PLANE_ANGLE 0x345A
|
||||
#define EGL_TEXTURE_RECTANGLE_ANGLE 0x345B
|
||||
#define EGL_TEXTURE_TYPE_ANGLE 0x345C
|
||||
#define EGL_TEXTURE_INTERNAL_FORMAT_ANGLE 0x345D
|
||||
#define EGL_IOSURFACE_USAGE_HINT_ANGLE 0x348A
|
||||
#define EGL_IOSURFACE_READ_HINT_ANGLE 0x0001
|
||||
#define EGL_IOSURFACE_WRITE_HINT_ANGLE 0x0002
|
||||
#define EGL_BIND_TO_TEXTURE_TARGET_ANGLE 0x348D
|
||||
#endif /* EGL_ANGLE_iosurface_client_buffer */
|
||||
|
||||
#ifndef ANGLE_metal_texture_client_buffer
|
||||
#define ANGLE_metal_texture_client_buffer 1
|
||||
#define EGL_METAL_TEXTURE_ANGLE 0x34A7
|
||||
#endif /* ANGLE_metal_texture_client_buffer */
|
||||
|
||||
#ifndef EGL_ANGLE_create_context_extensions_enabled
|
||||
#define EGL_ANGLE_create_context_extensions_enabled 1
|
||||
#define EGL_EXTENSIONS_ENABLED_ANGLE 0x345F
|
||||
#endif /* EGL_ANGLE_create_context_extensions_enabled */
|
||||
|
||||
#ifndef EGL_CHROMIUM_sync_control
|
||||
#define EGL_CHROMIUM_sync_control 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCVALUESCHROMIUMPROC) (EGLDisplay dpy,
|
||||
EGLSurface surface,
|
||||
EGLuint64KHR *ust,
|
||||
EGLuint64KHR *msc,
|
||||
EGLuint64KHR *sbc);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncValuesCHROMIUM(EGLDisplay dpy,
|
||||
EGLSurface surface,
|
||||
EGLuint64KHR *ust,
|
||||
EGLuint64KHR *msc,
|
||||
EGLuint64KHR *sbc);
|
||||
#endif
|
||||
#endif /* EGL_CHROMIUM_sync_control */
|
||||
|
||||
#ifndef EGL_ANGLE_sync_control_rate
|
||||
#define EGL_ANGLE_sync_control_rate 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETMSCRATEANGLEPROC) (EGLDisplay dpy,
|
||||
EGLSurface surface,
|
||||
EGLint *numerator,
|
||||
EGLint *denominator);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetMscRateANGLE(EGLDisplay dpy,
|
||||
EGLSurface surface,
|
||||
EGLint *numerator,
|
||||
EGLint *denominator);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_sync_control_rate */
|
||||
|
||||
#ifndef EGL_ANGLE_power_preference
|
||||
#define EGL_ANGLE_power_preference 1
|
||||
#define EGL_POWER_PREFERENCE_ANGLE 0x3482
|
||||
#define EGL_LOW_POWER_ANGLE 0x0001
|
||||
#define EGL_HIGH_POWER_ANGLE 0x0002
|
||||
typedef void(EGLAPIENTRYP PFNEGLRELEASEHIGHPOWERGPUANGLEPROC) (EGLDisplay dpy, EGLContext ctx);
|
||||
typedef void(EGLAPIENTRYP PFNEGLREACQUIREHIGHPOWERGPUANGLEPROC) (EGLDisplay dpy, EGLContext ctx);
|
||||
typedef void(EGLAPIENTRYP PFNEGLHANDLEGPUSWITCHANGLEPROC) (EGLDisplay dpy);
|
||||
typedef void(EGLAPIENTRYP PFNEGLFORCEGPUSWITCHANGLEPROC) (EGLDisplay dpy, EGLint gpuIDHigh, EGLint gpuIDLow);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI void EGLAPIENTRY eglReleaseHighPowerGPUANGLE(EGLDisplay dpy, EGLContext ctx);
|
||||
EGLAPI void EGLAPIENTRY eglReacquireHighPowerGPUANGLE(EGLDisplay dpy, EGLContext ctx);
|
||||
EGLAPI void EGLAPIENTRY eglHandleGPUSwitchANGLE(EGLDisplay dpy);
|
||||
EGLAPI void EGLAPIENTRY eglForceGPUSwitchANGLE(EGLDisplay dpy, EGLint gpuIDHigh, EGLint gpuIDLow);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_power_preference */
|
||||
|
||||
#ifndef EGL_ANGLE_wait_until_work_scheduled
|
||||
#define EGL_ANGLE_wait_until_work_scheduled 1
|
||||
typedef void(EGLAPIENTRYP PFNEGLWAITUNTILWORKSCHEDULEDANGLEPROC) (EGLDisplay dpy);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI void EGLAPIENTRY eglWaitUntilWorkScheduledANGLE(EGLDisplay dpy);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_wait_until_work_scheduled */
|
||||
|
||||
#ifndef EGL_ANGLE_feature_control
|
||||
#define EGL_ANGLE_feature_control 1
|
||||
#define EGL_FEATURE_NAME_ANGLE 0x3460
|
||||
#define EGL_FEATURE_CATEGORY_ANGLE 0x3461
|
||||
#define EGL_FEATURE_DESCRIPTION_ANGLE 0x3462
|
||||
#define EGL_FEATURE_BUG_ANGLE 0x3463
|
||||
#define EGL_FEATURE_STATUS_ANGLE 0x3464
|
||||
#define EGL_FEATURE_COUNT_ANGLE 0x3465
|
||||
#define EGL_FEATURE_OVERRIDES_ENABLED_ANGLE 0x3466
|
||||
#define EGL_FEATURE_OVERRIDES_DISABLED_ANGLE 0x3467
|
||||
#define EGL_FEATURE_CONDITION_ANGLE 0x3468
|
||||
#define EGL_FEATURE_ALL_DISABLED_ANGLE 0x3469
|
||||
typedef const char *(EGLAPIENTRYP PFNEGLQUERYSTRINGIANGLEPROC) (EGLDisplay dpy, EGLint name, EGLint index);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBANGLEPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI const char *EGLAPIENTRY eglQueryStringiANGLE(EGLDisplay dpy, EGLint name, EGLint index);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribANGLE(EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_feature_control */
|
||||
|
||||
#ifndef EGL_ANGLE_image_d3d11_texture
|
||||
#define EGL_D3D11_TEXTURE_ANGLE 0x3484
|
||||
#define EGL_TEXTURE_INTERNAL_FORMAT_ANGLE 0x345D
|
||||
#endif /* EGL_ANGLE_image_d3d11_texture */
|
||||
|
||||
#ifndef EGL_ANGLE_create_context_backwards_compatible
|
||||
#define EGL_ANGLE_create_context_backwards_compatible 1
|
||||
#define EGL_CONTEXT_OPENGL_BACKWARDS_COMPATIBLE_ANGLE 0x3483
|
||||
#endif /* EGL_ANGLE_create_context_backwards_compatible */
|
||||
|
||||
#ifndef EGL_ANGLE_device_cgl
|
||||
#define EGL_ANGLE_device_cgl 1
|
||||
#define EGL_CGL_CONTEXT_ANGLE 0x3485
|
||||
#define EGL_CGL_PIXEL_FORMAT_ANGLE 0x3486
|
||||
#endif
|
||||
|
||||
#ifndef EGL_ANGLE_ggp_stream_descriptor
|
||||
#define EGL_ANGLE_ggp_stream_descriptor 1
|
||||
#define EGL_GGP_STREAM_DESCRIPTOR_ANGLE 0x348B
|
||||
#endif /* EGL_ANGLE_ggp_stream_descriptor */
|
||||
|
||||
#ifndef EGL_ANGLE_swap_with_frame_token
|
||||
#define EGL_ANGLE_swap_with_frame_token 1
|
||||
typedef khronos_uint64_t EGLFrameTokenANGLE;
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHFRAMETOKENANGLEPROC)(EGLDisplay dpy, EGLSurface surface, EGLFrameTokenANGLE frametoken);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithFrameTokenANGLE(EGLDisplay dpy, EGLSurface surface, EGLFrameTokenANGLE frametoken);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_swap_with_frame_token */
|
||||
|
||||
#ifndef EGL_ANGLE_prepare_swap_buffers
|
||||
#define EGL_ANGLE_prepare_swap_buffers 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLPREPARESWAPBUFFERSANGLEPROC)(EGLDisplay dpy, EGLSurface surface);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglPrepareSwapBuffersANGLE(EGLDisplay dpy, EGLSurface surface);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_prepare_swap_buffers */
|
||||
|
||||
#ifndef EGL_ANGLE_device_eagl
|
||||
#define EGL_ANGLE_device_eagl 1
|
||||
#define EGL_EAGL_CONTEXT_ANGLE 0x348C
|
||||
#endif
|
||||
|
||||
#ifndef EGL_ANGLE_device_metal
|
||||
#define EGL_ANGLE_device_metal 1
|
||||
#define EGL_METAL_DEVICE_ANGLE 0x34A6
|
||||
#endif /* EGL_ANGLE_device_metal */
|
||||
|
||||
#ifndef EGL_ANGLE_display_semaphore_share_group
|
||||
#define EGL_ANGLE_display_semaphore_share_group 1
|
||||
#define EGL_DISPLAY_SEMAPHORE_SHARE_GROUP_ANGLE 0x348D
|
||||
#endif /* EGL_ANGLE_display_semaphore_share_group */
|
||||
|
||||
#ifndef EGL_ANGLE_external_context_and_surface
|
||||
#define EGL_ANGLE_external_context_and_surface 1
|
||||
#define EGL_EXTERNAL_CONTEXT_ANGLE 0x348E
|
||||
#define EGL_EXTERNAL_SURFACE_ANGLE 0x348F
|
||||
#define EGL_EXTERNAL_CONTEXT_SAVE_STATE_ANGLE 0x3490
|
||||
#endif /* EGL_ANGLE_external_context_and_surface */
|
||||
|
||||
#ifndef EGL_ANGLE_create_surface_swap_interval
|
||||
#define EGL_ANGLE_create_surface_swap_interval 1
|
||||
#define EGL_SWAP_INTERVAL_ANGLE 0x322F
|
||||
#endif /* EGL_ANGLE_create_surface_swap_interval */
|
||||
|
||||
#ifndef EGL_ANGLE_device_vulkan
|
||||
#define EGL_ANGLE_device_vulkan 1
|
||||
#define EGL_VULKAN_VERSION_ANGLE 0x34A8
|
||||
#define EGL_VULKAN_INSTANCE_ANGLE 0x34A9
|
||||
#define EGL_VULKAN_INSTANCE_EXTENSIONS_ANGLE 0x34AA
|
||||
#define EGL_VULKAN_PHYSICAL_DEVICE_ANGLE 0x34AB
|
||||
#define EGL_VULKAN_DEVICE_ANGLE 0x34AC
|
||||
#define EGL_VULKAN_DEVICE_EXTENSIONS_ANGLE 0x34AD
|
||||
#define EGL_VULKAN_FEATURES_ANGLE 0x34AE
|
||||
#define EGL_VULKAN_QUEUE_ANGLE 0x34AF
|
||||
#define EGL_VULKAN_QUEUE_FAMILIY_INDEX_ANGLE 0x34D0
|
||||
#define EGL_VULKAN_GET_INSTANCE_PROC_ADDR 0x34D1
|
||||
#endif /* EGL_ANGLE_device_vulkan */
|
||||
|
||||
#ifndef EGL_ANGLE_vulkan_image
|
||||
#define EGL_ANGLE_vulkan_image
|
||||
#define EGL_VULKAN_IMAGE_ANGLE 0x34D3
|
||||
#define EGL_VULKAN_IMAGE_CREATE_INFO_HI_ANGLE 0x34D4
|
||||
#define EGL_VULKAN_IMAGE_CREATE_INFO_LO_ANGLE 0x34D5
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTVKIMAGEANGLEPROC)(EGLDisplay dpy, EGLImage image, void* vk_image, void* vk_image_create_info);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglExportVkImageANGLE(EGLDisplay dpy, EGLImage image, void* vk_image, void* vk_image_create_info);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_vulkan_image */
|
||||
|
||||
#ifndef EGL_ANGLE_metal_shared_event_sync
|
||||
#define EGL_ANGLE_metal_hared_event_sync 1
|
||||
#define EGL_SYNC_METAL_SHARED_EVENT_ANGLE 0x34D8
|
||||
#define EGL_SYNC_METAL_SHARED_EVENT_OBJECT_ANGLE 0x34D9
|
||||
#define EGL_SYNC_METAL_SHARED_EVENT_SIGNAL_VALUE_LO_ANGLE 0x34DA
|
||||
#define EGL_SYNC_METAL_SHARED_EVENT_SIGNAL_VALUE_HI_ANGLE 0x34DB
|
||||
#define EGL_SYNC_METAL_SHARED_EVENT_SIGNALED_ANGLE 0x34DC
|
||||
typedef void* (EGLAPIENTRYP PFNEGLCOPYMETALSHAREDEVENTANGLEPROC)(EGLDisplay dpy, EGLSync sync);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI void *EGLAPIENTRY eglCopyMetalSharedEventANGLE(EGLDisplay dpy, EGLSync sync);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_metal_shared_event_sync */
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // INCLUDE_EGL_EGLEXT_ANGLE_
|
||||
175
engine/thirdparty/angle/include/EGL/eglplatform.h
vendored
Normal file
175
engine/thirdparty/angle/include/EGL/eglplatform.h
vendored
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
#ifndef __eglplatform_h_
|
||||
#define __eglplatform_h_
|
||||
|
||||
/*
|
||||
** Copyright 2007-2020 The Khronos Group Inc.
|
||||
** SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/* Platform-specific types and definitions for egl.h
|
||||
*
|
||||
* Adopters may modify khrplatform.h and this file to suit their platform.
|
||||
* You are encouraged to submit all modifications to the Khronos group so that
|
||||
* they can be included in future versions of this file. Please submit changes
|
||||
* by filing an issue or pull request on the public Khronos EGL Registry, at
|
||||
* https://www.github.com/KhronosGroup/EGL-Registry/
|
||||
*/
|
||||
|
||||
#include <KHR/khrplatform.h>
|
||||
|
||||
/* Macros used in EGL function prototype declarations.
|
||||
*
|
||||
* EGL functions should be prototyped as:
|
||||
*
|
||||
* EGLAPI return-type EGLAPIENTRY eglFunction(arguments);
|
||||
* typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments);
|
||||
*
|
||||
* KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h
|
||||
*/
|
||||
|
||||
#ifndef EGLAPI
|
||||
#define EGLAPI KHRONOS_APICALL
|
||||
#endif
|
||||
|
||||
#ifndef EGLAPIENTRY
|
||||
#define EGLAPIENTRY KHRONOS_APIENTRY
|
||||
#endif
|
||||
#define EGLAPIENTRYP EGLAPIENTRY*
|
||||
|
||||
/* The types NativeDisplayType, NativeWindowType, and NativePixmapType
|
||||
* are aliases of window-system-dependent types, such as X Display * or
|
||||
* Windows Device Context. They must be defined in platform-specific
|
||||
* code below. The EGL-prefixed versions of Native*Type are the same
|
||||
* types, renamed in EGL 1.3 so all types in the API start with "EGL".
|
||||
*
|
||||
* Khronos STRONGLY RECOMMENDS that you use the default definitions
|
||||
* provided below, since these changes affect both binary and source
|
||||
* portability of applications using EGL running on different EGL
|
||||
* implementations.
|
||||
*/
|
||||
|
||||
#if defined(EGL_NO_PLATFORM_SPECIFIC_TYPES)
|
||||
|
||||
typedef void *EGLNativeDisplayType;
|
||||
typedef void *EGLNativePixmapType;
|
||||
typedef void *EGLNativeWindowType;
|
||||
|
||||
#elif defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
#endif
|
||||
#include <windows.h>
|
||||
|
||||
typedef HDC EGLNativeDisplayType;
|
||||
typedef HBITMAP EGLNativePixmapType;
|
||||
|
||||
#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) /* Windows Desktop */
|
||||
typedef HWND EGLNativeWindowType;
|
||||
#else /* Windows Store */
|
||||
#include <inspectable.h>
|
||||
typedef IInspectable* EGLNativeWindowType;
|
||||
#endif
|
||||
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
|
||||
typedef int EGLNativeDisplayType;
|
||||
typedef int EGLNativePixmapType;
|
||||
typedef int EGLNativeWindowType;
|
||||
|
||||
#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */
|
||||
|
||||
typedef int EGLNativeDisplayType;
|
||||
typedef void *EGLNativePixmapType;
|
||||
typedef void *EGLNativeWindowType;
|
||||
|
||||
#elif defined(WL_EGL_PLATFORM)
|
||||
|
||||
typedef struct wl_display *EGLNativeDisplayType;
|
||||
typedef struct wl_egl_pixmap *EGLNativePixmapType;
|
||||
typedef struct wl_egl_window *EGLNativeWindowType;
|
||||
|
||||
#elif defined(__GBM__)
|
||||
|
||||
typedef struct gbm_device *EGLNativeDisplayType;
|
||||
typedef struct gbm_bo *EGLNativePixmapType;
|
||||
typedef void *EGLNativeWindowType;
|
||||
|
||||
#elif defined(__ANDROID__) || defined(ANDROID)
|
||||
|
||||
struct ANativeWindow;
|
||||
struct egl_native_pixmap_t;
|
||||
|
||||
typedef void* EGLNativeDisplayType;
|
||||
typedef struct egl_native_pixmap_t* EGLNativePixmapType;
|
||||
typedef struct ANativeWindow* EGLNativeWindowType;
|
||||
|
||||
#elif defined(USE_OZONE)
|
||||
|
||||
typedef intptr_t EGLNativeDisplayType;
|
||||
typedef intptr_t EGLNativePixmapType;
|
||||
typedef intptr_t EGLNativeWindowType;
|
||||
|
||||
#elif defined(USE_X11)
|
||||
|
||||
/* X11 (tentative) */
|
||||
#include <X11/Xlib.h>
|
||||
#include <X11/Xutil.h>
|
||||
|
||||
typedef Display *EGLNativeDisplayType;
|
||||
typedef Pixmap EGLNativePixmapType;
|
||||
typedef Window EGLNativeWindowType;
|
||||
|
||||
#elif defined(__unix__)
|
||||
|
||||
typedef void *EGLNativeDisplayType;
|
||||
typedef khronos_uintptr_t EGLNativePixmapType;
|
||||
typedef khronos_uintptr_t EGLNativeWindowType;
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
typedef int EGLNativeDisplayType;
|
||||
typedef void *EGLNativePixmapType;
|
||||
typedef void *EGLNativeWindowType;
|
||||
|
||||
#elif defined(__HAIKU__)
|
||||
|
||||
#include <kernel/image.h>
|
||||
|
||||
typedef void *EGLNativeDisplayType;
|
||||
typedef khronos_uintptr_t EGLNativePixmapType;
|
||||
typedef khronos_uintptr_t EGLNativeWindowType;
|
||||
|
||||
#elif defined(__Fuchsia__)
|
||||
|
||||
typedef void *EGLNativeDisplayType;
|
||||
typedef khronos_uintptr_t EGLNativePixmapType;
|
||||
typedef khronos_uintptr_t EGLNativeWindowType;
|
||||
|
||||
#else
|
||||
#error "Platform not recognized"
|
||||
#endif
|
||||
|
||||
/* EGL 1.2 types, renamed for consistency in EGL 1.3 */
|
||||
typedef EGLNativeDisplayType NativeDisplayType;
|
||||
typedef EGLNativePixmapType NativePixmapType;
|
||||
typedef EGLNativeWindowType NativeWindowType;
|
||||
|
||||
|
||||
/* Define EGLint. This must be a signed integral type large enough to contain
|
||||
* all legal attribute names and values passed into and out of EGL, whether
|
||||
* their type is boolean, bitmask, enumerant (symbolic constant), integer,
|
||||
* handle, or other. While in general a 32-bit integer will suffice, if
|
||||
* handles are 64 bit types, then EGLint should be defined as a signed 64-bit
|
||||
* integer type.
|
||||
*/
|
||||
typedef khronos_int32_t EGLint;
|
||||
|
||||
|
||||
/* C++ / C typecast macros for special EGL handle values */
|
||||
#if defined(__cplusplus)
|
||||
#define EGL_CAST(type, value) (static_cast<type>(value))
|
||||
#else
|
||||
#define EGL_CAST(type, value) ((type) (value))
|
||||
#endif
|
||||
|
||||
#endif /* __eglplatform_h */
|
||||
290
engine/thirdparty/angle/include/KHR/khrplatform.h
vendored
Normal file
290
engine/thirdparty/angle/include/KHR/khrplatform.h
vendored
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
#ifndef __khrplatform_h_
|
||||
#define __khrplatform_h_
|
||||
|
||||
/*
|
||||
** Copyright (c) 2008-2018 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
/* Khronos platform-specific types and definitions.
|
||||
*
|
||||
* The master copy of khrplatform.h is maintained in the Khronos EGL
|
||||
* Registry repository at https://github.com/KhronosGroup/EGL-Registry
|
||||
* The last semantic modification to khrplatform.h was at commit ID:
|
||||
* 67a3e0864c2d75ea5287b9f3d2eb74a745936692
|
||||
*
|
||||
* Adopters may modify this file to suit their platform. Adopters are
|
||||
* encouraged to submit platform specific modifications to the Khronos
|
||||
* group so that they can be included in future versions of this file.
|
||||
* Please submit changes by filing pull requests or issues on
|
||||
* the EGL Registry repository linked above.
|
||||
*
|
||||
*
|
||||
* See the Implementer's Guidelines for information about where this file
|
||||
* should be located on your system and for more details of its use:
|
||||
* http://www.khronos.org/registry/implementers_guide.pdf
|
||||
*
|
||||
* This file should be included as
|
||||
* #include <KHR/khrplatform.h>
|
||||
* by Khronos client API header files that use its types and defines.
|
||||
*
|
||||
* The types in khrplatform.h should only be used to define API-specific types.
|
||||
*
|
||||
* Types defined in khrplatform.h:
|
||||
* khronos_int8_t signed 8 bit
|
||||
* khronos_uint8_t unsigned 8 bit
|
||||
* khronos_int16_t signed 16 bit
|
||||
* khronos_uint16_t unsigned 16 bit
|
||||
* khronos_int32_t signed 32 bit
|
||||
* khronos_uint32_t unsigned 32 bit
|
||||
* khronos_int64_t signed 64 bit
|
||||
* khronos_uint64_t unsigned 64 bit
|
||||
* khronos_intptr_t signed same number of bits as a pointer
|
||||
* khronos_uintptr_t unsigned same number of bits as a pointer
|
||||
* khronos_ssize_t signed size
|
||||
* khronos_usize_t unsigned size
|
||||
* khronos_float_t signed 32 bit floating point
|
||||
* khronos_time_ns_t unsigned 64 bit time in nanoseconds
|
||||
* khronos_utime_nanoseconds_t unsigned time interval or absolute time in
|
||||
* nanoseconds
|
||||
* khronos_stime_nanoseconds_t signed time interval in nanoseconds
|
||||
* khronos_boolean_enum_t enumerated boolean type. This should
|
||||
* only be used as a base type when a client API's boolean type is
|
||||
* an enum. Client APIs which use an integer or other type for
|
||||
* booleans cannot use this as the base type for their boolean.
|
||||
*
|
||||
* Tokens defined in khrplatform.h:
|
||||
*
|
||||
* KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
|
||||
*
|
||||
* KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
|
||||
* KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
|
||||
*
|
||||
* Calling convention macros defined in this file:
|
||||
* KHRONOS_APICALL
|
||||
* KHRONOS_APIENTRY
|
||||
* KHRONOS_APIATTRIBUTES
|
||||
*
|
||||
* These may be used in function prototypes as:
|
||||
*
|
||||
* KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
|
||||
* int arg1,
|
||||
* int arg2) KHRONOS_APIATTRIBUTES;
|
||||
*/
|
||||
|
||||
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
|
||||
# define KHRONOS_STATIC 1
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APICALL
|
||||
*-------------------------------------------------------------------------
|
||||
* This precedes the return type of the function in the function prototype.
|
||||
*/
|
||||
#if defined(KHRONOS_STATIC)
|
||||
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
|
||||
* header compatible with static linking. */
|
||||
# define KHRONOS_APICALL
|
||||
#elif defined(_WIN32)
|
||||
# define KHRONOS_APICALL __declspec(dllimport)
|
||||
#elif defined (__SYMBIAN32__)
|
||||
# define KHRONOS_APICALL IMPORT_C
|
||||
#elif defined(__ANDROID__)
|
||||
# define KHRONOS_APICALL __attribute__((visibility("default")))
|
||||
#else
|
||||
# define KHRONOS_APICALL
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APIENTRY
|
||||
*-------------------------------------------------------------------------
|
||||
* This follows the return type of the function and precedes the function
|
||||
* name in the function prototype.
|
||||
*/
|
||||
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
|
||||
/* Win32 but not WinCE */
|
||||
# define KHRONOS_APIENTRY __stdcall
|
||||
#else
|
||||
# define KHRONOS_APIENTRY
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APIATTRIBUTES
|
||||
*-------------------------------------------------------------------------
|
||||
* This follows the closing parenthesis of the function prototype arguments.
|
||||
*/
|
||||
#if defined (__ARMCC_2__)
|
||||
#define KHRONOS_APIATTRIBUTES __softfp
|
||||
#else
|
||||
#define KHRONOS_APIATTRIBUTES
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* basic type definitions
|
||||
*-----------------------------------------------------------------------*/
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
|
||||
|
||||
|
||||
/*
|
||||
* Using <stdint.h>
|
||||
*/
|
||||
#include <stdint.h>
|
||||
typedef int32_t khronos_int32_t;
|
||||
typedef uint32_t khronos_uint32_t;
|
||||
typedef int64_t khronos_int64_t;
|
||||
typedef uint64_t khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif defined(__VMS ) || defined(__sgi)
|
||||
|
||||
/*
|
||||
* Using <inttypes.h>
|
||||
*/
|
||||
#include <inttypes.h>
|
||||
typedef int32_t khronos_int32_t;
|
||||
typedef uint32_t khronos_uint32_t;
|
||||
typedef int64_t khronos_int64_t;
|
||||
typedef uint64_t khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
|
||||
|
||||
/*
|
||||
* Win32
|
||||
*/
|
||||
typedef __int32 khronos_int32_t;
|
||||
typedef unsigned __int32 khronos_uint32_t;
|
||||
typedef __int64 khronos_int64_t;
|
||||
typedef unsigned __int64 khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif defined(__sun__) || defined(__digital__)
|
||||
|
||||
/*
|
||||
* Sun or Digital
|
||||
*/
|
||||
typedef int khronos_int32_t;
|
||||
typedef unsigned int khronos_uint32_t;
|
||||
#if defined(__arch64__) || defined(_LP64)
|
||||
typedef long int khronos_int64_t;
|
||||
typedef unsigned long int khronos_uint64_t;
|
||||
#else
|
||||
typedef long long int khronos_int64_t;
|
||||
typedef unsigned long long int khronos_uint64_t;
|
||||
#endif /* __arch64__ */
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif 0
|
||||
|
||||
/*
|
||||
* Hypothetical platform with no float or int64 support
|
||||
*/
|
||||
typedef int khronos_int32_t;
|
||||
typedef unsigned int khronos_uint32_t;
|
||||
#define KHRONOS_SUPPORT_INT64 0
|
||||
#define KHRONOS_SUPPORT_FLOAT 0
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Generic fallback
|
||||
*/
|
||||
#include <stdint.h>
|
||||
typedef int32_t khronos_int32_t;
|
||||
typedef uint32_t khronos_uint32_t;
|
||||
typedef int64_t khronos_int64_t;
|
||||
typedef uint64_t khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Types that are (so far) the same on all platforms
|
||||
*/
|
||||
typedef signed char khronos_int8_t;
|
||||
typedef unsigned char khronos_uint8_t;
|
||||
typedef signed short int khronos_int16_t;
|
||||
typedef unsigned short int khronos_uint16_t;
|
||||
|
||||
/*
|
||||
* Types that differ between LLP64 and LP64 architectures - in LLP64,
|
||||
* pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
|
||||
* to be the only LLP64 architecture in current use.
|
||||
*/
|
||||
#ifdef _WIN64
|
||||
typedef signed long long int khronos_intptr_t;
|
||||
typedef unsigned long long int khronos_uintptr_t;
|
||||
typedef signed long long int khronos_ssize_t;
|
||||
typedef unsigned long long int khronos_usize_t;
|
||||
#else
|
||||
typedef signed long int khronos_intptr_t;
|
||||
typedef unsigned long int khronos_uintptr_t;
|
||||
typedef signed long int khronos_ssize_t;
|
||||
typedef unsigned long int khronos_usize_t;
|
||||
#endif
|
||||
|
||||
#if KHRONOS_SUPPORT_FLOAT
|
||||
/*
|
||||
* Float type
|
||||
*/
|
||||
typedef float khronos_float_t;
|
||||
#endif
|
||||
|
||||
#if KHRONOS_SUPPORT_INT64
|
||||
/* Time types
|
||||
*
|
||||
* These types can be used to represent a time interval in nanoseconds or
|
||||
* an absolute Unadjusted System Time. Unadjusted System Time is the number
|
||||
* of nanoseconds since some arbitrary system event (e.g. since the last
|
||||
* time the system booted). The Unadjusted System Time is an unsigned
|
||||
* 64 bit value that wraps back to 0 every 584 years. Time intervals
|
||||
* may be either signed or unsigned.
|
||||
*/
|
||||
typedef khronos_uint64_t khronos_utime_nanoseconds_t;
|
||||
typedef khronos_int64_t khronos_stime_nanoseconds_t;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Dummy value used to pad enum types to 32 bits.
|
||||
*/
|
||||
#ifndef KHRONOS_MAX_ENUM
|
||||
#define KHRONOS_MAX_ENUM 0x7FFFFFFF
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enumerated boolean type
|
||||
*
|
||||
* Values other than zero should be considered to be true. Therefore
|
||||
* comparisons should not be made against KHRONOS_TRUE.
|
||||
*/
|
||||
typedef enum {
|
||||
KHRONOS_FALSE = 0,
|
||||
KHRONOS_TRUE = 1,
|
||||
KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
|
||||
} khronos_boolean_enum_t;
|
||||
|
||||
#endif /* __khrplatform_h_ */
|
||||
175
engine/thirdparty/astcenc/LICENSE.txt
vendored
Normal file
175
engine/thirdparty/astcenc/LICENSE.txt
vendored
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
860
engine/thirdparty/astcenc/astcenc.h
vendored
Normal file
860
engine/thirdparty/astcenc/astcenc.h
vendored
Normal file
|
|
@ -0,0 +1,860 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2020-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief The core astcenc codec library interface.
|
||||
*
|
||||
* This interface is the entry point to the core astcenc codec. It aims to be easy to use for
|
||||
* non-experts, but also to allow experts to have fine control over the compressor heuristics if
|
||||
* needed. The core codec only handles compression and decompression, transferring all inputs and
|
||||
* outputs via memory buffers. To catch obvious input/output buffer sizing issues, which can cause
|
||||
* security and stability problems, all transfer buffers are explicitly sized.
|
||||
*
|
||||
* While the aim is that we keep this interface mostly stable, it should be viewed as a mutable
|
||||
* interface tied to a specific source version. We are not trying to maintain backwards
|
||||
* compatibility across codec versions.
|
||||
*
|
||||
* The API state management is based around an explicit context object, which is the context for all
|
||||
* allocated memory resources needed to compress and decompress a single image. A context can be
|
||||
* used to sequentially compress multiple images using the same configuration, allowing setup
|
||||
* overheads to be amortized over multiple images, which is particularly important when images are
|
||||
* small.
|
||||
*
|
||||
* Multi-threading can be used two ways.
|
||||
*
|
||||
* * An application wishing to process multiple images in parallel can allocate multiple
|
||||
* contexts and assign each context to a thread.
|
||||
* * An application wishing to process a single image in using multiple threads can configure
|
||||
* contexts for multi-threaded use, and invoke astcenc_compress/decompress() once per thread
|
||||
* for faster processing. The caller is responsible for creating the worker threads, and
|
||||
* synchronizing between images.
|
||||
*
|
||||
* Extended instruction set support
|
||||
* ================================
|
||||
*
|
||||
* This library supports use of extended instruction sets, such as SSE4.1 and AVX2. These are
|
||||
* enabled at compile time when building the library. There is no runtime checking in the core
|
||||
* library that the instruction sets used are actually available. Checking compatibility is the
|
||||
* responsibility of the calling code.
|
||||
*
|
||||
* Threading
|
||||
* =========
|
||||
*
|
||||
* In pseudo-code, the usage for manual user threading looks like this:
|
||||
*
|
||||
* // Configure the compressor run
|
||||
* astcenc_config my_config;
|
||||
* astcenc_config_init(..., &my_config);
|
||||
*
|
||||
* // Power users can tweak <my_config> settings here ...
|
||||
*
|
||||
* // Allocate working state given config and thread_count
|
||||
* astcenc_context* my_context;
|
||||
* astcenc_context_alloc(&my_config, thread_count, &my_context);
|
||||
*
|
||||
* // Compress each image using these config settings
|
||||
* foreach image:
|
||||
* // For each thread in the thread pool
|
||||
* for i in range(0, thread_count):
|
||||
* astcenc_compress_image(my_context, &my_input, my_output, i);
|
||||
*
|
||||
* astcenc_compress_reset(my_context);
|
||||
*
|
||||
* // Clean up
|
||||
* astcenc_context_free(my_context);
|
||||
*
|
||||
* Images
|
||||
* ======
|
||||
*
|
||||
* The codec supports compressing single images, which can be either 2D images or volumetric 3D
|
||||
* images. Calling code is responsible for any handling of aggregate types, such as mipmap chains,
|
||||
* texture arrays, or sliced 3D textures.
|
||||
*
|
||||
* Images are passed in as an astcenc_image structure. Inputs can be either 8-bit unorm, 16-bit
|
||||
* half-float, or 32-bit float, as indicated by the data_type field.
|
||||
*
|
||||
* Images can be any dimension; there is no requirement to be a multiple of the ASTC block size.
|
||||
*
|
||||
* Data is always passed in as 4 color components, and accessed as an array of 2D image slices. Data
|
||||
* within an image slice is always tightly packed without padding. Addressing looks like this:
|
||||
*
|
||||
* data[z_coord][y_coord * x_dim * 4 + x_coord * 4 ] // Red
|
||||
* data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 1] // Green
|
||||
* data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 2] // Blue
|
||||
* data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 3] // Alpha
|
||||
*
|
||||
* Common compressor usage
|
||||
* =======================
|
||||
*
|
||||
* One of the most important things for coding image quality is to align the input data component
|
||||
* count with the ASTC color endpoint mode. This avoids wasting bits encoding components you don't
|
||||
* actually need in the endpoint colors.
|
||||
*
|
||||
* | Input data | Encoding swizzle | Sampling swizzle |
|
||||
* | ------------ | ---------------- | ---------------- |
|
||||
* | 1 component | RRR1 | .[rgb] |
|
||||
* | 2 components | RRRG | .[rgb]a |
|
||||
* | 3 components | RGB1 | .rgb |
|
||||
* | 4 components | RGBA | .rgba |
|
||||
*
|
||||
* The 1 and 2 component modes recommend sampling from "g" to recover the luminance value as this
|
||||
* provide best compatibility with other texture formats where the green component may be stored at
|
||||
* higher precision than the others, such as RGB565. For ASTC any of the RGB components can be used;
|
||||
* the luminance endpoint component will be returned for all three.
|
||||
*
|
||||
* When using the normal map compression mode ASTC will store normals as a two component X+Y map.
|
||||
* Input images must contain unit-length normalized and should be passed in using a two component
|
||||
* swizzle. The astcenc command line tool defaults to an RRRG swizzle, but some developers prefer
|
||||
* to use GGGR for compatability with BC5n which will work just as well. The Z component can be
|
||||
* recovered programmatically in shader code, using knowledge that the vector is unit length and
|
||||
* that Z must be positive for a tangent-space normal map.
|
||||
*
|
||||
* Decompress-only usage
|
||||
* =====================
|
||||
*
|
||||
* For some use cases it is useful to have a cut-down context and/or library which supports
|
||||
* decompression but not compression.
|
||||
*
|
||||
* A context can be made decompress-only using the ASTCENC_FLG_DECOMPRESS_ONLY flag when the context
|
||||
* is allocated. These contexts have lower dynamic memory footprint than a full context.
|
||||
*
|
||||
* The entire library can be made decompress-only by building the files with the define
|
||||
* ASTCENC_DECOMPRESS_ONLY set. In this build the context will be smaller, and the library will
|
||||
* exclude the functionality which is only needed for compression. This reduces the binary size by
|
||||
* ~180KB. For these builds contexts must be created with the ASTCENC_FLG_DECOMPRESS_ONLY flag.
|
||||
*
|
||||
* Note that context structures returned by a library built as decompress-only are incompatible with
|
||||
* a library built with compression included, and visa versa, as they have different sizes and
|
||||
* memory layout.
|
||||
*
|
||||
* Self-decompress-only usage
|
||||
* ==========================
|
||||
*
|
||||
* ASTC is a complex format with a large search space. The parts of this search space that are
|
||||
* searched is determined by heuristics that are, in part, tied to the quality level used when
|
||||
* creating the context.
|
||||
*
|
||||
* A normal context is capable of decompressing any ASTC texture, including those generated by other
|
||||
* compressors with unknown heuristics. This is the most flexible implementation, but forces the
|
||||
* data tables used by the codec to include entries that are not needed during compression. This
|
||||
* can slow down context creation by a significant amount, especially for the faster compression
|
||||
* modes where few data table entries are actually used. To optimize this use case the context can
|
||||
* be created with the ASTCENC_FLG_SELF_DECOMPRESS_ONLY flag. This tells the compressor that it will
|
||||
* only be asked to decompress images that it compressed itself, allowing the data tables to
|
||||
* exclude entries that are not needed by the current compression configuration. This reduces the
|
||||
* size of the context data tables in memory and improves context creation performance. Note that,
|
||||
* as of the 3.6 release, this flag no longer affects compression performance.
|
||||
*
|
||||
* Using this flag while attempting to decompress an valid image which was created by another
|
||||
* compressor, or even another astcenc compressor version or configuration, may result in blocks
|
||||
* returning as solid magenta or NaN value error blocks.
|
||||
*/
|
||||
|
||||
#ifndef ASTCENC_INCLUDED
|
||||
#define ASTCENC_INCLUDED
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#if defined(ASTCENC_DYNAMIC_LIBRARY)
|
||||
#if defined(_MSC_VER)
|
||||
#define ASTCENC_PUBLIC extern "C" __declspec(dllexport)
|
||||
#else
|
||||
#define ASTCENC_PUBLIC extern "C" __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
#else
|
||||
#define ASTCENC_PUBLIC
|
||||
#endif
|
||||
|
||||
/* ============================================================================
|
||||
Data declarations
|
||||
============================================================================ */
|
||||
|
||||
/**
|
||||
* @brief An opaque structure; see astcenc_internal.h for definition.
|
||||
*/
|
||||
struct astcenc_context;
|
||||
|
||||
/**
|
||||
* @brief A codec API error code.
|
||||
*/
|
||||
enum astcenc_error {
|
||||
/** @brief The call was successful. */
|
||||
ASTCENC_SUCCESS = 0,
|
||||
/** @brief The call failed due to low memory, or undersized I/O buffers. */
|
||||
ASTCENC_ERR_OUT_OF_MEM,
|
||||
/** @brief The call failed due to the build using fast math. */
|
||||
ASTCENC_ERR_BAD_CPU_FLOAT,
|
||||
/** @brief The call failed due to an out-of-spec parameter. */
|
||||
ASTCENC_ERR_BAD_PARAM,
|
||||
/** @brief The call failed due to an out-of-spec block size. */
|
||||
ASTCENC_ERR_BAD_BLOCK_SIZE,
|
||||
/** @brief The call failed due to an out-of-spec color profile. */
|
||||
ASTCENC_ERR_BAD_PROFILE,
|
||||
/** @brief The call failed due to an out-of-spec quality value. */
|
||||
ASTCENC_ERR_BAD_QUALITY,
|
||||
/** @brief The call failed due to an out-of-spec component swizzle. */
|
||||
ASTCENC_ERR_BAD_SWIZZLE,
|
||||
/** @brief The call failed due to an out-of-spec flag set. */
|
||||
ASTCENC_ERR_BAD_FLAGS,
|
||||
/** @brief The call failed due to the context not supporting the operation. */
|
||||
ASTCENC_ERR_BAD_CONTEXT,
|
||||
/** @brief The call failed due to unimplemented functionality. */
|
||||
ASTCENC_ERR_NOT_IMPLEMENTED,
|
||||
/** @brief The call failed due to an out-of-spec decode mode flag set. */
|
||||
ASTCENC_ERR_BAD_DECODE_MODE,
|
||||
#if defined(ASTCENC_DIAGNOSTICS)
|
||||
/** @brief The call failed due to an issue with diagnostic tracing. */
|
||||
ASTCENC_ERR_DTRACE_FAILURE,
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A codec color profile.
|
||||
*/
|
||||
enum astcenc_profile {
|
||||
/** @brief The LDR sRGB color profile. */
|
||||
ASTCENC_PRF_LDR_SRGB = 0,
|
||||
/** @brief The LDR linear color profile. */
|
||||
ASTCENC_PRF_LDR,
|
||||
/** @brief The HDR RGB with LDR alpha color profile. */
|
||||
ASTCENC_PRF_HDR_RGB_LDR_A,
|
||||
/** @brief The HDR RGBA color profile. */
|
||||
ASTCENC_PRF_HDR
|
||||
};
|
||||
|
||||
/** @brief The fastest, lowest quality, search preset. */
|
||||
static const float ASTCENC_PRE_FASTEST = 0.0f;
|
||||
|
||||
/** @brief The fast search preset. */
|
||||
static const float ASTCENC_PRE_FAST = 10.0f;
|
||||
|
||||
/** @brief The medium quality search preset. */
|
||||
static const float ASTCENC_PRE_MEDIUM = 60.0f;
|
||||
|
||||
/** @brief The thorough quality search preset. */
|
||||
static const float ASTCENC_PRE_THOROUGH = 98.0f;
|
||||
|
||||
/** @brief The thorough quality search preset. */
|
||||
static const float ASTCENC_PRE_VERYTHOROUGH = 99.0f;
|
||||
|
||||
/** @brief The exhaustive, highest quality, search preset. */
|
||||
static const float ASTCENC_PRE_EXHAUSTIVE = 100.0f;
|
||||
|
||||
/**
|
||||
* @brief A codec component swizzle selector.
|
||||
*/
|
||||
enum astcenc_swz
|
||||
{
|
||||
/** @brief Select the red component. */
|
||||
ASTCENC_SWZ_R = 0,
|
||||
/** @brief Select the green component. */
|
||||
ASTCENC_SWZ_G = 1,
|
||||
/** @brief Select the blue component. */
|
||||
ASTCENC_SWZ_B = 2,
|
||||
/** @brief Select the alpha component. */
|
||||
ASTCENC_SWZ_A = 3,
|
||||
/** @brief Use a constant zero component. */
|
||||
ASTCENC_SWZ_0 = 4,
|
||||
/** @brief Use a constant one component. */
|
||||
ASTCENC_SWZ_1 = 5,
|
||||
/** @brief Use a reconstructed normal vector Z component. */
|
||||
ASTCENC_SWZ_Z = 6
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A texel component swizzle.
|
||||
*/
|
||||
struct astcenc_swizzle
|
||||
{
|
||||
/** @brief The red component selector. */
|
||||
astcenc_swz r;
|
||||
/** @brief The green component selector. */
|
||||
astcenc_swz g;
|
||||
/** @brief The blue component selector. */
|
||||
astcenc_swz b;
|
||||
/** @brief The alpha component selector. */
|
||||
astcenc_swz a;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A texel component data format.
|
||||
*/
|
||||
enum astcenc_type
|
||||
{
|
||||
/** @brief Unorm 8-bit data per component. */
|
||||
ASTCENC_TYPE_U8 = 0,
|
||||
/** @brief 16-bit float per component. */
|
||||
ASTCENC_TYPE_F16 = 1,
|
||||
/** @brief 32-bit float per component. */
|
||||
ASTCENC_TYPE_F32 = 2
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Function pointer type for compression progress reporting callback.
|
||||
*/
|
||||
extern "C" typedef void (*astcenc_progress_callback)(float);
|
||||
|
||||
/**
|
||||
* @brief Enable normal map compression.
|
||||
*
|
||||
* Input data will be treated a two component normal map, storing X and Y, and the codec will
|
||||
* optimize for angular error rather than simple linear PSNR. In this mode the input swizzle should
|
||||
* be e.g. rrrg (the default ordering for ASTC normals on the command line) or gggr (the ordering
|
||||
* used by BC5n).
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_MAP_NORMAL = 1 << 0;
|
||||
|
||||
/**
|
||||
* @brief Enable compression heuristics that assume use of decode_unorm8 decode mode.
|
||||
*
|
||||
* The decode_unorm8 decode mode rounds differently to the decode_fp16 decode mode, so enabling this
|
||||
* flag during compression will allow the compressor to use the correct rounding when selecting
|
||||
* encodings. This will improve the compressed image quality if your application is using the
|
||||
* decode_unorm8 decode mode, but will reduce image quality if using decode_fp16.
|
||||
*
|
||||
* Note that LDR_SRGB images will always use decode_unorm8 for the RGB channels, irrespective of
|
||||
* this setting.
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_USE_DECODE_UNORM8 = 1 << 1;
|
||||
|
||||
/**
|
||||
* @brief Enable alpha weighting.
|
||||
*
|
||||
* The input alpha value is used for transparency, so errors in the RGB components are weighted by
|
||||
* the transparency level. This allows the codec to more accurately encode the alpha value in areas
|
||||
* where the color value is less significant.
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2;
|
||||
|
||||
/**
|
||||
* @brief Enable perceptual error metrics.
|
||||
*
|
||||
* This mode enables perceptual compression mode, which will optimize for perceptual error rather
|
||||
* than best PSNR. Only some input modes support perceptual error metrics.
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL = 1 << 3;
|
||||
|
||||
/**
|
||||
* @brief Create a decompression-only context.
|
||||
*
|
||||
* This mode disables support for compression. This enables context allocation to skip some
|
||||
* transient buffer allocation, resulting in lower memory usage.
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY = 1 << 4;
|
||||
|
||||
/**
|
||||
* @brief Create a self-decompression context.
|
||||
*
|
||||
* This mode configures the compressor so that it is only guaranteed to be able to decompress images
|
||||
* that were actually created using the current context. This is the common case for compression use
|
||||
* cases, and setting this flag enables additional optimizations, but does mean that the context
|
||||
* cannot reliably decompress arbitrary ASTC images.
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
|
||||
|
||||
/**
|
||||
* @brief Enable RGBM map compression.
|
||||
*
|
||||
* Input data will be treated as HDR data that has been stored in an LDR RGBM-encoded wrapper
|
||||
* format. Data must be preprocessed by the user to be in LDR RGBM format before calling the
|
||||
* compression function, this flag is only used to control the use of RGBM-specific heuristics and
|
||||
* error metrics.
|
||||
*
|
||||
* IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained RGBM data; very small
|
||||
* M values can round to zero due to quantization and result in black or white pixels. It is highly
|
||||
* recommended that the minimum value of M used in the encoding is kept above a lower threshold (try
|
||||
* 16 or 32). Applying this threshold reduces the number of very dark colors that can be
|
||||
* represented, but is still higher precision than 8-bit LDR.
|
||||
*
|
||||
* When this flag is set the value of @c rgbm_m_scale in the context must be set to the RGBM scale
|
||||
* factor used during reconstruction. This defaults to 5 when in RGBM mode.
|
||||
*
|
||||
* It is recommended that the value of @c cw_a_weight is set to twice the value of the multiplier
|
||||
* scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode,
|
||||
* matching the default scale factor.
|
||||
*/
|
||||
static const unsigned int ASTCENC_FLG_MAP_RGBM = 1 << 6;
|
||||
|
||||
/**
|
||||
* @brief The bit mask of all valid flags.
|
||||
*/
|
||||
static const unsigned int ASTCENC_ALL_FLAGS =
|
||||
ASTCENC_FLG_MAP_NORMAL |
|
||||
ASTCENC_FLG_MAP_RGBM |
|
||||
ASTCENC_FLG_USE_ALPHA_WEIGHT |
|
||||
ASTCENC_FLG_USE_PERCEPTUAL |
|
||||
ASTCENC_FLG_USE_DECODE_UNORM8 |
|
||||
ASTCENC_FLG_DECOMPRESS_ONLY |
|
||||
ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
|
||||
|
||||
/**
|
||||
* @brief The config structure.
|
||||
*
|
||||
* This structure will initially be populated by a call to astcenc_config_init, but power users may
|
||||
* modify it before calling astcenc_context_alloc. See astcenccli_toplevel_help.cpp for full user
|
||||
* documentation of the power-user settings.
|
||||
*
|
||||
* Note for any settings which are associated with a specific color component, the value in the
|
||||
* config applies to the component that exists after any compression data swizzle is applied.
|
||||
*/
|
||||
struct astcenc_config
|
||||
{
|
||||
/** @brief The color profile. */
|
||||
astcenc_profile profile;
|
||||
|
||||
/** @brief The set of set flags. */
|
||||
unsigned int flags;
|
||||
|
||||
/** @brief The ASTC block size X dimension. */
|
||||
unsigned int block_x;
|
||||
|
||||
/** @brief The ASTC block size Y dimension. */
|
||||
unsigned int block_y;
|
||||
|
||||
/** @brief The ASTC block size Z dimension. */
|
||||
unsigned int block_z;
|
||||
|
||||
/** @brief The red component weight scale for error weighting (-cw). */
|
||||
float cw_r_weight;
|
||||
|
||||
/** @brief The green component weight scale for error weighting (-cw). */
|
||||
float cw_g_weight;
|
||||
|
||||
/** @brief The blue component weight scale for error weighting (-cw). */
|
||||
float cw_b_weight;
|
||||
|
||||
/** @brief The alpha component weight scale for error weighting (-cw). */
|
||||
float cw_a_weight;
|
||||
|
||||
/**
|
||||
* @brief The radius for any alpha-weight scaling (-a).
|
||||
*
|
||||
* It is recommended that this is set to 1 when using FLG_USE_ALPHA_WEIGHT on a texture that
|
||||
* will be sampled using linear texture filtering to minimize color bleed out of transparent
|
||||
* texels that are adjacent to non-transparent texels.
|
||||
*/
|
||||
unsigned int a_scale_radius;
|
||||
|
||||
/** @brief The RGBM scale factor for the shared multiplier (-rgbm). */
|
||||
float rgbm_m_scale;
|
||||
|
||||
/**
|
||||
* @brief The maximum number of partitions searched (-partitioncountlimit).
|
||||
*
|
||||
* Valid values are between 1 and 4.
|
||||
*/
|
||||
unsigned int tune_partition_count_limit;
|
||||
|
||||
/**
|
||||
* @brief The maximum number of partitions searched (-2partitionindexlimit).
|
||||
*
|
||||
* Valid values are between 1 and 1024.
|
||||
*/
|
||||
unsigned int tune_2partition_index_limit;
|
||||
|
||||
/**
|
||||
* @brief The maximum number of partitions searched (-3partitionindexlimit).
|
||||
*
|
||||
* Valid values are between 1 and 1024.
|
||||
*/
|
||||
unsigned int tune_3partition_index_limit;
|
||||
|
||||
/**
|
||||
* @brief The maximum number of partitions searched (-4partitionindexlimit).
|
||||
*
|
||||
* Valid values are between 1 and 1024.
|
||||
*/
|
||||
unsigned int tune_4partition_index_limit;
|
||||
|
||||
/**
|
||||
* @brief The maximum centile for block modes searched (-blockmodelimit).
|
||||
*
|
||||
* Valid values are between 1 and 100.
|
||||
*/
|
||||
unsigned int tune_block_mode_limit;
|
||||
|
||||
/**
|
||||
* @brief The maximum iterative refinements applied (-refinementlimit).
|
||||
*
|
||||
* Valid values are between 1 and N; there is no technical upper limit
|
||||
* but little benefit is expected after N=4.
|
||||
*/
|
||||
unsigned int tune_refinement_limit;
|
||||
|
||||
/**
|
||||
* @brief The number of trial candidates per mode search (-candidatelimit).
|
||||
*
|
||||
* Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES.
|
||||
*/
|
||||
unsigned int tune_candidate_limit;
|
||||
|
||||
/**
|
||||
* @brief The number of trial partitionings per search (-2partitioncandidatelimit).
|
||||
*
|
||||
* Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
|
||||
*/
|
||||
unsigned int tune_2partitioning_candidate_limit;
|
||||
|
||||
/**
|
||||
* @brief The number of trial partitionings per search (-3partitioncandidatelimit).
|
||||
*
|
||||
* Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
|
||||
*/
|
||||
unsigned int tune_3partitioning_candidate_limit;
|
||||
|
||||
/**
|
||||
* @brief The number of trial partitionings per search (-4partitioncandidatelimit).
|
||||
*
|
||||
* Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
|
||||
*/
|
||||
unsigned int tune_4partitioning_candidate_limit;
|
||||
|
||||
/**
|
||||
* @brief The dB threshold for stopping block search (-dblimit).
|
||||
*
|
||||
* This option is ineffective for HDR textures.
|
||||
*/
|
||||
float tune_db_limit;
|
||||
|
||||
/**
|
||||
* @brief The amount of MSE overshoot needed to early-out trials.
|
||||
*
|
||||
* The first early-out is for 1 partition, 1 plane trials, where we try a minimal encode using
|
||||
* the high probability block modes. This can short-cut compression for simple blocks.
|
||||
*
|
||||
* The second early-out is for refinement trials, where we can exit refinement once quality is
|
||||
* reached.
|
||||
*/
|
||||
float tune_mse_overshoot;
|
||||
|
||||
/**
|
||||
* @brief The threshold for skipping 3.1/4.1 trials (-2partitionlimitfactor).
|
||||
*
|
||||
* This option is further scaled for normal maps, so it skips less often.
|
||||
*/
|
||||
float tune_2partition_early_out_limit_factor;
|
||||
|
||||
/**
|
||||
* @brief The threshold for skipping 4.1 trials (-3partitionlimitfactor).
|
||||
*
|
||||
* This option is further scaled for normal maps, so it skips less often.
|
||||
*/
|
||||
float tune_3partition_early_out_limit_factor;
|
||||
|
||||
/**
|
||||
* @brief The threshold for skipping two weight planes (-2planelimitcorrelation).
|
||||
*
|
||||
* This option is ineffective for normal maps.
|
||||
*/
|
||||
float tune_2plane_early_out_limit_correlation;
|
||||
|
||||
/**
|
||||
* @brief The config enable for the mode0 fast-path search.
|
||||
*
|
||||
* If this is set to TUNE_MIN_TEXELS_MODE0 or higher then the early-out fast mode0
|
||||
* search is enabled. This option is ineffective for 3D block sizes.
|
||||
*/
|
||||
float tune_search_mode0_enable;
|
||||
|
||||
/**
|
||||
* @brief The progress callback, can be @c nullptr.
|
||||
*
|
||||
* If this is specified the codec will peridocially report progress for
|
||||
* compression as a percentage between 0 and 100. The callback is called from one
|
||||
* of the compressor threads, so doing significant work in the callback will
|
||||
* reduce compression performance.
|
||||
*/
|
||||
astcenc_progress_callback progress_callback;
|
||||
|
||||
#if defined(ASTCENC_DIAGNOSTICS)
|
||||
/**
|
||||
* @brief The path to save the diagnostic trace data to.
|
||||
*
|
||||
* This option is not part of the public API, and requires special builds
|
||||
* of the library.
|
||||
*/
|
||||
const char* trace_file_path;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief An uncompressed 2D or 3D image.
|
||||
*
|
||||
* 3D image are passed in as an array of 2D slices. Each slice has identical
|
||||
* size and color format.
|
||||
*/
|
||||
struct astcenc_image
|
||||
{
|
||||
/** @brief The X dimension of the image, in texels. */
|
||||
unsigned int dim_x;
|
||||
|
||||
/** @brief The Y dimension of the image, in texels. */
|
||||
unsigned int dim_y;
|
||||
|
||||
/** @brief The Z dimension of the image, in texels. */
|
||||
unsigned int dim_z;
|
||||
|
||||
/** @brief The data type per component. */
|
||||
astcenc_type data_type;
|
||||
|
||||
/** @brief The array of 2D slices, of length @c dim_z. */
|
||||
void** data;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A block encoding metadata query result.
|
||||
*
|
||||
* If the block is an error block or a constant color block or an error block all fields other than
|
||||
* the profile, block dimensions, and error/constant indicator will be zero.
|
||||
*/
|
||||
struct astcenc_block_info
|
||||
{
|
||||
/** @brief The block encoding color profile. */
|
||||
astcenc_profile profile;
|
||||
|
||||
/** @brief The number of texels in the X dimension. */
|
||||
unsigned int block_x;
|
||||
|
||||
/** @brief The number of texels in the Y dimension. */
|
||||
unsigned int block_y;
|
||||
|
||||
/** @brief The number of texel in the Z dimension. */
|
||||
unsigned int block_z;
|
||||
|
||||
/** @brief The number of texels in the block. */
|
||||
unsigned int texel_count;
|
||||
|
||||
/** @brief True if this block is an error block. */
|
||||
bool is_error_block;
|
||||
|
||||
/** @brief True if this block is a constant color block. */
|
||||
bool is_constant_block;
|
||||
|
||||
/** @brief True if this block is an HDR block. */
|
||||
bool is_hdr_block;
|
||||
|
||||
/** @brief True if this block uses two weight planes. */
|
||||
bool is_dual_plane_block;
|
||||
|
||||
/** @brief The number of partitions if not constant color. */
|
||||
unsigned int partition_count;
|
||||
|
||||
/** @brief The partition index if 2 - 4 partitions used. */
|
||||
unsigned int partition_index;
|
||||
|
||||
/** @brief The component index of the second plane if dual plane. */
|
||||
unsigned int dual_plane_component;
|
||||
|
||||
/** @brief The color endpoint encoding mode for each partition. */
|
||||
unsigned int color_endpoint_modes[4];
|
||||
|
||||
/** @brief The number of color endpoint quantization levels. */
|
||||
unsigned int color_level_count;
|
||||
|
||||
/** @brief The number of weight quantization levels. */
|
||||
unsigned int weight_level_count;
|
||||
|
||||
/** @brief The number of weights in the X dimension. */
|
||||
unsigned int weight_x;
|
||||
|
||||
/** @brief The number of weights in the Y dimension. */
|
||||
unsigned int weight_y;
|
||||
|
||||
/** @brief The number of weights in the Z dimension. */
|
||||
unsigned int weight_z;
|
||||
|
||||
/** @brief The unpacked color endpoints for each partition. */
|
||||
float color_endpoints[4][2][4];
|
||||
|
||||
/** @brief The per-texel interpolation weights for the block. */
|
||||
float weight_values_plane1[216];
|
||||
|
||||
/** @brief The per-texel interpolation weights for the block. */
|
||||
float weight_values_plane2[216];
|
||||
|
||||
/** @brief The per-texel partition assignments for the block. */
|
||||
uint8_t partition_assignment[216];
|
||||
};
|
||||
|
||||
/**
|
||||
* Populate a codec config based on default settings.
|
||||
*
|
||||
* Power users can edit the returned config struct to fine tune before allocating the context.
|
||||
*
|
||||
* @param profile Color profile.
|
||||
* @param block_x ASTC block size X dimension.
|
||||
* @param block_y ASTC block size Y dimension.
|
||||
* @param block_z ASTC block size Z dimension.
|
||||
* @param quality Search quality preset / effort level. Either an
|
||||
* @c ASTCENC_PRE_* value, or a effort level between 0
|
||||
* and 100. Performance is not linear between 0 and 100.
|
||||
|
||||
* @param flags A valid set of @c ASTCENC_FLG_* flag bits.
|
||||
* @param[out] config Output config struct to populate.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS on success, or an error if the inputs are invalid
|
||||
* either individually, or in combination.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_config_init(
|
||||
astcenc_profile profile,
|
||||
unsigned int block_x,
|
||||
unsigned int block_y,
|
||||
unsigned int block_z,
|
||||
float quality,
|
||||
unsigned int flags,
|
||||
astcenc_config* config);
|
||||
|
||||
/**
|
||||
* @brief Allocate a new codec context based on a config.
|
||||
*
|
||||
* This function allocates all of the memory resources and threads needed by the codec. This can be
|
||||
* slow, so it is recommended that contexts are reused to serially compress or decompress multiple
|
||||
* images to amortize setup cost.
|
||||
*
|
||||
* Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY
|
||||
* flag when creating the configuration. The compression functions will fail if invoked. For a
|
||||
* decompress-only library build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating
|
||||
* any context.
|
||||
*
|
||||
* @param[in] config Codec config.
|
||||
* @param thread_count Thread count to configure for.
|
||||
* @param[out] context Location to store an opaque context pointer.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS on success, or an error if context creation failed.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_context_alloc(
|
||||
const astcenc_config* config,
|
||||
unsigned int thread_count,
|
||||
astcenc_context** context);
|
||||
|
||||
/**
|
||||
* @brief Compress an image.
|
||||
*
|
||||
* A single context can only compress or decompress a single image at a time.
|
||||
*
|
||||
* For a context configured for multi-threading, any set of the N threads can call this function.
|
||||
* Work will be dynamically scheduled across the threads available. Each thread must have a unique
|
||||
* @c thread_index.
|
||||
*
|
||||
* @param context Codec context.
|
||||
* @param[in,out] image An input image, in 2D slices.
|
||||
* @param swizzle Compression data swizzle, applied before compression.
|
||||
* @param[out] data_out Pointer to output data array.
|
||||
* @param data_len Length of the output data array.
|
||||
* @param thread_index Thread index [0..N-1] of calling thread.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS on success, or an error if compression failed.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_compress_image(
|
||||
astcenc_context* context,
|
||||
astcenc_image* image,
|
||||
const astcenc_swizzle* swizzle,
|
||||
uint8_t* data_out,
|
||||
size_t data_len,
|
||||
unsigned int thread_index);
|
||||
|
||||
/**
|
||||
* @brief Reset the codec state for a new compression.
|
||||
*
|
||||
* The caller is responsible for synchronizing threads in the worker thread pool. This function must
|
||||
* only be called when all threads have exited the @c astcenc_compress_image() function for image N,
|
||||
* but before any thread enters it for image N + 1.
|
||||
*
|
||||
* Calling this is not required (but won't hurt), if the context is created for single threaded use.
|
||||
*
|
||||
* @param context Codec context.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS on success, or an error if reset failed.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_compress_reset(
|
||||
astcenc_context* context);
|
||||
|
||||
/**
|
||||
* @brief Decompress an image.
|
||||
*
|
||||
* @param context Codec context.
|
||||
* @param[in] data Pointer to compressed data.
|
||||
* @param data_len Length of the compressed data, in bytes.
|
||||
* @param[in,out] image_out Output image.
|
||||
* @param swizzle Decompression data swizzle, applied after decompression.
|
||||
* @param thread_index Thread index [0..N-1] of calling thread.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS on success, or an error if decompression failed.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_decompress_image(
|
||||
astcenc_context* context,
|
||||
const uint8_t* data,
|
||||
size_t data_len,
|
||||
astcenc_image* image_out,
|
||||
const astcenc_swizzle* swizzle,
|
||||
unsigned int thread_index);
|
||||
|
||||
/**
|
||||
* @brief Reset the codec state for a new decompression.
|
||||
*
|
||||
* The caller is responsible for synchronizing threads in the worker thread pool. This function must
|
||||
* only be called when all threads have exited the @c astcenc_decompress_image() function for image
|
||||
* N, but before any thread enters it for image N + 1.
|
||||
*
|
||||
* Calling this is not required (but won't hurt), if the context is created for single threaded use.
|
||||
*
|
||||
* @param context Codec context.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS on success, or an error if reset failed.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_decompress_reset(
|
||||
astcenc_context* context);
|
||||
|
||||
/**
|
||||
* Free the compressor context.
|
||||
*
|
||||
* @param context The codec context.
|
||||
*/
|
||||
ASTCENC_PUBLIC void astcenc_context_free(
|
||||
astcenc_context* context);
|
||||
|
||||
/**
|
||||
* @brief Provide a high level summary of a block's encoding.
|
||||
*
|
||||
* This feature is primarily useful for codec developers but may be useful for developers building
|
||||
* advanced content packaging pipelines.
|
||||
*
|
||||
* @param context Codec context.
|
||||
* @param data One block of compressed ASTC data.
|
||||
* @param info The output info structure to populate.
|
||||
*
|
||||
* @return @c ASTCENC_SUCCESS if the block was decoded, or an error otherwise. Note that this
|
||||
* function will return success even if the block itself was an error block encoding, as the
|
||||
* decode was correctly handled.
|
||||
*/
|
||||
ASTCENC_PUBLIC astcenc_error astcenc_get_block_info(
|
||||
astcenc_context* context,
|
||||
const uint8_t data[16],
|
||||
astcenc_block_info* info);
|
||||
|
||||
/**
|
||||
* @brief Get a printable string for specific status code.
|
||||
*
|
||||
* @param status The status value.
|
||||
*
|
||||
* @return A human readable nul-terminated string.
|
||||
*/
|
||||
ASTCENC_PUBLIC const char* astcenc_get_error_string(
|
||||
astcenc_error status);
|
||||
|
||||
#endif
|
||||
948
engine/thirdparty/astcenc/astcenc_averages_and_directions.cpp
vendored
Normal file
948
engine/thirdparty/astcenc/astcenc_averages_and_directions.cpp
vendored
Normal file
|
|
@ -0,0 +1,948 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions for finding dominant direction of a set of colors.
|
||||
*/
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
/**
|
||||
* @brief Compute the average RGB color of each partition.
|
||||
*
|
||||
* The algorithm here uses a vectorized sequential scan and per-partition
|
||||
* color accumulators, using select() to mask texel lanes in other partitions.
|
||||
*
|
||||
* We only accumulate sums for N-1 partitions during the scan; the value for
|
||||
* the last partition can be computed given that we know the block-wide average
|
||||
* already.
|
||||
*
|
||||
* Because of this we could reduce the loop iteration count so it "just" spans
|
||||
* the max texel index needed for the N-1 partitions, which could need fewer
|
||||
* iterations than the full block texel count. However, this makes the loop
|
||||
* count erratic and causes more branch mispredictions so is a net loss.
|
||||
*
|
||||
* @param pi The partitioning to use.
|
||||
* @param blk The block data to process.
|
||||
* @param[out] averages The output averages. Unused partition indices will
|
||||
* not be initialized, and lane<3> will be zero.
|
||||
*/
|
||||
static void compute_partition_averages_rgb(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
vfloat4 averages[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
unsigned int partition_count = pi.partition_count;
|
||||
unsigned int texel_count = blk.texel_count;
|
||||
promise(texel_count > 0);
|
||||
|
||||
// For 1 partition just use the precomputed mean
|
||||
if (partition_count == 1)
|
||||
{
|
||||
averages[0] = blk.data_mean.swz<0, 1, 2>();
|
||||
}
|
||||
// For 2 partitions scan results for partition 0, compute partition 1
|
||||
else if (partition_count == 2)
|
||||
{
|
||||
vfloatacc pp_avg_rgb[3] {};
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint texel_partition(pi.partition_of_texel + i);
|
||||
|
||||
vmask lane_mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
|
||||
vmask p0_mask = lane_mask & (texel_partition == vint(0));
|
||||
|
||||
vfloat data_r = loada(blk.data_r + i);
|
||||
haccumulate(pp_avg_rgb[0], data_r, p0_mask);
|
||||
|
||||
vfloat data_g = loada(blk.data_g + i);
|
||||
haccumulate(pp_avg_rgb[1], data_g, p0_mask);
|
||||
|
||||
vfloat data_b = loada(blk.data_b + i);
|
||||
haccumulate(pp_avg_rgb[2], data_b, p0_mask);
|
||||
}
|
||||
|
||||
vfloat4 block_total = blk.data_mean.swz<0, 1, 2>() * static_cast<float>(blk.texel_count);
|
||||
|
||||
vfloat4 p0_total = vfloat3(hadd_s(pp_avg_rgb[0]),
|
||||
hadd_s(pp_avg_rgb[1]),
|
||||
hadd_s(pp_avg_rgb[2]));
|
||||
|
||||
vfloat4 p1_total = block_total - p0_total;
|
||||
|
||||
averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
|
||||
averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
|
||||
}
|
||||
// For 3 partitions scan results for partition 0/1, compute partition 2
|
||||
else if (partition_count == 3)
|
||||
{
|
||||
vfloatacc pp_avg_rgb[2][3] {};
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint texel_partition(pi.partition_of_texel + i);
|
||||
|
||||
vmask lane_mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
|
||||
vmask p0_mask = lane_mask & (texel_partition == vint(0));
|
||||
vmask p1_mask = lane_mask & (texel_partition == vint(1));
|
||||
|
||||
vfloat data_r = loada(blk.data_r + i);
|
||||
haccumulate(pp_avg_rgb[0][0], data_r, p0_mask);
|
||||
haccumulate(pp_avg_rgb[1][0], data_r, p1_mask);
|
||||
|
||||
vfloat data_g = loada(blk.data_g + i);
|
||||
haccumulate(pp_avg_rgb[0][1], data_g, p0_mask);
|
||||
haccumulate(pp_avg_rgb[1][1], data_g, p1_mask);
|
||||
|
||||
vfloat data_b = loada(blk.data_b + i);
|
||||
haccumulate(pp_avg_rgb[0][2], data_b, p0_mask);
|
||||
haccumulate(pp_avg_rgb[1][2], data_b, p1_mask);
|
||||
}
|
||||
|
||||
vfloat4 block_total = blk.data_mean.swz<0, 1, 2>() * static_cast<float>(blk.texel_count);
|
||||
|
||||
vfloat4 p0_total = vfloat3(hadd_s(pp_avg_rgb[0][0]),
|
||||
hadd_s(pp_avg_rgb[0][1]),
|
||||
hadd_s(pp_avg_rgb[0][2]));
|
||||
|
||||
vfloat4 p1_total = vfloat3(hadd_s(pp_avg_rgb[1][0]),
|
||||
hadd_s(pp_avg_rgb[1][1]),
|
||||
hadd_s(pp_avg_rgb[1][2]));
|
||||
|
||||
vfloat4 p2_total = block_total - p0_total - p1_total;
|
||||
|
||||
averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
|
||||
averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
|
||||
averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// For 4 partitions scan results for partition 0/1/2, compute partition 3
|
||||
vfloatacc pp_avg_rgb[3][3] {};
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint texel_partition(pi.partition_of_texel + i);
|
||||
|
||||
vmask lane_mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
|
||||
vmask p0_mask = lane_mask & (texel_partition == vint(0));
|
||||
vmask p1_mask = lane_mask & (texel_partition == vint(1));
|
||||
vmask p2_mask = lane_mask & (texel_partition == vint(2));
|
||||
|
||||
vfloat data_r = loada(blk.data_r + i);
|
||||
haccumulate(pp_avg_rgb[0][0], data_r, p0_mask);
|
||||
haccumulate(pp_avg_rgb[1][0], data_r, p1_mask);
|
||||
haccumulate(pp_avg_rgb[2][0], data_r, p2_mask);
|
||||
|
||||
vfloat data_g = loada(blk.data_g + i);
|
||||
haccumulate(pp_avg_rgb[0][1], data_g, p0_mask);
|
||||
haccumulate(pp_avg_rgb[1][1], data_g, p1_mask);
|
||||
haccumulate(pp_avg_rgb[2][1], data_g, p2_mask);
|
||||
|
||||
vfloat data_b = loada(blk.data_b + i);
|
||||
haccumulate(pp_avg_rgb[0][2], data_b, p0_mask);
|
||||
haccumulate(pp_avg_rgb[1][2], data_b, p1_mask);
|
||||
haccumulate(pp_avg_rgb[2][2], data_b, p2_mask);
|
||||
}
|
||||
|
||||
vfloat4 block_total = blk.data_mean.swz<0, 1, 2>() * static_cast<float>(blk.texel_count);
|
||||
|
||||
vfloat4 p0_total = vfloat3(hadd_s(pp_avg_rgb[0][0]),
|
||||
hadd_s(pp_avg_rgb[0][1]),
|
||||
hadd_s(pp_avg_rgb[0][2]));
|
||||
|
||||
vfloat4 p1_total = vfloat3(hadd_s(pp_avg_rgb[1][0]),
|
||||
hadd_s(pp_avg_rgb[1][1]),
|
||||
hadd_s(pp_avg_rgb[1][2]));
|
||||
|
||||
vfloat4 p2_total = vfloat3(hadd_s(pp_avg_rgb[2][0]),
|
||||
hadd_s(pp_avg_rgb[2][1]),
|
||||
hadd_s(pp_avg_rgb[2][2]));
|
||||
|
||||
vfloat4 p3_total = block_total - p0_total - p1_total- p2_total;
|
||||
|
||||
averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
|
||||
averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
|
||||
averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
|
||||
averages[3] = p3_total / static_cast<float>(pi.partition_texel_count[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute the average RGBA color of each partition.
|
||||
*
|
||||
* The algorithm here uses a vectorized sequential scan and per-partition
|
||||
* color accumulators, using select() to mask texel lanes in other partitions.
|
||||
*
|
||||
* We only accumulate sums for N-1 partitions during the scan; the value for
|
||||
* the last partition can be computed given that we know the block-wide average
|
||||
* already.
|
||||
*
|
||||
* Because of this we could reduce the loop iteration count so it "just" spans
|
||||
* the max texel index needed for the N-1 partitions, which could need fewer
|
||||
* iterations than the full block texel count. However, this makes the loop
|
||||
* count erratic and causes more branch mispredictions so is a net loss.
|
||||
*
|
||||
* @param pi The partitioning to use.
|
||||
* @param blk The block data to process.
|
||||
* @param[out] averages The output averages. Unused partition indices will
|
||||
* not be initialized.
|
||||
*/
|
||||
static void compute_partition_averages_rgba(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
vfloat4 averages[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
unsigned int partition_count = pi.partition_count;
|
||||
unsigned int texel_count = blk.texel_count;
|
||||
promise(texel_count > 0);
|
||||
|
||||
// For 1 partition just use the precomputed mean
|
||||
if (partition_count == 1)
|
||||
{
|
||||
averages[0] = blk.data_mean;
|
||||
}
|
||||
// For 2 partitions scan results for partition 0, compute partition 1
|
||||
else if (partition_count == 2)
|
||||
{
|
||||
vfloat4 pp_avg_rgba[4] {};
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint texel_partition(pi.partition_of_texel + i);
|
||||
|
||||
vmask lane_mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
|
||||
vmask p0_mask = lane_mask & (texel_partition == vint(0));
|
||||
|
||||
vfloat data_r = loada(blk.data_r + i);
|
||||
haccumulate(pp_avg_rgba[0], data_r, p0_mask);
|
||||
|
||||
vfloat data_g = loada(blk.data_g + i);
|
||||
haccumulate(pp_avg_rgba[1], data_g, p0_mask);
|
||||
|
||||
vfloat data_b = loada(blk.data_b + i);
|
||||
haccumulate(pp_avg_rgba[2], data_b, p0_mask);
|
||||
|
||||
vfloat data_a = loada(blk.data_a + i);
|
||||
haccumulate(pp_avg_rgba[3], data_a, p0_mask);
|
||||
}
|
||||
|
||||
vfloat4 block_total = blk.data_mean * static_cast<float>(blk.texel_count);
|
||||
|
||||
vfloat4 p0_total = vfloat4(hadd_s(pp_avg_rgba[0]),
|
||||
hadd_s(pp_avg_rgba[1]),
|
||||
hadd_s(pp_avg_rgba[2]),
|
||||
hadd_s(pp_avg_rgba[3]));
|
||||
|
||||
vfloat4 p1_total = block_total - p0_total;
|
||||
|
||||
averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
|
||||
averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
|
||||
}
|
||||
// For 3 partitions scan results for partition 0/1, compute partition 2
|
||||
else if (partition_count == 3)
|
||||
{
|
||||
vfloat4 pp_avg_rgba[2][4] {};
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint texel_partition(pi.partition_of_texel + i);
|
||||
|
||||
vmask lane_mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
|
||||
vmask p0_mask = lane_mask & (texel_partition == vint(0));
|
||||
vmask p1_mask = lane_mask & (texel_partition == vint(1));
|
||||
|
||||
vfloat data_r = loada(blk.data_r + i);
|
||||
haccumulate(pp_avg_rgba[0][0], data_r, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][0], data_r, p1_mask);
|
||||
|
||||
vfloat data_g = loada(blk.data_g + i);
|
||||
haccumulate(pp_avg_rgba[0][1], data_g, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][1], data_g, p1_mask);
|
||||
|
||||
vfloat data_b = loada(blk.data_b + i);
|
||||
haccumulate(pp_avg_rgba[0][2], data_b, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][2], data_b, p1_mask);
|
||||
|
||||
vfloat data_a = loada(blk.data_a + i);
|
||||
haccumulate(pp_avg_rgba[0][3], data_a, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][3], data_a, p1_mask);
|
||||
}
|
||||
|
||||
vfloat4 block_total = blk.data_mean * static_cast<float>(blk.texel_count);
|
||||
|
||||
vfloat4 p0_total = vfloat4(hadd_s(pp_avg_rgba[0][0]),
|
||||
hadd_s(pp_avg_rgba[0][1]),
|
||||
hadd_s(pp_avg_rgba[0][2]),
|
||||
hadd_s(pp_avg_rgba[0][3]));
|
||||
|
||||
vfloat4 p1_total = vfloat4(hadd_s(pp_avg_rgba[1][0]),
|
||||
hadd_s(pp_avg_rgba[1][1]),
|
||||
hadd_s(pp_avg_rgba[1][2]),
|
||||
hadd_s(pp_avg_rgba[1][3]));
|
||||
|
||||
vfloat4 p2_total = block_total - p0_total - p1_total;
|
||||
|
||||
averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
|
||||
averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
|
||||
averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// For 4 partitions scan results for partition 0/1/2, compute partition 3
|
||||
vfloat4 pp_avg_rgba[3][4] {};
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint texel_partition(pi.partition_of_texel + i);
|
||||
|
||||
vmask lane_mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
|
||||
vmask p0_mask = lane_mask & (texel_partition == vint(0));
|
||||
vmask p1_mask = lane_mask & (texel_partition == vint(1));
|
||||
vmask p2_mask = lane_mask & (texel_partition == vint(2));
|
||||
|
||||
vfloat data_r = loada(blk.data_r + i);
|
||||
haccumulate(pp_avg_rgba[0][0], data_r, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][0], data_r, p1_mask);
|
||||
haccumulate(pp_avg_rgba[2][0], data_r, p2_mask);
|
||||
|
||||
vfloat data_g = loada(blk.data_g + i);
|
||||
haccumulate(pp_avg_rgba[0][1], data_g, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][1], data_g, p1_mask);
|
||||
haccumulate(pp_avg_rgba[2][1], data_g, p2_mask);
|
||||
|
||||
vfloat data_b = loada(blk.data_b + i);
|
||||
haccumulate(pp_avg_rgba[0][2], data_b, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][2], data_b, p1_mask);
|
||||
haccumulate(pp_avg_rgba[2][2], data_b, p2_mask);
|
||||
|
||||
vfloat data_a = loada(blk.data_a + i);
|
||||
haccumulate(pp_avg_rgba[0][3], data_a, p0_mask);
|
||||
haccumulate(pp_avg_rgba[1][3], data_a, p1_mask);
|
||||
haccumulate(pp_avg_rgba[2][3], data_a, p2_mask);
|
||||
}
|
||||
|
||||
vfloat4 block_total = blk.data_mean * static_cast<float>(blk.texel_count);
|
||||
|
||||
vfloat4 p0_total = vfloat4(hadd_s(pp_avg_rgba[0][0]),
|
||||
hadd_s(pp_avg_rgba[0][1]),
|
||||
hadd_s(pp_avg_rgba[0][2]),
|
||||
hadd_s(pp_avg_rgba[0][3]));
|
||||
|
||||
vfloat4 p1_total = vfloat4(hadd_s(pp_avg_rgba[1][0]),
|
||||
hadd_s(pp_avg_rgba[1][1]),
|
||||
hadd_s(pp_avg_rgba[1][2]),
|
||||
hadd_s(pp_avg_rgba[1][3]));
|
||||
|
||||
vfloat4 p2_total = vfloat4(hadd_s(pp_avg_rgba[2][0]),
|
||||
hadd_s(pp_avg_rgba[2][1]),
|
||||
hadd_s(pp_avg_rgba[2][2]),
|
||||
hadd_s(pp_avg_rgba[2][3]));
|
||||
|
||||
vfloat4 p3_total = block_total - p0_total - p1_total- p2_total;
|
||||
|
||||
averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
|
||||
averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
|
||||
averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
|
||||
averages[3] = p3_total / static_cast<float>(pi.partition_texel_count[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_avgs_and_dirs_4_comp(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
partition_metrics pm[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
int partition_count = pi.partition_count;
|
||||
promise(partition_count > 0);
|
||||
|
||||
// Pre-compute partition_averages
|
||||
vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
|
||||
compute_partition_averages_rgba(pi, blk, partition_averages);
|
||||
|
||||
for (int partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
|
||||
unsigned int texel_count = pi.partition_texel_count[partition];
|
||||
promise(texel_count > 0);
|
||||
|
||||
vfloat4 average = partition_averages[partition];
|
||||
pm[partition].avg = average;
|
||||
|
||||
vfloat4 sum_xp = vfloat4::zero();
|
||||
vfloat4 sum_yp = vfloat4::zero();
|
||||
vfloat4 sum_zp = vfloat4::zero();
|
||||
vfloat4 sum_wp = vfloat4::zero();
|
||||
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
unsigned int iwt = texel_indexes[i];
|
||||
vfloat4 texel_datum = blk.texel(iwt);
|
||||
texel_datum = texel_datum - average;
|
||||
|
||||
vfloat4 zero = vfloat4::zero();
|
||||
|
||||
vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
|
||||
sum_xp += select(zero, texel_datum, tdm0);
|
||||
|
||||
vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
|
||||
sum_yp += select(zero, texel_datum, tdm1);
|
||||
|
||||
vmask4 tdm2 = texel_datum.swz<2,2,2,2>() > zero;
|
||||
sum_zp += select(zero, texel_datum, tdm2);
|
||||
|
||||
vmask4 tdm3 = texel_datum.swz<3,3,3,3>() > zero;
|
||||
sum_wp += select(zero, texel_datum, tdm3);
|
||||
}
|
||||
|
||||
vfloat4 prod_xp = dot(sum_xp, sum_xp);
|
||||
vfloat4 prod_yp = dot(sum_yp, sum_yp);
|
||||
vfloat4 prod_zp = dot(sum_zp, sum_zp);
|
||||
vfloat4 prod_wp = dot(sum_wp, sum_wp);
|
||||
|
||||
vfloat4 best_vector = sum_xp;
|
||||
vfloat4 best_sum = prod_xp;
|
||||
|
||||
vmask4 mask = prod_yp > best_sum;
|
||||
best_vector = select(best_vector, sum_yp, mask);
|
||||
best_sum = select(best_sum, prod_yp, mask);
|
||||
|
||||
mask = prod_zp > best_sum;
|
||||
best_vector = select(best_vector, sum_zp, mask);
|
||||
best_sum = select(best_sum, prod_zp, mask);
|
||||
|
||||
mask = prod_wp > best_sum;
|
||||
best_vector = select(best_vector, sum_wp, mask);
|
||||
|
||||
pm[partition].dir = best_vector;
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_avgs_and_dirs_3_comp(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
unsigned int omitted_component,
|
||||
partition_metrics pm[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
// Pre-compute partition_averages
|
||||
vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
|
||||
compute_partition_averages_rgba(pi, blk, partition_averages);
|
||||
|
||||
const float* data_vr = blk.data_r;
|
||||
const float* data_vg = blk.data_g;
|
||||
const float* data_vb = blk.data_b;
|
||||
|
||||
// TODO: Data-driven permute would be useful to avoid this ...
|
||||
if (omitted_component == 0)
|
||||
{
|
||||
partition_averages[0] = partition_averages[0].swz<1, 2, 3>();
|
||||
partition_averages[1] = partition_averages[1].swz<1, 2, 3>();
|
||||
partition_averages[2] = partition_averages[2].swz<1, 2, 3>();
|
||||
partition_averages[3] = partition_averages[3].swz<1, 2, 3>();
|
||||
|
||||
data_vr = blk.data_g;
|
||||
data_vg = blk.data_b;
|
||||
data_vb = blk.data_a;
|
||||
}
|
||||
else if (omitted_component == 1)
|
||||
{
|
||||
partition_averages[0] = partition_averages[0].swz<0, 2, 3>();
|
||||
partition_averages[1] = partition_averages[1].swz<0, 2, 3>();
|
||||
partition_averages[2] = partition_averages[2].swz<0, 2, 3>();
|
||||
partition_averages[3] = partition_averages[3].swz<0, 2, 3>();
|
||||
|
||||
data_vg = blk.data_b;
|
||||
data_vb = blk.data_a;
|
||||
}
|
||||
else if (omitted_component == 2)
|
||||
{
|
||||
partition_averages[0] = partition_averages[0].swz<0, 1, 3>();
|
||||
partition_averages[1] = partition_averages[1].swz<0, 1, 3>();
|
||||
partition_averages[2] = partition_averages[2].swz<0, 1, 3>();
|
||||
partition_averages[3] = partition_averages[3].swz<0, 1, 3>();
|
||||
|
||||
data_vb = blk.data_a;
|
||||
}
|
||||
else
|
||||
{
|
||||
partition_averages[0] = partition_averages[0].swz<0, 1, 2>();
|
||||
partition_averages[1] = partition_averages[1].swz<0, 1, 2>();
|
||||
partition_averages[2] = partition_averages[2].swz<0, 1, 2>();
|
||||
partition_averages[3] = partition_averages[3].swz<0, 1, 2>();
|
||||
}
|
||||
|
||||
unsigned int partition_count = pi.partition_count;
|
||||
promise(partition_count > 0);
|
||||
|
||||
for (unsigned int partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
|
||||
unsigned int texel_count = pi.partition_texel_count[partition];
|
||||
promise(texel_count > 0);
|
||||
|
||||
vfloat4 average = partition_averages[partition];
|
||||
pm[partition].avg = average;
|
||||
|
||||
vfloat4 sum_xp = vfloat4::zero();
|
||||
vfloat4 sum_yp = vfloat4::zero();
|
||||
vfloat4 sum_zp = vfloat4::zero();
|
||||
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
unsigned int iwt = texel_indexes[i];
|
||||
|
||||
vfloat4 texel_datum = vfloat3(data_vr[iwt],
|
||||
data_vg[iwt],
|
||||
data_vb[iwt]);
|
||||
texel_datum = texel_datum - average;
|
||||
|
||||
vfloat4 zero = vfloat4::zero();
|
||||
|
||||
vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
|
||||
sum_xp += select(zero, texel_datum, tdm0);
|
||||
|
||||
vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
|
||||
sum_yp += select(zero, texel_datum, tdm1);
|
||||
|
||||
vmask4 tdm2 = texel_datum.swz<2,2,2,2>() > zero;
|
||||
sum_zp += select(zero, texel_datum, tdm2);
|
||||
}
|
||||
|
||||
vfloat4 prod_xp = dot(sum_xp, sum_xp);
|
||||
vfloat4 prod_yp = dot(sum_yp, sum_yp);
|
||||
vfloat4 prod_zp = dot(sum_zp, sum_zp);
|
||||
|
||||
vfloat4 best_vector = sum_xp;
|
||||
vfloat4 best_sum = prod_xp;
|
||||
|
||||
vmask4 mask = prod_yp > best_sum;
|
||||
best_vector = select(best_vector, sum_yp, mask);
|
||||
best_sum = select(best_sum, prod_yp, mask);
|
||||
|
||||
mask = prod_zp > best_sum;
|
||||
best_vector = select(best_vector, sum_zp, mask);
|
||||
|
||||
pm[partition].dir = best_vector;
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_avgs_and_dirs_3_comp_rgb(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
partition_metrics pm[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
unsigned int partition_count = pi.partition_count;
|
||||
promise(partition_count > 0);
|
||||
|
||||
// Pre-compute partition_averages
|
||||
vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
|
||||
compute_partition_averages_rgb(pi, blk, partition_averages);
|
||||
|
||||
for (unsigned int partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
|
||||
unsigned int texel_count = pi.partition_texel_count[partition];
|
||||
promise(texel_count > 0);
|
||||
|
||||
vfloat4 average = partition_averages[partition];
|
||||
pm[partition].avg = average;
|
||||
|
||||
vfloat4 sum_xp = vfloat4::zero();
|
||||
vfloat4 sum_yp = vfloat4::zero();
|
||||
vfloat4 sum_zp = vfloat4::zero();
|
||||
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
unsigned int iwt = texel_indexes[i];
|
||||
|
||||
vfloat4 texel_datum = blk.texel3(iwt);
|
||||
texel_datum = texel_datum - average;
|
||||
|
||||
vfloat4 zero = vfloat4::zero();
|
||||
|
||||
vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
|
||||
sum_xp += select(zero, texel_datum, tdm0);
|
||||
|
||||
vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
|
||||
sum_yp += select(zero, texel_datum, tdm1);
|
||||
|
||||
vmask4 tdm2 = texel_datum.swz<2,2,2,2>() > zero;
|
||||
sum_zp += select(zero, texel_datum, tdm2);
|
||||
}
|
||||
|
||||
vfloat4 prod_xp = dot(sum_xp, sum_xp);
|
||||
vfloat4 prod_yp = dot(sum_yp, sum_yp);
|
||||
vfloat4 prod_zp = dot(sum_zp, sum_zp);
|
||||
|
||||
vfloat4 best_vector = sum_xp;
|
||||
vfloat4 best_sum = prod_xp;
|
||||
|
||||
vmask4 mask = prod_yp > best_sum;
|
||||
best_vector = select(best_vector, sum_yp, mask);
|
||||
best_sum = select(best_sum, prod_yp, mask);
|
||||
|
||||
mask = prod_zp > best_sum;
|
||||
best_vector = select(best_vector, sum_zp, mask);
|
||||
|
||||
pm[partition].dir = best_vector;
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_avgs_and_dirs_2_comp(
|
||||
const partition_info& pt,
|
||||
const image_block& blk,
|
||||
unsigned int component1,
|
||||
unsigned int component2,
|
||||
partition_metrics pm[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
vfloat4 average;
|
||||
|
||||
const float* data_vr = nullptr;
|
||||
const float* data_vg = nullptr;
|
||||
|
||||
if (component1 == 0 && component2 == 1)
|
||||
{
|
||||
average = blk.data_mean.swz<0, 1>();
|
||||
|
||||
data_vr = blk.data_r;
|
||||
data_vg = blk.data_g;
|
||||
}
|
||||
else if (component1 == 0 && component2 == 2)
|
||||
{
|
||||
average = blk.data_mean.swz<0, 2>();
|
||||
|
||||
data_vr = blk.data_r;
|
||||
data_vg = blk.data_b;
|
||||
}
|
||||
else // (component1 == 1 && component2 == 2)
|
||||
{
|
||||
assert(component1 == 1 && component2 == 2);
|
||||
|
||||
average = blk.data_mean.swz<1, 2>();
|
||||
|
||||
data_vr = blk.data_g;
|
||||
data_vg = blk.data_b;
|
||||
}
|
||||
|
||||
unsigned int partition_count = pt.partition_count;
|
||||
promise(partition_count > 0);
|
||||
|
||||
for (unsigned int partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *texel_indexes = pt.texels_of_partition[partition];
|
||||
unsigned int texel_count = pt.partition_texel_count[partition];
|
||||
promise(texel_count > 0);
|
||||
|
||||
// Only compute a partition mean if more than one partition
|
||||
if (partition_count > 1)
|
||||
{
|
||||
average = vfloat4::zero();
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
unsigned int iwt = texel_indexes[i];
|
||||
average += vfloat2(data_vr[iwt], data_vg[iwt]);
|
||||
}
|
||||
|
||||
average = average / static_cast<float>(texel_count);
|
||||
}
|
||||
|
||||
pm[partition].avg = average;
|
||||
|
||||
vfloat4 sum_xp = vfloat4::zero();
|
||||
vfloat4 sum_yp = vfloat4::zero();
|
||||
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
unsigned int iwt = texel_indexes[i];
|
||||
vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]);
|
||||
texel_datum = texel_datum - average;
|
||||
|
||||
vfloat4 zero = vfloat4::zero();
|
||||
|
||||
vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
|
||||
sum_xp += select(zero, texel_datum, tdm0);
|
||||
|
||||
vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
|
||||
sum_yp += select(zero, texel_datum, tdm1);
|
||||
}
|
||||
|
||||
vfloat4 prod_xp = dot(sum_xp, sum_xp);
|
||||
vfloat4 prod_yp = dot(sum_yp, sum_yp);
|
||||
|
||||
vfloat4 best_vector = sum_xp;
|
||||
vfloat4 best_sum = prod_xp;
|
||||
|
||||
vmask4 mask = prod_yp > best_sum;
|
||||
best_vector = select(best_vector, sum_yp, mask);
|
||||
|
||||
pm[partition].dir = best_vector;
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_error_squared_rgba(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
|
||||
const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
|
||||
float line_lengths[BLOCK_MAX_PARTITIONS],
|
||||
float& uncor_error,
|
||||
float& samec_error
|
||||
) {
|
||||
unsigned int partition_count = pi.partition_count;
|
||||
promise(partition_count > 0);
|
||||
|
||||
vfloatacc uncor_errorsumv = vfloatacc::zero();
|
||||
vfloatacc samec_errorsumv = vfloatacc::zero();
|
||||
|
||||
for (unsigned int partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
|
||||
|
||||
processed_line4 l_uncor = uncor_plines[partition];
|
||||
processed_line4 l_samec = samec_plines[partition];
|
||||
|
||||
unsigned int texel_count = pi.partition_texel_count[partition];
|
||||
promise(texel_count > 0);
|
||||
|
||||
// Vectorize some useful scalar inputs
|
||||
vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
|
||||
vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
|
||||
vfloat l_uncor_bs2(l_uncor.bs.lane<2>());
|
||||
vfloat l_uncor_bs3(l_uncor.bs.lane<3>());
|
||||
|
||||
vfloat l_uncor_amod0(l_uncor.amod.lane<0>());
|
||||
vfloat l_uncor_amod1(l_uncor.amod.lane<1>());
|
||||
vfloat l_uncor_amod2(l_uncor.amod.lane<2>());
|
||||
vfloat l_uncor_amod3(l_uncor.amod.lane<3>());
|
||||
|
||||
vfloat l_samec_bs0(l_samec.bs.lane<0>());
|
||||
vfloat l_samec_bs1(l_samec.bs.lane<1>());
|
||||
vfloat l_samec_bs2(l_samec.bs.lane<2>());
|
||||
vfloat l_samec_bs3(l_samec.bs.lane<3>());
|
||||
|
||||
assert(all(l_samec.amod == vfloat4(0.0f)));
|
||||
|
||||
vfloat uncor_loparamv(1e10f);
|
||||
vfloat uncor_hiparamv(-1e10f);
|
||||
|
||||
vfloat ew_r(blk.channel_weight.lane<0>());
|
||||
vfloat ew_g(blk.channel_weight.lane<1>());
|
||||
vfloat ew_b(blk.channel_weight.lane<2>());
|
||||
vfloat ew_a(blk.channel_weight.lane<3>());
|
||||
|
||||
// This implementation over-shoots, but this is safe as we initialize the texel_indexes
|
||||
// array to extend the last value. This means min/max are not impacted, but we need to mask
|
||||
// out the dummy values when we compute the line weighting.
|
||||
vint lane_ids = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vmask mask = lane_ids < vint(texel_count);
|
||||
vint texel_idxs(texel_indexes + i);
|
||||
|
||||
vfloat data_r = gatherf(blk.data_r, texel_idxs);
|
||||
vfloat data_g = gatherf(blk.data_g, texel_idxs);
|
||||
vfloat data_b = gatherf(blk.data_b, texel_idxs);
|
||||
vfloat data_a = gatherf(blk.data_a, texel_idxs);
|
||||
|
||||
vfloat uncor_param = (data_r * l_uncor_bs0)
|
||||
+ (data_g * l_uncor_bs1)
|
||||
+ (data_b * l_uncor_bs2)
|
||||
+ (data_a * l_uncor_bs3);
|
||||
|
||||
uncor_loparamv = min(uncor_param, uncor_loparamv);
|
||||
uncor_hiparamv = max(uncor_param, uncor_hiparamv);
|
||||
|
||||
vfloat uncor_dist0 = (l_uncor_amod0 - data_r)
|
||||
+ (uncor_param * l_uncor_bs0);
|
||||
vfloat uncor_dist1 = (l_uncor_amod1 - data_g)
|
||||
+ (uncor_param * l_uncor_bs1);
|
||||
vfloat uncor_dist2 = (l_uncor_amod2 - data_b)
|
||||
+ (uncor_param * l_uncor_bs2);
|
||||
vfloat uncor_dist3 = (l_uncor_amod3 - data_a)
|
||||
+ (uncor_param * l_uncor_bs3);
|
||||
|
||||
vfloat uncor_err = (ew_r * uncor_dist0 * uncor_dist0)
|
||||
+ (ew_g * uncor_dist1 * uncor_dist1)
|
||||
+ (ew_b * uncor_dist2 * uncor_dist2)
|
||||
+ (ew_a * uncor_dist3 * uncor_dist3);
|
||||
|
||||
haccumulate(uncor_errorsumv, uncor_err, mask);
|
||||
|
||||
// Process samechroma data
|
||||
vfloat samec_param = (data_r * l_samec_bs0)
|
||||
+ (data_g * l_samec_bs1)
|
||||
+ (data_b * l_samec_bs2)
|
||||
+ (data_a * l_samec_bs3);
|
||||
|
||||
vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
|
||||
vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
|
||||
vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
|
||||
vfloat samec_dist3 = samec_param * l_samec_bs3 - data_a;
|
||||
|
||||
vfloat samec_err = (ew_r * samec_dist0 * samec_dist0)
|
||||
+ (ew_g * samec_dist1 * samec_dist1)
|
||||
+ (ew_b * samec_dist2 * samec_dist2)
|
||||
+ (ew_a * samec_dist3 * samec_dist3);
|
||||
|
||||
haccumulate(samec_errorsumv, samec_err, mask);
|
||||
|
||||
lane_ids += vint(ASTCENC_SIMD_WIDTH);
|
||||
}
|
||||
|
||||
// Turn very small numbers and NaNs into a small number
|
||||
float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
|
||||
line_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
|
||||
}
|
||||
|
||||
uncor_error = hadd_s(uncor_errorsumv);
|
||||
samec_error = hadd_s(samec_errorsumv);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_error_squared_rgb(
|
||||
const partition_info& pi,
|
||||
const image_block& blk,
|
||||
partition_lines3 plines[BLOCK_MAX_PARTITIONS],
|
||||
float& uncor_error,
|
||||
float& samec_error
|
||||
) {
|
||||
unsigned int partition_count = pi.partition_count;
|
||||
promise(partition_count > 0);
|
||||
|
||||
vfloatacc uncor_errorsumv = vfloatacc::zero();
|
||||
vfloatacc samec_errorsumv = vfloatacc::zero();
|
||||
|
||||
for (unsigned int partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
partition_lines3& pl = plines[partition];
|
||||
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
|
||||
unsigned int texel_count = pi.partition_texel_count[partition];
|
||||
promise(texel_count > 0);
|
||||
|
||||
processed_line3 l_uncor = pl.uncor_pline;
|
||||
processed_line3 l_samec = pl.samec_pline;
|
||||
|
||||
// Vectorize some useful scalar inputs
|
||||
vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
|
||||
vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
|
||||
vfloat l_uncor_bs2(l_uncor.bs.lane<2>());
|
||||
|
||||
vfloat l_uncor_amod0(l_uncor.amod.lane<0>());
|
||||
vfloat l_uncor_amod1(l_uncor.amod.lane<1>());
|
||||
vfloat l_uncor_amod2(l_uncor.amod.lane<2>());
|
||||
|
||||
vfloat l_samec_bs0(l_samec.bs.lane<0>());
|
||||
vfloat l_samec_bs1(l_samec.bs.lane<1>());
|
||||
vfloat l_samec_bs2(l_samec.bs.lane<2>());
|
||||
|
||||
assert(all(l_samec.amod == vfloat4(0.0f)));
|
||||
|
||||
vfloat uncor_loparamv(1e10f);
|
||||
vfloat uncor_hiparamv(-1e10f);
|
||||
|
||||
vfloat ew_r(blk.channel_weight.lane<0>());
|
||||
vfloat ew_g(blk.channel_weight.lane<1>());
|
||||
vfloat ew_b(blk.channel_weight.lane<2>());
|
||||
|
||||
// This implementation over-shoots, but this is safe as we initialize the weights array
|
||||
// to extend the last value. This means min/max are not impacted, but we need to mask
|
||||
// out the dummy values when we compute the line weighting.
|
||||
vint lane_ids = vint::lane_id();
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vmask mask = lane_ids < vint(texel_count);
|
||||
vint texel_idxs(texel_indexes + i);
|
||||
|
||||
vfloat data_r = gatherf(blk.data_r, texel_idxs);
|
||||
vfloat data_g = gatherf(blk.data_g, texel_idxs);
|
||||
vfloat data_b = gatherf(blk.data_b, texel_idxs);
|
||||
|
||||
vfloat uncor_param = (data_r * l_uncor_bs0)
|
||||
+ (data_g * l_uncor_bs1)
|
||||
+ (data_b * l_uncor_bs2);
|
||||
|
||||
uncor_loparamv = min(uncor_param, uncor_loparamv);
|
||||
uncor_hiparamv = max(uncor_param, uncor_hiparamv);
|
||||
|
||||
vfloat uncor_dist0 = (l_uncor_amod0 - data_r)
|
||||
+ (uncor_param * l_uncor_bs0);
|
||||
vfloat uncor_dist1 = (l_uncor_amod1 - data_g)
|
||||
+ (uncor_param * l_uncor_bs1);
|
||||
vfloat uncor_dist2 = (l_uncor_amod2 - data_b)
|
||||
+ (uncor_param * l_uncor_bs2);
|
||||
|
||||
vfloat uncor_err = (ew_r * uncor_dist0 * uncor_dist0)
|
||||
+ (ew_g * uncor_dist1 * uncor_dist1)
|
||||
+ (ew_b * uncor_dist2 * uncor_dist2);
|
||||
|
||||
haccumulate(uncor_errorsumv, uncor_err, mask);
|
||||
|
||||
// Process samechroma data
|
||||
vfloat samec_param = (data_r * l_samec_bs0)
|
||||
+ (data_g * l_samec_bs1)
|
||||
+ (data_b * l_samec_bs2);
|
||||
|
||||
vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
|
||||
vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
|
||||
vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
|
||||
|
||||
vfloat samec_err = (ew_r * samec_dist0 * samec_dist0)
|
||||
+ (ew_g * samec_dist1 * samec_dist1)
|
||||
+ (ew_b * samec_dist2 * samec_dist2);
|
||||
|
||||
haccumulate(samec_errorsumv, samec_err, mask);
|
||||
|
||||
lane_ids += vint(ASTCENC_SIMD_WIDTH);
|
||||
}
|
||||
|
||||
// Turn very small numbers and NaNs into a small number
|
||||
float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
|
||||
pl.line_length = astc::max(uncor_linelen, 1e-7f);
|
||||
}
|
||||
|
||||
uncor_error = hadd_s(uncor_errorsumv);
|
||||
samec_error = hadd_s(samec_errorsumv);
|
||||
}
|
||||
|
||||
#endif
|
||||
1184
engine/thirdparty/astcenc/astcenc_block_sizes.cpp
vendored
Normal file
1184
engine/thirdparty/astcenc/astcenc_block_sizes.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
2149
engine/thirdparty/astcenc/astcenc_color_quantize.cpp
vendored
Normal file
2149
engine/thirdparty/astcenc/astcenc_color_quantize.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
948
engine/thirdparty/astcenc/astcenc_color_unquantize.cpp
vendored
Normal file
948
engine/thirdparty/astcenc/astcenc_color_unquantize.cpp
vendored
Normal file
|
|
@ -0,0 +1,948 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#include <utility>
|
||||
|
||||
/**
|
||||
* @brief Functions for color unquantization.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
/**
|
||||
* @brief Un-blue-contract a color.
|
||||
*
|
||||
* This function reverses any applied blue contraction.
|
||||
*
|
||||
* @param input The input color that has been blue-contracted.
|
||||
*
|
||||
* @return The uncontracted color.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vint4 uncontract_color(
|
||||
vint4 input
|
||||
) {
|
||||
vmask4 mask(true, true, false, false);
|
||||
vint4 bc0 = asr<1>(input + input.lane<2>());
|
||||
return select(input, bc0, mask);
|
||||
}
|
||||
|
||||
void rgba_delta_unpack(
|
||||
vint4 input0,
|
||||
vint4 input1,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
// Apply bit transfer
|
||||
bit_transfer_signed(input1, input0);
|
||||
|
||||
// Apply blue-uncontraction if needed
|
||||
int rgb_sum = hadd_rgb_s(input1);
|
||||
input1 = input1 + input0;
|
||||
if (rgb_sum < 0)
|
||||
{
|
||||
input0 = uncontract_color(input0);
|
||||
input1 = uncontract_color(input1);
|
||||
std::swap(input0, input1);
|
||||
}
|
||||
|
||||
output0 = clamp(0, 255, input0);
|
||||
output1 = clamp(0, 255, input1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR RGB color that uses delta encoding.
|
||||
*
|
||||
* Output alpha set to 255.
|
||||
*
|
||||
* @param input0 The packed endpoint 0 color.
|
||||
* @param input1 The packed endpoint 1 color deltas.
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void rgb_delta_unpack(
|
||||
vint4 input0,
|
||||
vint4 input1,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
rgba_delta_unpack(input0, input1, output0, output1);
|
||||
output0.set_lane<3>(255);
|
||||
output1.set_lane<3>(255);
|
||||
}
|
||||
|
||||
void rgba_unpack(
|
||||
vint4 input0,
|
||||
vint4 input1,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
// Apply blue-uncontraction if needed
|
||||
if (hadd_rgb_s(input0) > hadd_rgb_s(input1))
|
||||
{
|
||||
input0 = uncontract_color(input0);
|
||||
input1 = uncontract_color(input1);
|
||||
std::swap(input0, input1);
|
||||
}
|
||||
|
||||
output0 = input0;
|
||||
output1 = input1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR RGB color that uses direct encoding.
|
||||
*
|
||||
* Output alpha set to 255.
|
||||
*
|
||||
* @param input0 The packed endpoint 0 color.
|
||||
* @param input1 The packed endpoint 1 color.
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void rgb_unpack(
|
||||
vint4 input0,
|
||||
vint4 input1,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
rgba_unpack(input0, input1, output0, output1);
|
||||
output0.set_lane<3>(255);
|
||||
output1.set_lane<3>(255);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR RGBA color that uses scaled encoding.
|
||||
*
|
||||
* Note only the RGB channels use the scaled encoding, alpha uses direct.
|
||||
*
|
||||
* @param input0 The packed endpoint 0 color.
|
||||
* @param alpha1 The packed endpoint 1 alpha value.
|
||||
* @param scale The packed quantized scale.
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void rgb_scale_alpha_unpack(
|
||||
vint4 input0,
|
||||
uint8_t alpha1,
|
||||
uint8_t scale,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
output1 = input0;
|
||||
output1.set_lane<3>(alpha1);
|
||||
|
||||
output0 = asr<8>(input0 * scale);
|
||||
output0.set_lane<3>(input0.lane<3>());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR RGB color that uses scaled encoding.
|
||||
*
|
||||
* Output alpha is 255.
|
||||
*
|
||||
* @param input0 The packed endpoint 0 color.
|
||||
* @param scale The packed scale.
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void rgb_scale_unpack(
|
||||
vint4 input0,
|
||||
int scale,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
output1 = input0;
|
||||
output1.set_lane<3>(255);
|
||||
|
||||
output0 = asr<8>(input0 * scale);
|
||||
output0.set_lane<3>(255);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR L color that uses direct encoding.
|
||||
*
|
||||
* Output alpha is 255.
|
||||
*
|
||||
* @param input The packed endpoints.
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void luminance_unpack(
|
||||
const uint8_t input[2],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int lum0 = input[0];
|
||||
int lum1 = input[1];
|
||||
output0 = vint4(lum0, lum0, lum0, 255);
|
||||
output1 = vint4(lum1, lum1, lum1, 255);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR L color that uses delta encoding.
|
||||
*
|
||||
* Output alpha is 255.
|
||||
*
|
||||
* @param input The packed endpoints (L0, L1).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void luminance_delta_unpack(
|
||||
const uint8_t input[2],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int v0 = input[0];
|
||||
int v1 = input[1];
|
||||
int l0 = (v0 >> 2) | (v1 & 0xC0);
|
||||
int l1 = l0 + (v1 & 0x3F);
|
||||
|
||||
l1 = astc::min(l1, 255);
|
||||
|
||||
output0 = vint4(l0, l0, l0, 255);
|
||||
output1 = vint4(l1, l1, l1, 255);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR LA color that uses direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (L0, L1, A0, A1).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void luminance_alpha_unpack(
|
||||
const uint8_t input[4],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int lum0 = input[0];
|
||||
int lum1 = input[1];
|
||||
int alpha0 = input[2];
|
||||
int alpha1 = input[3];
|
||||
output0 = vint4(lum0, lum0, lum0, alpha0);
|
||||
output1 = vint4(lum1, lum1, lum1, alpha1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an LDR LA color that uses delta encoding.
|
||||
*
|
||||
* @param input The packed endpoints (L0, L1, A0, A1).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void luminance_alpha_delta_unpack(
|
||||
const uint8_t input[4],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int lum0 = input[0];
|
||||
int lum1 = input[1];
|
||||
int alpha0 = input[2];
|
||||
int alpha1 = input[3];
|
||||
|
||||
lum0 |= (lum1 & 0x80) << 1;
|
||||
alpha0 |= (alpha1 & 0x80) << 1;
|
||||
lum1 &= 0x7F;
|
||||
alpha1 &= 0x7F;
|
||||
|
||||
if (lum1 & 0x40)
|
||||
{
|
||||
lum1 -= 0x80;
|
||||
}
|
||||
|
||||
if (alpha1 & 0x40)
|
||||
{
|
||||
alpha1 -= 0x80;
|
||||
}
|
||||
|
||||
lum0 >>= 1;
|
||||
lum1 >>= 1;
|
||||
alpha0 >>= 1;
|
||||
alpha1 >>= 1;
|
||||
lum1 += lum0;
|
||||
alpha1 += alpha0;
|
||||
|
||||
lum1 = astc::clamp(lum1, 0, 255);
|
||||
alpha1 = astc::clamp(alpha1, 0, 255);
|
||||
|
||||
output0 = vint4(lum0, lum0, lum0, alpha0);
|
||||
output1 = vint4(lum1, lum1, lum1, alpha1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR RGB + offset encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_rgbo_unpack(
|
||||
const uint8_t input[4],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int v0 = input[0];
|
||||
int v1 = input[1];
|
||||
int v2 = input[2];
|
||||
int v3 = input[3];
|
||||
|
||||
int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
|
||||
|
||||
int majcomp;
|
||||
int mode;
|
||||
if ((modeval & 0xC) != 0xC)
|
||||
{
|
||||
majcomp = modeval >> 2;
|
||||
mode = modeval & 3;
|
||||
}
|
||||
else if (modeval != 0xF)
|
||||
{
|
||||
majcomp = modeval & 3;
|
||||
mode = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
majcomp = 0;
|
||||
mode = 5;
|
||||
}
|
||||
|
||||
int red = v0 & 0x3F;
|
||||
int green = v1 & 0x1F;
|
||||
int blue = v2 & 0x1F;
|
||||
int scale = v3 & 0x1F;
|
||||
|
||||
int bit0 = (v1 >> 6) & 1;
|
||||
int bit1 = (v1 >> 5) & 1;
|
||||
int bit2 = (v2 >> 6) & 1;
|
||||
int bit3 = (v2 >> 5) & 1;
|
||||
int bit4 = (v3 >> 7) & 1;
|
||||
int bit5 = (v3 >> 6) & 1;
|
||||
int bit6 = (v3 >> 5) & 1;
|
||||
|
||||
int ohcomp = 1 << mode;
|
||||
|
||||
if (ohcomp & 0x30)
|
||||
green |= bit0 << 6;
|
||||
if (ohcomp & 0x3A)
|
||||
green |= bit1 << 5;
|
||||
if (ohcomp & 0x30)
|
||||
blue |= bit2 << 6;
|
||||
if (ohcomp & 0x3A)
|
||||
blue |= bit3 << 5;
|
||||
|
||||
if (ohcomp & 0x3D)
|
||||
scale |= bit6 << 5;
|
||||
if (ohcomp & 0x2D)
|
||||
scale |= bit5 << 6;
|
||||
if (ohcomp & 0x04)
|
||||
scale |= bit4 << 7;
|
||||
|
||||
if (ohcomp & 0x3B)
|
||||
red |= bit4 << 6;
|
||||
if (ohcomp & 0x04)
|
||||
red |= bit3 << 6;
|
||||
|
||||
if (ohcomp & 0x10)
|
||||
red |= bit5 << 7;
|
||||
if (ohcomp & 0x0F)
|
||||
red |= bit2 << 7;
|
||||
|
||||
if (ohcomp & 0x05)
|
||||
red |= bit1 << 8;
|
||||
if (ohcomp & 0x0A)
|
||||
red |= bit0 << 8;
|
||||
|
||||
if (ohcomp & 0x05)
|
||||
red |= bit0 << 9;
|
||||
if (ohcomp & 0x02)
|
||||
red |= bit6 << 9;
|
||||
|
||||
if (ohcomp & 0x01)
|
||||
red |= bit3 << 10;
|
||||
if (ohcomp & 0x02)
|
||||
red |= bit5 << 10;
|
||||
|
||||
// expand to 12 bits.
|
||||
static const int shamts[6] { 1, 1, 2, 3, 4, 5 };
|
||||
int shamt = shamts[mode];
|
||||
red <<= shamt;
|
||||
green <<= shamt;
|
||||
blue <<= shamt;
|
||||
scale <<= shamt;
|
||||
|
||||
// on modes 0 to 4, the values stored for "green" and "blue" are differentials,
|
||||
// not absolute values.
|
||||
if (mode != 5)
|
||||
{
|
||||
green = red - green;
|
||||
blue = red - blue;
|
||||
}
|
||||
|
||||
// switch around components.
|
||||
int temp;
|
||||
switch (majcomp)
|
||||
{
|
||||
case 1:
|
||||
temp = red;
|
||||
red = green;
|
||||
green = temp;
|
||||
break;
|
||||
case 2:
|
||||
temp = red;
|
||||
red = blue;
|
||||
blue = temp;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
int red0 = red - scale;
|
||||
int green0 = green - scale;
|
||||
int blue0 = blue - scale;
|
||||
|
||||
// clamp to [0,0xFFF].
|
||||
if (red < 0)
|
||||
red = 0;
|
||||
if (green < 0)
|
||||
green = 0;
|
||||
if (blue < 0)
|
||||
blue = 0;
|
||||
|
||||
if (red0 < 0)
|
||||
red0 = 0;
|
||||
if (green0 < 0)
|
||||
green0 = 0;
|
||||
if (blue0 < 0)
|
||||
blue0 = 0;
|
||||
|
||||
output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
|
||||
output1 = vint4(red << 4, green << 4, blue << 4, 0x7800);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR RGB direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_rgb_unpack(
|
||||
const uint8_t input[6],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
|
||||
int v0 = input[0];
|
||||
int v1 = input[1];
|
||||
int v2 = input[2];
|
||||
int v3 = input[3];
|
||||
int v4 = input[4];
|
||||
int v5 = input[5];
|
||||
|
||||
// extract all the fixed-placement bitfields
|
||||
int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
|
||||
|
||||
int majcomp = ((v4 & 0x80) >> 7) | (((v5 & 0x80) >> 7) << 1);
|
||||
|
||||
if (majcomp == 3)
|
||||
{
|
||||
output0 = vint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
|
||||
output1 = vint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
|
||||
return;
|
||||
}
|
||||
|
||||
int a = v0 | ((v1 & 0x40) << 2);
|
||||
int b0 = v2 & 0x3f;
|
||||
int b1 = v3 & 0x3f;
|
||||
int c = v1 & 0x3f;
|
||||
int d0 = v4 & 0x7f;
|
||||
int d1 = v5 & 0x7f;
|
||||
|
||||
// get hold of the number of bits in 'd0' and 'd1'
|
||||
static const int dbits_tab[8] { 7, 6, 7, 6, 5, 6, 5, 6 };
|
||||
int dbits = dbits_tab[modeval];
|
||||
|
||||
// extract six variable-placement bits
|
||||
int bit0 = (v2 >> 6) & 1;
|
||||
int bit1 = (v3 >> 6) & 1;
|
||||
int bit2 = (v4 >> 6) & 1;
|
||||
int bit3 = (v5 >> 6) & 1;
|
||||
int bit4 = (v4 >> 5) & 1;
|
||||
int bit5 = (v5 >> 5) & 1;
|
||||
|
||||
// and prepend the variable-placement bits depending on mode.
|
||||
int ohmod = 1 << modeval; // one-hot-mode
|
||||
if (ohmod & 0xA4)
|
||||
a |= bit0 << 9;
|
||||
if (ohmod & 0x8)
|
||||
a |= bit2 << 9;
|
||||
if (ohmod & 0x50)
|
||||
a |= bit4 << 9;
|
||||
|
||||
if (ohmod & 0x50)
|
||||
a |= bit5 << 10;
|
||||
if (ohmod & 0xA0)
|
||||
a |= bit1 << 10;
|
||||
|
||||
if (ohmod & 0xC0)
|
||||
a |= bit2 << 11;
|
||||
|
||||
if (ohmod & 0x4)
|
||||
c |= bit1 << 6;
|
||||
if (ohmod & 0xE8)
|
||||
c |= bit3 << 6;
|
||||
|
||||
if (ohmod & 0x20)
|
||||
c |= bit2 << 7;
|
||||
|
||||
if (ohmod & 0x5B)
|
||||
{
|
||||
b0 |= bit0 << 6;
|
||||
b1 |= bit1 << 6;
|
||||
}
|
||||
|
||||
if (ohmod & 0x12)
|
||||
{
|
||||
b0 |= bit2 << 7;
|
||||
b1 |= bit3 << 7;
|
||||
}
|
||||
|
||||
if (ohmod & 0xAF)
|
||||
{
|
||||
d0 |= bit4 << 5;
|
||||
d1 |= bit5 << 5;
|
||||
}
|
||||
|
||||
if (ohmod & 0x5)
|
||||
{
|
||||
d0 |= bit2 << 6;
|
||||
d1 |= bit3 << 6;
|
||||
}
|
||||
|
||||
// sign-extend 'd0' and 'd1'
|
||||
// note: this code assumes that signed right-shift actually sign-fills, not zero-fills.
|
||||
int32_t d0x = d0;
|
||||
int32_t d1x = d1;
|
||||
int sx_shamt = 32 - dbits;
|
||||
d0x <<= sx_shamt;
|
||||
d0x >>= sx_shamt;
|
||||
d1x <<= sx_shamt;
|
||||
d1x >>= sx_shamt;
|
||||
d0 = d0x;
|
||||
d1 = d1x;
|
||||
|
||||
// expand all values to 12 bits, with left-shift as needed.
|
||||
int val_shamt = (modeval >> 1) ^ 3;
|
||||
a <<= val_shamt;
|
||||
b0 <<= val_shamt;
|
||||
b1 <<= val_shamt;
|
||||
c <<= val_shamt;
|
||||
d0 <<= val_shamt;
|
||||
d1 <<= val_shamt;
|
||||
|
||||
// then compute the actual color values.
|
||||
int red1 = a;
|
||||
int green1 = a - b0;
|
||||
int blue1 = a - b1;
|
||||
int red0 = a - c;
|
||||
int green0 = a - b0 - c - d0;
|
||||
int blue0 = a - b1 - c - d1;
|
||||
|
||||
// clamp the color components to [0,2^12 - 1]
|
||||
red0 = astc::clamp(red0, 0, 4095);
|
||||
green0 = astc::clamp(green0, 0, 4095);
|
||||
blue0 = astc::clamp(blue0, 0, 4095);
|
||||
|
||||
red1 = astc::clamp(red1, 0, 4095);
|
||||
green1 = astc::clamp(green1, 0, 4095);
|
||||
blue1 = astc::clamp(blue1, 0, 4095);
|
||||
|
||||
// switch around the color components
|
||||
int temp0, temp1;
|
||||
switch (majcomp)
|
||||
{
|
||||
case 1: // switch around red and green
|
||||
temp0 = red0;
|
||||
temp1 = red1;
|
||||
red0 = green0;
|
||||
red1 = green1;
|
||||
green0 = temp0;
|
||||
green1 = temp1;
|
||||
break;
|
||||
case 2: // switch around red and blue
|
||||
temp0 = red0;
|
||||
temp1 = red1;
|
||||
red0 = blue0;
|
||||
red1 = blue1;
|
||||
blue0 = temp0;
|
||||
blue1 = temp1;
|
||||
break;
|
||||
case 0: // no switch
|
||||
break;
|
||||
}
|
||||
|
||||
output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
|
||||
output1 = vint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR RGB + LDR A direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_rgb_ldr_alpha_unpack(
|
||||
const uint8_t input[8],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
hdr_rgb_unpack(input, output0, output1);
|
||||
|
||||
int v6 = input[6];
|
||||
int v7 = input[7];
|
||||
output0.set_lane<3>(v6);
|
||||
output1.set_lane<3>(v7);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR L (small range) direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_luminance_small_range_unpack(
|
||||
const uint8_t input[2],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int v0 = input[0];
|
||||
int v1 = input[1];
|
||||
|
||||
int y0, y1;
|
||||
if (v0 & 0x80)
|
||||
{
|
||||
y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
|
||||
y1 = (v1 & 0x1F) << 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
|
||||
y1 = (v1 & 0xF) << 1;
|
||||
}
|
||||
|
||||
y1 += y0;
|
||||
if (y1 > 0xFFF)
|
||||
{
|
||||
y1 = 0xFFF;
|
||||
}
|
||||
|
||||
output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
|
||||
output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR L (large range) direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_luminance_large_range_unpack(
|
||||
const uint8_t input[2],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
int v0 = input[0];
|
||||
int v1 = input[1];
|
||||
|
||||
int y0, y1;
|
||||
if (v1 >= v0)
|
||||
{
|
||||
y0 = v0 << 4;
|
||||
y1 = v1 << 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (v1 << 4) + 8;
|
||||
y1 = (v0 << 4) - 8;
|
||||
}
|
||||
|
||||
output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
|
||||
output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR A direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_alpha_unpack(
|
||||
const uint8_t input[2],
|
||||
int& output0,
|
||||
int& output1
|
||||
) {
|
||||
|
||||
int v6 = input[0];
|
||||
int v7 = input[1];
|
||||
|
||||
int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
|
||||
v6 &= 0x7F;
|
||||
v7 &= 0x7F;
|
||||
if (selector == 3)
|
||||
{
|
||||
output0 = v6 << 5;
|
||||
output1 = v7 << 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
v6 |= (v7 << (selector + 1)) & 0x780;
|
||||
v7 &= (0x3f >> selector);
|
||||
v7 ^= 32 >> selector;
|
||||
v7 -= 32 >> selector;
|
||||
v6 <<= (4 - selector);
|
||||
v7 <<= (4 - selector);
|
||||
v7 += v6;
|
||||
|
||||
if (v7 < 0)
|
||||
{
|
||||
v7 = 0;
|
||||
}
|
||||
else if (v7 > 0xFFF)
|
||||
{
|
||||
v7 = 0xFFF;
|
||||
}
|
||||
|
||||
output0 = v6;
|
||||
output1 = v7;
|
||||
}
|
||||
|
||||
output0 <<= 4;
|
||||
output1 <<= 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Unpack an HDR RGBA direct encoding.
|
||||
*
|
||||
* @param input The packed endpoints (packed and modal).
|
||||
* @param[out] output0 The unpacked endpoint 0 color.
|
||||
* @param[out] output1 The unpacked endpoint 1 color.
|
||||
*/
|
||||
static void hdr_rgb_hdr_alpha_unpack(
|
||||
const uint8_t input[8],
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
hdr_rgb_unpack(input, output0, output1);
|
||||
|
||||
int alpha0, alpha1;
|
||||
hdr_alpha_unpack(input + 6, alpha0, alpha1);
|
||||
|
||||
output0.set_lane<3>(alpha0);
|
||||
output1.set_lane<3>(alpha1);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void unpack_color_endpoints(
|
||||
astcenc_profile decode_mode,
|
||||
int format,
|
||||
const uint8_t* input,
|
||||
bool& rgb_hdr,
|
||||
bool& alpha_hdr,
|
||||
vint4& output0,
|
||||
vint4& output1
|
||||
) {
|
||||
// Assume no NaNs and LDR endpoints unless set later
|
||||
rgb_hdr = false;
|
||||
alpha_hdr = false;
|
||||
|
||||
bool alpha_hdr_default = false;
|
||||
|
||||
switch (format)
|
||||
{
|
||||
case FMT_LUMINANCE:
|
||||
luminance_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_LUMINANCE_DELTA:
|
||||
luminance_delta_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_LUMINANCE_SMALL_RANGE:
|
||||
rgb_hdr = true;
|
||||
alpha_hdr_default = true;
|
||||
hdr_luminance_small_range_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_LUMINANCE_LARGE_RANGE:
|
||||
rgb_hdr = true;
|
||||
alpha_hdr_default = true;
|
||||
hdr_luminance_large_range_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_LUMINANCE_ALPHA:
|
||||
luminance_alpha_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_LUMINANCE_ALPHA_DELTA:
|
||||
luminance_alpha_delta_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGB_SCALE:
|
||||
{
|
||||
vint4 input0q(input[0], input[1], input[2], 0);
|
||||
uint8_t scale = input[3];
|
||||
rgb_scale_unpack(input0q, scale, output0, output1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FMT_RGB_SCALE_ALPHA:
|
||||
{
|
||||
vint4 input0q(input[0], input[1], input[2], input[4]);
|
||||
uint8_t alpha1q = input[5];
|
||||
uint8_t scaleq = input[3];
|
||||
rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, output0, output1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGB_SCALE:
|
||||
rgb_hdr = true;
|
||||
alpha_hdr_default = true;
|
||||
hdr_rgbo_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGB:
|
||||
{
|
||||
vint4 input0q(input[0], input[2], input[4], 0);
|
||||
vint4 input1q(input[1], input[3], input[5], 0);
|
||||
rgb_unpack(input0q, input1q, output0, output1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FMT_RGB_DELTA:
|
||||
{
|
||||
vint4 input0q(input[0], input[2], input[4], 0);
|
||||
vint4 input1q(input[1], input[3], input[5], 0);
|
||||
rgb_delta_unpack(input0q, input1q, output0, output1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGB:
|
||||
rgb_hdr = true;
|
||||
alpha_hdr_default = true;
|
||||
hdr_rgb_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGBA:
|
||||
{
|
||||
vint4 input0q(input[0], input[2], input[4], input[6]);
|
||||
vint4 input1q(input[1], input[3], input[5], input[7]);
|
||||
rgba_unpack(input0q, input1q, output0, output1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FMT_RGBA_DELTA:
|
||||
{
|
||||
vint4 input0q(input[0], input[2], input[4], input[6]);
|
||||
vint4 input1q(input[1], input[3], input[5], input[7]);
|
||||
rgba_delta_unpack(input0q, input1q, output0, output1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGB_LDR_ALPHA:
|
||||
rgb_hdr = true;
|
||||
hdr_rgb_ldr_alpha_unpack(input, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGBA:
|
||||
rgb_hdr = true;
|
||||
alpha_hdr = true;
|
||||
hdr_rgb_hdr_alpha_unpack(input, output0, output1);
|
||||
break;
|
||||
}
|
||||
|
||||
// Assign a correct default alpha
|
||||
if (alpha_hdr_default)
|
||||
{
|
||||
if (decode_mode == ASTCENC_PRF_HDR)
|
||||
{
|
||||
output0.set_lane<3>(0x7800);
|
||||
output1.set_lane<3>(0x7800);
|
||||
alpha_hdr = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
output0.set_lane<3>(0x00FF);
|
||||
output1.set_lane<3>(0x00FF);
|
||||
alpha_hdr = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle endpoint errors and expansion
|
||||
|
||||
// Linear LDR 8-bit endpoints are expanded to 16-bit by replication
|
||||
if (decode_mode == ASTCENC_PRF_LDR)
|
||||
{
|
||||
// Error color - HDR endpoint in an LDR encoding
|
||||
if (rgb_hdr || alpha_hdr)
|
||||
{
|
||||
output0 = vint4(0xFF, 0x00, 0xFF, 0xFF);
|
||||
output1 = vint4(0xFF, 0x00, 0xFF, 0xFF);
|
||||
rgb_hdr = false;
|
||||
alpha_hdr = false;
|
||||
}
|
||||
|
||||
output0 = output0 * 257;
|
||||
output1 = output1 * 257;
|
||||
}
|
||||
// sRGB LDR 8-bit endpoints are expanded to 16 bit by:
|
||||
// - RGB = shift left by 8 bits and OR with 0x80
|
||||
// - A = replication
|
||||
else if (decode_mode == ASTCENC_PRF_LDR_SRGB)
|
||||
{
|
||||
// Error color - HDR endpoint in an LDR encoding
|
||||
if (rgb_hdr || alpha_hdr)
|
||||
{
|
||||
output0 = vint4(0xFF, 0x00, 0xFF, 0xFF);
|
||||
output1 = vint4(0xFF, 0x00, 0xFF, 0xFF);
|
||||
rgb_hdr = false;
|
||||
alpha_hdr = false;
|
||||
}
|
||||
|
||||
vmask4 mask(true, true, true, false);
|
||||
|
||||
vint4 output0rgb = lsl<8>(output0) | vint4(0x80);
|
||||
vint4 output0a = output0 * 257;
|
||||
output0 = select(output0a, output0rgb, mask);
|
||||
|
||||
vint4 output1rgb = lsl<8>(output1) | vint4(0x80);
|
||||
vint4 output1a = output1 * 257;
|
||||
output1 = select(output1a, output1rgb, mask);
|
||||
}
|
||||
// An HDR profile decode, but may be using linear LDR endpoints
|
||||
// Linear LDR 8-bit endpoints are expanded to 16-bit by replication
|
||||
// HDR endpoints are already 16-bit
|
||||
else
|
||||
{
|
||||
vmask4 hdr_lanes(rgb_hdr, rgb_hdr, rgb_hdr, alpha_hdr);
|
||||
vint4 output_scale = select(vint4(257), vint4(1), hdr_lanes);
|
||||
output0 = output0 * output_scale;
|
||||
output1 = output1 * output_scale;
|
||||
}
|
||||
}
|
||||
1456
engine/thirdparty/astcenc/astcenc_compress_symbolic.cpp
vendored
Normal file
1456
engine/thirdparty/astcenc/astcenc_compress_symbolic.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
472
engine/thirdparty/astcenc/astcenc_compute_variance.cpp
vendored
Normal file
472
engine/thirdparty/astcenc/astcenc_compute_variance.cpp
vendored
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2022 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
/**
|
||||
* @brief Functions to calculate variance per component in a NxN footprint.
|
||||
*
|
||||
* We need N to be parametric, so the routine below uses summed area tables in order to execute in
|
||||
* O(1) time independent of how big N is.
|
||||
*
|
||||
* The addition uses a Brent-Kung-based parallel prefix adder. This uses the prefix tree to first
|
||||
* perform a binary reduction, and then distributes the results. This method means that there is no
|
||||
* serial dependency between a given element and the next one, and also significantly improves
|
||||
* numerical stability allowing us to use floats rather than doubles.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
/**
|
||||
* @brief Generate a prefix-sum array using the Brent-Kung algorithm.
|
||||
*
|
||||
* This will take an input array of the form:
|
||||
* v0, v1, v2, ...
|
||||
* ... and modify in-place to turn it into a prefix-sum array of the form:
|
||||
* v0, v0+v1, v0+v1+v2, ...
|
||||
*
|
||||
* @param d The array to prefix-sum.
|
||||
* @param items The number of items in the array.
|
||||
* @param stride The item spacing in the array; i.e. dense arrays should use 1.
|
||||
*/
|
||||
static void brent_kung_prefix_sum(
|
||||
vfloat4* d,
|
||||
size_t items,
|
||||
int stride
|
||||
) {
|
||||
if (items < 2)
|
||||
return;
|
||||
|
||||
size_t lc_stride = 2;
|
||||
size_t log2_stride = 1;
|
||||
|
||||
// The reduction-tree loop
|
||||
do {
|
||||
size_t step = lc_stride >> 1;
|
||||
size_t start = lc_stride - 1;
|
||||
size_t iters = items >> log2_stride;
|
||||
|
||||
vfloat4 *da = d + (start * stride);
|
||||
ptrdiff_t ofs = -static_cast<ptrdiff_t>(step * stride);
|
||||
size_t ofs_stride = stride << log2_stride;
|
||||
|
||||
while (iters)
|
||||
{
|
||||
*da = *da + da[ofs];
|
||||
da += ofs_stride;
|
||||
iters--;
|
||||
}
|
||||
|
||||
log2_stride += 1;
|
||||
lc_stride <<= 1;
|
||||
} while (lc_stride <= items);
|
||||
|
||||
// The expansion-tree loop
|
||||
do {
|
||||
log2_stride -= 1;
|
||||
lc_stride >>= 1;
|
||||
|
||||
size_t step = lc_stride >> 1;
|
||||
size_t start = step + lc_stride - 1;
|
||||
size_t iters = (items - step) >> log2_stride;
|
||||
|
||||
vfloat4 *da = d + (start * stride);
|
||||
ptrdiff_t ofs = -static_cast<ptrdiff_t>(step * stride);
|
||||
size_t ofs_stride = stride << log2_stride;
|
||||
|
||||
while (iters)
|
||||
{
|
||||
*da = *da + da[ofs];
|
||||
da += ofs_stride;
|
||||
iters--;
|
||||
}
|
||||
} while (lc_stride > 2);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_pixel_region_variance(
|
||||
astcenc_contexti& ctx,
|
||||
const pixel_region_args& arg
|
||||
) {
|
||||
// Unpack the memory structure into local variables
|
||||
const astcenc_image* img = arg.img;
|
||||
astcenc_swizzle swz = arg.swz;
|
||||
bool have_z = arg.have_z;
|
||||
|
||||
int size_x = arg.size_x;
|
||||
int size_y = arg.size_y;
|
||||
int size_z = arg.size_z;
|
||||
|
||||
int offset_x = arg.offset_x;
|
||||
int offset_y = arg.offset_y;
|
||||
int offset_z = arg.offset_z;
|
||||
|
||||
int alpha_kernel_radius = arg.alpha_kernel_radius;
|
||||
|
||||
float* input_alpha_averages = ctx.input_alpha_averages;
|
||||
vfloat4* work_memory = arg.work_memory;
|
||||
|
||||
// Compute memory sizes and dimensions that we need
|
||||
int kernel_radius = alpha_kernel_radius;
|
||||
int kerneldim = 2 * kernel_radius + 1;
|
||||
int kernel_radius_xy = kernel_radius;
|
||||
int kernel_radius_z = have_z ? kernel_radius : 0;
|
||||
|
||||
int padsize_x = size_x + kerneldim;
|
||||
int padsize_y = size_y + kerneldim;
|
||||
int padsize_z = size_z + (have_z ? kerneldim : 0);
|
||||
int sizeprod = padsize_x * padsize_y * padsize_z;
|
||||
|
||||
int zd_start = have_z ? 1 : 0;
|
||||
|
||||
vfloat4 *varbuf1 = work_memory;
|
||||
vfloat4 *varbuf2 = work_memory + sizeprod;
|
||||
|
||||
// Scaling factors to apply to Y and Z for accesses into the work buffers
|
||||
int yst = padsize_x;
|
||||
int zst = padsize_x * padsize_y;
|
||||
|
||||
// Scaling factors to apply to Y and Z for accesses into result buffers
|
||||
int ydt = img->dim_x;
|
||||
int zdt = img->dim_x * img->dim_y;
|
||||
|
||||
// Macros to act as accessor functions for the work-memory
|
||||
#define VARBUF1(z, y, x) varbuf1[z * zst + y * yst + x]
|
||||
#define VARBUF2(z, y, x) varbuf2[z * zst + y * yst + x]
|
||||
|
||||
// Load N and N^2 values into the work buffers
|
||||
if (img->data_type == ASTCENC_TYPE_U8)
|
||||
{
|
||||
// Swizzle data structure 4 = ZERO, 5 = ONE
|
||||
uint8_t data[6];
|
||||
data[ASTCENC_SWZ_0] = 0;
|
||||
data[ASTCENC_SWZ_1] = 255;
|
||||
|
||||
for (int z = zd_start; z < padsize_z; z++)
|
||||
{
|
||||
int z_src = (z - zd_start) + offset_z - kernel_radius_z;
|
||||
z_src = astc::clamp(z_src, 0, static_cast<int>(img->dim_z - 1));
|
||||
uint8_t* data8 = static_cast<uint8_t*>(img->data[z_src]);
|
||||
|
||||
for (int y = 1; y < padsize_y; y++)
|
||||
{
|
||||
int y_src = (y - 1) + offset_y - kernel_radius_xy;
|
||||
y_src = astc::clamp(y_src, 0, static_cast<int>(img->dim_y - 1));
|
||||
|
||||
for (int x = 1; x < padsize_x; x++)
|
||||
{
|
||||
int x_src = (x - 1) + offset_x - kernel_radius_xy;
|
||||
x_src = astc::clamp(x_src, 0, static_cast<int>(img->dim_x - 1));
|
||||
|
||||
data[0] = data8[(4 * img->dim_x * y_src) + (4 * x_src )];
|
||||
data[1] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
|
||||
data[2] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 2)];
|
||||
data[3] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 3)];
|
||||
|
||||
uint8_t r = data[swz.r];
|
||||
uint8_t g = data[swz.g];
|
||||
uint8_t b = data[swz.b];
|
||||
uint8_t a = data[swz.a];
|
||||
|
||||
vfloat4 d = vfloat4 (r * (1.0f / 255.0f),
|
||||
g * (1.0f / 255.0f),
|
||||
b * (1.0f / 255.0f),
|
||||
a * (1.0f / 255.0f));
|
||||
|
||||
VARBUF1(z, y, x) = d;
|
||||
VARBUF2(z, y, x) = d * d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (img->data_type == ASTCENC_TYPE_F16)
|
||||
{
|
||||
// Swizzle data structure 4 = ZERO, 5 = ONE (in FP16)
|
||||
uint16_t data[6];
|
||||
data[ASTCENC_SWZ_0] = 0;
|
||||
data[ASTCENC_SWZ_1] = 0x3C00;
|
||||
|
||||
for (int z = zd_start; z < padsize_z; z++)
|
||||
{
|
||||
int z_src = (z - zd_start) + offset_z - kernel_radius_z;
|
||||
z_src = astc::clamp(z_src, 0, static_cast<int>(img->dim_z - 1));
|
||||
uint16_t* data16 = static_cast<uint16_t*>(img->data[z_src]);
|
||||
|
||||
for (int y = 1; y < padsize_y; y++)
|
||||
{
|
||||
int y_src = (y - 1) + offset_y - kernel_radius_xy;
|
||||
y_src = astc::clamp(y_src, 0, static_cast<int>(img->dim_y - 1));
|
||||
|
||||
for (int x = 1; x < padsize_x; x++)
|
||||
{
|
||||
int x_src = (x - 1) + offset_x - kernel_radius_xy;
|
||||
x_src = astc::clamp(x_src, 0, static_cast<int>(img->dim_x - 1));
|
||||
|
||||
data[0] = data16[(4 * img->dim_x * y_src) + (4 * x_src )];
|
||||
data[1] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
|
||||
data[2] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 2)];
|
||||
data[3] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 3)];
|
||||
|
||||
vint4 di(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
|
||||
vfloat4 d = float16_to_float(di);
|
||||
|
||||
VARBUF1(z, y, x) = d;
|
||||
VARBUF2(z, y, x) = d * d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else // if (img->data_type == ASTCENC_TYPE_F32)
|
||||
{
|
||||
assert(img->data_type == ASTCENC_TYPE_F32);
|
||||
|
||||
// Swizzle data structure 4 = ZERO, 5 = ONE (in FP16)
|
||||
float data[6];
|
||||
data[ASTCENC_SWZ_0] = 0.0f;
|
||||
data[ASTCENC_SWZ_1] = 1.0f;
|
||||
|
||||
for (int z = zd_start; z < padsize_z; z++)
|
||||
{
|
||||
int z_src = (z - zd_start) + offset_z - kernel_radius_z;
|
||||
z_src = astc::clamp(z_src, 0, static_cast<int>(img->dim_z - 1));
|
||||
float* data32 = static_cast<float*>(img->data[z_src]);
|
||||
|
||||
for (int y = 1; y < padsize_y; y++)
|
||||
{
|
||||
int y_src = (y - 1) + offset_y - kernel_radius_xy;
|
||||
y_src = astc::clamp(y_src, 0, static_cast<int>(img->dim_y - 1));
|
||||
|
||||
for (int x = 1; x < padsize_x; x++)
|
||||
{
|
||||
int x_src = (x - 1) + offset_x - kernel_radius_xy;
|
||||
x_src = astc::clamp(x_src, 0, static_cast<int>(img->dim_x - 1));
|
||||
|
||||
data[0] = data32[(4 * img->dim_x * y_src) + (4 * x_src )];
|
||||
data[1] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
|
||||
data[2] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 2)];
|
||||
data[3] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 3)];
|
||||
|
||||
float r = data[swz.r];
|
||||
float g = data[swz.g];
|
||||
float b = data[swz.b];
|
||||
float a = data[swz.a];
|
||||
|
||||
vfloat4 d(r, g, b, a);
|
||||
|
||||
VARBUF1(z, y, x) = d;
|
||||
VARBUF2(z, y, x) = d * d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pad with an extra layer of 0s; this forms the edge of the SAT tables
|
||||
vfloat4 vbz = vfloat4::zero();
|
||||
for (int z = 0; z < padsize_z; z++)
|
||||
{
|
||||
for (int y = 0; y < padsize_y; y++)
|
||||
{
|
||||
VARBUF1(z, y, 0) = vbz;
|
||||
VARBUF2(z, y, 0) = vbz;
|
||||
}
|
||||
|
||||
for (int x = 0; x < padsize_x; x++)
|
||||
{
|
||||
VARBUF1(z, 0, x) = vbz;
|
||||
VARBUF2(z, 0, x) = vbz;
|
||||
}
|
||||
}
|
||||
|
||||
if (have_z)
|
||||
{
|
||||
for (int y = 0; y < padsize_y; y++)
|
||||
{
|
||||
for (int x = 0; x < padsize_x; x++)
|
||||
{
|
||||
VARBUF1(0, y, x) = vbz;
|
||||
VARBUF2(0, y, x) = vbz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate summed-area tables for N and N^2; this is done in-place, using
|
||||
// a Brent-Kung parallel-prefix based algorithm to minimize precision loss
|
||||
for (int z = zd_start; z < padsize_z; z++)
|
||||
{
|
||||
for (int y = 1; y < padsize_y; y++)
|
||||
{
|
||||
brent_kung_prefix_sum(&(VARBUF1(z, y, 1)), padsize_x - 1, 1);
|
||||
brent_kung_prefix_sum(&(VARBUF2(z, y, 1)), padsize_x - 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (int z = zd_start; z < padsize_z; z++)
|
||||
{
|
||||
for (int x = 1; x < padsize_x; x++)
|
||||
{
|
||||
brent_kung_prefix_sum(&(VARBUF1(z, 1, x)), padsize_y - 1, yst);
|
||||
brent_kung_prefix_sum(&(VARBUF2(z, 1, x)), padsize_y - 1, yst);
|
||||
}
|
||||
}
|
||||
|
||||
if (have_z)
|
||||
{
|
||||
for (int y = 1; y < padsize_y; y++)
|
||||
{
|
||||
for (int x = 1; x < padsize_x; x++)
|
||||
{
|
||||
brent_kung_prefix_sum(&(VARBUF1(1, y, x)), padsize_z - 1, zst);
|
||||
brent_kung_prefix_sum(&(VARBUF2(1, y, x)), padsize_z - 1, zst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute a few constants used in the variance-calculation.
|
||||
float alpha_kdim = static_cast<float>(2 * alpha_kernel_radius + 1);
|
||||
float alpha_rsamples;
|
||||
|
||||
if (have_z)
|
||||
{
|
||||
alpha_rsamples = 1.0f / (alpha_kdim * alpha_kdim * alpha_kdim);
|
||||
}
|
||||
else
|
||||
{
|
||||
alpha_rsamples = 1.0f / (alpha_kdim * alpha_kdim);
|
||||
}
|
||||
|
||||
// Use the summed-area tables to compute variance for each neighborhood
|
||||
if (have_z)
|
||||
{
|
||||
for (int z = 0; z < size_z; z++)
|
||||
{
|
||||
int z_src = z + kernel_radius_z;
|
||||
int z_dst = z + offset_z;
|
||||
int z_low = z_src - alpha_kernel_radius;
|
||||
int z_high = z_src + alpha_kernel_radius + 1;
|
||||
|
||||
for (int y = 0; y < size_y; y++)
|
||||
{
|
||||
int y_src = y + kernel_radius_xy;
|
||||
int y_dst = y + offset_y;
|
||||
int y_low = y_src - alpha_kernel_radius;
|
||||
int y_high = y_src + alpha_kernel_radius + 1;
|
||||
|
||||
for (int x = 0; x < size_x; x++)
|
||||
{
|
||||
int x_src = x + kernel_radius_xy;
|
||||
int x_dst = x + offset_x;
|
||||
int x_low = x_src - alpha_kernel_radius;
|
||||
int x_high = x_src + alpha_kernel_radius + 1;
|
||||
|
||||
// Summed-area table lookups for alpha average
|
||||
float vasum = ( VARBUF1(z_high, y_low, x_low).lane<3>()
|
||||
- VARBUF1(z_high, y_low, x_high).lane<3>()
|
||||
- VARBUF1(z_high, y_high, x_low).lane<3>()
|
||||
+ VARBUF1(z_high, y_high, x_high).lane<3>()) -
|
||||
( VARBUF1(z_low, y_low, x_low).lane<3>()
|
||||
- VARBUF1(z_low, y_low, x_high).lane<3>()
|
||||
- VARBUF1(z_low, y_high, x_low).lane<3>()
|
||||
+ VARBUF1(z_low, y_high, x_high).lane<3>());
|
||||
|
||||
int out_index = z_dst * zdt + y_dst * ydt + x_dst;
|
||||
input_alpha_averages[out_index] = (vasum * alpha_rsamples);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < size_y; y++)
|
||||
{
|
||||
int y_src = y + kernel_radius_xy;
|
||||
int y_dst = y + offset_y;
|
||||
int y_low = y_src - alpha_kernel_radius;
|
||||
int y_high = y_src + alpha_kernel_radius + 1;
|
||||
|
||||
for (int x = 0; x < size_x; x++)
|
||||
{
|
||||
int x_src = x + kernel_radius_xy;
|
||||
int x_dst = x + offset_x;
|
||||
int x_low = x_src - alpha_kernel_radius;
|
||||
int x_high = x_src + alpha_kernel_radius + 1;
|
||||
|
||||
// Summed-area table lookups for alpha average
|
||||
float vasum = VARBUF1(0, y_low, x_low).lane<3>()
|
||||
- VARBUF1(0, y_low, x_high).lane<3>()
|
||||
- VARBUF1(0, y_high, x_low).lane<3>()
|
||||
+ VARBUF1(0, y_high, x_high).lane<3>();
|
||||
|
||||
int out_index = y_dst * ydt + x_dst;
|
||||
input_alpha_averages[out_index] = (vasum * alpha_rsamples);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
unsigned int init_compute_averages(
|
||||
const astcenc_image& img,
|
||||
unsigned int alpha_kernel_radius,
|
||||
const astcenc_swizzle& swz,
|
||||
avg_args& ag
|
||||
) {
|
||||
unsigned int size_x = img.dim_x;
|
||||
unsigned int size_y = img.dim_y;
|
||||
unsigned int size_z = img.dim_z;
|
||||
|
||||
// Compute maximum block size and from that the working memory buffer size
|
||||
unsigned int kernel_radius = alpha_kernel_radius;
|
||||
unsigned int kerneldim = 2 * kernel_radius + 1;
|
||||
|
||||
bool have_z = (size_z > 1);
|
||||
unsigned int max_blk_size_xy = have_z ? 16 : 32;
|
||||
unsigned int max_blk_size_z = astc::min(size_z, have_z ? 16u : 1u);
|
||||
|
||||
unsigned int max_padsize_xy = max_blk_size_xy + kerneldim;
|
||||
unsigned int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0);
|
||||
|
||||
// Perform block-wise averages calculations across the image
|
||||
// Initialize fields which are not populated until later
|
||||
ag.arg.size_x = 0;
|
||||
ag.arg.size_y = 0;
|
||||
ag.arg.size_z = 0;
|
||||
ag.arg.offset_x = 0;
|
||||
ag.arg.offset_y = 0;
|
||||
ag.arg.offset_z = 0;
|
||||
ag.arg.work_memory = nullptr;
|
||||
|
||||
ag.arg.img = &img;
|
||||
ag.arg.swz = swz;
|
||||
ag.arg.have_z = have_z;
|
||||
ag.arg.alpha_kernel_radius = alpha_kernel_radius;
|
||||
|
||||
ag.img_size_x = size_x;
|
||||
ag.img_size_y = size_y;
|
||||
ag.img_size_z = size_z;
|
||||
ag.blk_size_xy = max_blk_size_xy;
|
||||
ag.blk_size_z = max_blk_size_z;
|
||||
ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z;
|
||||
|
||||
// The parallel task count
|
||||
unsigned int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z;
|
||||
unsigned int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy;
|
||||
return z_tasks * y_tasks;
|
||||
}
|
||||
|
||||
#endif
|
||||
631
engine/thirdparty/astcenc/astcenc_decompress_symbolic.cpp
vendored
Normal file
631
engine/thirdparty/astcenc/astcenc_decompress_symbolic.cpp
vendored
Normal file
|
|
@ -0,0 +1,631 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions to decompress a symbolic block.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
/**
|
||||
* @brief Compute the integer linear interpolation of two color endpoints.
|
||||
*
|
||||
* @param u8_mask The mask for lanes using decode_unorm8 rather than decode_f16.
|
||||
* @param color0 The endpoint0 color.
|
||||
* @param color1 The endpoint1 color.
|
||||
* @param weights The interpolation weight (between 0 and 64).
|
||||
*
|
||||
* @return The interpolated color.
|
||||
*/
|
||||
static vint4 lerp_color_int(
|
||||
vmask4 u8_mask,
|
||||
vint4 color0,
|
||||
vint4 color1,
|
||||
vint4 weights
|
||||
) {
|
||||
vint4 weight1 = weights;
|
||||
vint4 weight0 = vint4(64) - weight1;
|
||||
|
||||
vint4 color = (color0 * weight0) + (color1 * weight1) + vint4(32);
|
||||
color = asr<6>(color);
|
||||
|
||||
// For decode_unorm8 values force the codec to bit replicate. This allows the
|
||||
// rest of the codec to assume the full 0xFFFF range for everything and ignore
|
||||
// the decode_mode setting
|
||||
vint4 color_u8 = asr<8>(color) * vint4(257);
|
||||
color = select(color, color_u8, u8_mask);
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert integer color value into a float value for the decoder.
|
||||
*
|
||||
* @param data The integer color value post-interpolation.
|
||||
* @param lns_mask If set treat lane as HDR (LNS) else LDR (unorm16).
|
||||
*
|
||||
* @return The float color value.
|
||||
*/
|
||||
static inline vfloat4 decode_texel(
|
||||
vint4 data,
|
||||
vmask4 lns_mask
|
||||
) {
|
||||
vint4 color_lns = vint4::zero();
|
||||
vint4 color_unorm = vint4::zero();
|
||||
|
||||
if (any(lns_mask))
|
||||
{
|
||||
color_lns = lns_to_sf16(data);
|
||||
}
|
||||
|
||||
if (!all(lns_mask))
|
||||
{
|
||||
color_unorm = unorm16_to_sf16(data);
|
||||
}
|
||||
|
||||
// Pick components and then convert to FP16
|
||||
vint4 datai = select(color_unorm, color_lns, lns_mask);
|
||||
return float16_to_float(datai);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void unpack_weights(
|
||||
const block_size_descriptor& bsd,
|
||||
const symbolic_compressed_block& scb,
|
||||
const decimation_info& di,
|
||||
bool is_dual_plane,
|
||||
int weights_plane1[BLOCK_MAX_TEXELS],
|
||||
int weights_plane2[BLOCK_MAX_TEXELS]
|
||||
) {
|
||||
// Safe to overshoot as all arrays are allocated to full size
|
||||
if (!is_dual_plane)
|
||||
{
|
||||
// Build full 64-entry weight lookup table
|
||||
vint4 tab0 = vint4::load(scb.weights + 0);
|
||||
vint4 tab1 = vint4::load(scb.weights + 16);
|
||||
vint4 tab2 = vint4::load(scb.weights + 32);
|
||||
vint4 tab3 = vint4::load(scb.weights + 48);
|
||||
|
||||
vint tab0p, tab1p, tab2p, tab3p;
|
||||
vtable_prepare(tab0, tab1, tab2, tab3, tab0p, tab1p, tab2p, tab3p);
|
||||
|
||||
for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint summed_value(8);
|
||||
vint weight_count(di.texel_weight_count + i);
|
||||
int max_weight_count = hmax(weight_count).lane<0>();
|
||||
|
||||
promise(max_weight_count > 0);
|
||||
for (int j = 0; j < max_weight_count; j++)
|
||||
{
|
||||
vint texel_weights(di.texel_weights_tr[j] + i);
|
||||
vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
|
||||
|
||||
summed_value += vtable_8bt_32bi(tab0p, tab1p, tab2p, tab3p, texel_weights) * texel_weights_int;
|
||||
}
|
||||
|
||||
store(lsr<4>(summed_value), weights_plane1 + i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Build a 32-entry weight lookup table per plane
|
||||
// Plane 1
|
||||
vint4 tab0_plane1 = vint4::load(scb.weights + 0);
|
||||
vint4 tab1_plane1 = vint4::load(scb.weights + 16);
|
||||
vint tab0_plane1p, tab1_plane1p;
|
||||
vtable_prepare(tab0_plane1, tab1_plane1, tab0_plane1p, tab1_plane1p);
|
||||
|
||||
// Plane 2
|
||||
vint4 tab0_plane2 = vint4::load(scb.weights + 32);
|
||||
vint4 tab1_plane2 = vint4::load(scb.weights + 48);
|
||||
vint tab0_plane2p, tab1_plane2p;
|
||||
vtable_prepare(tab0_plane2, tab1_plane2, tab0_plane2p, tab1_plane2p);
|
||||
|
||||
for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vint sum_plane1(8);
|
||||
vint sum_plane2(8);
|
||||
|
||||
vint weight_count(di.texel_weight_count + i);
|
||||
int max_weight_count = hmax(weight_count).lane<0>();
|
||||
|
||||
promise(max_weight_count > 0);
|
||||
for (int j = 0; j < max_weight_count; j++)
|
||||
{
|
||||
vint texel_weights(di.texel_weights_tr[j] + i);
|
||||
vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
|
||||
|
||||
sum_plane1 += vtable_8bt_32bi(tab0_plane1p, tab1_plane1p, texel_weights) * texel_weights_int;
|
||||
sum_plane2 += vtable_8bt_32bi(tab0_plane2p, tab1_plane2p, texel_weights) * texel_weights_int;
|
||||
}
|
||||
|
||||
store(lsr<4>(sum_plane1), weights_plane1 + i);
|
||||
store(lsr<4>(sum_plane2), weights_plane2 + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return an FP32 NaN value for use in error colors.
|
||||
*
|
||||
* This NaN encoding will turn into 0xFFFF when converted to an FP16 NaN.
|
||||
*
|
||||
* @return The float color value.
|
||||
*/
|
||||
static float error_color_nan()
|
||||
{
|
||||
if32 v;
|
||||
v.u = 0xFFFFE000U;
|
||||
return v.f;
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void decompress_symbolic_block(
|
||||
astcenc_profile decode_mode,
|
||||
const block_size_descriptor& bsd,
|
||||
int xpos,
|
||||
int ypos,
|
||||
int zpos,
|
||||
const symbolic_compressed_block& scb,
|
||||
image_block& blk
|
||||
) {
|
||||
blk.xpos = xpos;
|
||||
blk.ypos = ypos;
|
||||
blk.zpos = zpos;
|
||||
|
||||
blk.data_min = vfloat4::zero();
|
||||
blk.data_mean = vfloat4::zero();
|
||||
blk.data_max = vfloat4::zero();
|
||||
blk.grayscale = false;
|
||||
|
||||
// If we detected an error-block, blow up immediately.
|
||||
if (scb.block_type == SYM_BTYPE_ERROR)
|
||||
{
|
||||
for (unsigned int i = 0; i < bsd.texel_count; i++)
|
||||
{
|
||||
blk.data_r[i] = error_color_nan();
|
||||
blk.data_g[i] = error_color_nan();
|
||||
blk.data_b[i] = error_color_nan();
|
||||
blk.data_a[i] = error_color_nan();
|
||||
blk.rgb_lns[i] = 0;
|
||||
blk.alpha_lns[i] = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if ((scb.block_type == SYM_BTYPE_CONST_F16) ||
|
||||
(scb.block_type == SYM_BTYPE_CONST_U16))
|
||||
{
|
||||
vfloat4 color;
|
||||
uint8_t use_lns = 0;
|
||||
|
||||
// UNORM16 constant color block
|
||||
if (scb.block_type == SYM_BTYPE_CONST_U16)
|
||||
{
|
||||
vint4 colori(scb.constant_color);
|
||||
|
||||
// Determine the UNORM8 rounding on the decode
|
||||
vmask4 u8_mask = get_u8_component_mask(decode_mode, blk);
|
||||
|
||||
// The real decoder would just use the top 8 bits, but we rescale
|
||||
// in to a 16-bit value that rounds correctly.
|
||||
vint4 colori_u8 = asr<8>(colori) * 257;
|
||||
colori = select(colori, colori_u8, u8_mask);
|
||||
|
||||
vint4 colorf16 = unorm16_to_sf16(colori);
|
||||
color = float16_to_float(colorf16);
|
||||
}
|
||||
// FLOAT16 constant color block
|
||||
else
|
||||
{
|
||||
switch (decode_mode)
|
||||
{
|
||||
case ASTCENC_PRF_LDR_SRGB:
|
||||
case ASTCENC_PRF_LDR:
|
||||
color = vfloat4(error_color_nan());
|
||||
break;
|
||||
case ASTCENC_PRF_HDR_RGB_LDR_A:
|
||||
case ASTCENC_PRF_HDR:
|
||||
// Constant-color block; unpack from FP16 to FP32.
|
||||
color = float16_to_float(vint4(scb.constant_color));
|
||||
use_lns = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < bsd.texel_count; i++)
|
||||
{
|
||||
blk.data_r[i] = color.lane<0>();
|
||||
blk.data_g[i] = color.lane<1>();
|
||||
blk.data_b[i] = color.lane<2>();
|
||||
blk.data_a[i] = color.lane<3>();
|
||||
blk.rgb_lns[i] = use_lns;
|
||||
blk.alpha_lns[i] = use_lns;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the appropriate partition-table entry
|
||||
int partition_count = scb.partition_count;
|
||||
const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
|
||||
|
||||
// Get the appropriate block descriptors
|
||||
const auto& bm = bsd.get_block_mode(scb.block_mode);
|
||||
const auto& di = bsd.get_decimation_info(bm.decimation_mode);
|
||||
|
||||
bool is_dual_plane = static_cast<bool>(bm.is_dual_plane);
|
||||
|
||||
// Unquantize and undecimate the weights
|
||||
int plane1_weights[BLOCK_MAX_TEXELS];
|
||||
int plane2_weights[BLOCK_MAX_TEXELS];
|
||||
unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);
|
||||
|
||||
// Now that we have endpoint colors and weights, we can unpack texel colors
|
||||
int plane2_component = scb.plane2_component;
|
||||
vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);
|
||||
|
||||
vmask4 u8_mask = get_u8_component_mask(decode_mode, blk);
|
||||
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
// Decode the color endpoints for this partition
|
||||
vint4 ep0;
|
||||
vint4 ep1;
|
||||
bool rgb_lns;
|
||||
bool a_lns;
|
||||
|
||||
unpack_color_endpoints(decode_mode,
|
||||
scb.color_formats[i],
|
||||
scb.color_values[i],
|
||||
rgb_lns, a_lns,
|
||||
ep0, ep1);
|
||||
|
||||
vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);
|
||||
|
||||
int texel_count = pi.partition_texel_count[i];
|
||||
for (int j = 0; j < texel_count; j++)
|
||||
{
|
||||
int tix = pi.texels_of_partition[i][j];
|
||||
vint4 weight = select(vint4(plane1_weights[tix]), vint4(plane2_weights[tix]), plane2_mask);
|
||||
vint4 color = lerp_color_int(u8_mask, ep0, ep1, weight);
|
||||
vfloat4 colorf = decode_texel(color, lns_mask);
|
||||
|
||||
blk.data_r[tix] = colorf.lane<0>();
|
||||
blk.data_g[tix] = colorf.lane<1>();
|
||||
blk.data_b[tix] = colorf.lane<2>();
|
||||
blk.data_a[tix] = colorf.lane<3>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
/* See header for documentation. */
|
||||
float compute_symbolic_block_difference_2plane(
|
||||
const astcenc_config& config,
|
||||
const block_size_descriptor& bsd,
|
||||
const symbolic_compressed_block& scb,
|
||||
const image_block& blk
|
||||
) {
|
||||
// If we detected an error-block, blow up immediately.
|
||||
if (scb.block_type == SYM_BTYPE_ERROR)
|
||||
{
|
||||
return ERROR_CALC_DEFAULT;
|
||||
}
|
||||
|
||||
assert(scb.block_mode >= 0);
|
||||
assert(scb.partition_count == 1);
|
||||
assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 1);
|
||||
|
||||
// Get the appropriate block descriptor
|
||||
const block_mode& bm = bsd.get_block_mode(scb.block_mode);
|
||||
const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
|
||||
|
||||
// Unquantize and undecimate the weights
|
||||
int plane1_weights[BLOCK_MAX_TEXELS];
|
||||
int plane2_weights[BLOCK_MAX_TEXELS];
|
||||
unpack_weights(bsd, scb, di, true, plane1_weights, plane2_weights);
|
||||
|
||||
vmask4 plane2_mask = vint4::lane_id() == vint4(scb.plane2_component);
|
||||
|
||||
vfloat4 summa = vfloat4::zero();
|
||||
|
||||
// Decode the color endpoints for this partition
|
||||
vint4 ep0;
|
||||
vint4 ep1;
|
||||
bool rgb_lns;
|
||||
bool a_lns;
|
||||
|
||||
unpack_color_endpoints(config.profile,
|
||||
scb.color_formats[0],
|
||||
scb.color_values[0],
|
||||
rgb_lns, a_lns,
|
||||
ep0, ep1);
|
||||
|
||||
vmask4 u8_mask = get_u8_component_mask(config.profile, blk);
|
||||
|
||||
// Unpack and compute error for each texel in the partition
|
||||
unsigned int texel_count = bsd.texel_count;
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask);
|
||||
vint4 colori = lerp_color_int(u8_mask, ep0, ep1, weight);
|
||||
|
||||
vfloat4 color = int_to_float(colori);
|
||||
vfloat4 oldColor = blk.texel(i);
|
||||
|
||||
// Compare error using a perceptual decode metric for RGBM textures
|
||||
if (config.flags & ASTCENC_FLG_MAP_RGBM)
|
||||
{
|
||||
// Fail encodings that result in zero weight M pixels. Note that this can cause
|
||||
// "interesting" artifacts if we reject all useful encodings - we typically get max
|
||||
// brightness encodings instead which look just as bad. We recommend users apply a
|
||||
// bias to their stored M value, limiting the lower value to 16 or 32 to avoid
|
||||
// getting small M values post-quantization, but we can't prove it would never
|
||||
// happen, especially at low bit rates ...
|
||||
if (color.lane<3>() == 0.0f)
|
||||
{
|
||||
return -ERROR_CALC_DEFAULT;
|
||||
}
|
||||
|
||||
// Compute error based on decoded RGBM color
|
||||
color = vfloat4(
|
||||
color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
|
||||
color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
|
||||
color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
|
||||
1.0f
|
||||
);
|
||||
|
||||
oldColor = vfloat4(
|
||||
oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
|
||||
oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
|
||||
oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
|
||||
1.0f
|
||||
);
|
||||
}
|
||||
|
||||
vfloat4 error = oldColor - color;
|
||||
error = min(abs(error), 1e15f);
|
||||
error = error * error;
|
||||
|
||||
summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
|
||||
}
|
||||
|
||||
return summa.lane<0>();
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
float compute_symbolic_block_difference_1plane(
|
||||
const astcenc_config& config,
|
||||
const block_size_descriptor& bsd,
|
||||
const symbolic_compressed_block& scb,
|
||||
const image_block& blk
|
||||
) {
|
||||
assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 0);
|
||||
|
||||
// If we detected an error-block, blow up immediately.
|
||||
if (scb.block_type == SYM_BTYPE_ERROR)
|
||||
{
|
||||
return ERROR_CALC_DEFAULT;
|
||||
}
|
||||
|
||||
assert(scb.block_mode >= 0);
|
||||
|
||||
// Get the appropriate partition-table entry
|
||||
unsigned int partition_count = scb.partition_count;
|
||||
const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
|
||||
|
||||
// Get the appropriate block descriptor
|
||||
const block_mode& bm = bsd.get_block_mode(scb.block_mode);
|
||||
const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
|
||||
|
||||
// Unquantize and undecimate the weights
|
||||
int plane1_weights[BLOCK_MAX_TEXELS];
|
||||
unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
|
||||
|
||||
vmask4 u8_mask = get_u8_component_mask(config.profile, blk);
|
||||
|
||||
vfloat4 summa = vfloat4::zero();
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
// Decode the color endpoints for this partition
|
||||
vint4 ep0;
|
||||
vint4 ep1;
|
||||
bool rgb_lns;
|
||||
bool a_lns;
|
||||
|
||||
unpack_color_endpoints(config.profile,
|
||||
scb.color_formats[i],
|
||||
scb.color_values[i],
|
||||
rgb_lns, a_lns,
|
||||
ep0, ep1);
|
||||
|
||||
// Unpack and compute error for each texel in the partition
|
||||
unsigned int texel_count = pi.partition_texel_count[i];
|
||||
for (unsigned int j = 0; j < texel_count; j++)
|
||||
{
|
||||
unsigned int tix = pi.texels_of_partition[i][j];
|
||||
vint4 colori = lerp_color_int(u8_mask, ep0, ep1,
|
||||
vint4(plane1_weights[tix]));
|
||||
|
||||
vfloat4 color = int_to_float(colori);
|
||||
vfloat4 oldColor = blk.texel(tix);
|
||||
|
||||
// Compare error using a perceptual decode metric for RGBM textures
|
||||
if (config.flags & ASTCENC_FLG_MAP_RGBM)
|
||||
{
|
||||
// Fail encodings that result in zero weight M pixels. Note that this can cause
|
||||
// "interesting" artifacts if we reject all useful encodings - we typically get max
|
||||
// brightness encodings instead which look just as bad. We recommend users apply a
|
||||
// bias to their stored M value, limiting the lower value to 16 or 32 to avoid
|
||||
// getting small M values post-quantization, but we can't prove it would never
|
||||
// happen, especially at low bit rates ...
|
||||
if (color.lane<3>() == 0.0f)
|
||||
{
|
||||
return -ERROR_CALC_DEFAULT;
|
||||
}
|
||||
|
||||
// Compute error based on decoded RGBM color
|
||||
color = vfloat4(
|
||||
color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
|
||||
color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
|
||||
color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
|
||||
1.0f
|
||||
);
|
||||
|
||||
oldColor = vfloat4(
|
||||
oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
|
||||
oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
|
||||
oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
|
||||
1.0f
|
||||
);
|
||||
}
|
||||
|
||||
vfloat4 error = oldColor - color;
|
||||
error = min(abs(error), 1e15f);
|
||||
error = error * error;
|
||||
|
||||
summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
return summa.lane<0>();
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
float compute_symbolic_block_difference_1plane_1partition(
|
||||
const astcenc_config& config,
|
||||
const block_size_descriptor& bsd,
|
||||
const symbolic_compressed_block& scb,
|
||||
const image_block& blk
|
||||
) {
|
||||
// If we detected an error-block, blow up immediately.
|
||||
if (scb.block_type == SYM_BTYPE_ERROR)
|
||||
{
|
||||
return ERROR_CALC_DEFAULT;
|
||||
}
|
||||
|
||||
assert(scb.block_mode >= 0);
|
||||
assert(bsd.get_partition_info(scb.partition_count, scb.partition_index).partition_count == 1);
|
||||
|
||||
// Get the appropriate block descriptor
|
||||
const block_mode& bm = bsd.get_block_mode(scb.block_mode);
|
||||
const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
|
||||
|
||||
// Unquantize and undecimate the weights
|
||||
ASTCENC_ALIGNAS int plane1_weights[BLOCK_MAX_TEXELS];
|
||||
unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
|
||||
|
||||
// Decode the color endpoints for this partition
|
||||
vint4 ep0;
|
||||
vint4 ep1;
|
||||
bool rgb_lns;
|
||||
bool a_lns;
|
||||
|
||||
unpack_color_endpoints(config.profile,
|
||||
scb.color_formats[0],
|
||||
scb.color_values[0],
|
||||
rgb_lns, a_lns,
|
||||
ep0, ep1);
|
||||
|
||||
vmask4 u8_mask = get_u8_component_mask(config.profile, blk);
|
||||
|
||||
// Unpack and compute error for each texel in the partition
|
||||
vfloatacc summav = vfloatacc::zero();
|
||||
|
||||
vint lane_id = vint::lane_id();
|
||||
|
||||
unsigned int texel_count = bsd.texel_count;
|
||||
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
// Compute EP1 contribution
|
||||
vint weight1 = vint::loada(plane1_weights + i);
|
||||
vint ep1_r = vint(ep1.lane<0>()) * weight1;
|
||||
vint ep1_g = vint(ep1.lane<1>()) * weight1;
|
||||
vint ep1_b = vint(ep1.lane<2>()) * weight1;
|
||||
vint ep1_a = vint(ep1.lane<3>()) * weight1;
|
||||
|
||||
// Compute EP0 contribution
|
||||
vint weight0 = vint(64) - weight1;
|
||||
vint ep0_r = vint(ep0.lane<0>()) * weight0;
|
||||
vint ep0_g = vint(ep0.lane<1>()) * weight0;
|
||||
vint ep0_b = vint(ep0.lane<2>()) * weight0;
|
||||
vint ep0_a = vint(ep0.lane<3>()) * weight0;
|
||||
|
||||
// Combine contributions
|
||||
vint colori_r = asr<6>(ep0_r + ep1_r + vint(32));
|
||||
vint colori_g = asr<6>(ep0_g + ep1_g + vint(32));
|
||||
vint colori_b = asr<6>(ep0_b + ep1_b + vint(32));
|
||||
vint colori_a = asr<6>(ep0_a + ep1_a + vint(32));
|
||||
|
||||
// If using a U8 decode mode bit replicate top 8 bits
|
||||
// so rest of codec can assume 0xFFFF max range everywhere
|
||||
vint colori_r8 = asr<8>(colori_r) * vint(257);
|
||||
colori_r = select(colori_r, colori_r8, vmask(u8_mask.lane<0>()));
|
||||
|
||||
vint colori_g8 = asr<8>(colori_g) * vint(257);
|
||||
colori_g = select(colori_g, colori_g8, vmask(u8_mask.lane<1>()));
|
||||
|
||||
vint colori_b8 = asr<8>(colori_b) * vint(257);
|
||||
colori_b = select(colori_b, colori_b8, vmask(u8_mask.lane<2>()));
|
||||
|
||||
vint colori_a8 = asr<8>(colori_a) * vint(257);
|
||||
colori_a = select(colori_a, colori_a8, vmask(u8_mask.lane<3>()));
|
||||
|
||||
// Compute color diff
|
||||
vfloat color_r = int_to_float(colori_r);
|
||||
vfloat color_g = int_to_float(colori_g);
|
||||
vfloat color_b = int_to_float(colori_b);
|
||||
vfloat color_a = int_to_float(colori_a);
|
||||
|
||||
vfloat color_orig_r = loada(blk.data_r + i);
|
||||
vfloat color_orig_g = loada(blk.data_g + i);
|
||||
vfloat color_orig_b = loada(blk.data_b + i);
|
||||
vfloat color_orig_a = loada(blk.data_a + i);
|
||||
|
||||
vfloat color_error_r = min(abs(color_orig_r - color_r), vfloat(1e15f));
|
||||
vfloat color_error_g = min(abs(color_orig_g - color_g), vfloat(1e15f));
|
||||
vfloat color_error_b = min(abs(color_orig_b - color_b), vfloat(1e15f));
|
||||
vfloat color_error_a = min(abs(color_orig_a - color_a), vfloat(1e15f));
|
||||
|
||||
// Compute squared error metric
|
||||
color_error_r = color_error_r * color_error_r;
|
||||
color_error_g = color_error_g * color_error_g;
|
||||
color_error_b = color_error_b * color_error_b;
|
||||
color_error_a = color_error_a * color_error_a;
|
||||
|
||||
vfloat metric = color_error_r * blk.channel_weight.lane<0>()
|
||||
+ color_error_g * blk.channel_weight.lane<1>()
|
||||
+ color_error_b * blk.channel_weight.lane<2>()
|
||||
+ color_error_a * blk.channel_weight.lane<3>();
|
||||
|
||||
// Mask off bad lanes
|
||||
vmask mask = lane_id < vint(texel_count);
|
||||
lane_id += vint(ASTCENC_SIMD_WIDTH);
|
||||
haccumulate(summav, metric, mask);
|
||||
}
|
||||
|
||||
return hadd_s(summav);
|
||||
}
|
||||
|
||||
#endif
|
||||
245
engine/thirdparty/astcenc/astcenc_diagnostic_trace.cpp
vendored
Normal file
245
engine/thirdparty/astcenc/astcenc_diagnostic_trace.cpp
vendored
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2021-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions for the library entrypoint.
|
||||
*/
|
||||
|
||||
#if defined(ASTCENC_DIAGNOSTICS)
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "astcenc_diagnostic_trace.h"
|
||||
|
||||
/** @brief The global trace logger. */
|
||||
static TraceLog* g_TraceLog = nullptr;
|
||||
|
||||
/** @brief The JSON indentation level. */
|
||||
static const size_t g_trace_indent = 2;
|
||||
|
||||
TraceLog::TraceLog(
|
||||
const char* file_name):
|
||||
m_file(file_name, std::ofstream::out | std::ofstream::binary)
|
||||
{
|
||||
assert(!g_TraceLog);
|
||||
g_TraceLog = this;
|
||||
m_root = new TraceNode("root");
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
TraceNode* TraceLog::get_current_leaf()
|
||||
{
|
||||
if (m_stack.size())
|
||||
{
|
||||
return m_stack.back();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
size_t TraceLog::get_depth()
|
||||
{
|
||||
return m_stack.size();
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
TraceLog::~TraceLog()
|
||||
{
|
||||
assert(g_TraceLog == this);
|
||||
delete m_root;
|
||||
g_TraceLog = nullptr;
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
TraceNode::TraceNode(
|
||||
const char* format,
|
||||
...
|
||||
) {
|
||||
// Format the name string
|
||||
constexpr size_t bufsz = 256;
|
||||
char buffer[bufsz];
|
||||
|
||||
va_list args;
|
||||
va_start (args, format);
|
||||
vsnprintf (buffer, bufsz, format, args);
|
||||
va_end (args);
|
||||
|
||||
// Guarantee there is a nul terminator
|
||||
buffer[bufsz - 1] = 0;
|
||||
|
||||
// Generate the node
|
||||
TraceNode* parent = g_TraceLog->get_current_leaf();
|
||||
size_t depth = g_TraceLog->get_depth();
|
||||
g_TraceLog->m_stack.push_back(this);
|
||||
|
||||
bool comma = parent && parent->m_attrib_count;
|
||||
auto& out = g_TraceLog->m_file;
|
||||
|
||||
if (parent)
|
||||
{
|
||||
parent->m_attrib_count++;
|
||||
}
|
||||
|
||||
if (comma)
|
||||
{
|
||||
out << ',';
|
||||
}
|
||||
|
||||
if (depth)
|
||||
{
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
size_t out_indent = (depth * 2) * g_trace_indent;
|
||||
size_t in_indent = (depth * 2 + 1) * g_trace_indent;
|
||||
|
||||
std::string out_indents("");
|
||||
if (out_indent)
|
||||
{
|
||||
out_indents = std::string(out_indent, ' ');
|
||||
}
|
||||
|
||||
std::string in_indents(in_indent, ' ');
|
||||
|
||||
out << out_indents << "[ \"node\", \"" << buffer << "\",\n";
|
||||
out << in_indents << "[";
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void TraceNode::add_attrib(
|
||||
std::string type,
|
||||
std::string key,
|
||||
std::string value
|
||||
) {
|
||||
(void)type;
|
||||
|
||||
size_t depth = g_TraceLog->get_depth();
|
||||
size_t indent = (depth * 2) * g_trace_indent;
|
||||
auto& out = g_TraceLog->m_file;
|
||||
bool comma = m_attrib_count;
|
||||
m_attrib_count++;
|
||||
|
||||
if (comma)
|
||||
{
|
||||
out << ',';
|
||||
}
|
||||
|
||||
out << '\n';
|
||||
out << std::string(indent, ' ') << "[ "
|
||||
<< "\"" << key << "\", "
|
||||
<< value << " ]";
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
TraceNode::~TraceNode()
|
||||
{
|
||||
g_TraceLog->m_stack.pop_back();
|
||||
|
||||
auto& out = g_TraceLog->m_file;
|
||||
size_t depth = g_TraceLog->get_depth();
|
||||
size_t out_indent = (depth * 2) * g_trace_indent;
|
||||
size_t in_indent = (depth * 2 + 1) * g_trace_indent;
|
||||
|
||||
std::string out_indents("");
|
||||
if (out_indent)
|
||||
{
|
||||
out_indents = std::string(out_indent, ' ');
|
||||
}
|
||||
|
||||
std::string in_indents(in_indent, ' ');
|
||||
|
||||
if (m_attrib_count)
|
||||
{
|
||||
out << "\n" << in_indents;
|
||||
}
|
||||
out << "]\n";
|
||||
|
||||
out << out_indents << "]";
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void trace_add_data(
|
||||
const char* key,
|
||||
const char* format,
|
||||
...
|
||||
) {
|
||||
constexpr size_t bufsz = 256;
|
||||
char buffer[bufsz];
|
||||
|
||||
va_list args;
|
||||
va_start (args, format);
|
||||
vsnprintf (buffer, bufsz, format, args);
|
||||
va_end (args);
|
||||
|
||||
// Guarantee there is a nul terminator
|
||||
buffer[bufsz - 1] = 0;
|
||||
|
||||
std::string value = "\"" + std::string(buffer) + "\"";
|
||||
|
||||
TraceNode* node = g_TraceLog->get_current_leaf();
|
||||
node->add_attrib("str", key, value);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void trace_add_data(
|
||||
const char* key,
|
||||
float value
|
||||
) {
|
||||
// Turn infinities into parseable values
|
||||
if (std::isinf(value))
|
||||
{
|
||||
if (value > 0.0f)
|
||||
{
|
||||
value = std::numeric_limits<float>::max();
|
||||
}
|
||||
else
|
||||
{
|
||||
value = -std::numeric_limits<float>::max();
|
||||
}
|
||||
}
|
||||
|
||||
char buffer[256];
|
||||
sprintf(buffer, "%.20g", (double)value);
|
||||
TraceNode* node = g_TraceLog->get_current_leaf();
|
||||
node->add_attrib("float", key, buffer);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void trace_add_data(
|
||||
const char* key,
|
||||
int value
|
||||
) {
|
||||
TraceNode* node = g_TraceLog->get_current_leaf();
|
||||
node->add_attrib("int", key, std::to_string(value));
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void trace_add_data(
|
||||
const char* key,
|
||||
unsigned int value
|
||||
) {
|
||||
TraceNode* node = g_TraceLog->get_current_leaf();
|
||||
node->add_attrib("int", key, std::to_string(value));
|
||||
}
|
||||
|
||||
#endif
|
||||
219
engine/thirdparty/astcenc/astcenc_diagnostic_trace.h
vendored
Normal file
219
engine/thirdparty/astcenc/astcenc_diagnostic_trace.h
vendored
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2021-2022 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief This module provides a set of diagnostic tracing utilities.
|
||||
*
|
||||
* Overview
|
||||
* ========
|
||||
*
|
||||
* The built-in diagnostic trace tool generates a hierarchical JSON tree structure. The tree
|
||||
* hierarchy contains three levels:
|
||||
*
|
||||
* - block
|
||||
* - pass
|
||||
* - candidate
|
||||
*
|
||||
* One block node exists for each compressed block in the image. One pass node exists for each major
|
||||
* pass (N partition, M planes, O components) applied to a block. One candidate node exists for each
|
||||
* encoding candidate trialed for a pass.
|
||||
*
|
||||
* Each node contains both the hierarchy but also a number of attributes which explain the behavior.
|
||||
* For example, the block node contains the block coordinates in the image, the pass explains the
|
||||
* pass configuration, and the candidate will explain the candidate encoding such as weight
|
||||
* decimation, refinement error, etc.
|
||||
*
|
||||
* Trace Nodes are designed as scope-managed C++ objects with stack-like push/pop behavior.
|
||||
* Constructing a trace node on the stack will automatically add it to the current node as a child,
|
||||
* and then make it the current node. Destroying the current node will pop the stack and set the
|
||||
* parent to the current node. This provides a robust mechanism for ensuring reliable nesting in the
|
||||
* tree structure.
|
||||
*
|
||||
* A set of utility macros are provided to add attribute annotations to the current trace node.
|
||||
*
|
||||
* Usage
|
||||
* =====
|
||||
*
|
||||
* Create Trace Nodes on the stack using the @c TRACE_NODE() macro. This will compile-out completely
|
||||
* in builds with diagnostics disabled.
|
||||
*
|
||||
* Add annotations to the current trace node using the @c trace_add_data() macro. This will
|
||||
* similarly compile out completely in builds with diagnostics disabled.
|
||||
*
|
||||
* If you need to add additional code to support diagnostics-only behavior wrap
|
||||
* it in preprocessor guards:
|
||||
*
|
||||
* #if defined(ASTCENC_DIAGNOSTICS)
|
||||
* #endif
|
||||
*/
|
||||
|
||||
#ifndef ASTCENC_DIAGNOSTIC_TRACE_INCLUDED
|
||||
#define ASTCENC_DIAGNOSTIC_TRACE_INCLUDED
|
||||
|
||||
#if defined(ASTCENC_DIAGNOSTICS)
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* @brief Class representing a single node in the trace hierarchy.
|
||||
*/
|
||||
class TraceNode
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new node.
|
||||
*
|
||||
* Constructing a node will push to the the top of the stack, automatically making it a child of
|
||||
* the current node, and then setting it to become the current node.
|
||||
*
|
||||
* @param format The format template for the node name.
|
||||
* @param ... The format parameters.
|
||||
*/
|
||||
TraceNode(const char* format, ...);
|
||||
|
||||
/**
|
||||
* @brief Add an attribute to this node.
|
||||
*
|
||||
* Note that no quoting is applied to the @c value, so if quoting is needed it must be done by
|
||||
* the caller.
|
||||
*
|
||||
* @param type The type of the attribute.
|
||||
* @param key The key of the attribute.
|
||||
* @param value The value of the attribute.
|
||||
*/
|
||||
void add_attrib(std::string type, std::string key, std::string value);
|
||||
|
||||
/**
|
||||
* @brief Destroy this node.
|
||||
*
|
||||
* Destroying a node will pop it from the top of the stack, making its parent the current node.
|
||||
* It is invalid behavior to destroy a node that is not the current node; usage must conform to
|
||||
* stack push-pop semantics.
|
||||
*/
|
||||
~TraceNode();
|
||||
|
||||
/**
|
||||
* @brief The number of attributes and child nodes in this node.
|
||||
*/
|
||||
unsigned int m_attrib_count { 0 };
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Class representing the trace log file being written.
|
||||
*/
|
||||
class TraceLog
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Create a new trace log.
|
||||
*
|
||||
* The trace log is global; there can be only one at a time.
|
||||
*
|
||||
* @param file_name The name of the file to write.
|
||||
*/
|
||||
TraceLog(const char* file_name);
|
||||
|
||||
/**
|
||||
* @brief Detroy the trace log.
|
||||
*
|
||||
* Trace logs MUST be cleanly destroyed to ensure the file gets written.
|
||||
*/
|
||||
~TraceLog();
|
||||
|
||||
/**
|
||||
* @brief Get the current child node.
|
||||
*
|
||||
* @return The current leaf node.
|
||||
*/
|
||||
TraceNode* get_current_leaf();
|
||||
|
||||
/**
|
||||
* @brief Get the stack depth of the current child node.
|
||||
*
|
||||
* @return The current leaf node stack depth.
|
||||
*/
|
||||
size_t get_depth();
|
||||
|
||||
/**
|
||||
* @brief The file stream to write to.
|
||||
*/
|
||||
std::ofstream m_file;
|
||||
|
||||
/**
|
||||
* @brief The stack of nodes (newest at the back).
|
||||
*/
|
||||
std::vector<TraceNode*> m_stack;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief The root node in the JSON file.
|
||||
*/
|
||||
TraceNode* m_root;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Utility macro to create a trace node on the stack.
|
||||
*
|
||||
* @param name The variable name to use.
|
||||
* @param ... The name template and format parameters.
|
||||
*/
|
||||
#define TRACE_NODE(name, ...) TraceNode name(__VA_ARGS__);
|
||||
|
||||
/**
|
||||
* @brief Add a string annotation to the current node.
|
||||
*
|
||||
* @param key The name of the attribute.
|
||||
* @param format The format template for the attribute value.
|
||||
* @param ... The format parameters.
|
||||
*/
|
||||
void trace_add_data(const char* key, const char* format, ...);
|
||||
|
||||
/**
|
||||
* @brief Add a float annotation to the current node.
|
||||
*
|
||||
* @param key The name of the attribute.
|
||||
* @param value The value of the attribute.
|
||||
*/
|
||||
void trace_add_data(const char* key, float value);
|
||||
|
||||
/**
|
||||
* @brief Add an integer annotation to the current node.
|
||||
*
|
||||
* @param key The name of the attribute.
|
||||
* @param value The value of the attribute.
|
||||
*/
|
||||
void trace_add_data(const char* key, int value);
|
||||
|
||||
/**
|
||||
* @brief Add an unsigned integer annotation to the current node.
|
||||
*
|
||||
* @param key The name of the attribute.
|
||||
* @param value The value of the attribute.
|
||||
*/
|
||||
void trace_add_data(const char* key, unsigned int value);
|
||||
|
||||
#else
|
||||
|
||||
#define TRACE_NODE(name, ...)
|
||||
|
||||
#define trace_add_data(...)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1390
engine/thirdparty/astcenc/astcenc_entry.cpp
vendored
Normal file
1390
engine/thirdparty/astcenc/astcenc_entry.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
781
engine/thirdparty/astcenc/astcenc_find_best_partitioning.cpp
vendored
Normal file
781
engine/thirdparty/astcenc/astcenc_find_best_partitioning.cpp
vendored
Normal file
|
|
@ -0,0 +1,781 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
/**
|
||||
* @brief Functions for finding best partition for a block.
|
||||
*
|
||||
* The partition search operates in two stages. The first pass uses kmeans clustering to group
|
||||
* texels into an ideal partitioning for the requested partition count, and then compares that
|
||||
* against the 1024 partitionings generated by the ASTC partition hash function. The generated
|
||||
* partitions are then ranked by the number of texels in the wrong partition, compared to the ideal
|
||||
* clustering. All 1024 partitions are tested for similarity and ranked, apart from duplicates and
|
||||
* partitionings that actually generate fewer than the requested partition count, but only the top
|
||||
* N candidates are actually put through a more detailed search. N is determined by the compressor
|
||||
* quality preset.
|
||||
*
|
||||
* For the detailed search, each candidate is checked against two possible encoding methods:
|
||||
*
|
||||
* - The best partitioning assuming different chroma colors (RGB + RGB or RGB + delta endpoints).
|
||||
* - The best partitioning assuming same chroma colors (RGB + scale endpoints).
|
||||
*
|
||||
* This is implemented by computing the compute mean color and dominant direction for each
|
||||
* partition. This defines two lines, both of which go through the mean color value.
|
||||
*
|
||||
* - One line has a direction defined by the dominant direction; this is used to assess the error
|
||||
* from using an uncorrelated color representation.
|
||||
* - The other line goes through (0,0,0,1) and is used to assess the error from using a same chroma
|
||||
* (RGB + scale) color representation.
|
||||
*
|
||||
* The best candidate is selected by computing the squared-errors that result from using these
|
||||
* lines for endpoint selection.
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
/**
|
||||
* @brief Pick some initial kmeans cluster centers.
|
||||
*
|
||||
* @param blk The image block color data to compress.
|
||||
* @param texel_count The number of texels in the block.
|
||||
* @param partition_count The number of partitions in the block.
|
||||
* @param[out] cluster_centers The initial partition cluster center colors.
|
||||
*/
|
||||
static void kmeans_init(
|
||||
const image_block& blk,
|
||||
unsigned int texel_count,
|
||||
unsigned int partition_count,
|
||||
vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]
|
||||
) {
|
||||
promise(texel_count > 0);
|
||||
promise(partition_count > 0);
|
||||
|
||||
unsigned int clusters_selected = 0;
|
||||
float distances[BLOCK_MAX_TEXELS];
|
||||
|
||||
// Pick a random sample as first cluster center; 145897 from random.org
|
||||
unsigned int sample = 145897 % texel_count;
|
||||
vfloat4 center_color = blk.texel(sample);
|
||||
cluster_centers[clusters_selected] = center_color;
|
||||
clusters_selected++;
|
||||
|
||||
// Compute the distance to the first cluster center
|
||||
float distance_sum = 0.0f;
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
vfloat4 color = blk.texel(i);
|
||||
vfloat4 diff = color - center_color;
|
||||
float distance = dot_s(diff * diff, blk.channel_weight);
|
||||
distance_sum += distance;
|
||||
distances[i] = distance;
|
||||
}
|
||||
|
||||
// More numbers from random.org for weighted-random center selection
|
||||
const float cluster_cutoffs[9] {
|
||||
0.626220f, 0.932770f, 0.275454f,
|
||||
0.318558f, 0.240113f, 0.009190f,
|
||||
0.347661f, 0.731960f, 0.156391f
|
||||
};
|
||||
|
||||
unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2);
|
||||
|
||||
// Pick the remaining samples as needed
|
||||
while (true)
|
||||
{
|
||||
// Pick the next center in a weighted-random fashion.
|
||||
float summa = 0.0f;
|
||||
float distance_cutoff = distance_sum * cluster_cutoffs[cutoff++];
|
||||
for (sample = 0; sample < texel_count; sample++)
|
||||
{
|
||||
summa += distances[sample];
|
||||
if (summa >= distance_cutoff)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp to a valid range and store the selected cluster center
|
||||
sample = astc::min(sample, texel_count - 1);
|
||||
|
||||
center_color = blk.texel(sample);
|
||||
cluster_centers[clusters_selected++] = center_color;
|
||||
if (clusters_selected >= partition_count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Compute the distance to the new cluster center, keep the min dist
|
||||
distance_sum = 0.0f;
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
vfloat4 color = blk.texel(i);
|
||||
vfloat4 diff = color - center_color;
|
||||
float distance = dot_s(diff * diff, blk.channel_weight);
|
||||
distance = astc::min(distance, distances[i]);
|
||||
distance_sum += distance;
|
||||
distances[i] = distance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Assign texels to clusters, based on a set of chosen center points.
|
||||
*
|
||||
* @param blk The image block color data to compress.
|
||||
* @param texel_count The number of texels in the block.
|
||||
* @param partition_count The number of partitions in the block.
|
||||
* @param cluster_centers The partition cluster center colors.
|
||||
* @param[out] partition_of_texel The partition assigned for each texel.
|
||||
*/
|
||||
static void kmeans_assign(
|
||||
const image_block& blk,
|
||||
unsigned int texel_count,
|
||||
unsigned int partition_count,
|
||||
const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
|
||||
uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
|
||||
) {
|
||||
promise(texel_count > 0);
|
||||
promise(partition_count > 0);
|
||||
|
||||
uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
|
||||
|
||||
// Find the best partition for every texel
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
float best_distance = std::numeric_limits<float>::max();
|
||||
unsigned int best_partition = 0;
|
||||
|
||||
vfloat4 color = blk.texel(i);
|
||||
for (unsigned int j = 0; j < partition_count; j++)
|
||||
{
|
||||
vfloat4 diff = color - cluster_centers[j];
|
||||
float distance = dot_s(diff * diff, blk.channel_weight);
|
||||
if (distance < best_distance)
|
||||
{
|
||||
best_distance = distance;
|
||||
best_partition = j;
|
||||
}
|
||||
}
|
||||
|
||||
partition_of_texel[i] = static_cast<uint8_t>(best_partition);
|
||||
partition_texel_count[best_partition]++;
|
||||
}
|
||||
|
||||
// It is possible to get a situation where a partition ends up without any texels. In this case,
|
||||
// assign texel N to partition N. This is silly, but ensures that every partition retains at
|
||||
// least one texel. Reassigning a texel in this manner may cause another partition to go empty,
|
||||
// so if we actually did a reassignment, run the whole loop over again.
|
||||
bool problem_case;
|
||||
do
|
||||
{
|
||||
problem_case = false;
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
if (partition_texel_count[i] == 0)
|
||||
{
|
||||
partition_texel_count[partition_of_texel[i]]--;
|
||||
partition_texel_count[i]++;
|
||||
partition_of_texel[i] = static_cast<uint8_t>(i);
|
||||
problem_case = true;
|
||||
}
|
||||
}
|
||||
} while (problem_case);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute new cluster centers based on their center of gravity.
|
||||
*
|
||||
* @param blk The image block color data to compress.
|
||||
* @param texel_count The number of texels in the block.
|
||||
* @param partition_count The number of partitions in the block.
|
||||
* @param[out] cluster_centers The new cluster center colors.
|
||||
* @param partition_of_texel The partition assigned for each texel.
|
||||
*/
|
||||
static void kmeans_update(
|
||||
const image_block& blk,
|
||||
unsigned int texel_count,
|
||||
unsigned int partition_count,
|
||||
vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
|
||||
const uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
|
||||
) {
|
||||
promise(texel_count > 0);
|
||||
promise(partition_count > 0);
|
||||
|
||||
vfloat4 color_sum[BLOCK_MAX_PARTITIONS] {
|
||||
vfloat4::zero(),
|
||||
vfloat4::zero(),
|
||||
vfloat4::zero(),
|
||||
vfloat4::zero()
|
||||
};
|
||||
|
||||
uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
|
||||
|
||||
// Find the center-of-gravity in each cluster
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
uint8_t partition = partition_of_texel[i];
|
||||
color_sum[partition] += blk.texel(i);
|
||||
partition_texel_count[partition]++;
|
||||
}
|
||||
|
||||
// Set the center of gravity to be the new cluster center
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
float scale = 1.0f / static_cast<float>(partition_texel_count[i]);
|
||||
cluster_centers[i] = color_sum[i] * scale;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute bit-mismatch for partitioning in 2-partition mode.
|
||||
*
|
||||
* @param a The texel assignment bitvector for the block.
|
||||
* @param b The texel assignment bitvector for the partition table.
|
||||
*
|
||||
* @return The number of bit mismatches.
|
||||
*/
|
||||
static inline uint8_t partition_mismatch2(
|
||||
const uint64_t a[2],
|
||||
const uint64_t b[2]
|
||||
) {
|
||||
int v1 = popcount(a[0] ^ b[0]) + popcount(a[1] ^ b[1]);
|
||||
int v2 = popcount(a[0] ^ b[1]) + popcount(a[1] ^ b[0]);
|
||||
|
||||
// Divide by 2 because XOR always counts errors twice, once when missing
|
||||
// in the expected position, and again when present in the wrong partition
|
||||
return static_cast<uint8_t>(astc::min(v1, v2) / 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute bit-mismatch for partitioning in 3-partition mode.
|
||||
*
|
||||
* @param a The texel assignment bitvector for the block.
|
||||
* @param b The texel assignment bitvector for the partition table.
|
||||
*
|
||||
* @return The number of bit mismatches.
|
||||
*/
|
||||
static inline uint8_t partition_mismatch3(
|
||||
const uint64_t a[3],
|
||||
const uint64_t b[3]
|
||||
) {
|
||||
int p00 = popcount(a[0] ^ b[0]);
|
||||
int p01 = popcount(a[0] ^ b[1]);
|
||||
int p02 = popcount(a[0] ^ b[2]);
|
||||
|
||||
int p10 = popcount(a[1] ^ b[0]);
|
||||
int p11 = popcount(a[1] ^ b[1]);
|
||||
int p12 = popcount(a[1] ^ b[2]);
|
||||
|
||||
int p20 = popcount(a[2] ^ b[0]);
|
||||
int p21 = popcount(a[2] ^ b[1]);
|
||||
int p22 = popcount(a[2] ^ b[2]);
|
||||
|
||||
int s0 = p11 + p22;
|
||||
int s1 = p12 + p21;
|
||||
int v0 = astc::min(s0, s1) + p00;
|
||||
|
||||
int s2 = p10 + p22;
|
||||
int s3 = p12 + p20;
|
||||
int v1 = astc::min(s2, s3) + p01;
|
||||
|
||||
int s4 = p10 + p21;
|
||||
int s5 = p11 + p20;
|
||||
int v2 = astc::min(s4, s5) + p02;
|
||||
|
||||
// Divide by 2 because XOR always counts errors twice, once when missing
|
||||
// in the expected position, and again when present in the wrong partition
|
||||
return static_cast<uint8_t>(astc::min(v0, v1, v2) / 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute bit-mismatch for partitioning in 4-partition mode.
|
||||
*
|
||||
* @param a The texel assignment bitvector for the block.
|
||||
* @param b The texel assignment bitvector for the partition table.
|
||||
*
|
||||
* @return The number of bit mismatches.
|
||||
*/
|
||||
static inline uint8_t partition_mismatch4(
|
||||
const uint64_t a[4],
|
||||
const uint64_t b[4]
|
||||
) {
|
||||
int p00 = popcount(a[0] ^ b[0]);
|
||||
int p01 = popcount(a[0] ^ b[1]);
|
||||
int p02 = popcount(a[0] ^ b[2]);
|
||||
int p03 = popcount(a[0] ^ b[3]);
|
||||
|
||||
int p10 = popcount(a[1] ^ b[0]);
|
||||
int p11 = popcount(a[1] ^ b[1]);
|
||||
int p12 = popcount(a[1] ^ b[2]);
|
||||
int p13 = popcount(a[1] ^ b[3]);
|
||||
|
||||
int p20 = popcount(a[2] ^ b[0]);
|
||||
int p21 = popcount(a[2] ^ b[1]);
|
||||
int p22 = popcount(a[2] ^ b[2]);
|
||||
int p23 = popcount(a[2] ^ b[3]);
|
||||
|
||||
int p30 = popcount(a[3] ^ b[0]);
|
||||
int p31 = popcount(a[3] ^ b[1]);
|
||||
int p32 = popcount(a[3] ^ b[2]);
|
||||
int p33 = popcount(a[3] ^ b[3]);
|
||||
|
||||
int mx23 = astc::min(p22 + p33, p23 + p32);
|
||||
int mx13 = astc::min(p21 + p33, p23 + p31);
|
||||
int mx12 = astc::min(p21 + p32, p22 + p31);
|
||||
int mx03 = astc::min(p20 + p33, p23 + p30);
|
||||
int mx02 = astc::min(p20 + p32, p22 + p30);
|
||||
int mx01 = astc::min(p21 + p30, p20 + p31);
|
||||
|
||||
int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12);
|
||||
int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02);
|
||||
int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01);
|
||||
int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12);
|
||||
|
||||
// Divide by 2 because XOR always counts errors twice, once when missing
|
||||
// in the expected position, and again when present in the wrong partition
|
||||
return static_cast<uint8_t>(astc::min(v0, v1, v2, v3) / 2);
|
||||
}
|
||||
|
||||
using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*);
|
||||
|
||||
/**
|
||||
* @brief Count the partition table mismatches vs the data clustering.
|
||||
*
|
||||
* @param bsd The block size information.
|
||||
* @param partition_count The number of partitions in the block.
|
||||
* @param bitmaps The block texel partition assignment patterns.
|
||||
* @param[out] mismatch_counts The array storing per partitioning mismatch counts.
|
||||
*/
|
||||
static void count_partition_mismatch_bits(
|
||||
const block_size_descriptor& bsd,
|
||||
unsigned int partition_count,
|
||||
const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
|
||||
uint8_t mismatch_counts[BLOCK_MAX_PARTITIONINGS]
|
||||
) {
|
||||
unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
|
||||
promise(active_count > 0);
|
||||
|
||||
if (partition_count == 2)
|
||||
{
|
||||
for (unsigned int i = 0; i < active_count; i++)
|
||||
{
|
||||
mismatch_counts[i] = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]);
|
||||
assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS);
|
||||
assert(mismatch_counts[i] < bsd.texel_count);
|
||||
}
|
||||
}
|
||||
else if (partition_count == 3)
|
||||
{
|
||||
for (unsigned int i = 0; i < active_count; i++)
|
||||
{
|
||||
mismatch_counts[i] = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]);
|
||||
assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS);
|
||||
assert(mismatch_counts[i] < bsd.texel_count);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int i = 0; i < active_count; i++)
|
||||
{
|
||||
mismatch_counts[i] = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]);
|
||||
assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS);
|
||||
assert(mismatch_counts[i] < bsd.texel_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Use counting sort on the mismatch array to sort partition candidates.
|
||||
*
|
||||
* @param partitioning_count The number of packed partitionings.
|
||||
* @param mismatch_count Partitioning mismatch counts, in index order.
|
||||
* @param[out] partition_ordering Partition index values, in mismatch order.
|
||||
*
|
||||
* @return The number of active partitions in this selection.
|
||||
*/
|
||||
static unsigned int get_partition_ordering_by_mismatch_bits(
|
||||
unsigned int texel_count,
|
||||
unsigned int partitioning_count,
|
||||
const uint8_t mismatch_count[BLOCK_MAX_PARTITIONINGS],
|
||||
uint16_t partition_ordering[BLOCK_MAX_PARTITIONINGS]
|
||||
) {
|
||||
promise(partitioning_count > 0);
|
||||
uint16_t mscount[BLOCK_MAX_KMEANS_TEXELS] { 0 };
|
||||
|
||||
// Create the histogram of mismatch counts
|
||||
for (unsigned int i = 0; i < partitioning_count; i++)
|
||||
{
|
||||
mscount[mismatch_count[i]]++;
|
||||
}
|
||||
|
||||
// Create a running sum from the histogram array
|
||||
// Cells store previous values only; i.e. exclude self after sum
|
||||
unsigned int sum = 0;
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
uint16_t cnt = mscount[i];
|
||||
mscount[i] = sum;
|
||||
sum += cnt;
|
||||
}
|
||||
|
||||
// Use the running sum as the index, incrementing after read to allow
|
||||
// sequential entries with the same count
|
||||
for (unsigned int i = 0; i < partitioning_count; i++)
|
||||
{
|
||||
unsigned int idx = mscount[mismatch_count[i]]++;
|
||||
partition_ordering[idx] = static_cast<uint16_t>(i);
|
||||
}
|
||||
|
||||
return partitioning_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Use k-means clustering to compute a partition ordering for a block..
|
||||
*
|
||||
* @param bsd The block size information.
|
||||
* @param blk The image block color data to compress.
|
||||
* @param partition_count The desired number of partitions in the block.
|
||||
* @param[out] partition_ordering The list of recommended partition indices, in priority order.
|
||||
*
|
||||
* @return The number of active partitionings in this selection.
|
||||
*/
|
||||
static unsigned int compute_kmeans_partition_ordering(
|
||||
const block_size_descriptor& bsd,
|
||||
const image_block& blk,
|
||||
unsigned int partition_count,
|
||||
uint16_t partition_ordering[BLOCK_MAX_PARTITIONINGS]
|
||||
) {
|
||||
vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS];
|
||||
uint8_t texel_partitions[BLOCK_MAX_TEXELS];
|
||||
|
||||
// Use three passes of k-means clustering to partition the block data
|
||||
for (unsigned int i = 0; i < 3; i++)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
kmeans_init(blk, bsd.texel_count, partition_count, cluster_centers);
|
||||
}
|
||||
else
|
||||
{
|
||||
kmeans_update(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
|
||||
}
|
||||
|
||||
kmeans_assign(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
|
||||
}
|
||||
|
||||
// Construct the block bitmaps of texel assignments to each partition
|
||||
uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 };
|
||||
unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
|
||||
promise(texels_to_process > 0);
|
||||
for (unsigned int i = 0; i < texels_to_process; i++)
|
||||
{
|
||||
unsigned int idx = bsd.kmeans_texels[i];
|
||||
bitmaps[texel_partitions[idx]] |= 1ULL << i;
|
||||
}
|
||||
|
||||
// Count the mismatch between the block and the format's partition tables
|
||||
uint8_t mismatch_counts[BLOCK_MAX_PARTITIONINGS];
|
||||
count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
|
||||
|
||||
// Sort the partitions based on the number of mismatched bits
|
||||
return get_partition_ordering_by_mismatch_bits(
|
||||
texels_to_process,
|
||||
bsd.partitioning_count_selected[partition_count - 1],
|
||||
mismatch_counts, partition_ordering);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Insert a partitioning into an order list of results, sorted by error.
|
||||
*
|
||||
* @param max_values The max number of entries in the best result arrays.
|
||||
* @param this_error The error of the new entry.
|
||||
* @param this_partition The partition ID of the new entry.
|
||||
* @param[out] best_errors The array of best error values.
|
||||
* @param[out] best_partitions The array of best partition values.
|
||||
*/
|
||||
static void insert_result(
|
||||
unsigned int max_values,
|
||||
float this_error,
|
||||
unsigned int this_partition,
|
||||
float* best_errors,
|
||||
unsigned int* best_partitions)
|
||||
{
|
||||
promise(max_values > 0);
|
||||
|
||||
// Don't bother searching if the current worst error beats the new error
|
||||
if (this_error >= best_errors[max_values - 1])
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Else insert into the list in error-order
|
||||
for (unsigned int i = 0; i < max_values; i++)
|
||||
{
|
||||
// Existing result is better - move on ...
|
||||
if (this_error > best_errors[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Move existing results down one
|
||||
for (unsigned int j = max_values - 1; j > i; j--)
|
||||
{
|
||||
best_errors[j] = best_errors[j - 1];
|
||||
best_partitions[j] = best_partitions[j - 1];
|
||||
}
|
||||
|
||||
// Insert new result
|
||||
best_errors[i] = this_error;
|
||||
best_partitions[i] = this_partition;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
unsigned int find_best_partition_candidates(
|
||||
const block_size_descriptor& bsd,
|
||||
const image_block& blk,
|
||||
unsigned int partition_count,
|
||||
unsigned int partition_search_limit,
|
||||
unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
|
||||
unsigned int requested_candidates
|
||||
) {
|
||||
// Constant used to estimate quantization error for a given partitioning; the optimal value for
|
||||
// this depends on bitrate. These values have been determined empirically.
|
||||
unsigned int texels_per_block = bsd.texel_count;
|
||||
float weight_imprecision_estim = 0.055f;
|
||||
if (texels_per_block <= 20)
|
||||
{
|
||||
weight_imprecision_estim = 0.03f;
|
||||
}
|
||||
else if (texels_per_block <= 31)
|
||||
{
|
||||
weight_imprecision_estim = 0.04f;
|
||||
}
|
||||
else if (texels_per_block <= 41)
|
||||
{
|
||||
weight_imprecision_estim = 0.05f;
|
||||
}
|
||||
|
||||
promise(partition_count > 0);
|
||||
promise(partition_search_limit > 0);
|
||||
|
||||
weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
|
||||
|
||||
uint16_t partition_sequence[BLOCK_MAX_PARTITIONINGS];
|
||||
unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
|
||||
partition_search_limit = astc::min(partition_search_limit, sequence_len);
|
||||
requested_candidates = astc::min(partition_search_limit, requested_candidates);
|
||||
|
||||
bool uses_alpha = !blk.is_constant_channel(3);
|
||||
|
||||
// Partitioning errors assuming uncorrelated-chrominance endpoints
|
||||
float uncor_best_errors[TUNE_MAX_PARTITIONING_CANDIDATES];
|
||||
unsigned int uncor_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES];
|
||||
|
||||
// Partitioning errors assuming same-chrominance endpoints
|
||||
float samec_best_errors[TUNE_MAX_PARTITIONING_CANDIDATES];
|
||||
unsigned int samec_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES];
|
||||
|
||||
for (unsigned int i = 0; i < requested_candidates; i++)
|
||||
{
|
||||
uncor_best_errors[i] = ERROR_CALC_DEFAULT;
|
||||
samec_best_errors[i] = ERROR_CALC_DEFAULT;
|
||||
}
|
||||
|
||||
if (uses_alpha)
|
||||
{
|
||||
for (unsigned int i = 0; i < partition_search_limit; i++)
|
||||
{
|
||||
unsigned int partition = partition_sequence[i];
|
||||
const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
|
||||
|
||||
// Compute weighting to give to each component in each partition
|
||||
partition_metrics pms[BLOCK_MAX_PARTITIONS];
|
||||
|
||||
compute_avgs_and_dirs_4_comp(pi, blk, pms);
|
||||
|
||||
line4 uncor_lines[BLOCK_MAX_PARTITIONS];
|
||||
line4 samec_lines[BLOCK_MAX_PARTITIONS];
|
||||
|
||||
processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS];
|
||||
processed_line4 samec_plines[BLOCK_MAX_PARTITIONS];
|
||||
|
||||
float line_lengths[BLOCK_MAX_PARTITIONS];
|
||||
|
||||
for (unsigned int j = 0; j < partition_count; j++)
|
||||
{
|
||||
partition_metrics& pm = pms[j];
|
||||
|
||||
uncor_lines[j].a = pm.avg;
|
||||
uncor_lines[j].b = normalize_safe(pm.dir, unit4());
|
||||
|
||||
uncor_plines[j].amod = uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b);
|
||||
uncor_plines[j].bs = uncor_lines[j].b;
|
||||
|
||||
samec_lines[j].a = vfloat4::zero();
|
||||
samec_lines[j].b = normalize_safe(pm.avg, unit4());
|
||||
|
||||
samec_plines[j].amod = vfloat4::zero();
|
||||
samec_plines[j].bs = samec_lines[j].b;
|
||||
}
|
||||
|
||||
float uncor_error = 0.0f;
|
||||
float samec_error = 0.0f;
|
||||
|
||||
compute_error_squared_rgba(pi,
|
||||
blk,
|
||||
uncor_plines,
|
||||
samec_plines,
|
||||
line_lengths,
|
||||
uncor_error,
|
||||
samec_error);
|
||||
|
||||
// Compute an estimate of error introduced by weight quantization imprecision.
|
||||
// This error is computed as follows, for each partition
|
||||
// 1: compute the principal-axis vector (full length) in error-space
|
||||
// 2: convert the principal-axis vector to regular RGB-space
|
||||
// 3: scale the vector by a constant that estimates average quantization error
|
||||
// 4: for each texel, square the vector, then do a dot-product with the texel's
|
||||
// error weight; sum up the results across all texels.
|
||||
// 4(optimized): square the vector once, then do a dot-product with the average
|
||||
// texel error, then multiply by the number of texels.
|
||||
|
||||
for (unsigned int j = 0; j < partition_count; j++)
|
||||
{
|
||||
float tpp = static_cast<float>(pi.partition_texel_count[j]);
|
||||
vfloat4 error_weights(tpp * weight_imprecision_estim);
|
||||
|
||||
vfloat4 uncor_vector = uncor_lines[j].b * line_lengths[j];
|
||||
vfloat4 samec_vector = samec_lines[j].b * line_lengths[j];
|
||||
|
||||
uncor_error += dot_s(uncor_vector * uncor_vector, error_weights);
|
||||
samec_error += dot_s(samec_vector * samec_vector, error_weights);
|
||||
}
|
||||
|
||||
insert_result(requested_candidates, uncor_error, partition, uncor_best_errors, uncor_best_partitions);
|
||||
insert_result(requested_candidates, samec_error, partition, samec_best_errors, samec_best_partitions);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int i = 0; i < partition_search_limit; i++)
|
||||
{
|
||||
unsigned int partition = partition_sequence[i];
|
||||
const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
|
||||
|
||||
// Compute weighting to give to each component in each partition
|
||||
partition_metrics pms[BLOCK_MAX_PARTITIONS];
|
||||
compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
|
||||
|
||||
partition_lines3 plines[BLOCK_MAX_PARTITIONS];
|
||||
|
||||
for (unsigned int j = 0; j < partition_count; j++)
|
||||
{
|
||||
partition_metrics& pm = pms[j];
|
||||
partition_lines3& pl = plines[j];
|
||||
|
||||
pl.uncor_line.a = pm.avg;
|
||||
pl.uncor_line.b = normalize_safe(pm.dir, unit3());
|
||||
|
||||
pl.samec_line.a = vfloat4::zero();
|
||||
pl.samec_line.b = normalize_safe(pm.avg, unit3());
|
||||
|
||||
pl.uncor_pline.amod = pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b);
|
||||
pl.uncor_pline.bs = pl.uncor_line.b;
|
||||
|
||||
pl.samec_pline.amod = vfloat4::zero();
|
||||
pl.samec_pline.bs = pl.samec_line.b;
|
||||
}
|
||||
|
||||
float uncor_error = 0.0f;
|
||||
float samec_error = 0.0f;
|
||||
|
||||
compute_error_squared_rgb(pi,
|
||||
blk,
|
||||
plines,
|
||||
uncor_error,
|
||||
samec_error);
|
||||
|
||||
// Compute an estimate of error introduced by weight quantization imprecision.
|
||||
// This error is computed as follows, for each partition
|
||||
// 1: compute the principal-axis vector (full length) in error-space
|
||||
// 2: convert the principal-axis vector to regular RGB-space
|
||||
// 3: scale the vector by a constant that estimates average quantization error
|
||||
// 4: for each texel, square the vector, then do a dot-product with the texel's
|
||||
// error weight; sum up the results across all texels.
|
||||
// 4(optimized): square the vector once, then do a dot-product with the average
|
||||
// texel error, then multiply by the number of texels.
|
||||
|
||||
for (unsigned int j = 0; j < partition_count; j++)
|
||||
{
|
||||
partition_lines3& pl = plines[j];
|
||||
|
||||
float tpp = static_cast<float>(pi.partition_texel_count[j]);
|
||||
vfloat4 error_weights(tpp * weight_imprecision_estim);
|
||||
|
||||
vfloat4 uncor_vector = pl.uncor_line.b * pl.line_length;
|
||||
vfloat4 samec_vector = pl.samec_line.b * pl.line_length;
|
||||
|
||||
uncor_error += dot3_s(uncor_vector * uncor_vector, error_weights);
|
||||
samec_error += dot3_s(samec_vector * samec_vector, error_weights);
|
||||
}
|
||||
|
||||
insert_result(requested_candidates, uncor_error, partition, uncor_best_errors, uncor_best_partitions);
|
||||
insert_result(requested_candidates, samec_error, partition, samec_best_errors, samec_best_partitions);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES];
|
||||
for (unsigned int i = 0; i < requested_candidates; i++)
|
||||
{
|
||||
interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
|
||||
interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
|
||||
}
|
||||
|
||||
uint64_t bitmasks[1024/64] { 0 };
|
||||
unsigned int emitted = 0;
|
||||
|
||||
// Deduplicate the first "requested" entries
|
||||
for (unsigned int i = 0; i < requested_candidates * 2; i++)
|
||||
{
|
||||
unsigned int partition = interleave[i];
|
||||
|
||||
unsigned int word = partition / 64;
|
||||
unsigned int bit = partition % 64;
|
||||
|
||||
bool written = bitmasks[word] & (1ull << bit);
|
||||
|
||||
if (!written)
|
||||
{
|
||||
best_partitions[emitted] = partition;
|
||||
bitmasks[word] |= 1ull << bit;
|
||||
emitted++;
|
||||
|
||||
if (emitted == requested_candidates)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return emitted;
|
||||
}
|
||||
|
||||
#endif
|
||||
1663
engine/thirdparty/astcenc/astcenc_ideal_endpoints_and_weights.cpp
vendored
Normal file
1663
engine/thirdparty/astcenc/astcenc_ideal_endpoints_and_weights.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
558
engine/thirdparty/astcenc/astcenc_image.cpp
vendored
Normal file
558
engine/thirdparty/astcenc/astcenc_image.cpp
vendored
Normal file
|
|
@ -0,0 +1,558 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions for creating in-memory ASTC image structures.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
/**
|
||||
* @brief Loader pipeline function type for data fetch from memory.
|
||||
*/
|
||||
using pixel_loader = vfloat4(*)(const void*, int);
|
||||
|
||||
/**
|
||||
* @brief Loader pipeline function type for swizzling data in a vector.
|
||||
*/
|
||||
using pixel_swizzler = vfloat4(*)(vfloat4, const astcenc_swizzle&);
|
||||
|
||||
/**
|
||||
* @brief Loader pipeline function type for converting data in a vector to LNS.
|
||||
*/
|
||||
using pixel_converter = vfloat4(*)(vfloat4, vmask4);
|
||||
|
||||
/**
|
||||
* @brief Load a 8-bit UNORM texel from a data array.
|
||||
*
|
||||
* @param data The data pointer.
|
||||
* @param base_offset The index offset to the start of the pixel.
|
||||
*/
|
||||
static vfloat4 load_texel_u8(
|
||||
const void* data,
|
||||
int base_offset
|
||||
) {
|
||||
const uint8_t* data8 = static_cast<const uint8_t*>(data);
|
||||
return int_to_float(vint4(data8 + base_offset)) / 255.0f;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Load a 16-bit fp16 texel from a data array.
|
||||
*
|
||||
* @param data The data pointer.
|
||||
* @param base_offset The index offset to the start of the pixel.
|
||||
*/
|
||||
static vfloat4 load_texel_f16(
|
||||
const void* data,
|
||||
int base_offset
|
||||
) {
|
||||
const uint16_t* data16 = static_cast<const uint16_t*>(data);
|
||||
int r = data16[base_offset ];
|
||||
int g = data16[base_offset + 1];
|
||||
int b = data16[base_offset + 2];
|
||||
int a = data16[base_offset + 3];
|
||||
return float16_to_float(vint4(r, g, b, a));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Load a 32-bit float texel from a data array.
|
||||
*
|
||||
* @param data The data pointer.
|
||||
* @param base_offset The index offset to the start of the pixel.
|
||||
*/
|
||||
static vfloat4 load_texel_f32(
|
||||
const void* data,
|
||||
int base_offset
|
||||
) {
|
||||
const float* data32 = static_cast<const float*>(data);
|
||||
return vfloat4(data32 + base_offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Dummy no-op swizzle function.
|
||||
*
|
||||
* @param data The source RGBA vector to swizzle.
|
||||
* @param swz The swizzle to use.
|
||||
*/
|
||||
static vfloat4 swz_texel_skip(
|
||||
vfloat4 data,
|
||||
const astcenc_swizzle& swz
|
||||
) {
|
||||
(void)swz;
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Swizzle a texel into a new arrangement.
|
||||
*
|
||||
* @param data The source RGBA vector to swizzle.
|
||||
* @param swz The swizzle to use.
|
||||
*/
|
||||
static vfloat4 swz_texel(
|
||||
vfloat4 data,
|
||||
const astcenc_swizzle& swz
|
||||
) {
|
||||
ASTCENC_ALIGNAS float datas[6];
|
||||
|
||||
storea(data, datas);
|
||||
datas[ASTCENC_SWZ_0] = 0.0f;
|
||||
datas[ASTCENC_SWZ_1] = 1.0f;
|
||||
|
||||
return vfloat4(datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Encode a texel that is entirely LDR linear.
|
||||
*
|
||||
* @param data The RGBA data to encode.
|
||||
* @param lns_mask The mask for the HDR channels than need LNS encoding.
|
||||
*/
|
||||
static vfloat4 encode_texel_unorm(
|
||||
vfloat4 data,
|
||||
vmask4 lns_mask
|
||||
) {
|
||||
(void)lns_mask;
|
||||
return data * 65535.0f;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Encode a texel that includes at least some HDR LNS texels.
|
||||
*
|
||||
* @param data The RGBA data to encode.
|
||||
* @param lns_mask The mask for the HDR channels than need LNS encoding.
|
||||
*/
|
||||
static vfloat4 encode_texel_lns(
|
||||
vfloat4 data,
|
||||
vmask4 lns_mask
|
||||
) {
|
||||
vfloat4 datav_unorm = data * 65535.0f;
|
||||
vfloat4 datav_lns = float_to_lns(data);
|
||||
return select(datav_unorm, datav_lns, lns_mask);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void load_image_block(
|
||||
astcenc_profile decode_mode,
|
||||
const astcenc_image& img,
|
||||
image_block& blk,
|
||||
const block_size_descriptor& bsd,
|
||||
unsigned int xpos,
|
||||
unsigned int ypos,
|
||||
unsigned int zpos,
|
||||
const astcenc_swizzle& swz
|
||||
) {
|
||||
unsigned int xsize = img.dim_x;
|
||||
unsigned int ysize = img.dim_y;
|
||||
unsigned int zsize = img.dim_z;
|
||||
|
||||
blk.xpos = xpos;
|
||||
blk.ypos = ypos;
|
||||
blk.zpos = zpos;
|
||||
|
||||
// True if any non-identity swizzle
|
||||
bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
|
||||
(swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
|
||||
|
||||
int idx = 0;
|
||||
|
||||
vfloat4 data_min(1e38f);
|
||||
vfloat4 data_mean(0.0f);
|
||||
vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
|
||||
vfloat4 data_max(-1e38f);
|
||||
vmask4 grayscalev(true);
|
||||
|
||||
// This works because we impose the same choice everywhere during encode
|
||||
uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
|
||||
(decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? 1 : 0;
|
||||
uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? 1 : 0;
|
||||
vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
|
||||
vmask4 lns_mask = use_lns != vint4::zero();
|
||||
|
||||
// Set up the function pointers for loading pipeline as needed
|
||||
pixel_loader loader = load_texel_u8;
|
||||
if (img.data_type == ASTCENC_TYPE_F16)
|
||||
{
|
||||
loader = load_texel_f16;
|
||||
}
|
||||
else if (img.data_type == ASTCENC_TYPE_F32)
|
||||
{
|
||||
loader = load_texel_f32;
|
||||
}
|
||||
|
||||
pixel_swizzler swizzler = swz_texel_skip;
|
||||
if (needs_swz)
|
||||
{
|
||||
swizzler = swz_texel;
|
||||
}
|
||||
|
||||
pixel_converter converter = encode_texel_unorm;
|
||||
if (any(lns_mask))
|
||||
{
|
||||
converter = encode_texel_lns;
|
||||
}
|
||||
|
||||
for (unsigned int z = 0; z < bsd.zdim; z++)
|
||||
{
|
||||
unsigned int zi = astc::min(zpos + z, zsize - 1);
|
||||
void* plane = img.data[zi];
|
||||
|
||||
for (unsigned int y = 0; y < bsd.ydim; y++)
|
||||
{
|
||||
unsigned int yi = astc::min(ypos + y, ysize - 1);
|
||||
|
||||
for (unsigned int x = 0; x < bsd.xdim; x++)
|
||||
{
|
||||
unsigned int xi = astc::min(xpos + x, xsize - 1);
|
||||
|
||||
vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
|
||||
datav = swizzler(datav, swz);
|
||||
datav = converter(datav, lns_mask);
|
||||
|
||||
// Compute block metadata
|
||||
data_min = min(data_min, datav);
|
||||
data_mean += datav * data_mean_scale;
|
||||
data_max = max(data_max, datav);
|
||||
|
||||
grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
|
||||
|
||||
blk.data_r[idx] = datav.lane<0>();
|
||||
blk.data_g[idx] = datav.lane<1>();
|
||||
blk.data_b[idx] = datav.lane<2>();
|
||||
blk.data_a[idx] = datav.lane<3>();
|
||||
|
||||
blk.rgb_lns[idx] = rgb_lns;
|
||||
blk.alpha_lns[idx] = a_lns;
|
||||
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse the encoding so we store origin block in the original format
|
||||
vfloat4 data_enc = blk.texel(0);
|
||||
vfloat4 data_enc_unorm = data_enc / 65535.0f;
|
||||
vfloat4 data_enc_lns = vfloat4::zero();
|
||||
|
||||
if (rgb_lns || a_lns)
|
||||
{
|
||||
data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
|
||||
}
|
||||
|
||||
blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
|
||||
|
||||
// Store block metadata
|
||||
blk.data_min = data_min;
|
||||
blk.data_mean = data_mean;
|
||||
blk.data_max = data_max;
|
||||
blk.grayscale = all(grayscalev);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void load_image_block_fast_ldr(
|
||||
astcenc_profile decode_mode,
|
||||
const astcenc_image& img,
|
||||
image_block& blk,
|
||||
const block_size_descriptor& bsd,
|
||||
unsigned int xpos,
|
||||
unsigned int ypos,
|
||||
unsigned int zpos,
|
||||
const astcenc_swizzle& swz
|
||||
) {
|
||||
(void)swz;
|
||||
(void)decode_mode;
|
||||
|
||||
unsigned int xsize = img.dim_x;
|
||||
unsigned int ysize = img.dim_y;
|
||||
|
||||
blk.xpos = xpos;
|
||||
blk.ypos = ypos;
|
||||
blk.zpos = zpos;
|
||||
|
||||
vfloat4 data_min(1e38f);
|
||||
vfloat4 data_mean = vfloat4::zero();
|
||||
vfloat4 data_max(-1e38f);
|
||||
vmask4 grayscalev(true);
|
||||
int idx = 0;
|
||||
|
||||
const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
|
||||
for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
|
||||
{
|
||||
unsigned int yi = astc::min(y, ysize - 1);
|
||||
|
||||
for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
|
||||
{
|
||||
unsigned int xi = astc::min(x, xsize - 1);
|
||||
|
||||
vint4 datavi = vint4(plane + (4 * xsize * yi) + (4 * xi));
|
||||
vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
|
||||
|
||||
// Compute block metadata
|
||||
data_min = min(data_min, datav);
|
||||
data_mean += datav;
|
||||
data_max = max(data_max, datav);
|
||||
|
||||
grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
|
||||
|
||||
blk.data_r[idx] = datav.lane<0>();
|
||||
blk.data_g[idx] = datav.lane<1>();
|
||||
blk.data_b[idx] = datav.lane<2>();
|
||||
blk.data_a[idx] = datav.lane<3>();
|
||||
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse the encoding so we store origin block in the original format
|
||||
blk.origin_texel = blk.texel(0) / 65535.0f;
|
||||
|
||||
// Store block metadata
|
||||
blk.rgb_lns[0] = 0;
|
||||
blk.alpha_lns[0] = 0;
|
||||
blk.data_min = data_min;
|
||||
blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
|
||||
blk.data_max = data_max;
|
||||
blk.grayscale = all(grayscalev);
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void store_image_block(
|
||||
astcenc_image& img,
|
||||
const image_block& blk,
|
||||
const block_size_descriptor& bsd,
|
||||
unsigned int xpos,
|
||||
unsigned int ypos,
|
||||
unsigned int zpos,
|
||||
const astcenc_swizzle& swz
|
||||
) {
|
||||
unsigned int x_size = img.dim_x;
|
||||
unsigned int x_start = xpos;
|
||||
unsigned int x_end = astc::min(x_size, xpos + bsd.xdim);
|
||||
unsigned int x_count = x_end - x_start;
|
||||
unsigned int x_nudge = bsd.xdim - x_count;
|
||||
|
||||
unsigned int y_size = img.dim_y;
|
||||
unsigned int y_start = ypos;
|
||||
unsigned int y_end = astc::min(y_size, ypos + bsd.ydim);
|
||||
unsigned int y_count = y_end - y_start;
|
||||
unsigned int y_nudge = (bsd.ydim - y_count) * bsd.xdim;
|
||||
|
||||
unsigned int z_size = img.dim_z;
|
||||
unsigned int z_start = zpos;
|
||||
unsigned int z_end = astc::min(z_size, zpos + bsd.zdim);
|
||||
|
||||
// True if any non-identity swizzle
|
||||
bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
|
||||
(swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
|
||||
|
||||
// True if any swizzle uses Z reconstruct
|
||||
bool needs_z = (swz.r == ASTCENC_SWZ_Z) || (swz.g == ASTCENC_SWZ_Z) ||
|
||||
(swz.b == ASTCENC_SWZ_Z) || (swz.a == ASTCENC_SWZ_Z);
|
||||
|
||||
int idx = 0;
|
||||
if (img.data_type == ASTCENC_TYPE_U8)
|
||||
{
|
||||
for (unsigned int z = z_start; z < z_end; z++)
|
||||
{
|
||||
// Fetch the image plane
|
||||
uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
|
||||
|
||||
for (unsigned int y = y_start; y < y_end; y++)
|
||||
{
|
||||
uint8_t* data8_row = data8 + (4 * x_size * y) + (4 * x_start);
|
||||
|
||||
for (unsigned int x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
unsigned int max_texels = ASTCENC_SIMD_WIDTH;
|
||||
unsigned int used_texels = astc::min(x_count - x, max_texels);
|
||||
|
||||
// Unaligned load as rows are not always SIMD_WIDTH long
|
||||
vfloat data_r(blk.data_r + idx);
|
||||
vfloat data_g(blk.data_g + idx);
|
||||
vfloat data_b(blk.data_b + idx);
|
||||
vfloat data_a(blk.data_a + idx);
|
||||
|
||||
vint data_ri = float_to_int_rtn(min(data_r, 1.0f) * 255.0f);
|
||||
vint data_gi = float_to_int_rtn(min(data_g, 1.0f) * 255.0f);
|
||||
vint data_bi = float_to_int_rtn(min(data_b, 1.0f) * 255.0f);
|
||||
vint data_ai = float_to_int_rtn(min(data_a, 1.0f) * 255.0f);
|
||||
|
||||
if (needs_swz)
|
||||
{
|
||||
vint swizzle_table[7];
|
||||
swizzle_table[ASTCENC_SWZ_0] = vint(0);
|
||||
swizzle_table[ASTCENC_SWZ_1] = vint(255);
|
||||
swizzle_table[ASTCENC_SWZ_R] = data_ri;
|
||||
swizzle_table[ASTCENC_SWZ_G] = data_gi;
|
||||
swizzle_table[ASTCENC_SWZ_B] = data_bi;
|
||||
swizzle_table[ASTCENC_SWZ_A] = data_ai;
|
||||
|
||||
if (needs_z)
|
||||
{
|
||||
vfloat data_x = (data_r * vfloat(2.0f)) - vfloat(1.0f);
|
||||
vfloat data_y = (data_a * vfloat(2.0f)) - vfloat(1.0f);
|
||||
vfloat data_z = vfloat(1.0f) - (data_x * data_x) - (data_y * data_y);
|
||||
data_z = max(data_z, 0.0f);
|
||||
data_z = (sqrt(data_z) * vfloat(0.5f)) + vfloat(0.5f);
|
||||
|
||||
swizzle_table[ASTCENC_SWZ_Z] = float_to_int_rtn(min(data_z, 1.0f) * 255.0f);
|
||||
}
|
||||
|
||||
data_ri = swizzle_table[swz.r];
|
||||
data_gi = swizzle_table[swz.g];
|
||||
data_bi = swizzle_table[swz.b];
|
||||
data_ai = swizzle_table[swz.a];
|
||||
}
|
||||
|
||||
// Errors are NaN encoded - convert to magenta error color
|
||||
// Branch is OK here - it is almost never true so predicts well
|
||||
vmask nan_mask = data_r != data_r;
|
||||
if (any(nan_mask))
|
||||
{
|
||||
data_ri = select(data_ri, vint(0xFF), nan_mask);
|
||||
data_gi = select(data_gi, vint(0x00), nan_mask);
|
||||
data_bi = select(data_bi, vint(0xFF), nan_mask);
|
||||
data_ai = select(data_ai, vint(0xFF), nan_mask);
|
||||
}
|
||||
|
||||
vint data_rgbai = interleave_rgba8(data_ri, data_gi, data_bi, data_ai);
|
||||
vmask store_mask = vint::lane_id() < vint(used_texels);
|
||||
store_lanes_masked(data8_row, data_rgbai, store_mask);
|
||||
|
||||
data8_row += ASTCENC_SIMD_WIDTH * 4;
|
||||
idx += used_texels;
|
||||
}
|
||||
idx += x_nudge;
|
||||
}
|
||||
idx += y_nudge;
|
||||
}
|
||||
}
|
||||
else if (img.data_type == ASTCENC_TYPE_F16)
|
||||
{
|
||||
for (unsigned int z = z_start; z < z_end; z++)
|
||||
{
|
||||
// Fetch the image plane
|
||||
uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
|
||||
|
||||
for (unsigned int y = y_start; y < y_end; y++)
|
||||
{
|
||||
uint16_t* data16_row = data16 + (4 * x_size * y) + (4 * x_start);
|
||||
|
||||
for (unsigned int x = 0; x < x_count; x++)
|
||||
{
|
||||
vint4 color;
|
||||
|
||||
// NaNs are handled inline - no need to special case
|
||||
if (needs_swz)
|
||||
{
|
||||
float data[7];
|
||||
data[ASTCENC_SWZ_0] = 0.0f;
|
||||
data[ASTCENC_SWZ_1] = 1.0f;
|
||||
data[ASTCENC_SWZ_R] = blk.data_r[idx];
|
||||
data[ASTCENC_SWZ_G] = blk.data_g[idx];
|
||||
data[ASTCENC_SWZ_B] = blk.data_b[idx];
|
||||
data[ASTCENC_SWZ_A] = blk.data_a[idx];
|
||||
|
||||
if (needs_z)
|
||||
{
|
||||
float xN = (data[0] * 2.0f) - 1.0f;
|
||||
float yN = (data[3] * 2.0f) - 1.0f;
|
||||
float zN = 1.0f - xN * xN - yN * yN;
|
||||
if (zN < 0.0f)
|
||||
{
|
||||
zN = 0.0f;
|
||||
}
|
||||
data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
|
||||
}
|
||||
|
||||
vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
|
||||
color = float_to_float16(colorf);
|
||||
}
|
||||
else
|
||||
{
|
||||
vfloat4 colorf = blk.texel(idx);
|
||||
color = float_to_float16(colorf);
|
||||
}
|
||||
|
||||
// TODO: Vectorize with store N shorts?
|
||||
data16_row[0] = static_cast<uint16_t>(color.lane<0>());
|
||||
data16_row[1] = static_cast<uint16_t>(color.lane<1>());
|
||||
data16_row[2] = static_cast<uint16_t>(color.lane<2>());
|
||||
data16_row[3] = static_cast<uint16_t>(color.lane<3>());
|
||||
data16_row += 4;
|
||||
idx++;
|
||||
}
|
||||
idx += x_nudge;
|
||||
}
|
||||
idx += y_nudge;
|
||||
}
|
||||
}
|
||||
else // if (img.data_type == ASTCENC_TYPE_F32)
|
||||
{
|
||||
assert(img.data_type == ASTCENC_TYPE_F32);
|
||||
|
||||
for (unsigned int z = z_start; z < z_end; z++)
|
||||
{
|
||||
// Fetch the image plane
|
||||
float* data32 = static_cast<float*>(img.data[z]);
|
||||
|
||||
for (unsigned int y = y_start; y < y_end; y++)
|
||||
{
|
||||
float* data32_row = data32 + (4 * x_size * y) + (4 * x_start);
|
||||
|
||||
for (unsigned int x = 0; x < x_count; x++)
|
||||
{
|
||||
vfloat4 color = blk.texel(idx);
|
||||
|
||||
// NaNs are handled inline - no need to special case
|
||||
if (needs_swz)
|
||||
{
|
||||
float data[7];
|
||||
data[ASTCENC_SWZ_0] = 0.0f;
|
||||
data[ASTCENC_SWZ_1] = 1.0f;
|
||||
data[ASTCENC_SWZ_R] = color.lane<0>();
|
||||
data[ASTCENC_SWZ_G] = color.lane<1>();
|
||||
data[ASTCENC_SWZ_B] = color.lane<2>();
|
||||
data[ASTCENC_SWZ_A] = color.lane<3>();
|
||||
|
||||
if (needs_z)
|
||||
{
|
||||
float xN = (data[0] * 2.0f) - 1.0f;
|
||||
float yN = (data[3] * 2.0f) - 1.0f;
|
||||
float zN = 1.0f - xN * xN - yN * yN;
|
||||
if (zN < 0.0f)
|
||||
{
|
||||
zN = 0.0f;
|
||||
}
|
||||
data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
|
||||
}
|
||||
|
||||
color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
|
||||
}
|
||||
|
||||
store(color, data32_row);
|
||||
data32_row += 4;
|
||||
idx++;
|
||||
}
|
||||
idx += x_nudge;
|
||||
}
|
||||
idx += y_nudge;
|
||||
}
|
||||
}
|
||||
}
|
||||
739
engine/thirdparty/astcenc/astcenc_integer_sequence.cpp
vendored
Normal file
739
engine/thirdparty/astcenc/astcenc_integer_sequence.cpp
vendored
Normal file
|
|
@ -0,0 +1,739 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions for encoding/decoding Bounded Integer Sequence Encoding.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
/** @brief Unpacked quint triplets <low,middle,high> for each packed value */
|
||||
// TODO: Bitpack these into a uint16_t?
|
||||
static const uint8_t quints_of_integer[128][3] {
|
||||
{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
|
||||
{4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4},
|
||||
{0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
|
||||
{4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4},
|
||||
{0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0},
|
||||
{4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4},
|
||||
{0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0},
|
||||
{4, 3, 0}, {3, 4, 0}, {4, 4, 3}, {4, 4, 4},
|
||||
{0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1},
|
||||
{4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4},
|
||||
{0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1},
|
||||
{4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4},
|
||||
{0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1},
|
||||
{4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4},
|
||||
{0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
|
||||
{4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4},
|
||||
{0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2},
|
||||
{4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4},
|
||||
{0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2},
|
||||
{4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4},
|
||||
{0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2},
|
||||
{4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4},
|
||||
{0, 3, 2}, {1, 3, 2}, {2, 3, 2}, {3, 3, 2},
|
||||
{4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4},
|
||||
{0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3},
|
||||
{4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4},
|
||||
{0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3},
|
||||
{4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4},
|
||||
{0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3},
|
||||
{4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
|
||||
{0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3},
|
||||
{4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4}
|
||||
};
|
||||
|
||||
/** @brief Packed quint values for each unpacked value, indexed [hi][mid][lo]. */
|
||||
static const uint8_t integer_of_quints[5][5][5] {
|
||||
{
|
||||
{0, 1, 2, 3, 4},
|
||||
{8, 9, 10, 11, 12},
|
||||
{16, 17, 18, 19, 20},
|
||||
{24, 25, 26, 27, 28},
|
||||
{5, 13, 21, 29, 6}
|
||||
},
|
||||
{
|
||||
{32, 33, 34, 35, 36},
|
||||
{40, 41, 42, 43, 44},
|
||||
{48, 49, 50, 51, 52},
|
||||
{56, 57, 58, 59, 60},
|
||||
{37, 45, 53, 61, 14}
|
||||
},
|
||||
{
|
||||
{64, 65, 66, 67, 68},
|
||||
{72, 73, 74, 75, 76},
|
||||
{80, 81, 82, 83, 84},
|
||||
{88, 89, 90, 91, 92},
|
||||
{69, 77, 85, 93, 22}
|
||||
},
|
||||
{
|
||||
{96, 97, 98, 99, 100},
|
||||
{104, 105, 106, 107, 108},
|
||||
{112, 113, 114, 115, 116},
|
||||
{120, 121, 122, 123, 124},
|
||||
{101, 109, 117, 125, 30}
|
||||
},
|
||||
{
|
||||
{102, 103, 70, 71, 38},
|
||||
{110, 111, 78, 79, 46},
|
||||
{118, 119, 86, 87, 54},
|
||||
{126, 127, 94, 95, 62},
|
||||
{39, 47, 55, 63, 31}
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Unpacked trit quintuplets <low,...,high> for each packed value */
|
||||
// TODO: Bitpack these into a uint16_t?
|
||||
static const uint8_t trits_of_integer[256][5] {
|
||||
{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
|
||||
{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
|
||||
{0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
|
||||
{0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0},
|
||||
{0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0},
|
||||
{0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
|
||||
{0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0},
|
||||
{0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2},
|
||||
{0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
|
||||
{0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0},
|
||||
{0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0},
|
||||
{0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
|
||||
{0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0},
|
||||
{0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0},
|
||||
{0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
|
||||
{0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2},
|
||||
{0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0},
|
||||
{0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
|
||||
{0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0},
|
||||
{0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0},
|
||||
{0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
|
||||
{0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0},
|
||||
{0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0},
|
||||
{0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
|
||||
{0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2},
|
||||
{0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2},
|
||||
{0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
|
||||
{0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2},
|
||||
{0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2},
|
||||
{0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
|
||||
{0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2},
|
||||
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2},
|
||||
{0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
|
||||
{0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1},
|
||||
{0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1},
|
||||
{0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
|
||||
{0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1},
|
||||
{0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1},
|
||||
{0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
|
||||
{0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2},
|
||||
{0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1},
|
||||
{0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
|
||||
{0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1},
|
||||
{0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1},
|
||||
{0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
|
||||
{0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1},
|
||||
{0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1},
|
||||
{0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
|
||||
{0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1},
|
||||
{0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1},
|
||||
{0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
|
||||
{0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1},
|
||||
{0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1},
|
||||
{0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
|
||||
{0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1},
|
||||
{0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2},
|
||||
{0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
|
||||
{0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2},
|
||||
{0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2},
|
||||
{0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
|
||||
{0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2},
|
||||
{0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2},
|
||||
{0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
|
||||
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2}
|
||||
};
|
||||
|
||||
/** @brief Packed trit values for each unpacked value, indexed [hi][][][][lo]. */
|
||||
static const uint8_t integer_of_trits[3][3][3][3][3] {
|
||||
{
|
||||
{
|
||||
{
|
||||
{0, 1, 2},
|
||||
{4, 5, 6},
|
||||
{8, 9, 10}
|
||||
},
|
||||
{
|
||||
{16, 17, 18},
|
||||
{20, 21, 22},
|
||||
{24, 25, 26}
|
||||
},
|
||||
{
|
||||
{3, 7, 15},
|
||||
{19, 23, 27},
|
||||
{12, 13, 14}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{32, 33, 34},
|
||||
{36, 37, 38},
|
||||
{40, 41, 42}
|
||||
},
|
||||
{
|
||||
{48, 49, 50},
|
||||
{52, 53, 54},
|
||||
{56, 57, 58}
|
||||
},
|
||||
{
|
||||
{35, 39, 47},
|
||||
{51, 55, 59},
|
||||
{44, 45, 46}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{64, 65, 66},
|
||||
{68, 69, 70},
|
||||
{72, 73, 74}
|
||||
},
|
||||
{
|
||||
{80, 81, 82},
|
||||
{84, 85, 86},
|
||||
{88, 89, 90}
|
||||
},
|
||||
{
|
||||
{67, 71, 79},
|
||||
{83, 87, 91},
|
||||
{76, 77, 78}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{
|
||||
{128, 129, 130},
|
||||
{132, 133, 134},
|
||||
{136, 137, 138}
|
||||
},
|
||||
{
|
||||
{144, 145, 146},
|
||||
{148, 149, 150},
|
||||
{152, 153, 154}
|
||||
},
|
||||
{
|
||||
{131, 135, 143},
|
||||
{147, 151, 155},
|
||||
{140, 141, 142}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{160, 161, 162},
|
||||
{164, 165, 166},
|
||||
{168, 169, 170}
|
||||
},
|
||||
{
|
||||
{176, 177, 178},
|
||||
{180, 181, 182},
|
||||
{184, 185, 186}
|
||||
},
|
||||
{
|
||||
{163, 167, 175},
|
||||
{179, 183, 187},
|
||||
{172, 173, 174}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{192, 193, 194},
|
||||
{196, 197, 198},
|
||||
{200, 201, 202}
|
||||
},
|
||||
{
|
||||
{208, 209, 210},
|
||||
{212, 213, 214},
|
||||
{216, 217, 218}
|
||||
},
|
||||
{
|
||||
{195, 199, 207},
|
||||
{211, 215, 219},
|
||||
{204, 205, 206}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{
|
||||
{96, 97, 98},
|
||||
{100, 101, 102},
|
||||
{104, 105, 106}
|
||||
},
|
||||
{
|
||||
{112, 113, 114},
|
||||
{116, 117, 118},
|
||||
{120, 121, 122}
|
||||
},
|
||||
{
|
||||
{99, 103, 111},
|
||||
{115, 119, 123},
|
||||
{108, 109, 110}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{224, 225, 226},
|
||||
{228, 229, 230},
|
||||
{232, 233, 234}
|
||||
},
|
||||
{
|
||||
{240, 241, 242},
|
||||
{244, 245, 246},
|
||||
{248, 249, 250}
|
||||
},
|
||||
{
|
||||
{227, 231, 239},
|
||||
{243, 247, 251},
|
||||
{236, 237, 238}
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
{28, 29, 30},
|
||||
{60, 61, 62},
|
||||
{92, 93, 94}
|
||||
},
|
||||
{
|
||||
{156, 157, 158},
|
||||
{188, 189, 190},
|
||||
{220, 221, 222}
|
||||
},
|
||||
{
|
||||
{31, 63, 127},
|
||||
{159, 191, 255},
|
||||
{252, 253, 254}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The number of bits, trits, and quints needed for a quant level.
|
||||
*/
|
||||
struct btq_count
|
||||
{
|
||||
/** @brief The number of bits. */
|
||||
uint8_t bits:6;
|
||||
|
||||
/** @brief The number of trits. */
|
||||
uint8_t trits:1;
|
||||
|
||||
/** @brief The number of quints. */
|
||||
uint8_t quints:1;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The table of bits, trits, and quints needed for a quant encode.
|
||||
*/
|
||||
static const std::array<btq_count, 21> btq_counts {{
|
||||
{ 1, 0, 0 }, // QUANT_2
|
||||
{ 0, 1, 0 }, // QUANT_3
|
||||
{ 2, 0, 0 }, // QUANT_4
|
||||
{ 0, 0, 1 }, // QUANT_5
|
||||
{ 1, 1, 0 }, // QUANT_6
|
||||
{ 3, 0, 0 }, // QUANT_8
|
||||
{ 1, 0, 1 }, // QUANT_10
|
||||
{ 2, 1, 0 }, // QUANT_12
|
||||
{ 4, 0, 0 }, // QUANT_16
|
||||
{ 2, 0, 1 }, // QUANT_20
|
||||
{ 3, 1, 0 }, // QUANT_24
|
||||
{ 5, 0, 0 }, // QUANT_32
|
||||
{ 3, 0, 1 }, // QUANT_40
|
||||
{ 4, 1, 0 }, // QUANT_48
|
||||
{ 6, 0, 0 }, // QUANT_64
|
||||
{ 4, 0, 1 }, // QUANT_80
|
||||
{ 5, 1, 0 }, // QUANT_96
|
||||
{ 7, 0, 0 }, // QUANT_128
|
||||
{ 5, 0, 1 }, // QUANT_160
|
||||
{ 6, 1, 0 }, // QUANT_192
|
||||
{ 8, 0, 0 } // QUANT_256
|
||||
}};
|
||||
|
||||
/**
|
||||
* @brief The sequence scale, round, and divisors needed to compute sizing.
|
||||
*
|
||||
* The length of a quantized sequence in bits is:
|
||||
* (scale * <sequence_len> + round) / divisor
|
||||
*/
|
||||
struct ise_size
|
||||
{
|
||||
/** @brief The scaling parameter. */
|
||||
uint8_t scale:6;
|
||||
|
||||
/** @brief The divisor parameter. */
|
||||
uint8_t divisor:2;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The table of scale, round, and divisors needed for quant sizing.
|
||||
*/
|
||||
static const std::array<ise_size, 21> ise_sizes {{
|
||||
{ 1, 0 }, // QUANT_2
|
||||
{ 8, 2 }, // QUANT_3
|
||||
{ 2, 0 }, // QUANT_4
|
||||
{ 7, 1 }, // QUANT_5
|
||||
{ 13, 2 }, // QUANT_6
|
||||
{ 3, 0 }, // QUANT_8
|
||||
{ 10, 1 }, // QUANT_10
|
||||
{ 18, 2 }, // QUANT_12
|
||||
{ 4, 0 }, // QUANT_16
|
||||
{ 13, 1 }, // QUANT_20
|
||||
{ 23, 2 }, // QUANT_24
|
||||
{ 5, 0 }, // QUANT_32
|
||||
{ 16, 1 }, // QUANT_40
|
||||
{ 28, 2 }, // QUANT_48
|
||||
{ 6, 0 }, // QUANT_64
|
||||
{ 19, 1 }, // QUANT_80
|
||||
{ 33, 2 }, // QUANT_96
|
||||
{ 7, 0 }, // QUANT_128
|
||||
{ 22, 1 }, // QUANT_160
|
||||
{ 38, 2 }, // QUANT_192
|
||||
{ 8, 0 } // QUANT_256
|
||||
}};
|
||||
|
||||
/* See header for documentation. */
|
||||
unsigned int get_ise_sequence_bitcount(
|
||||
unsigned int character_count,
|
||||
quant_method quant_level
|
||||
) {
|
||||
// Cope with out-of bounds values - input might be invalid
|
||||
if (static_cast<size_t>(quant_level) >= ise_sizes.size())
|
||||
{
|
||||
// Arbitrary large number that's more than an ASTC block can hold
|
||||
return 1024;
|
||||
}
|
||||
|
||||
auto& entry = ise_sizes[quant_level];
|
||||
unsigned int divisor = (entry.divisor << 1) + 1;
|
||||
return (entry.scale * character_count + divisor - 1) / divisor;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Write up to 8 bits at an arbitrary bit offset.
|
||||
*
|
||||
* The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may
|
||||
* span two separate bytes in memory.
|
||||
*
|
||||
* @param value The value to write.
|
||||
* @param bitcount The number of bits to write, starting from LSB.
|
||||
* @param bitoffset The bit offset to store at, between 0 and 7.
|
||||
* @param[in,out] ptr The data pointer to write to.
|
||||
*/
|
||||
static inline void write_bits(
|
||||
unsigned int value,
|
||||
unsigned int bitcount,
|
||||
unsigned int bitoffset,
|
||||
uint8_t ptr[2]
|
||||
) {
|
||||
unsigned int mask = (1 << bitcount) - 1;
|
||||
value &= mask;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
value <<= bitoffset;
|
||||
mask <<= bitoffset;
|
||||
mask = ~mask;
|
||||
|
||||
ptr[0] &= mask;
|
||||
ptr[0] |= value;
|
||||
ptr[1] &= mask >> 8;
|
||||
ptr[1] |= value >> 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Read up to 16 bits from two bytes.
|
||||
*
|
||||
* This function reads a packed N-bit field from two bytes in memory. The stored value must exist
|
||||
* within the two bytes, but can start at an arbitary bit offset and span the two bytes in memory.
|
||||
*
|
||||
* @param bitcount The number of bits to read.
|
||||
* @param bitoffset The bit offset to read from, between 0 and 7.
|
||||
* @param[in,out] ptr The data pointer to read from.
|
||||
*
|
||||
* @return The read value.
|
||||
*/
|
||||
static inline unsigned int read_bits(
|
||||
unsigned int bitcount,
|
||||
unsigned int bitoffset,
|
||||
const uint8_t* ptr
|
||||
) {
|
||||
unsigned int mask = (1 << bitcount) - 1;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
unsigned int value = ptr[0] | (ptr[1] << 8);
|
||||
value >>= bitoffset;
|
||||
value &= mask;
|
||||
return value;
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void encode_ise(
|
||||
quant_method quant_level,
|
||||
unsigned int character_count,
|
||||
const uint8_t* input_data,
|
||||
uint8_t* output_data,
|
||||
unsigned int bit_offset
|
||||
) {
|
||||
promise(character_count > 0);
|
||||
|
||||
unsigned int bits = btq_counts[quant_level].bits;
|
||||
unsigned int trits = btq_counts[quant_level].trits;
|
||||
unsigned int quints = btq_counts[quant_level].quints;
|
||||
unsigned int mask = (1 << bits) - 1;
|
||||
|
||||
// Write out trits and bits
|
||||
if (trits)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
unsigned int full_trit_blocks = character_count / 5;
|
||||
|
||||
for (unsigned int j = 0; j < full_trit_blocks; j++)
|
||||
{
|
||||
unsigned int i4 = input_data[i + 4] >> bits;
|
||||
unsigned int i3 = input_data[i + 3] >> bits;
|
||||
unsigned int i2 = input_data[i + 2] >> bits;
|
||||
unsigned int i1 = input_data[i + 1] >> bits;
|
||||
unsigned int i0 = input_data[i + 0] >> bits;
|
||||
|
||||
uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
|
||||
|
||||
// The max size of a trit bit count is 6, so we can always safely
|
||||
// pack a single MX value with the following 1 or 2 T bits.
|
||||
uint8_t pack;
|
||||
|
||||
// Element 0 + T0 + T1
|
||||
pack = (input_data[i++] & mask) | (((T >> 0) & 0x3) << bits);
|
||||
write_bits(pack, bits + 2, bit_offset, output_data);
|
||||
bit_offset += bits + 2;
|
||||
|
||||
// Element 1 + T2 + T3
|
||||
pack = (input_data[i++] & mask) | (((T >> 2) & 0x3) << bits);
|
||||
write_bits(pack, bits + 2, bit_offset, output_data);
|
||||
bit_offset += bits + 2;
|
||||
|
||||
// Element 2 + T4
|
||||
pack = (input_data[i++] & mask) | (((T >> 4) & 0x1) << bits);
|
||||
write_bits(pack, bits + 1, bit_offset, output_data);
|
||||
bit_offset += bits + 1;
|
||||
|
||||
// Element 3 + T5 + T6
|
||||
pack = (input_data[i++] & mask) | (((T >> 5) & 0x3) << bits);
|
||||
write_bits(pack, bits + 2, bit_offset, output_data);
|
||||
bit_offset += bits + 2;
|
||||
|
||||
// Element 4 + T7
|
||||
pack = (input_data[i++] & mask) | (((T >> 7) & 0x1) << bits);
|
||||
write_bits(pack, bits + 1, bit_offset, output_data);
|
||||
bit_offset += bits + 1;
|
||||
}
|
||||
|
||||
// Loop tail for a partial block
|
||||
if (i != character_count)
|
||||
{
|
||||
// i4 cannot be present - we know the block is partial
|
||||
// i0 must be present - we know the block isn't empty
|
||||
unsigned int i4 = 0;
|
||||
unsigned int i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits;
|
||||
unsigned int i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits;
|
||||
unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
|
||||
unsigned int i0 = input_data[i + 0] >> bits;
|
||||
|
||||
uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
|
||||
|
||||
for (unsigned int j = 0; i < character_count; i++, j++)
|
||||
{
|
||||
// Truncated table as this iteration is always partital
|
||||
static const uint8_t tbits[4] { 2, 2, 1, 2 };
|
||||
static const uint8_t tshift[4] { 0, 2, 4, 5 };
|
||||
|
||||
uint8_t pack = (input_data[i] & mask) |
|
||||
(((T >> tshift[j]) & ((1 << tbits[j]) - 1)) << bits);
|
||||
|
||||
write_bits(pack, bits + tbits[j], bit_offset, output_data);
|
||||
bit_offset += bits + tbits[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write out quints and bits
|
||||
else if (quints)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
unsigned int full_quint_blocks = character_count / 3;
|
||||
|
||||
for (unsigned int j = 0; j < full_quint_blocks; j++)
|
||||
{
|
||||
unsigned int i2 = input_data[i + 2] >> bits;
|
||||
unsigned int i1 = input_data[i + 1] >> bits;
|
||||
unsigned int i0 = input_data[i + 0] >> bits;
|
||||
|
||||
uint8_t T = integer_of_quints[i2][i1][i0];
|
||||
|
||||
// The max size of a quint bit count is 5, so we can always safely
|
||||
// pack a single M value with the following 2 or 3 T bits.
|
||||
uint8_t pack;
|
||||
|
||||
// Element 0
|
||||
pack = (input_data[i++] & mask) | (((T >> 0) & 0x7) << bits);
|
||||
write_bits(pack, bits + 3, bit_offset, output_data);
|
||||
bit_offset += bits + 3;
|
||||
|
||||
// Element 1
|
||||
pack = (input_data[i++] & mask) | (((T >> 3) & 0x3) << bits);
|
||||
write_bits(pack, bits + 2, bit_offset, output_data);
|
||||
bit_offset += bits + 2;
|
||||
|
||||
// Element 2
|
||||
pack = (input_data[i++] & mask) | (((T >> 5) & 0x3) << bits);
|
||||
write_bits(pack, bits + 2, bit_offset, output_data);
|
||||
bit_offset += bits + 2;
|
||||
}
|
||||
|
||||
// Loop tail for a partial block
|
||||
if (i != character_count)
|
||||
{
|
||||
// i2 cannot be present - we know the block is partial
|
||||
// i0 must be present - we know the block isn't empty
|
||||
unsigned int i2 = 0;
|
||||
unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
|
||||
unsigned int i0 = input_data[i + 0] >> bits;
|
||||
|
||||
uint8_t T = integer_of_quints[i2][i1][i0];
|
||||
|
||||
for (unsigned int j = 0; i < character_count; i++, j++)
|
||||
{
|
||||
// Truncated table as this iteration is always partital
|
||||
static const uint8_t tbits[2] { 3, 2 };
|
||||
static const uint8_t tshift[2] { 0, 3 };
|
||||
|
||||
uint8_t pack = (input_data[i] & mask) |
|
||||
(((T >> tshift[j]) & ((1 << tbits[j]) - 1)) << bits);
|
||||
|
||||
write_bits(pack, bits + tbits[j], bit_offset, output_data);
|
||||
bit_offset += bits + tbits[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write out just bits
|
||||
else
|
||||
{
|
||||
for (unsigned int i = 0; i < character_count; i++)
|
||||
{
|
||||
write_bits(input_data[i], bits, bit_offset, output_data);
|
||||
bit_offset += bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void decode_ise(
|
||||
quant_method quant_level,
|
||||
unsigned int character_count,
|
||||
const uint8_t* input_data,
|
||||
uint8_t* output_data,
|
||||
unsigned int bit_offset
|
||||
) {
|
||||
promise(character_count > 0);
|
||||
|
||||
// Note: due to how the trit/quint-block unpacking is done in this function, we may write more
|
||||
// temporary results than the number of outputs. The maximum actual number of results is 64 bit,
|
||||
// but we keep 4 additional character_count of padding.
|
||||
uint8_t results[68];
|
||||
uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed
|
||||
|
||||
unsigned int bits = btq_counts[quant_level].bits;
|
||||
unsigned int trits = btq_counts[quant_level].trits;
|
||||
unsigned int quints = btq_counts[quant_level].quints;
|
||||
|
||||
unsigned int lcounter = 0;
|
||||
unsigned int hcounter = 0;
|
||||
|
||||
// Collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
|
||||
for (unsigned int i = 0; i < character_count; i++)
|
||||
{
|
||||
results[i] = static_cast<uint8_t>(read_bits(bits, bit_offset, input_data));
|
||||
bit_offset += bits;
|
||||
|
||||
if (trits)
|
||||
{
|
||||
static const uint8_t bits_to_read[5] { 2, 2, 1, 2, 1 };
|
||||
static const uint8_t block_shift[5] { 0, 2, 4, 5, 7 };
|
||||
static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 };
|
||||
static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 };
|
||||
unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
|
||||
bit_offset += bits_to_read[lcounter];
|
||||
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
|
||||
hcounter += hcounter_incr[lcounter];
|
||||
lcounter = next_lcounter[lcounter];
|
||||
}
|
||||
|
||||
if (quints)
|
||||
{
|
||||
static const uint8_t bits_to_read[3] { 3, 2, 2 };
|
||||
static const uint8_t block_shift[3] { 0, 3, 5 };
|
||||
static const uint8_t next_lcounter[3] { 1, 2, 0 };
|
||||
static const uint8_t hcounter_incr[3] { 0, 0, 1 };
|
||||
unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
|
||||
bit_offset += bits_to_read[lcounter];
|
||||
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
|
||||
hcounter += hcounter_incr[lcounter];
|
||||
lcounter = next_lcounter[lcounter];
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack trit-blocks or quint-blocks as needed
|
||||
if (trits)
|
||||
{
|
||||
unsigned int trit_blocks = (character_count + 4) / 5;
|
||||
promise(trit_blocks > 0);
|
||||
for (unsigned int i = 0; i < trit_blocks; i++)
|
||||
{
|
||||
const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
|
||||
results[5 * i ] |= tritptr[0] << bits;
|
||||
results[5 * i + 1] |= tritptr[1] << bits;
|
||||
results[5 * i + 2] |= tritptr[2] << bits;
|
||||
results[5 * i + 3] |= tritptr[3] << bits;
|
||||
results[5 * i + 4] |= tritptr[4] << bits;
|
||||
}
|
||||
}
|
||||
|
||||
if (quints)
|
||||
{
|
||||
unsigned int quint_blocks = (character_count + 2) / 3;
|
||||
promise(quint_blocks > 0);
|
||||
for (unsigned int i = 0; i < quint_blocks; i++)
|
||||
{
|
||||
const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
|
||||
results[3 * i ] |= quintptr[0] << bits;
|
||||
results[3 * i + 1] |= quintptr[1] << bits;
|
||||
results[3 * i + 2] |= quintptr[2] << bits;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < character_count; i++)
|
||||
{
|
||||
output_data[i] = results[i];
|
||||
}
|
||||
}
|
||||
2227
engine/thirdparty/astcenc/astcenc_internal.h
vendored
Normal file
2227
engine/thirdparty/astcenc/astcenc_internal.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
331
engine/thirdparty/astcenc/astcenc_internal_entry.h
vendored
Normal file
331
engine/thirdparty/astcenc/astcenc_internal_entry.h
vendored
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions and data declarations for the outer context.
|
||||
*
|
||||
* The outer context includes thread-pool management, which is slower to
|
||||
* compile due to increased use of C++ stdlib. The inner context used in the
|
||||
* majority of the codec library does not include this.
|
||||
*/
|
||||
|
||||
#ifndef ASTCENC_INTERNAL_ENTRY_INCLUDED
|
||||
#define ASTCENC_INTERNAL_ENTRY_INCLUDED
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
/* ============================================================================
|
||||
Parallel execution control
|
||||
============================================================================ */
|
||||
|
||||
/**
|
||||
* @brief A simple counter-based manager for parallel task execution.
|
||||
*
|
||||
* The task processing execution consists of:
|
||||
*
|
||||
* * A single-threaded init stage.
|
||||
* * A multi-threaded processing stage.
|
||||
* * A condition variable so threads can wait for processing completion.
|
||||
*
|
||||
* The init stage will be executed by the first thread to arrive in the critical section, there is
|
||||
* no main thread in the thread pool.
|
||||
*
|
||||
* The processing stage uses dynamic dispatch to assign task tickets to threads on an on-demand
|
||||
* basis. Threads may each therefore executed different numbers of tasks, depending on their
|
||||
* processing complexity. The task queue and the task tickets are just counters; the caller must map
|
||||
* these integers to an actual processing partition in a specific problem domain.
|
||||
*
|
||||
* The exit wait condition is needed to ensure processing has finished before a worker thread can
|
||||
* progress to the next stage of the pipeline. Specifically a worker may exit the processing stage
|
||||
* because there are no new tasks to assign to it while other worker threads are still processing.
|
||||
* Calling @c wait() will ensure that all other worker have finished before the thread can proceed.
|
||||
*
|
||||
* The basic usage model:
|
||||
*
|
||||
* // --------- From single-threaded code ---------
|
||||
*
|
||||
* // Reset the tracker state
|
||||
* manager->reset()
|
||||
*
|
||||
* // --------- From multi-threaded code ---------
|
||||
*
|
||||
* // Run the stage init; only first thread actually runs the lambda
|
||||
* manager->init(<lambda>)
|
||||
*
|
||||
* do
|
||||
* {
|
||||
* // Request a task assignment
|
||||
* uint task_count;
|
||||
* uint base_index = manager->get_tasks(<granule>, task_count);
|
||||
*
|
||||
* // Process any tasks we were given (task_count <= granule size)
|
||||
* if (task_count)
|
||||
* {
|
||||
* // Run the user task processing code for N tasks here
|
||||
* ...
|
||||
*
|
||||
* // Flag these tasks as complete
|
||||
* manager->complete_tasks(task_count);
|
||||
* }
|
||||
* } while (task_count);
|
||||
*
|
||||
* // Wait for all threads to complete tasks before progressing
|
||||
* manager->wait()
|
||||
*
|
||||
* // Run the stage term; only first thread actually runs the lambda
|
||||
* manager->term(<lambda>)
|
||||
*/
|
||||
class ParallelManager
|
||||
{
|
||||
private:
|
||||
/** @brief Lock used for critical section and condition synchronization. */
|
||||
std::mutex m_lock;
|
||||
|
||||
/** @brief True if the stage init() step has been executed. */
|
||||
bool m_init_done;
|
||||
|
||||
/** @brief True if the stage term() step has been executed. */
|
||||
bool m_term_done;
|
||||
|
||||
/** @brief Condition variable for tracking stage processing completion. */
|
||||
std::condition_variable m_complete;
|
||||
|
||||
/** @brief Number of tasks started, but not necessarily finished. */
|
||||
std::atomic<unsigned int> m_start_count;
|
||||
|
||||
/** @brief Number of tasks finished. */
|
||||
unsigned int m_done_count;
|
||||
|
||||
/** @brief Number of tasks that need to be processed. */
|
||||
unsigned int m_task_count;
|
||||
|
||||
/** @brief Progress callback (optional). */
|
||||
astcenc_progress_callback m_callback;
|
||||
|
||||
/** @brief Lock used for callback synchronization. */
|
||||
std::mutex m_callback_lock;
|
||||
|
||||
/** @brief Minimum progress before making a callback. */
|
||||
float m_callback_min_diff;
|
||||
|
||||
/** @brief Last progress callback value. */
|
||||
float m_callback_last_value;
|
||||
|
||||
public:
|
||||
/** @brief Create a new ParallelManager. */
|
||||
ParallelManager()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reset the tracker for a new processing batch.
|
||||
*
|
||||
* This must be called from single-threaded code before starting the multi-threaded processing
|
||||
* operations.
|
||||
*/
|
||||
void reset()
|
||||
{
|
||||
m_init_done = false;
|
||||
m_term_done = false;
|
||||
m_start_count = 0;
|
||||
m_done_count = 0;
|
||||
m_task_count = 0;
|
||||
m_callback = nullptr;
|
||||
m_callback_last_value = 0.0f;
|
||||
m_callback_min_diff = 1.0f;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Trigger the pipeline stage init step.
|
||||
*
|
||||
* This can be called from multi-threaded code. The first thread to hit this will process the
|
||||
* initialization. Other threads will block and wait for it to complete.
|
||||
*
|
||||
* @param init_func Callable which executes the stage initialization. It must return the
|
||||
* total number of tasks in the stage.
|
||||
*/
|
||||
void init(std::function<unsigned int(void)> init_func)
|
||||
{
|
||||
std::lock_guard<std::mutex> lck(m_lock);
|
||||
if (!m_init_done)
|
||||
{
|
||||
m_task_count = init_func();
|
||||
m_init_done = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Trigger the pipeline stage init step.
|
||||
*
|
||||
* This can be called from multi-threaded code. The first thread to hit this will process the
|
||||
* initialization. Other threads will block and wait for it to complete.
|
||||
*
|
||||
* @param task_count Total number of tasks needing processing.
|
||||
* @param callback Function pointer for progress status callbacks.
|
||||
*/
|
||||
void init(unsigned int task_count, astcenc_progress_callback callback)
|
||||
{
|
||||
std::lock_guard<std::mutex> lck(m_lock);
|
||||
if (!m_init_done)
|
||||
{
|
||||
m_callback = callback;
|
||||
m_task_count = task_count;
|
||||
m_init_done = true;
|
||||
|
||||
// Report every 1% or 4096 blocks, whichever is larger, to avoid callback overhead
|
||||
float min_diff = (4096.0f / static_cast<float>(task_count)) * 100.0f;
|
||||
m_callback_min_diff = astc::max(min_diff, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Request a task assignment.
|
||||
*
|
||||
* Assign up to @c granule tasks to the caller for processing.
|
||||
*
|
||||
* @param granule Maximum number of tasks that can be assigned.
|
||||
* @param[out] count Actual number of tasks assigned, or zero if no tasks were assigned.
|
||||
*
|
||||
* @return Task index of the first assigned task; assigned tasks increment from this.
|
||||
*/
|
||||
unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
|
||||
{
|
||||
unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed);
|
||||
if (base >= m_task_count)
|
||||
{
|
||||
count = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
count = astc::min(m_task_count - base, granule);
|
||||
return base;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Complete a task assignment.
|
||||
*
|
||||
* Mark @c count tasks as complete. This will notify all threads blocked on @c wait() if this
|
||||
* completes the processing of the stage.
|
||||
*
|
||||
* @param count The number of completed tasks.
|
||||
*/
|
||||
void complete_task_assignment(unsigned int count)
|
||||
{
|
||||
// Note: m_done_count cannot use an atomic without the mutex; this has a race between the
|
||||
// update here and the wait() for other threads
|
||||
unsigned int local_count;
|
||||
float local_last_value;
|
||||
{
|
||||
std::unique_lock<std::mutex> lck(m_lock);
|
||||
m_done_count += count;
|
||||
local_count = m_done_count;
|
||||
local_last_value = m_callback_last_value;
|
||||
|
||||
if (m_done_count == m_task_count)
|
||||
{
|
||||
// Ensure the progress bar hits 100%
|
||||
if (m_callback)
|
||||
{
|
||||
std::unique_lock<std::mutex> cblck(m_callback_lock);
|
||||
m_callback(100.0f);
|
||||
m_callback_last_value = 100.0f;
|
||||
}
|
||||
|
||||
lck.unlock();
|
||||
m_complete.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
// Process progress callback if we have one
|
||||
if (m_callback)
|
||||
{
|
||||
// Initial lockless test - have we progressed enough to emit?
|
||||
float num = static_cast<float>(local_count);
|
||||
float den = static_cast<float>(m_task_count);
|
||||
float this_value = (num / den) * 100.0f;
|
||||
bool report_test = (this_value - local_last_value) > m_callback_min_diff;
|
||||
|
||||
// Recheck under lock, because another thread might report first
|
||||
if (report_test)
|
||||
{
|
||||
std::unique_lock<std::mutex> cblck(m_callback_lock);
|
||||
bool report_retest = (this_value - m_callback_last_value) > m_callback_min_diff;
|
||||
if (report_retest)
|
||||
{
|
||||
m_callback(this_value);
|
||||
m_callback_last_value = this_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Wait for stage processing to complete.
|
||||
*/
|
||||
void wait()
|
||||
{
|
||||
std::unique_lock<std::mutex> lck(m_lock);
|
||||
m_complete.wait(lck, [this]{ return m_done_count == m_task_count; });
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Trigger the pipeline stage term step.
|
||||
*
|
||||
* This can be called from multi-threaded code. The first thread to hit this will process the
|
||||
* work pool termination. Caller must have called @c wait() prior to calling this function to
|
||||
* ensure that processing is complete.
|
||||
*
|
||||
* @param term_func Callable which executes the stage termination.
|
||||
*/
|
||||
void term(std::function<void(void)> term_func)
|
||||
{
|
||||
std::lock_guard<std::mutex> lck(m_lock);
|
||||
if (!m_term_done)
|
||||
{
|
||||
term_func();
|
||||
m_term_done = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The astcenc compression context.
|
||||
*/
|
||||
struct astcenc_context
|
||||
{
|
||||
/** @brief The context internal state. */
|
||||
astcenc_contexti context;
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
/** @brief The parallel manager for averages computation. */
|
||||
ParallelManager manage_avg;
|
||||
|
||||
/** @brief The parallel manager for compression. */
|
||||
ParallelManager manage_compress;
|
||||
#endif
|
||||
|
||||
/** @brief The parallel manager for decompression. */
|
||||
ParallelManager manage_decompress;
|
||||
};
|
||||
|
||||
#endif
|
||||
48
engine/thirdparty/astcenc/astcenc_mathlib.cpp
vendored
Normal file
48
engine/thirdparty/astcenc/astcenc_mathlib.cpp
vendored
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2021 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#include "astcenc_mathlib.h"
|
||||
|
||||
/**
|
||||
* @brief 64-bit rotate left.
|
||||
*
|
||||
* @param val The value to rotate.
|
||||
* @param count The rotation, in bits.
|
||||
*/
|
||||
static inline uint64_t rotl(uint64_t val, int count)
|
||||
{
|
||||
return (val << count) | (val >> (64 - count));
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void astc::rand_init(uint64_t state[2])
|
||||
{
|
||||
state[0] = 0xfaf9e171cea1ec6bULL;
|
||||
state[1] = 0xf1b318cc06af5d71ULL;
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
uint64_t astc::rand(uint64_t state[2])
|
||||
{
|
||||
uint64_t s0 = state[0];
|
||||
uint64_t s1 = state[1];
|
||||
uint64_t res = s0 + s1;
|
||||
s1 ^= s0;
|
||||
state[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16);
|
||||
state[1] = rotl(s1, 37);
|
||||
return res;
|
||||
}
|
||||
488
engine/thirdparty/astcenc/astcenc_mathlib.h
vendored
Normal file
488
engine/thirdparty/astcenc/astcenc_mathlib.h
vendored
Normal file
|
|
@ -0,0 +1,488 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
* This module implements a variety of mathematical data types and library
|
||||
* functions used by the codec.
|
||||
*/
|
||||
|
||||
#ifndef ASTC_MATHLIB_H_INCLUDED
|
||||
#define ASTC_MATHLIB_H_INCLUDED
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
|
||||
#ifndef ASTCENC_POPCNT
|
||||
#if defined(__POPCNT__)
|
||||
#define ASTCENC_POPCNT 1
|
||||
#else
|
||||
#define ASTCENC_POPCNT 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ASTCENC_F16C
|
||||
#if defined(__F16C__)
|
||||
#define ASTCENC_F16C 1
|
||||
#else
|
||||
#define ASTCENC_F16C 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ASTCENC_SSE
|
||||
#if defined(__SSE4_2__)
|
||||
#define ASTCENC_SSE 42
|
||||
#elif defined(__SSE4_1__)
|
||||
#define ASTCENC_SSE 41
|
||||
#elif defined(__SSE2__)
|
||||
#define ASTCENC_SSE 20
|
||||
#else
|
||||
#define ASTCENC_SSE 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ASTCENC_AVX
|
||||
#if defined(__AVX2__)
|
||||
#define ASTCENC_AVX 2
|
||||
#elif defined(__AVX__)
|
||||
#define ASTCENC_AVX 1
|
||||
#else
|
||||
#define ASTCENC_AVX 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ASTCENC_NEON
|
||||
#if defined(__aarch64__)
|
||||
#define ASTCENC_NEON 1
|
||||
#else
|
||||
#define ASTCENC_NEON 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Force vector-sized SIMD alignment
|
||||
#if ASTCENC_AVX
|
||||
#define ASTCENC_VECALIGN 32
|
||||
#elif ASTCENC_SSE || ASTCENC_NEON
|
||||
#define ASTCENC_VECALIGN 16
|
||||
// Use default alignment for non-SIMD builds
|
||||
#else
|
||||
#define ASTCENC_VECALIGN 0
|
||||
#endif
|
||||
|
||||
// C++11 states that alignas(0) should be ignored but GCC doesn't do
|
||||
// this on some versions, so workaround and avoid emitting alignas(0)
|
||||
#if ASTCENC_VECALIGN > 0
|
||||
#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
|
||||
#else
|
||||
#define ASTCENC_ALIGNAS
|
||||
#endif
|
||||
|
||||
#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
/* ============================================================================
|
||||
Fast math library; note that many of the higher-order functions in this set
|
||||
use approximations which are less accurate, but faster, than <cmath> standard
|
||||
library equivalents.
|
||||
|
||||
Note: Many of these are not necessarily faster than simple C versions when
|
||||
used on a single scalar value, but are included for testing purposes as most
|
||||
have an option based on SSE intrinsics and therefore provide an obvious route
|
||||
to future vectorization.
|
||||
============================================================================ */
|
||||
|
||||
// Union for manipulation of float bit patterns
|
||||
typedef union
|
||||
{
|
||||
uint32_t u;
|
||||
int32_t s;
|
||||
float f;
|
||||
} if32;
|
||||
|
||||
// These are namespaced to avoid colliding with C standard library functions.
|
||||
namespace astc
|
||||
{
|
||||
|
||||
static const float PI = 3.14159265358979323846f;
|
||||
static const float PI_OVER_TWO = 1.57079632679489661923f;
|
||||
|
||||
/**
|
||||
* @brief SP float absolute value.
|
||||
*
|
||||
* @param v The value to make absolute.
|
||||
*
|
||||
* @return The absolute value.
|
||||
*/
|
||||
static inline float fabs(float v)
|
||||
{
|
||||
return std::fabs(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Test if a float value is a nan.
|
||||
*
|
||||
* @param v The value test.
|
||||
*
|
||||
* @return Zero is not a NaN, non-zero otherwise.
|
||||
*/
|
||||
static inline bool isnan(float v)
|
||||
{
|
||||
return v != v;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the minimum of two values.
|
||||
*
|
||||
* For floats, NaNs are turned into @c q.
|
||||
*
|
||||
* @param p The first value to compare.
|
||||
* @param q The second value to compare.
|
||||
*
|
||||
* @return The smallest value.
|
||||
*/
|
||||
template<typename T>
|
||||
static inline T min(T p, T q)
|
||||
{
|
||||
return p < q ? p : q;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the minimum of three values.
|
||||
*
|
||||
* For floats, NaNs are turned into @c r.
|
||||
*
|
||||
* @param p The first value to compare.
|
||||
* @param q The second value to compare.
|
||||
* @param r The third value to compare.
|
||||
*
|
||||
* @return The smallest value.
|
||||
*/
|
||||
template<typename T>
|
||||
static inline T min(T p, T q, T r)
|
||||
{
|
||||
return min(min(p, q), r);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the minimum of four values.
|
||||
*
|
||||
* For floats, NaNs are turned into @c s.
|
||||
*
|
||||
* @param p The first value to compare.
|
||||
* @param q The second value to compare.
|
||||
* @param r The third value to compare.
|
||||
* @param s The fourth value to compare.
|
||||
*
|
||||
* @return The smallest value.
|
||||
*/
|
||||
template<typename T>
|
||||
static inline T min(T p, T q, T r, T s)
|
||||
{
|
||||
return min(min(p, q), min(r, s));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the maximum of two values.
|
||||
*
|
||||
* For floats, NaNs are turned into @c q.
|
||||
*
|
||||
* @param p The first value to compare.
|
||||
* @param q The second value to compare.
|
||||
*
|
||||
* @return The largest value.
|
||||
*/
|
||||
template<typename T>
|
||||
static inline T max(T p, T q)
|
||||
{
|
||||
return p > q ? p : q;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the maximum of three values.
|
||||
*
|
||||
* For floats, NaNs are turned into @c r.
|
||||
*
|
||||
* @param p The first value to compare.
|
||||
* @param q The second value to compare.
|
||||
* @param r The third value to compare.
|
||||
*
|
||||
* @return The largest value.
|
||||
*/
|
||||
template<typename T>
|
||||
static inline T max(T p, T q, T r)
|
||||
{
|
||||
return max(max(p, q), r);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the maximum of four values.
|
||||
*
|
||||
* For floats, NaNs are turned into @c s.
|
||||
*
|
||||
* @param p The first value to compare.
|
||||
* @param q The second value to compare.
|
||||
* @param r The third value to compare.
|
||||
* @param s The fourth value to compare.
|
||||
*
|
||||
* @return The largest value.
|
||||
*/
|
||||
template<typename T>
|
||||
static inline T max(T p, T q, T r, T s)
|
||||
{
|
||||
return max(max(p, q), max(r, s));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clamp a value value between @c mn and @c mx.
|
||||
*
|
||||
* For floats, NaNs are turned into @c mn.
|
||||
*
|
||||
* @param v The value to clamp.
|
||||
* @param mn The min value (inclusive).
|
||||
* @param mx The max value (inclusive).
|
||||
*
|
||||
* @return The clamped value.
|
||||
*/
|
||||
template<typename T>
|
||||
inline T clamp(T v, T mn, T mx)
|
||||
{
|
||||
// Do not reorder; correct NaN handling relies on the fact that comparison
|
||||
// with NaN returns false and will fall-though to the "min" value.
|
||||
if (v > mx) return mx;
|
||||
if (v > mn) return v;
|
||||
return mn;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clamp a float value between 0.0f and 1.0f.
|
||||
*
|
||||
* NaNs are turned into 0.0f.
|
||||
*
|
||||
* @param v The value to clamp.
|
||||
*
|
||||
* @return The clamped value.
|
||||
*/
|
||||
static inline float clamp1f(float v)
|
||||
{
|
||||
return astc::clamp(v, 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clamp a float value between 0.0f and 255.0f.
|
||||
*
|
||||
* NaNs are turned into 0.0f.
|
||||
*
|
||||
* @param v The value to clamp.
|
||||
*
|
||||
* @return The clamped value.
|
||||
*/
|
||||
static inline float clamp255f(float v)
|
||||
{
|
||||
return astc::clamp(v, 0.0f, 255.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief SP float round-down.
|
||||
*
|
||||
* @param v The value to round.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
static inline float flt_rd(float v)
|
||||
{
|
||||
return std::floor(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief SP float round-to-nearest and convert to integer.
|
||||
*
|
||||
* @param v The value to round.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
static inline int flt2int_rtn(float v)
|
||||
{
|
||||
|
||||
return static_cast<int>(v + 0.5f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief SP float round down and convert to integer.
|
||||
*
|
||||
* @param v The value to round.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
static inline int flt2int_rd(float v)
|
||||
{
|
||||
return static_cast<int>(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief SP float bit-interpreted as an integer.
|
||||
*
|
||||
* @param v The value to bitcast.
|
||||
*
|
||||
* @return The converted value.
|
||||
*/
|
||||
static inline int float_as_int(float v)
|
||||
{
|
||||
union { int a; float b; } u;
|
||||
u.b = v;
|
||||
return u.a;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Integer bit-interpreted as an SP float.
|
||||
*
|
||||
* @param v The value to bitcast.
|
||||
*
|
||||
* @return The converted value.
|
||||
*/
|
||||
static inline float int_as_float(int v)
|
||||
{
|
||||
union { int a; float b; } u;
|
||||
u.a = v;
|
||||
return u.b;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Fast approximation of 1.0 / sqrt(val).
|
||||
*
|
||||
* @param v The input value.
|
||||
*
|
||||
* @return The approximated result.
|
||||
*/
|
||||
static inline float rsqrt(float v)
|
||||
{
|
||||
return 1.0f / std::sqrt(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Fast approximation of sqrt(val).
|
||||
*
|
||||
* @param v The input value.
|
||||
*
|
||||
* @return The approximated result.
|
||||
*/
|
||||
static inline float sqrt(float v)
|
||||
{
|
||||
return std::sqrt(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Extract mantissa and exponent of a float value.
|
||||
*
|
||||
* @param v The input value.
|
||||
* @param[out] expo The output exponent.
|
||||
*
|
||||
* @return The mantissa.
|
||||
*/
|
||||
static inline float frexp(float v, int* expo)
|
||||
{
|
||||
if32 p;
|
||||
p.f = v;
|
||||
*expo = ((p.u >> 23) & 0xFF) - 126;
|
||||
p.u = (p.u & 0x807fffff) | 0x3f000000;
|
||||
return p.f;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initialize the seed structure for a random number generator.
|
||||
*
|
||||
* Important note: For the purposes of ASTC we want sets of random numbers to
|
||||
* use the codec, but we want the same seed value across instances and threads
|
||||
* to ensure that image output is stable across compressor runs and across
|
||||
* platforms. Every PRNG created by this call will therefore return the same
|
||||
* sequence of values ...
|
||||
*
|
||||
* @param state The state structure to initialize.
|
||||
*/
|
||||
void rand_init(uint64_t state[2]);
|
||||
|
||||
/**
|
||||
* @brief Return the next random number from the generator.
|
||||
*
|
||||
* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
|
||||
* public-domain implementation given by David Blackman & Sebastiano Vigna at
|
||||
* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
|
||||
*
|
||||
* @param state The state structure to use/update.
|
||||
*/
|
||||
uint64_t rand(uint64_t state[2]);
|
||||
|
||||
}
|
||||
|
||||
/* ============================================================================
|
||||
Softfloat library with fp32 and fp16 conversion functionality.
|
||||
============================================================================ */
|
||||
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
|
||||
/* narrowing float->float conversions */
|
||||
uint16_t float_to_sf16(float val);
|
||||
float sf16_to_float(uint16_t val);
|
||||
#endif
|
||||
|
||||
/*********************************
|
||||
Vector library
|
||||
*********************************/
|
||||
#include "astcenc_vecmathlib.h"
|
||||
|
||||
/*********************************
|
||||
Declaration of line types
|
||||
*********************************/
|
||||
// parametric line, 2D: The line is given by line = a + b * t.
|
||||
|
||||
struct line2
|
||||
{
|
||||
vfloat4 a;
|
||||
vfloat4 b;
|
||||
};
|
||||
|
||||
// parametric line, 3D
|
||||
struct line3
|
||||
{
|
||||
vfloat4 a;
|
||||
vfloat4 b;
|
||||
};
|
||||
|
||||
struct line4
|
||||
{
|
||||
vfloat4 a;
|
||||
vfloat4 b;
|
||||
};
|
||||
|
||||
|
||||
struct processed_line2
|
||||
{
|
||||
vfloat4 amod;
|
||||
vfloat4 bs;
|
||||
};
|
||||
|
||||
struct processed_line3
|
||||
{
|
||||
vfloat4 amod;
|
||||
vfloat4 bs;
|
||||
};
|
||||
|
||||
struct processed_line4
|
||||
{
|
||||
vfloat4 amod;
|
||||
vfloat4 bs;
|
||||
};
|
||||
|
||||
#endif
|
||||
411
engine/thirdparty/astcenc/astcenc_mathlib_softfloat.cpp
vendored
Normal file
411
engine/thirdparty/astcenc/astcenc_mathlib_softfloat.cpp
vendored
Normal file
|
|
@ -0,0 +1,411 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2021 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Soft-float library for IEEE-754.
|
||||
*/
|
||||
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
|
||||
|
||||
#include "astcenc_mathlib.h"
|
||||
|
||||
/* sized soft-float types. These are mapped to the sized integer
|
||||
types of C99, instead of C's floating-point types; this is because
|
||||
the library needs to maintain exact, bit-level control on all
|
||||
operations on these data types. */
|
||||
typedef uint16_t sf16;
|
||||
typedef uint32_t sf32;
|
||||
|
||||
/******************************************
|
||||
helper functions and their lookup tables
|
||||
******************************************/
|
||||
/* count leading zeros functions. Only used when the input is nonzero. */
|
||||
|
||||
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
|
||||
#elif defined(__arm__) && defined(__ARMCC_VERSION)
|
||||
#elif defined(__arm__) && defined(__GNUC__)
|
||||
#else
|
||||
/* table used for the slow default versions. */
|
||||
static const uint8_t clz_table[256] =
|
||||
{
|
||||
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
|
||||
static uint32_t clz32(uint32_t inp)
|
||||
{
|
||||
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
|
||||
uint32_t bsr;
|
||||
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
|
||||
return 31 - bsr;
|
||||
#else
|
||||
#if defined(__arm__) && defined(__ARMCC_VERSION)
|
||||
return __clz(inp); /* armcc builtin */
|
||||
#else
|
||||
#if defined(__arm__) && defined(__GNUC__)
|
||||
uint32_t lz;
|
||||
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
|
||||
return lz;
|
||||
#else
|
||||
/* slow default version */
|
||||
uint32_t summa = 24;
|
||||
if (inp >= UINT32_C(0x10000))
|
||||
{
|
||||
inp >>= 16;
|
||||
summa -= 16;
|
||||
}
|
||||
if (inp >= UINT32_C(0x100))
|
||||
{
|
||||
inp >>= 8;
|
||||
summa -= 8;
|
||||
}
|
||||
return summa + clz_table[inp];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* the five rounding modes that IEEE-754r defines */
|
||||
typedef enum
|
||||
{
|
||||
SF_UP = 0, /* round towards positive infinity */
|
||||
SF_DOWN = 1, /* round towards negative infinity */
|
||||
SF_TOZERO = 2, /* round towards zero */
|
||||
SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */
|
||||
SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */
|
||||
} roundmode;
|
||||
|
||||
|
||||
static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
|
||||
{
|
||||
uint32_t vl1 = UINT32_C(1) << shamt;
|
||||
uint32_t inp2 = inp + (vl1 >> 1); /* added 0.5 ULP */
|
||||
uint32_t msk = (inp | UINT32_C(1)) & vl1; /* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */
|
||||
msk--; /* negative if even, nonnegative if odd. */
|
||||
inp2 -= (msk >> 31); /* subtract epsilon before shift if even. */
|
||||
inp2 >>= shamt;
|
||||
return inp2;
|
||||
}
|
||||
|
||||
static uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
|
||||
{
|
||||
uint32_t vl1 = (UINT32_C(1) << shamt) >> 1;
|
||||
inp += vl1;
|
||||
inp >>= shamt;
|
||||
return inp;
|
||||
}
|
||||
|
||||
static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
|
||||
{
|
||||
uint32_t vl1 = UINT32_C(1) << shamt;
|
||||
inp += vl1;
|
||||
inp--;
|
||||
inp >>= shamt;
|
||||
return inp;
|
||||
}
|
||||
|
||||
/* convert from FP16 to FP32. */
|
||||
static sf32 sf16_to_sf32(sf16 inp)
|
||||
{
|
||||
uint32_t inpx = inp;
|
||||
|
||||
/*
|
||||
This table contains, for every FP16 sign/exponent value combination,
|
||||
the difference between the input FP16 value and the value obtained
|
||||
by shifting the correct FP32 result right by 13 bits.
|
||||
This table allows us to handle every case except denormals and NaN
|
||||
with just 1 table lookup, 2 shifts and 1 add.
|
||||
*/
|
||||
|
||||
#define WITH_MSB(a) (UINT32_C(a) | (1u << 31))
|
||||
static const uint32_t tbl[64] =
|
||||
{
|
||||
WITH_MSB(0x00000), 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,
|
||||
0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,
|
||||
0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,
|
||||
0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, WITH_MSB(0x38000),
|
||||
WITH_MSB(0x38000), 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,
|
||||
0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,
|
||||
0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,
|
||||
0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, WITH_MSB(0x70000)
|
||||
};
|
||||
|
||||
uint32_t res = tbl[inpx >> 10];
|
||||
res += inpx;
|
||||
|
||||
/* Normal cases: MSB of 'res' not set. */
|
||||
if ((res & WITH_MSB(0)) == 0)
|
||||
{
|
||||
return res << 13;
|
||||
}
|
||||
|
||||
/* Infinity and Zero: 10 LSB of 'res' not set. */
|
||||
if ((res & 0x3FF) == 0)
|
||||
{
|
||||
return res << 13;
|
||||
}
|
||||
|
||||
/* NaN: the exponent field of 'inp' is non-zero. */
|
||||
if ((inpx & 0x7C00) != 0)
|
||||
{
|
||||
/* All NaNs are quietened. */
|
||||
return (res << 13) | 0x400000;
|
||||
}
|
||||
|
||||
/* Denormal cases */
|
||||
uint32_t sign = (inpx & 0x8000) << 16;
|
||||
uint32_t mskval = inpx & 0x7FFF;
|
||||
uint32_t leadingzeroes = clz32(mskval);
|
||||
mskval <<= leadingzeroes;
|
||||
return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
|
||||
}
|
||||
|
||||
/* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
|
||||
static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
|
||||
{
|
||||
/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
|
||||
static const uint8_t tab[512] {
|
||||
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50,
|
||||
|
||||
5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
|
||||
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55,
|
||||
};
|
||||
|
||||
/* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
|
||||
size. */
|
||||
static const uint32_t tabx[60] {
|
||||
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
|
||||
UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
|
||||
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
|
||||
UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000),
|
||||
UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000),
|
||||
UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00),
|
||||
UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00),
|
||||
UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000),
|
||||
UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000)
|
||||
};
|
||||
|
||||
uint32_t p;
|
||||
uint32_t idx = rmode + tab[inp >> 23];
|
||||
uint32_t vlx = tabx[idx];
|
||||
switch (idx)
|
||||
{
|
||||
/*
|
||||
Positive number which may be Infinity or NaN.
|
||||
We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
|
||||
(If we don't do this quieting, then a NaN that is distinguished only by having
|
||||
its low-order bits set, would be turned into an INF. */
|
||||
case 50:
|
||||
case 51:
|
||||
case 52:
|
||||
case 53:
|
||||
case 54:
|
||||
case 55:
|
||||
case 56:
|
||||
case 57:
|
||||
case 58:
|
||||
case 59:
|
||||
/*
|
||||
the input value is 0x7F800000 or 0xFF800000 if it is INF.
|
||||
By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
|
||||
For NaNs, however, this operation will keep bit 23 with the value 1.
|
||||
We can then extract bit 23, and logical-OR bit 9 of the result with this
|
||||
bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
|
||||
of the mantissa is set.)
|
||||
*/
|
||||
p = (inp - 1) & UINT32_C(0x800000); /* zero if INF, nonzero if NaN. */
|
||||
return static_cast<sf16>(((inp + vlx) >> 13) | (p >> 14));
|
||||
/*
|
||||
positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
|
||||
If it is, then return 0, else return 1 (the smallest representable nonzero number)
|
||||
*/
|
||||
case 0:
|
||||
/*
|
||||
-inp will set the MSB if the input number is nonzero.
|
||||
Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
|
||||
*/
|
||||
return static_cast<sf16>(static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> 31);
|
||||
|
||||
/*
|
||||
negative, exponent = , round-mode == DOWN, need to check whether number is
|
||||
actually 0. If it is, return 0x8000 ( float -0.0 )
|
||||
Else return the smallest negative number ( 0x8001 ) */
|
||||
case 6:
|
||||
/*
|
||||
in this case 'vlx' is 0x80000000. By subtracting the input value from it,
|
||||
we obtain a value that is 0 if the input value is in fact zero and has
|
||||
the MSB set if it isn't. We then right-shift the value by 31 places to
|
||||
get a value that is 0 if the input is -0.0 and 1 otherwise.
|
||||
*/
|
||||
return static_cast<sf16>(((vlx - inp) >> 31) + UINT32_C(0x8000));
|
||||
|
||||
/*
|
||||
for all other cases involving underflow/overflow, we don't need to
|
||||
do actual tests; we just return 'vlx'.
|
||||
*/
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
case 40:
|
||||
case 41:
|
||||
case 42:
|
||||
case 43:
|
||||
case 44:
|
||||
case 45:
|
||||
case 46:
|
||||
case 47:
|
||||
case 48:
|
||||
case 49:
|
||||
return static_cast<sf16>(vlx);
|
||||
|
||||
/*
|
||||
for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
|
||||
FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
|
||||
baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
|
||||
from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
|
||||
for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
|
||||
except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */
|
||||
|
||||
/* normal number, all rounding modes except round-to-nearest-even: */
|
||||
case 30:
|
||||
case 31:
|
||||
case 32:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
case 39:
|
||||
return static_cast<sf16>((inp + vlx) >> 13);
|
||||
|
||||
/* normal number, round-to-nearest-even. */
|
||||
case 33:
|
||||
case 38:
|
||||
p = inp + vlx;
|
||||
p += (inp >> 13) & 1;
|
||||
return static_cast<sf16>(p >> 13);
|
||||
|
||||
/*
|
||||
the various denormal cases. These are not expected to be common, so their performance is a bit
|
||||
less important. For each of these cases, we need to extract an exponent and a mantissa
|
||||
(including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
|
||||
depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
|
||||
sign of the resulting denormal number.
|
||||
*/
|
||||
case 21:
|
||||
case 22:
|
||||
case 25:
|
||||
case 27:
|
||||
/* denormal, round towards zero. */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return static_cast<sf16>((((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx);
|
||||
case 20:
|
||||
case 26:
|
||||
/* denormal, round away from zero. */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return static_cast<sf16>(rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
|
||||
case 24:
|
||||
case 29:
|
||||
/* denormal, round to nearest-away */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return static_cast<sf16>(rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
|
||||
case 23:
|
||||
case 28:
|
||||
/* denormal, round to nearest-even. */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return static_cast<sf16>(rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* convert from soft-float to native-float */
|
||||
float sf16_to_float(uint16_t p)
|
||||
{
|
||||
if32 i;
|
||||
i.u = sf16_to_sf32(p);
|
||||
return i.f;
|
||||
}
|
||||
|
||||
/* convert from native-float to soft-float */
|
||||
uint16_t float_to_sf16(float p)
|
||||
{
|
||||
if32 i;
|
||||
i.f = p;
|
||||
return sf32_to_sf16(i.u, SF_NEARESTEVEN);
|
||||
}
|
||||
|
||||
#endif
|
||||
481
engine/thirdparty/astcenc/astcenc_partition_tables.cpp
vendored
Normal file
481
engine/thirdparty/astcenc/astcenc_partition_tables.cpp
vendored
Normal file
|
|
@ -0,0 +1,481 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions for generating partition tables on demand.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
/** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */
|
||||
#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
|
||||
|
||||
/**
|
||||
* @brief Generate a canonical representation of a partition pattern.
|
||||
*
|
||||
* The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
|
||||
* the remapped texel index. Remapping ensures that we only match on the partition pattern,
|
||||
* independent of the partition order generated by the hash.
|
||||
*
|
||||
* @param texel_count The number of texels in the block.
|
||||
* @param partition_of_texel The partition assignments, in hash order.
|
||||
* @param[out] bit_pattern The output bit pattern representation.
|
||||
*/
|
||||
static void generate_canonical_partitioning(
|
||||
unsigned int texel_count,
|
||||
const uint8_t* partition_of_texel,
|
||||
uint64_t bit_pattern[BIT_PATTERN_WORDS]
|
||||
) {
|
||||
// Clear the pattern
|
||||
for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
|
||||
{
|
||||
bit_pattern[i] = 0;
|
||||
}
|
||||
|
||||
// Store a mapping to reorder the raw partitions so that the partitions are ordered such
|
||||
// that the lowest texel index in partition N is smaller than the lowest texel index in
|
||||
// partition N + 1.
|
||||
int mapped_index[BLOCK_MAX_PARTITIONS];
|
||||
int map_weight_count = 0;
|
||||
|
||||
for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
|
||||
{
|
||||
mapped_index[i] = -1;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < texel_count; i++)
|
||||
{
|
||||
int index = partition_of_texel[i];
|
||||
if (mapped_index[index] < 0)
|
||||
{
|
||||
mapped_index[index] = map_weight_count++;
|
||||
}
|
||||
|
||||
uint64_t xlat_index = mapped_index[index];
|
||||
bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compare two canonical patterns to see if they are the same.
|
||||
*
|
||||
* @param part1 The first canonical bit pattern to check.
|
||||
* @param part2 The second canonical bit pattern to check.
|
||||
*
|
||||
* @return @c true if the patterns are the same, @c false otherwise.
|
||||
*/
|
||||
static bool compare_canonical_partitionings(
|
||||
const uint64_t part1[BIT_PATTERN_WORDS],
|
||||
const uint64_t part2[BIT_PATTERN_WORDS]
|
||||
) {
|
||||
return (part1[0] == part2[0])
|
||||
#if BIT_PATTERN_WORDS > 1
|
||||
&& (part1[1] == part2[1])
|
||||
#endif
|
||||
#if BIT_PATTERN_WORDS > 2
|
||||
&& (part1[2] == part2[2])
|
||||
#endif
|
||||
#if BIT_PATTERN_WORDS > 3
|
||||
&& (part1[3] == part2[3])
|
||||
#endif
|
||||
#if BIT_PATTERN_WORDS > 4
|
||||
&& (part1[4] == part2[4])
|
||||
#endif
|
||||
#if BIT_PATTERN_WORDS > 5
|
||||
&& (part1[5] == part2[5])
|
||||
#endif
|
||||
#if BIT_PATTERN_WORDS > 6
|
||||
&& (part1[6] == part2[6])
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Hash function used for procedural partition assignment.
|
||||
*
|
||||
* @param inp The hash seed.
|
||||
*
|
||||
* @return The hashed value.
|
||||
*/
|
||||
static uint32_t hash52(
|
||||
uint32_t inp
|
||||
) {
|
||||
inp ^= inp >> 15;
|
||||
|
||||
// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
|
||||
inp *= 0xEEDE0891;
|
||||
inp ^= inp >> 5;
|
||||
inp += inp << 16;
|
||||
inp ^= inp >> 7;
|
||||
inp ^= inp >> 3;
|
||||
inp ^= inp << 6;
|
||||
inp ^= inp >> 17;
|
||||
return inp;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Select texel assignment for a single coordinate.
|
||||
*
|
||||
* @param seed The seed - the partition index from the block.
|
||||
* @param x The texel X coordinate in the block.
|
||||
* @param y The texel Y coordinate in the block.
|
||||
* @param z The texel Z coordinate in the block.
|
||||
* @param partition_count The total partition count of this encoding.
|
||||
* @param small_block @c true if the block has fewer than 32 texels.
|
||||
*
|
||||
* @return The assigned partition index for this texel.
|
||||
*/
|
||||
static uint8_t select_partition(
|
||||
int seed,
|
||||
int x,
|
||||
int y,
|
||||
int z,
|
||||
int partition_count,
|
||||
bool small_block
|
||||
) {
|
||||
// For small blocks bias the coordinates to get better distribution
|
||||
if (small_block)
|
||||
{
|
||||
x <<= 1;
|
||||
y <<= 1;
|
||||
z <<= 1;
|
||||
}
|
||||
|
||||
seed += (partition_count - 1) * 1024;
|
||||
|
||||
uint32_t rnum = hash52(seed);
|
||||
|
||||
uint8_t seed1 = rnum & 0xF;
|
||||
uint8_t seed2 = (rnum >> 4) & 0xF;
|
||||
uint8_t seed3 = (rnum >> 8) & 0xF;
|
||||
uint8_t seed4 = (rnum >> 12) & 0xF;
|
||||
uint8_t seed5 = (rnum >> 16) & 0xF;
|
||||
uint8_t seed6 = (rnum >> 20) & 0xF;
|
||||
uint8_t seed7 = (rnum >> 24) & 0xF;
|
||||
uint8_t seed8 = (rnum >> 28) & 0xF;
|
||||
uint8_t seed9 = (rnum >> 18) & 0xF;
|
||||
uint8_t seed10 = (rnum >> 22) & 0xF;
|
||||
uint8_t seed11 = (rnum >> 26) & 0xF;
|
||||
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
||||
|
||||
// Squaring all the seeds in order to bias their distribution towards lower values.
|
||||
seed1 *= seed1;
|
||||
seed2 *= seed2;
|
||||
seed3 *= seed3;
|
||||
seed4 *= seed4;
|
||||
seed5 *= seed5;
|
||||
seed6 *= seed6;
|
||||
seed7 *= seed7;
|
||||
seed8 *= seed8;
|
||||
seed9 *= seed9;
|
||||
seed10 *= seed10;
|
||||
seed11 *= seed11;
|
||||
seed12 *= seed12;
|
||||
|
||||
int sh1, sh2;
|
||||
if (seed & 1)
|
||||
{
|
||||
sh1 = (seed & 2 ? 4 : 5);
|
||||
sh2 = (partition_count == 3 ? 6 : 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
sh1 = (partition_count == 3 ? 6 : 5);
|
||||
sh2 = (seed & 2 ? 4 : 5);
|
||||
}
|
||||
|
||||
int sh3 = (seed & 0x10) ? sh1 : sh2;
|
||||
|
||||
seed1 >>= sh1;
|
||||
seed2 >>= sh2;
|
||||
seed3 >>= sh1;
|
||||
seed4 >>= sh2;
|
||||
seed5 >>= sh1;
|
||||
seed6 >>= sh2;
|
||||
seed7 >>= sh1;
|
||||
seed8 >>= sh2;
|
||||
|
||||
seed9 >>= sh3;
|
||||
seed10 >>= sh3;
|
||||
seed11 >>= sh3;
|
||||
seed12 >>= sh3;
|
||||
|
||||
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
|
||||
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
|
||||
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
|
||||
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
|
||||
|
||||
// Apply the saw
|
||||
a &= 0x3F;
|
||||
b &= 0x3F;
|
||||
c &= 0x3F;
|
||||
d &= 0x3F;
|
||||
|
||||
// Remove some of the components if we are to output < 4 partitions.
|
||||
if (partition_count <= 3)
|
||||
{
|
||||
d = 0;
|
||||
}
|
||||
|
||||
if (partition_count <= 2)
|
||||
{
|
||||
c = 0;
|
||||
}
|
||||
|
||||
if (partition_count <= 1)
|
||||
{
|
||||
b = 0;
|
||||
}
|
||||
|
||||
uint8_t partition;
|
||||
if (a >= b && a >= c && a >= d)
|
||||
{
|
||||
partition = 0;
|
||||
}
|
||||
else if (b >= c && b >= d)
|
||||
{
|
||||
partition = 1;
|
||||
}
|
||||
else if (c >= d)
|
||||
{
|
||||
partition = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
partition = 3;
|
||||
}
|
||||
|
||||
return partition;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Generate a single partition info structure.
|
||||
*
|
||||
* @param[out] bsd The block size information.
|
||||
* @param partition_count The partition count of this partitioning.
|
||||
* @param partition_index The partition index / seed of this partitioning.
|
||||
* @param partition_remap_index The remapped partition index of this partitioning.
|
||||
* @param[out] pi The partition info structure to populate.
|
||||
*
|
||||
* @return True if this is a useful partition index, False if we can skip it.
|
||||
*/
|
||||
static bool generate_one_partition_info_entry(
|
||||
block_size_descriptor& bsd,
|
||||
unsigned int partition_count,
|
||||
unsigned int partition_index,
|
||||
unsigned int partition_remap_index,
|
||||
partition_info& pi
|
||||
) {
|
||||
int texels_per_block = bsd.texel_count;
|
||||
bool small_block = texels_per_block < 32;
|
||||
|
||||
uint8_t *partition_of_texel = pi.partition_of_texel;
|
||||
|
||||
// Assign texels to partitions
|
||||
int texel_idx = 0;
|
||||
int counts[BLOCK_MAX_PARTITIONS] { 0 };
|
||||
for (unsigned int z = 0; z < bsd.zdim; z++)
|
||||
{
|
||||
for (unsigned int y = 0; y < bsd.ydim; y++)
|
||||
{
|
||||
for (unsigned int x = 0; x < bsd.xdim; x++)
|
||||
{
|
||||
uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
|
||||
pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
|
||||
*partition_of_texel++ = part;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fill loop tail so we can overfetch later
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
int ptex_count = counts[i];
|
||||
int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
|
||||
for (int j = ptex_count; j < ptex_count_simd; j++)
|
||||
{
|
||||
pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
|
||||
}
|
||||
}
|
||||
|
||||
// Populate the actual procedural partition count
|
||||
if (counts[0] == 0)
|
||||
{
|
||||
pi.partition_count = 0;
|
||||
}
|
||||
else if (counts[1] == 0)
|
||||
{
|
||||
pi.partition_count = 1;
|
||||
}
|
||||
else if (counts[2] == 0)
|
||||
{
|
||||
pi.partition_count = 2;
|
||||
}
|
||||
else if (counts[3] == 0)
|
||||
{
|
||||
pi.partition_count = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
pi.partition_count = 4;
|
||||
}
|
||||
|
||||
// Populate the partition index
|
||||
pi.partition_index = static_cast<uint16_t>(partition_index);
|
||||
|
||||
// Populate the coverage bitmaps for 2/3/4 partitions
|
||||
uint64_t* bitmaps { nullptr };
|
||||
if (partition_count == 2)
|
||||
{
|
||||
bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
|
||||
}
|
||||
else if (partition_count == 3)
|
||||
{
|
||||
bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
|
||||
}
|
||||
else if (partition_count == 4)
|
||||
{
|
||||
bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
|
||||
{
|
||||
pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
|
||||
}
|
||||
|
||||
// Valid partitionings have texels in all of the requested partitions
|
||||
bool valid = pi.partition_count == partition_count;
|
||||
|
||||
if (bitmaps)
|
||||
{
|
||||
// Populate the partition coverage bitmap
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
bitmaps[i] = 0ULL;
|
||||
}
|
||||
|
||||
unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
|
||||
for (unsigned int i = 0; i < texels_to_process; i++)
|
||||
{
|
||||
unsigned int idx = bsd.kmeans_texels[i];
|
||||
bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
|
||||
}
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
static void build_partition_table_for_one_partition_count(
|
||||
block_size_descriptor& bsd,
|
||||
bool can_omit_partitionings,
|
||||
unsigned int partition_count_cutoff,
|
||||
unsigned int partition_count,
|
||||
partition_info* ptab,
|
||||
uint64_t* canonical_patterns
|
||||
) {
|
||||
unsigned int next_index = 0;
|
||||
bsd.partitioning_count_selected[partition_count - 1] = 0;
|
||||
bsd.partitioning_count_all[partition_count - 1] = 0;
|
||||
|
||||
// Skip tables larger than config max partition count if we can omit modes
|
||||
if (can_omit_partitionings && (partition_count > partition_count_cutoff))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Iterate through twice
|
||||
// - Pass 0: Keep selected partitionings
|
||||
// - Pass 1: Keep non-selected partitionings (skip if in omit mode)
|
||||
unsigned int max_iter = can_omit_partitionings ? 1 : 2;
|
||||
|
||||
// Tracker for things we built in the first iteration
|
||||
uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
|
||||
for (unsigned int x = 0; x < max_iter; x++)
|
||||
{
|
||||
for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
|
||||
{
|
||||
// Don't include things we built in the first pass
|
||||
if ((x == 1) && build[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
|
||||
if ((x == 0) && !keep_useful)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
|
||||
bool keep_canonical = true;
|
||||
for (unsigned int j = 0; j < next_index; j++)
|
||||
{
|
||||
bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS);
|
||||
if (match)
|
||||
{
|
||||
keep_canonical = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (keep_useful && keep_canonical)
|
||||
{
|
||||
if (x == 0)
|
||||
{
|
||||
bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
|
||||
bsd.partitioning_count_selected[partition_count - 1]++;
|
||||
bsd.partitioning_count_all[partition_count - 1]++;
|
||||
build[i] = 1;
|
||||
next_index++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (x == 1)
|
||||
{
|
||||
bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
|
||||
bsd.partitioning_count_all[partition_count - 1]++;
|
||||
next_index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void init_partition_tables(
|
||||
block_size_descriptor& bsd,
|
||||
bool can_omit_partitionings,
|
||||
unsigned int partition_count_cutoff
|
||||
) {
|
||||
partition_info* par_tab2 = bsd.partitionings;
|
||||
partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
|
||||
partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
|
||||
partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
|
||||
|
||||
generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
|
||||
bsd.partitioning_count_selected[0] = 1;
|
||||
bsd.partitioning_count_all[0] = 1;
|
||||
|
||||
uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
|
||||
|
||||
build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
|
||||
build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
|
||||
build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
|
||||
|
||||
delete[] canonical_patterns;
|
||||
}
|
||||
1251
engine/thirdparty/astcenc/astcenc_percentile_tables.cpp
vendored
Normal file
1251
engine/thirdparty/astcenc/astcenc_percentile_tables.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
1350
engine/thirdparty/astcenc/astcenc_pick_best_endpoint_format.cpp
vendored
Normal file
1350
engine/thirdparty/astcenc/astcenc_pick_best_endpoint_format.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
903
engine/thirdparty/astcenc/astcenc_quantization.cpp
vendored
Normal file
903
engine/thirdparty/astcenc/astcenc_quantization.cpp
vendored
Normal file
|
|
@ -0,0 +1,903 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions and data tables for numeric quantization..
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
// Not scrambled, starts from QUANT_6
|
||||
const uint8_t color_unquant_to_uquant_tables[17][512] {
|
||||
{ // QUANT_6
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
|
||||
51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
|
||||
51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
|
||||
51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 102, 102, 102, 102, 102, 102,
|
||||
102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
|
||||
102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
|
||||
102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
|
||||
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
||||
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
||||
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
||||
153, 153, 153, 153, 153, 153, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
|
||||
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
|
||||
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
|
||||
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_8
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
|
||||
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
|
||||
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
|
||||
73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
|
||||
73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 109, 109, 109, 109, 109, 109, 109, 109, 109,
|
||||
109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
|
||||
109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
|
||||
146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146,
|
||||
146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146,
|
||||
146, 146, 146, 146, 146, 146, 146, 146, 146, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182,
|
||||
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182,
|
||||
182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
|
||||
219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
|
||||
219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_10
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
||||
56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
||||
56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
||||
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
||||
142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142,
|
||||
142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 171, 171, 171, 171, 171, 171,
|
||||
171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171,
|
||||
171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199,
|
||||
199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199,
|
||||
199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227,
|
||||
227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227,
|
||||
227, 227, 227, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_12
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 23, 23, 23, 23, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
|
||||
46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
|
||||
69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
|
||||
69, 69, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
|
||||
92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
|
||||
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
|
||||
139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
|
||||
139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163,
|
||||
163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 186, 186,
|
||||
186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
|
||||
186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,
|
||||
209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 232, 232, 232, 232, 232, 232,
|
||||
232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232,
|
||||
232, 232, 232, 232, 232, 232, 232, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_16
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
|
||||
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
|
||||
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
|
||||
51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 68, 68, 68, 68, 68, 68, 68, 68,
|
||||
68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 85, 85, 85, 85, 85, 85,
|
||||
85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 102, 102, 102, 102,
|
||||
102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 119, 119,
|
||||
119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119,
|
||||
136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
|
||||
136, 136, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
||||
153, 153, 153, 153, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170,
|
||||
170, 170, 170, 170, 170, 170, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187,
|
||||
187, 187, 187, 187, 187, 187, 187, 187, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
|
||||
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
|
||||
221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238,
|
||||
238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_20
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 54,
|
||||
54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 67, 67, 67, 67, 67, 67,
|
||||
67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
|
||||
94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107,
|
||||
107, 107, 107, 107, 107, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
|
||||
134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 148, 148, 148, 148, 148,
|
||||
148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
|
||||
161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175,
|
||||
175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188,
|
||||
188, 188, 188, 188, 188, 188, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201,
|
||||
201, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 228, 228, 228, 228,
|
||||
228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 242, 242, 242, 242, 242, 242, 242, 242, 242,
|
||||
242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_24
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 33, 33,
|
||||
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
|
||||
44, 44, 44, 44, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 66, 66, 66, 66, 66, 66,
|
||||
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
|
||||
77, 77, 77, 77, 77, 77, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 99, 99, 99, 99,
|
||||
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
|
||||
110, 110, 110, 110, 110, 110, 110, 110, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
|
||||
134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 145, 145, 145, 145, 145, 145, 145, 145,
|
||||
145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
|
||||
156, 156, 156, 156, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 178, 178, 178, 178, 178, 178,
|
||||
178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
|
||||
189, 189, 189, 189, 189, 189, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 211, 211, 211, 211,
|
||||
211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222,
|
||||
222, 222, 222, 222, 222, 222, 222, 222, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 244, 244,
|
||||
244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_32
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 33, 33, 33, 33, 33, 33,
|
||||
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 49, 49, 49, 49, 49,
|
||||
49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 66, 66, 66, 66,
|
||||
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 82, 82, 82,
|
||||
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 99, 99,
|
||||
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 115,
|
||||
115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123,
|
||||
132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
|
||||
140, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
|
||||
156, 156, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173,
|
||||
173, 173, 173, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
|
||||
189, 189, 189, 189, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
|
||||
206, 206, 206, 206, 206, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222,
|
||||
222, 222, 222, 222, 222, 222, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 239, 239, 239, 239, 239, 239, 239, 239, 239,
|
||||
239, 239, 239, 239, 239, 239, 239, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_40
|
||||
0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 65, 65, 65, 65,
|
||||
65, 65, 65, 65, 65, 65, 65, 65, 65, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
||||
78, 78, 78, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 97, 97, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 110, 110, 110, 110, 110, 110, 110, 110, 110,
|
||||
110, 110, 110, 110, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123,
|
||||
132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 145, 145, 145, 145,
|
||||
145, 145, 145, 145, 145, 145, 145, 145, 145, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
|
||||
158, 158, 158, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 177, 177, 177,
|
||||
177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 190, 190, 190, 190, 190, 190, 190, 190, 190,
|
||||
190, 190, 190, 190, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 210, 210,
|
||||
210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 223, 223, 223, 223, 223, 223, 223, 223,
|
||||
223, 223, 223, 223, 223, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 242,
|
||||
242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 255, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_48
|
||||
0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 65, 65, 65,
|
||||
65, 65, 65, 65, 65, 65, 65, 65, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 81, 81,
|
||||
81, 81, 81, 81, 81, 81, 81, 81, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 97, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 113, 113,
|
||||
113, 113, 113, 113, 113, 113, 113, 113, 113, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
|
||||
131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 142, 142, 142, 142, 142, 142, 142, 142, 142,
|
||||
142, 142, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 158, 158, 158, 158, 158, 158, 158, 158, 158,
|
||||
158, 158, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 174, 174, 174, 174, 174, 174, 174, 174,
|
||||
174, 174, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 190, 190, 190, 190, 190, 190, 190, 190,
|
||||
190, 190, 190, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 207, 207, 207, 207, 207, 207, 207,
|
||||
207, 207, 207, 207, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 223, 223, 223, 223, 223, 223, 223,
|
||||
223, 223, 223, 223, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 239, 239, 239, 239, 239, 239,
|
||||
239, 239, 239, 239, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 255, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_64
|
||||
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 20, 20, 20, 20, 20, 20, 20, 20, 24, 24, 24, 24, 24, 24, 24, 24, 28, 28, 28, 28, 28, 28, 28, 28, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 36, 36, 36, 36, 36, 36, 36, 36, 40, 40, 40, 40, 40, 40, 40, 40, 44, 44, 44, 44, 44, 44, 44, 44, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 52, 52, 52, 52, 52, 52, 52, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65,
|
||||
65, 65, 65, 65, 65, 65, 65, 69, 69, 69, 69, 69, 69, 69, 69, 73, 73, 73, 73, 73, 73, 73, 73, 77, 77, 77, 77, 77, 77, 77, 77, 81,
|
||||
81, 81, 81, 81, 81, 81, 81, 85, 85, 85, 85, 85, 85, 85, 85, 89, 89, 89, 89, 89, 89, 89, 89, 93, 93, 93, 93, 93, 93, 93, 93, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 101, 101, 101, 101, 101, 101, 101, 101, 105, 105, 105, 105, 105, 105, 105, 105, 109, 109, 109, 109, 109, 109, 109, 109, 113,
|
||||
113, 113, 113, 113, 113, 113, 113, 117, 117, 117, 117, 117, 117, 117, 117, 121, 121, 121, 121, 121, 121, 121, 121, 125, 125, 125, 125, 125, 125, 125, 125, 125,
|
||||
130, 130, 130, 130, 130, 130, 130, 130, 130, 134, 134, 134, 134, 134, 134, 134, 134, 138, 138, 138, 138, 138, 138, 138, 138, 142, 142, 142, 142, 142, 142, 142,
|
||||
142, 146, 146, 146, 146, 146, 146, 146, 146, 150, 150, 150, 150, 150, 150, 150, 150, 154, 154, 154, 154, 154, 154, 154, 154, 158, 158, 158, 158, 158, 158, 158,
|
||||
158, 162, 162, 162, 162, 162, 162, 162, 162, 166, 166, 166, 166, 166, 166, 166, 166, 170, 170, 170, 170, 170, 170, 170, 170, 174, 174, 174, 174, 174, 174, 174,
|
||||
174, 178, 178, 178, 178, 178, 178, 178, 178, 182, 182, 182, 182, 182, 182, 182, 182, 186, 186, 186, 186, 186, 186, 186, 186, 190, 190, 190, 190, 190, 190, 190,
|
||||
190, 190, 195, 195, 195, 195, 195, 195, 195, 195, 195, 199, 199, 199, 199, 199, 199, 199, 199, 203, 203, 203, 203, 203, 203, 203, 203, 207, 207, 207, 207, 207,
|
||||
207, 207, 207, 211, 211, 211, 211, 211, 211, 211, 211, 215, 215, 215, 215, 215, 215, 215, 215, 219, 219, 219, 219, 219, 219, 219, 219, 223, 223, 223, 223, 223,
|
||||
223, 223, 223, 227, 227, 227, 227, 227, 227, 227, 227, 231, 231, 231, 231, 231, 231, 231, 231, 235, 235, 235, 235, 235, 235, 235, 235, 239, 239, 239, 239, 239,
|
||||
239, 239, 239, 243, 243, 243, 243, 243, 243, 243, 243, 247, 247, 247, 247, 247, 247, 247, 247, 251, 251, 251, 251, 251, 251, 251, 251, 255, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_80
|
||||
0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 9, 13, 13, 13, 13, 13, 13, 13, 16, 16,
|
||||
16, 16, 16, 16, 19, 19, 19, 19, 19, 19, 22, 22, 22, 22, 22, 22, 25, 25, 25, 25, 25, 25, 25, 29, 29, 29, 29, 29, 29, 29, 32, 32,
|
||||
32, 32, 32, 32, 35, 35, 35, 35, 35, 35, 38, 38, 38, 38, 38, 38, 38, 42, 42, 42, 42, 42, 42, 42, 45, 45, 45, 45, 45, 45, 48, 48,
|
||||
48, 48, 48, 48, 51, 51, 51, 51, 51, 51, 54, 54, 54, 54, 54, 54, 54, 58, 58, 58, 58, 58, 58, 58, 61, 61, 61, 61, 61, 61, 64, 64,
|
||||
64, 64, 64, 64, 67, 67, 67, 67, 67, 67, 67, 71, 71, 71, 71, 71, 71, 71, 74, 74, 74, 74, 74, 74, 77, 77, 77, 77, 77, 77, 80, 80,
|
||||
80, 80, 80, 80, 83, 83, 83, 83, 83, 83, 83, 87, 87, 87, 87, 87, 87, 87, 90, 90, 90, 90, 90, 90, 93, 93, 93, 93, 93, 93, 96, 96,
|
||||
96, 96, 96, 96, 96, 100, 100, 100, 100, 100, 100, 100, 103, 103, 103, 103, 103, 103, 106, 106, 106, 106, 106, 106, 109, 109, 109, 109, 109, 109, 112, 112,
|
||||
112, 112, 112, 112, 112, 116, 116, 116, 116, 116, 116, 116, 119, 119, 119, 119, 119, 119, 122, 122, 122, 122, 122, 122, 125, 125, 125, 125, 125, 125, 125, 125,
|
||||
130, 130, 130, 130, 130, 130, 130, 130, 133, 133, 133, 133, 133, 133, 136, 136, 136, 136, 136, 136, 139, 139, 139, 139, 139, 139, 139, 143, 143, 143, 143, 143,
|
||||
143, 143, 146, 146, 146, 146, 146, 146, 149, 149, 149, 149, 149, 149, 152, 152, 152, 152, 152, 152, 155, 155, 155, 155, 155, 155, 155, 159, 159, 159, 159, 159,
|
||||
159, 159, 162, 162, 162, 162, 162, 162, 165, 165, 165, 165, 165, 165, 168, 168, 168, 168, 168, 168, 168, 172, 172, 172, 172, 172, 172, 172, 175, 175, 175, 175,
|
||||
175, 175, 178, 178, 178, 178, 178, 178, 181, 181, 181, 181, 181, 181, 184, 184, 184, 184, 184, 184, 184, 188, 188, 188, 188, 188, 188, 188, 191, 191, 191, 191,
|
||||
191, 191, 194, 194, 194, 194, 194, 194, 197, 197, 197, 197, 197, 197, 197, 201, 201, 201, 201, 201, 201, 201, 204, 204, 204, 204, 204, 204, 207, 207, 207, 207,
|
||||
207, 207, 210, 210, 210, 210, 210, 210, 213, 213, 213, 213, 213, 213, 213, 217, 217, 217, 217, 217, 217, 217, 220, 220, 220, 220, 220, 220, 223, 223, 223, 223,
|
||||
223, 223, 226, 226, 226, 226, 226, 226, 226, 230, 230, 230, 230, 230, 230, 230, 233, 233, 233, 233, 233, 233, 236, 236, 236, 236, 236, 236, 239, 239, 239, 239,
|
||||
239, 239, 242, 242, 242, 242, 242, 242, 242, 246, 246, 246, 246, 246, 246, 246, 249, 249, 249, 249, 249, 249, 252, 252, 252, 252, 252, 252, 255, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_96
|
||||
0, 0, 0, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 13, 13, 13, 13, 13, 13, 16, 16,
|
||||
16, 16, 16, 18, 18, 18, 18, 18, 21, 21, 21, 21, 21, 21, 24, 24, 24, 24, 24, 26, 26, 26, 26, 26, 29, 29, 29, 29, 29, 29, 32, 32,
|
||||
32, 32, 32, 32, 35, 35, 35, 35, 35, 37, 37, 37, 37, 37, 40, 40, 40, 40, 40, 40, 43, 43, 43, 43, 43, 45, 45, 45, 45, 45, 48, 48,
|
||||
48, 48, 48, 48, 51, 51, 51, 51, 51, 53, 53, 53, 53, 53, 56, 56, 56, 56, 56, 56, 59, 59, 59, 59, 59, 61, 61, 61, 61, 61, 64, 64,
|
||||
64, 64, 64, 64, 67, 67, 67, 67, 67, 67, 70, 70, 70, 70, 70, 72, 72, 72, 72, 72, 75, 75, 75, 75, 75, 75, 78, 78, 78, 78, 78, 80,
|
||||
80, 80, 80, 80, 83, 83, 83, 83, 83, 83, 86, 86, 86, 86, 86, 88, 88, 88, 88, 88, 91, 91, 91, 91, 91, 91, 94, 94, 94, 94, 94, 96,
|
||||
96, 96, 96, 96, 99, 99, 99, 99, 99, 99, 102, 102, 102, 102, 102, 104, 104, 104, 104, 104, 107, 107, 107, 107, 107, 107, 110, 110, 110, 110, 110, 112,
|
||||
112, 112, 112, 112, 115, 115, 115, 115, 115, 115, 118, 118, 118, 118, 118, 120, 120, 120, 120, 120, 123, 123, 123, 123, 123, 123, 126, 126, 126, 126, 126, 126,
|
||||
129, 129, 129, 129, 129, 129, 132, 132, 132, 132, 132, 132, 135, 135, 135, 135, 135, 137, 137, 137, 137, 137, 140, 140, 140, 140, 140, 140, 143, 143, 143, 143,
|
||||
143, 145, 145, 145, 145, 145, 148, 148, 148, 148, 148, 148, 151, 151, 151, 151, 151, 153, 153, 153, 153, 153, 156, 156, 156, 156, 156, 156, 159, 159, 159, 159,
|
||||
159, 161, 161, 161, 161, 161, 164, 164, 164, 164, 164, 164, 167, 167, 167, 167, 167, 169, 169, 169, 169, 169, 172, 172, 172, 172, 172, 172, 175, 175, 175, 175,
|
||||
175, 177, 177, 177, 177, 177, 180, 180, 180, 180, 180, 180, 183, 183, 183, 183, 183, 185, 185, 185, 185, 185, 188, 188, 188, 188, 188, 188, 191, 191, 191, 191,
|
||||
191, 191, 194, 194, 194, 194, 194, 196, 196, 196, 196, 196, 199, 199, 199, 199, 199, 199, 202, 202, 202, 202, 202, 204, 204, 204, 204, 204, 207, 207, 207, 207,
|
||||
207, 207, 210, 210, 210, 210, 210, 212, 212, 212, 212, 212, 215, 215, 215, 215, 215, 215, 218, 218, 218, 218, 218, 220, 220, 220, 220, 220, 223, 223, 223, 223,
|
||||
223, 223, 226, 226, 226, 226, 226, 226, 229, 229, 229, 229, 229, 231, 231, 231, 231, 231, 234, 234, 234, 234, 234, 234, 237, 237, 237, 237, 237, 239, 239, 239,
|
||||
239, 239, 242, 242, 242, 242, 242, 242, 245, 245, 245, 245, 245, 247, 247, 247, 247, 247, 250, 250, 250, 250, 250, 250, 253, 253, 253, 253, 253, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_128
|
||||
0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8, 10, 10, 10, 10, 12, 12, 12, 12, 14, 14, 14, 14, 16,
|
||||
16, 16, 16, 18, 18, 18, 18, 20, 20, 20, 20, 22, 22, 22, 22, 24, 24, 24, 24, 26, 26, 26, 26, 28, 28, 28, 28, 30, 30, 30, 30, 32,
|
||||
32, 32, 32, 34, 34, 34, 34, 36, 36, 36, 36, 38, 38, 38, 38, 40, 40, 40, 40, 42, 42, 42, 42, 44, 44, 44, 44, 46, 46, 46, 46, 48,
|
||||
48, 48, 48, 50, 50, 50, 50, 52, 52, 52, 52, 54, 54, 54, 54, 56, 56, 56, 56, 58, 58, 58, 58, 60, 60, 60, 60, 62, 62, 62, 62, 64,
|
||||
64, 64, 64, 66, 66, 66, 66, 68, 68, 68, 68, 70, 70, 70, 70, 72, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 78, 80,
|
||||
80, 80, 80, 82, 82, 82, 82, 84, 84, 84, 84, 86, 86, 86, 86, 88, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 92, 94, 94, 94, 94, 96,
|
||||
96, 96, 96, 98, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 102, 104, 104, 104, 104, 106, 106, 106, 106, 108, 108, 108, 108, 110, 110, 110, 110, 112,
|
||||
112, 112, 112, 114, 114, 114, 114, 116, 116, 116, 116, 118, 118, 118, 118, 120, 120, 120, 120, 122, 122, 122, 122, 124, 124, 124, 124, 126, 126, 126, 126, 126,
|
||||
129, 129, 129, 129, 129, 131, 131, 131, 131, 133, 133, 133, 133, 135, 135, 135, 135, 137, 137, 137, 137, 139, 139, 139, 139, 141, 141, 141, 141, 143, 143, 143,
|
||||
143, 145, 145, 145, 145, 147, 147, 147, 147, 149, 149, 149, 149, 151, 151, 151, 151, 153, 153, 153, 153, 155, 155, 155, 155, 157, 157, 157, 157, 159, 159, 159,
|
||||
159, 161, 161, 161, 161, 163, 163, 163, 163, 165, 165, 165, 165, 167, 167, 167, 167, 169, 169, 169, 169, 171, 171, 171, 171, 173, 173, 173, 173, 175, 175, 175,
|
||||
175, 177, 177, 177, 177, 179, 179, 179, 179, 181, 181, 181, 181, 183, 183, 183, 183, 185, 185, 185, 185, 187, 187, 187, 187, 189, 189, 189, 189, 191, 191, 191,
|
||||
191, 193, 193, 193, 193, 195, 195, 195, 195, 197, 197, 197, 197, 199, 199, 199, 199, 201, 201, 201, 201, 203, 203, 203, 203, 205, 205, 205, 205, 207, 207, 207,
|
||||
207, 209, 209, 209, 209, 211, 211, 211, 211, 213, 213, 213, 213, 215, 215, 215, 215, 217, 217, 217, 217, 219, 219, 219, 219, 221, 221, 221, 221, 223, 223, 223,
|
||||
223, 225, 225, 225, 225, 227, 227, 227, 227, 229, 229, 229, 229, 231, 231, 231, 231, 233, 233, 233, 233, 235, 235, 235, 235, 237, 237, 237, 237, 239, 239, 239,
|
||||
239, 241, 241, 241, 241, 243, 243, 243, 243, 245, 245, 245, 245, 247, 247, 247, 247, 249, 249, 249, 249, 251, 251, 251, 251, 253, 253, 253, 253, 255, 255, 255
|
||||
},
|
||||
{ // QUANT_160
|
||||
0, 0, 1, 1, 1, 3, 3, 3, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 9, 9, 9, 11, 11, 11, 12, 12, 12, 14, 14, 14, 14, 16,
|
||||
16, 16, 17, 17, 17, 19, 19, 19, 20, 20, 20, 22, 22, 22, 22, 24, 24, 24, 25, 25, 25, 27, 27, 27, 28, 28, 28, 30, 30, 30, 30, 32,
|
||||
32, 32, 33, 33, 33, 35, 35, 35, 36, 36, 36, 38, 38, 38, 38, 40, 40, 40, 41, 41, 41, 43, 43, 43, 44, 44, 44, 46, 46, 46, 46, 48,
|
||||
48, 48, 49, 49, 49, 51, 51, 51, 52, 52, 52, 54, 54, 54, 54, 56, 56, 56, 57, 57, 57, 59, 59, 59, 60, 60, 60, 62, 62, 62, 62, 64,
|
||||
64, 64, 65, 65, 65, 67, 67, 67, 68, 68, 68, 70, 70, 70, 70, 72, 72, 72, 73, 73, 73, 75, 75, 75, 76, 76, 76, 78, 78, 78, 78, 80,
|
||||
80, 80, 81, 81, 81, 83, 83, 83, 84, 84, 84, 86, 86, 86, 86, 88, 88, 88, 89, 89, 89, 91, 91, 91, 92, 92, 92, 94, 94, 94, 94, 96,
|
||||
96, 96, 97, 97, 97, 99, 99, 99, 100, 100, 100, 102, 102, 102, 102, 104, 104, 104, 105, 105, 105, 107, 107, 107, 108, 108, 108, 110, 110, 110, 110, 112,
|
||||
112, 112, 113, 113, 113, 115, 115, 115, 116, 116, 116, 118, 118, 118, 118, 120, 120, 120, 121, 121, 121, 123, 123, 123, 124, 124, 124, 126, 126, 126, 126, 126,
|
||||
129, 129, 129, 129, 129, 131, 131, 131, 132, 132, 132, 134, 134, 134, 135, 135, 135, 137, 137, 137, 137, 139, 139, 139, 140, 140, 140, 142, 142, 142, 143, 143,
|
||||
143, 145, 145, 145, 145, 147, 147, 147, 148, 148, 148, 150, 150, 150, 151, 151, 151, 153, 153, 153, 153, 155, 155, 155, 156, 156, 156, 158, 158, 158, 159, 159,
|
||||
159, 161, 161, 161, 161, 163, 163, 163, 164, 164, 164, 166, 166, 166, 167, 167, 167, 169, 169, 169, 169, 171, 171, 171, 172, 172, 172, 174, 174, 174, 175, 175,
|
||||
175, 177, 177, 177, 177, 179, 179, 179, 180, 180, 180, 182, 182, 182, 183, 183, 183, 185, 185, 185, 185, 187, 187, 187, 188, 188, 188, 190, 190, 190, 191, 191,
|
||||
191, 193, 193, 193, 193, 195, 195, 195, 196, 196, 196, 198, 198, 198, 199, 199, 199, 201, 201, 201, 201, 203, 203, 203, 204, 204, 204, 206, 206, 206, 207, 207,
|
||||
207, 209, 209, 209, 209, 211, 211, 211, 212, 212, 212, 214, 214, 214, 215, 215, 215, 217, 217, 217, 217, 219, 219, 219, 220, 220, 220, 222, 222, 222, 223, 223,
|
||||
223, 225, 225, 225, 225, 227, 227, 227, 228, 228, 228, 230, 230, 230, 231, 231, 231, 233, 233, 233, 233, 235, 235, 235, 236, 236, 236, 238, 238, 238, 239, 239,
|
||||
239, 241, 241, 241, 241, 243, 243, 243, 244, 244, 244, 246, 246, 246, 247, 247, 247, 249, 249, 249, 249, 251, 251, 251, 252, 252, 252, 254, 254, 254, 255, 255
|
||||
},
|
||||
{ // QUANT_192
|
||||
0, 0, 1, 1, 2, 2, 2, 4, 4, 4, 5, 5, 6, 6, 6, 8, 8, 8, 9, 9, 10, 10, 10, 12, 12, 12, 13, 13, 14, 14, 14, 16,
|
||||
16, 16, 17, 17, 18, 18, 18, 20, 20, 20, 21, 21, 22, 22, 22, 24, 24, 24, 25, 25, 26, 26, 26, 28, 28, 28, 29, 29, 30, 30, 30, 32,
|
||||
32, 32, 33, 33, 34, 34, 34, 36, 36, 36, 37, 37, 38, 38, 38, 40, 40, 40, 41, 41, 42, 42, 42, 44, 44, 44, 45, 45, 46, 46, 46, 48,
|
||||
48, 48, 49, 49, 50, 50, 50, 52, 52, 52, 53, 53, 54, 54, 54, 56, 56, 56, 57, 57, 58, 58, 58, 60, 60, 60, 61, 61, 62, 62, 62, 64,
|
||||
64, 64, 65, 65, 66, 66, 66, 68, 68, 68, 69, 69, 70, 70, 70, 72, 72, 72, 73, 73, 74, 74, 74, 76, 76, 76, 77, 77, 78, 78, 78, 80,
|
||||
80, 80, 81, 81, 82, 82, 82, 84, 84, 84, 85, 85, 86, 86, 86, 88, 88, 88, 89, 89, 90, 90, 90, 92, 92, 92, 93, 93, 94, 94, 94, 96,
|
||||
96, 96, 97, 97, 98, 98, 98, 100, 100, 100, 101, 101, 102, 102, 102, 104, 104, 104, 105, 105, 106, 106, 106, 108, 108, 108, 109, 109, 110, 110, 110, 112,
|
||||
112, 112, 113, 113, 114, 114, 114, 116, 116, 116, 117, 117, 118, 118, 118, 120, 120, 120, 121, 121, 122, 122, 122, 124, 124, 124, 125, 125, 126, 126, 126, 126,
|
||||
129, 129, 129, 129, 130, 130, 131, 131, 131, 133, 133, 133, 134, 134, 135, 135, 135, 137, 137, 137, 138, 138, 139, 139, 139, 141, 141, 141, 142, 142, 143, 143,
|
||||
143, 145, 145, 145, 146, 146, 147, 147, 147, 149, 149, 149, 150, 150, 151, 151, 151, 153, 153, 153, 154, 154, 155, 155, 155, 157, 157, 157, 158, 158, 159, 159,
|
||||
159, 161, 161, 161, 162, 162, 163, 163, 163, 165, 165, 165, 166, 166, 167, 167, 167, 169, 169, 169, 170, 170, 171, 171, 171, 173, 173, 173, 174, 174, 175, 175,
|
||||
175, 177, 177, 177, 178, 178, 179, 179, 179, 181, 181, 181, 182, 182, 183, 183, 183, 185, 185, 185, 186, 186, 187, 187, 187, 189, 189, 189, 190, 190, 191, 191,
|
||||
191, 193, 193, 193, 194, 194, 195, 195, 195, 197, 197, 197, 198, 198, 199, 199, 199, 201, 201, 201, 202, 202, 203, 203, 203, 205, 205, 205, 206, 206, 207, 207,
|
||||
207, 209, 209, 209, 210, 210, 211, 211, 211, 213, 213, 213, 214, 214, 215, 215, 215, 217, 217, 217, 218, 218, 219, 219, 219, 221, 221, 221, 222, 222, 223, 223,
|
||||
223, 225, 225, 225, 226, 226, 227, 227, 227, 229, 229, 229, 230, 230, 231, 231, 231, 233, 233, 233, 234, 234, 235, 235, 235, 237, 237, 237, 238, 238, 239, 239,
|
||||
239, 241, 241, 241, 242, 242, 243, 243, 243, 245, 245, 245, 246, 246, 247, 247, 247, 249, 249, 249, 250, 250, 251, 251, 251, 253, 253, 253, 254, 254, 255, 255
|
||||
},
|
||||
{ // QUANT_256
|
||||
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
|
||||
16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
|
||||
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
|
||||
48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
|
||||
64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
|
||||
80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87, 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
|
||||
96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103, 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
|
||||
112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119, 120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
|
||||
128, 128, 129, 129, 130, 130, 131, 131, 132, 132, 133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, 141, 141, 142, 142, 143, 143,
|
||||
144, 144, 145, 145, 146, 146, 147, 147, 148, 148, 149, 149, 150, 150, 151, 151, 152, 152, 153, 153, 154, 154, 155, 155, 156, 156, 157, 157, 158, 158, 159, 159,
|
||||
160, 160, 161, 161, 162, 162, 163, 163, 164, 164, 165, 165, 166, 166, 167, 167, 168, 168, 169, 169, 170, 170, 171, 171, 172, 172, 173, 173, 174, 174, 175, 175,
|
||||
176, 176, 177, 177, 178, 178, 179, 179, 180, 180, 181, 181, 182, 182, 183, 183, 184, 184, 185, 185, 186, 186, 187, 187, 188, 188, 189, 189, 190, 190, 191, 191,
|
||||
192, 192, 193, 193, 194, 194, 195, 195, 196, 196, 197, 197, 198, 198, 199, 199, 200, 200, 201, 201, 202, 202, 203, 203, 204, 204, 205, 205, 206, 206, 207, 207,
|
||||
208, 208, 209, 209, 210, 210, 211, 211, 212, 212, 213, 213, 214, 214, 215, 215, 216, 216, 217, 217, 218, 218, 219, 219, 220, 220, 221, 221, 222, 222, 223, 223,
|
||||
224, 224, 225, 225, 226, 226, 227, 227, 228, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 239, 239,
|
||||
240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255
|
||||
},
|
||||
};
|
||||
|
||||
// Starts from QUANT_6
|
||||
// Scrambled
|
||||
const uint8_t color_uquant_to_scrambled_pquant_tables[17][256] {
|
||||
{ // QUANT_6
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_8
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
|
||||
},
|
||||
{ // QUANT_10
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_12
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_16
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15
|
||||
},
|
||||
{ // QUANT_20
|
||||
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
|
||||
17, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_24
|
||||
0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_32
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
|
||||
12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
|
||||
29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31
|
||||
},
|
||||
{ // QUANT_40
|
||||
0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
|
||||
24, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
|
||||
2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18, 18,
|
||||
18, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 34, 4, 4,
|
||||
4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
|
||||
20, 28, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
|
||||
6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22, 22,
|
||||
22, 22, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
39, 39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 23, 23,
|
||||
23, 23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
|
||||
7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 29, 21,
|
||||
21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5,
|
||||
5, 5, 35, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 19,
|
||||
19, 19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
|
||||
3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 25,
|
||||
17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_48
|
||||
0, 0, 0, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 2, 2,
|
||||
2, 2, 2, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 34, 4, 4,
|
||||
4, 4, 4, 20, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
|
||||
6, 6, 6, 22, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
|
||||
8, 8, 8, 8, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 40, 10,
|
||||
10, 10, 10, 10, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 42, 12,
|
||||
12, 12, 12, 12, 28, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
|
||||
14, 14, 14, 14, 30, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
|
||||
47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 15, 15, 15, 15,
|
||||
15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 29, 13, 13, 13, 13,
|
||||
13, 43, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 11, 11, 11, 11,
|
||||
11, 41, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 9, 9, 9, 9,
|
||||
9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 23, 7, 7, 7,
|
||||
7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 21, 5, 5, 5,
|
||||
5, 5, 35, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 3, 3, 3,
|
||||
3, 3, 33, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 1, 1, 1
|
||||
},
|
||||
{ // QUANT_64
|
||||
0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
|
||||
4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8,
|
||||
8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12,
|
||||
12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16,
|
||||
16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20,
|
||||
20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24,
|
||||
24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28,
|
||||
28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31,
|
||||
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
||||
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
||||
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
||||
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
||||
47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
|
||||
51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
|
||||
55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
|
||||
59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63
|
||||
},
|
||||
{ // QUANT_80
|
||||
0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 64, 64, 64, 64, 2,
|
||||
2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 66, 66, 66, 66, 4,
|
||||
4, 4, 20, 20, 20, 36, 36, 36, 52, 52, 52, 52, 68, 68, 68, 6,
|
||||
6, 6, 22, 22, 22, 38, 38, 38, 54, 54, 54, 54, 70, 70, 70, 8,
|
||||
8, 8, 24, 24, 24, 40, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
|
||||
10, 10, 26, 26, 26, 42, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
|
||||
12, 12, 28, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
|
||||
14, 14, 30, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
|
||||
79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 31, 15, 15,
|
||||
15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 29, 13, 13,
|
||||
13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 43, 27, 27, 27, 11, 11,
|
||||
11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 41, 25, 25, 25, 9, 9,
|
||||
9, 71, 71, 71, 55, 55, 55, 55, 39, 39, 39, 23, 23, 23, 7, 7,
|
||||
7, 69, 69, 69, 53, 53, 53, 53, 37, 37, 37, 21, 21, 21, 5, 5,
|
||||
5, 67, 67, 67, 67, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
|
||||
3, 65, 65, 65, 65, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1
|
||||
},
|
||||
{ // QUANT_96
|
||||
0, 32, 32, 32, 64, 64, 64, 2, 2, 34, 34, 34, 66, 66, 66, 4,
|
||||
4, 36, 36, 36, 68, 68, 68, 6, 6, 38, 38, 38, 70, 70, 70, 8,
|
||||
8, 8, 40, 40, 72, 72, 72, 10, 10, 10, 42, 42, 74, 74, 74, 12,
|
||||
12, 12, 44, 44, 76, 76, 76, 14, 14, 14, 46, 46, 78, 78, 78, 16,
|
||||
16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
|
||||
20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
|
||||
24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
|
||||
28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
|
||||
95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
|
||||
91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
|
||||
87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
|
||||
83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
|
||||
17, 79, 79, 79, 47, 47, 15, 15, 15, 77, 77, 77, 45, 45, 13, 13,
|
||||
13, 75, 75, 75, 43, 43, 11, 11, 11, 73, 73, 73, 41, 41, 9, 9,
|
||||
9, 71, 71, 71, 39, 39, 39, 7, 7, 69, 69, 69, 37, 37, 37, 5,
|
||||
5, 67, 67, 67, 35, 35, 35, 3, 3, 65, 65, 65, 33, 33, 33, 1
|
||||
},
|
||||
{ // QUANT_128
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16,
|
||||
16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24,
|
||||
24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32,
|
||||
32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40,
|
||||
40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 48,
|
||||
48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56,
|
||||
56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 63,
|
||||
64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
|
||||
72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
|
||||
80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
|
||||
88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
|
||||
96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
|
||||
104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
|
||||
112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
|
||||
120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127
|
||||
},
|
||||
{ // QUANT_160
|
||||
0, 32, 64, 64, 96, 128, 128, 128, 2, 34, 66, 66, 98, 130, 130, 130,
|
||||
4, 36, 68, 68, 100, 132, 132, 132, 6, 38, 70, 70, 102, 134, 134, 134,
|
||||
8, 40, 72, 72, 104, 136, 136, 136, 10, 42, 74, 74, 106, 138, 138, 138,
|
||||
12, 44, 76, 76, 108, 140, 140, 140, 14, 46, 78, 78, 110, 142, 142, 142,
|
||||
16, 48, 80, 80, 112, 144, 144, 144, 18, 50, 82, 82, 114, 146, 146, 146,
|
||||
20, 52, 84, 84, 116, 148, 148, 148, 22, 54, 86, 86, 118, 150, 150, 150,
|
||||
24, 56, 88, 88, 120, 152, 152, 152, 26, 58, 90, 90, 122, 154, 154, 154,
|
||||
28, 60, 92, 92, 124, 156, 156, 156, 30, 62, 94, 94, 126, 158, 158, 158,
|
||||
159, 159, 159, 127, 95, 95, 63, 31, 157, 157, 157, 125, 93, 93, 61, 29,
|
||||
155, 155, 155, 123, 91, 91, 59, 27, 153, 153, 153, 121, 89, 89, 57, 25,
|
||||
151, 151, 151, 119, 87, 87, 55, 23, 149, 149, 149, 117, 85, 85, 53, 21,
|
||||
147, 147, 147, 115, 83, 83, 51, 19, 145, 145, 145, 113, 81, 81, 49, 17,
|
||||
143, 143, 143, 111, 79, 79, 47, 15, 141, 141, 141, 109, 77, 77, 45, 13,
|
||||
139, 139, 139, 107, 75, 75, 43, 11, 137, 137, 137, 105, 73, 73, 41, 9,
|
||||
135, 135, 135, 103, 71, 71, 39, 7, 133, 133, 133, 101, 69, 69, 37, 5,
|
||||
131, 131, 131, 99, 67, 67, 35, 3, 129, 129, 129, 97, 65, 65, 33, 1
|
||||
},
|
||||
{ // QUANT_192
|
||||
0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
|
||||
8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
|
||||
16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
|
||||
24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
|
||||
32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
|
||||
40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
|
||||
48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
|
||||
56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
|
||||
191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
|
||||
183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
|
||||
175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
|
||||
167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
|
||||
159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
|
||||
151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
|
||||
143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
|
||||
135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1
|
||||
},
|
||||
{ // QUANT_256
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
|
||||
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
|
||||
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
||||
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
||||
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
|
||||
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
// Starts from QUANT_6
|
||||
// Scrambled
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q6[6] {
|
||||
0, 255, 51, 204, 102, 153
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q8[8] {
|
||||
0, 36, 73, 109, 146, 182, 219, 255
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q10[10] {
|
||||
0, 255, 28, 227, 56, 199, 84, 171, 113, 142
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q12[12] {
|
||||
0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q16[16] {
|
||||
0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q20[20] {
|
||||
0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
|
||||
54, 201, 121, 134
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q24[24] {
|
||||
0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
|
||||
22, 233, 55, 200, 88, 167, 121, 134
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q32[32] {
|
||||
0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
|
||||
132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q40[40] {
|
||||
0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
|
||||
13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
|
||||
26, 229, 58, 197, 91, 164, 123, 132
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q48[48] {
|
||||
0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
|
||||
5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
|
||||
11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q64[64] {
|
||||
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
|
||||
65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
|
||||
130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
|
||||
195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255,
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q80[80] {
|
||||
0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
|
||||
3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
|
||||
6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
|
||||
9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
|
||||
13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q96[96] {
|
||||
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
|
||||
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
|
||||
2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
|
||||
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
|
||||
5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
|
||||
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q128[128] {
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
|
||||
32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
|
||||
64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
|
||||
96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
|
||||
129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
|
||||
161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
|
||||
193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
|
||||
225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q160[160] {
|
||||
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
|
||||
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
|
||||
1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
|
||||
65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
|
||||
3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
|
||||
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
|
||||
4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
|
||||
68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
|
||||
6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
|
||||
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q192[192] {
|
||||
0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
|
||||
32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
|
||||
64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
|
||||
96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
|
||||
1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
|
||||
33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
|
||||
65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
|
||||
97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
|
||||
2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
|
||||
34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
|
||||
66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
|
||||
98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
|
||||
};
|
||||
|
||||
static const uint8_t color_scrambled_pquant_to_uquant_q256[256] {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
|
||||
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
|
||||
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
||||
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
||||
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
|
||||
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
|
||||
};
|
||||
|
||||
const uint8_t* color_scrambled_pquant_to_uquant_tables[17] {
|
||||
color_scrambled_pquant_to_uquant_q6,
|
||||
color_scrambled_pquant_to_uquant_q8,
|
||||
color_scrambled_pquant_to_uquant_q10,
|
||||
color_scrambled_pquant_to_uquant_q12,
|
||||
color_scrambled_pquant_to_uquant_q16,
|
||||
color_scrambled_pquant_to_uquant_q20,
|
||||
color_scrambled_pquant_to_uquant_q24,
|
||||
color_scrambled_pquant_to_uquant_q32,
|
||||
color_scrambled_pquant_to_uquant_q40,
|
||||
color_scrambled_pquant_to_uquant_q48,
|
||||
color_scrambled_pquant_to_uquant_q64,
|
||||
color_scrambled_pquant_to_uquant_q80,
|
||||
color_scrambled_pquant_to_uquant_q96,
|
||||
color_scrambled_pquant_to_uquant_q128,
|
||||
color_scrambled_pquant_to_uquant_q160,
|
||||
color_scrambled_pquant_to_uquant_q192,
|
||||
color_scrambled_pquant_to_uquant_q256
|
||||
};
|
||||
|
||||
// The quant_mode_table[integer_count/2][bits] gives us the quantization level for a given integer
|
||||
// count and number of bits that the integer may fit into.
|
||||
const int8_t quant_mode_table[10][128] {
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
},
|
||||
{
|
||||
-1, -1, 0, 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, 0, 0, 0, 1, 2, 2, 3, 4, 5, 5, 6, 7,
|
||||
8, 8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
|
||||
12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 1, 1, 1,
|
||||
2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7,
|
||||
8, 8, 8, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 13, 13, 13,
|
||||
14, 14, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 19, 19, 19,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5,
|
||||
5, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 10, 10,
|
||||
10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14,
|
||||
15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 19, 19, 19, 19,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
||||
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6,
|
||||
6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9,
|
||||
9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13,
|
||||
13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16,
|
||||
16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
|
||||
17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19
|
||||
},
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
|
||||
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4,
|
||||
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,
|
||||
6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9,
|
||||
9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
|
||||
14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 17, 17
|
||||
}
|
||||
};
|
||||
544
engine/thirdparty/astcenc/astcenc_symbolic_physical.cpp
vendored
Normal file
544
engine/thirdparty/astcenc/astcenc_symbolic_physical.cpp
vendored
Normal file
|
|
@ -0,0 +1,544 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2023 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Functions for converting between symbolic and physical encodings.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
/**
|
||||
* @brief Reverse bits in a byte.
|
||||
*
|
||||
* @param p The value to reverse.
|
||||
*
|
||||
* @return The reversed result.
|
||||
*/
|
||||
static inline int bitrev8(int p)
|
||||
{
|
||||
p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
|
||||
p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
|
||||
p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Read up to 8 bits at an arbitrary bit offset.
|
||||
*
|
||||
* The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may
|
||||
* span two separate bytes in memory.
|
||||
*
|
||||
* @param bitcount The number of bits to read.
|
||||
* @param bitoffset The bit offset to read from, between 0 and 7.
|
||||
* @param[in,out] ptr The data pointer to read from.
|
||||
*
|
||||
* @return The read value.
|
||||
*/
|
||||
static inline int read_bits(
|
||||
int bitcount,
|
||||
int bitoffset,
|
||||
const uint8_t* ptr
|
||||
) {
|
||||
int mask = (1 << bitcount) - 1;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
int value = ptr[0] | (ptr[1] << 8);
|
||||
value >>= bitoffset;
|
||||
value &= mask;
|
||||
return value;
|
||||
}
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
/**
|
||||
* @brief Write up to 8 bits at an arbitrary bit offset.
|
||||
*
|
||||
* The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so
|
||||
* may span two separate bytes in memory.
|
||||
*
|
||||
* @param value The value to write.
|
||||
* @param bitcount The number of bits to write, starting from LSB.
|
||||
* @param bitoffset The bit offset to store at, between 0 and 7.
|
||||
* @param[in,out] ptr The data pointer to write to.
|
||||
*/
|
||||
static inline void write_bits(
|
||||
int value,
|
||||
int bitcount,
|
||||
int bitoffset,
|
||||
uint8_t* ptr
|
||||
) {
|
||||
int mask = (1 << bitcount) - 1;
|
||||
value &= mask;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
value <<= bitoffset;
|
||||
mask <<= bitoffset;
|
||||
mask = ~mask;
|
||||
|
||||
ptr[0] &= mask;
|
||||
ptr[0] |= value;
|
||||
ptr[1] &= mask >> 8;
|
||||
ptr[1] |= value >> 8;
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void symbolic_to_physical(
|
||||
const block_size_descriptor& bsd,
|
||||
const symbolic_compressed_block& scb,
|
||||
uint8_t pcb[16]
|
||||
) {
|
||||
assert(scb.block_type != SYM_BTYPE_ERROR);
|
||||
|
||||
// Constant color block using UNORM16 colors
|
||||
if (scb.block_type == SYM_BTYPE_CONST_U16)
|
||||
{
|
||||
// There is currently no attempt to coalesce larger void-extents
|
||||
static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
{
|
||||
pcb[i] = cbytes[i];
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++)
|
||||
{
|
||||
pcb[2 * i + 8] = scb.constant_color[i] & 0xFF;
|
||||
pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Constant color block using FP16 colors
|
||||
if (scb.block_type == SYM_BTYPE_CONST_F16)
|
||||
{
|
||||
// There is currently no attempt to coalesce larger void-extents
|
||||
static const uint8_t cbytes[8] { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
{
|
||||
pcb[i] = cbytes[i];
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++)
|
||||
{
|
||||
pcb[2 * i + 8] = scb.constant_color[i] & 0xFF;
|
||||
pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int partition_count = scb.partition_count;
|
||||
|
||||
// Compress the weights.
|
||||
// They are encoded as an ordinary integer-sequence, then bit-reversed
|
||||
uint8_t weightbuf[16] { 0 };
|
||||
|
||||
const auto& bm = bsd.get_block_mode(scb.block_mode);
|
||||
const auto& di = bsd.get_decimation_info(bm.decimation_mode);
|
||||
int weight_count = di.weight_count;
|
||||
quant_method weight_quant_method = bm.get_weight_quant_mode();
|
||||
float weight_quant_levels = static_cast<float>(get_quant_level(weight_quant_method));
|
||||
int is_dual_plane = bm.is_dual_plane;
|
||||
|
||||
const auto& qat = quant_and_xfer_tables[weight_quant_method];
|
||||
|
||||
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
|
||||
|
||||
int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method);
|
||||
|
||||
uint8_t weights[64];
|
||||
if (is_dual_plane)
|
||||
{
|
||||
for (int i = 0; i < weight_count; i++)
|
||||
{
|
||||
float uqw = static_cast<float>(scb.weights[i]);
|
||||
float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f);
|
||||
int qwi = static_cast<int>(qw + 0.5f);
|
||||
weights[2 * i] = qat.scramble_map[qwi];
|
||||
|
||||
uqw = static_cast<float>(scb.weights[i + WEIGHTS_PLANE2_OFFSET]);
|
||||
qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f);
|
||||
qwi = static_cast<int>(qw + 0.5f);
|
||||
weights[2 * i + 1] = qat.scramble_map[qwi];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < weight_count; i++)
|
||||
{
|
||||
float uqw = static_cast<float>(scb.weights[i]);
|
||||
float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f);
|
||||
int qwi = static_cast<int>(qw + 0.5f);
|
||||
weights[i] = qat.scramble_map[qwi];
|
||||
}
|
||||
}
|
||||
|
||||
encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0);
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
pcb[i] = static_cast<uint8_t>(bitrev8(weightbuf[15 - i]));
|
||||
}
|
||||
|
||||
write_bits(scb.block_mode, 11, 0, pcb);
|
||||
write_bits(partition_count - 1, 2, 11, pcb);
|
||||
|
||||
int below_weights_pos = 128 - bits_for_weights;
|
||||
|
||||
// Encode partition index and color endpoint types for blocks with 2+ partitions
|
||||
if (partition_count > 1)
|
||||
{
|
||||
write_bits(scb.partition_index, 6, 13, pcb);
|
||||
write_bits(scb.partition_index >> 6, PARTITION_INDEX_BITS - 6, 19, pcb);
|
||||
|
||||
if (scb.color_formats_matched)
|
||||
{
|
||||
write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_INDEX_BITS, pcb);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Check endpoint types for each partition to determine the lowest class present
|
||||
int low_class = 4;
|
||||
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
int class_of_format = scb.color_formats[i] >> 2;
|
||||
low_class = astc::min(class_of_format, low_class);
|
||||
}
|
||||
|
||||
if (low_class == 3)
|
||||
{
|
||||
low_class = 2;
|
||||
}
|
||||
|
||||
int encoded_type = low_class + 1;
|
||||
int bitpos = 2;
|
||||
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
int classbit_of_format = (scb.color_formats[i] >> 2) - low_class;
|
||||
encoded_type |= classbit_of_format << bitpos;
|
||||
bitpos++;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < partition_count; i++)
|
||||
{
|
||||
int lowbits_of_format = scb.color_formats[i] & 3;
|
||||
encoded_type |= lowbits_of_format << bitpos;
|
||||
bitpos += 2;
|
||||
}
|
||||
|
||||
int encoded_type_lowpart = encoded_type & 0x3F;
|
||||
int encoded_type_highpart = encoded_type >> 6;
|
||||
int encoded_type_highpart_size = (3 * partition_count) - 4;
|
||||
int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size;
|
||||
write_bits(encoded_type_lowpart, 6, 13 + PARTITION_INDEX_BITS, pcb);
|
||||
write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, pcb);
|
||||
below_weights_pos -= encoded_type_highpart_size;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
write_bits(scb.color_formats[0], 4, 13, pcb);
|
||||
}
|
||||
|
||||
// In dual-plane mode, encode the color component of the second plane of weights
|
||||
if (is_dual_plane)
|
||||
{
|
||||
write_bits(scb.plane2_component, 2, below_weights_pos - 2, pcb);
|
||||
}
|
||||
|
||||
// Encode the color components
|
||||
uint8_t values_to_encode[32];
|
||||
int valuecount_to_encode = 0;
|
||||
|
||||
const uint8_t* pack_table = color_uquant_to_scrambled_pquant_tables[scb.quant_mode - QUANT_6];
|
||||
for (unsigned int i = 0; i < scb.partition_count; i++)
|
||||
{
|
||||
int vals = 2 * (scb.color_formats[i] >> 2) + 2;
|
||||
assert(vals <= 8);
|
||||
for (int j = 0; j < vals; j++)
|
||||
{
|
||||
values_to_encode[j + valuecount_to_encode] = pack_table[scb.color_values[i][j]];
|
||||
}
|
||||
valuecount_to_encode += vals;
|
||||
}
|
||||
|
||||
encode_ise(scb.get_color_quant_mode(), valuecount_to_encode, values_to_encode, pcb,
|
||||
scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* See header for documentation. */
|
||||
void physical_to_symbolic(
|
||||
const block_size_descriptor& bsd,
|
||||
const uint8_t pcb[16],
|
||||
symbolic_compressed_block& scb
|
||||
) {
|
||||
uint8_t bswapped[16];
|
||||
|
||||
scb.block_type = SYM_BTYPE_NONCONST;
|
||||
|
||||
// Extract header fields
|
||||
int block_mode = read_bits(11, 0, pcb);
|
||||
if ((block_mode & 0x1FF) == 0x1FC)
|
||||
{
|
||||
// Constant color block
|
||||
|
||||
// Check what format the data has
|
||||
if (block_mode & 0x200)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_CONST_F16;
|
||||
}
|
||||
else
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_CONST_U16;
|
||||
}
|
||||
|
||||
scb.partition_count = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
scb.constant_color[i] = pcb[2 * i + 8] | (pcb[2 * i + 9] << 8);
|
||||
}
|
||||
|
||||
// Additionally, check that the void-extent
|
||||
if (bsd.zdim == 1)
|
||||
{
|
||||
// 2D void-extent
|
||||
int rsvbits = read_bits(2, 10, pcb);
|
||||
if (rsvbits != 3)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Low values span 3 bytes so need two read_bits calls
|
||||
int vx_low_s = read_bits(8, 12, pcb) | (read_bits(5, 12 + 8, pcb) << 8);
|
||||
int vx_high_s = read_bits(13, 25, pcb);
|
||||
int vx_low_t = read_bits(8, 38, pcb) | (read_bits(5, 38 + 8, pcb) << 8);
|
||||
int vx_high_t = read_bits(13, 51, pcb);
|
||||
|
||||
int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF &&
|
||||
vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
|
||||
|
||||
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// 3D void-extent
|
||||
int vx_low_s = read_bits(9, 10, pcb);
|
||||
int vx_high_s = read_bits(9, 19, pcb);
|
||||
int vx_low_t = read_bits(9, 28, pcb);
|
||||
int vx_high_t = read_bits(9, 37, pcb);
|
||||
int vx_low_r = read_bits(9, 46, pcb);
|
||||
int vx_high_r = read_bits(9, 55, pcb);
|
||||
|
||||
int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF &&
|
||||
vx_low_t == 0x1FF && vx_high_t == 0x1FF &&
|
||||
vx_low_r == 0x1FF && vx_high_r == 0x1FF;
|
||||
|
||||
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_r >= vx_high_r) && !all_ones)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int packed_index = bsd.block_mode_packed_index[block_mode];
|
||||
if (packed_index == BLOCK_BAD_BLOCK_MODE)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& bm = bsd.get_block_mode(block_mode);
|
||||
const auto& di = bsd.get_decimation_info(bm.decimation_mode);
|
||||
|
||||
int weight_count = di.weight_count;
|
||||
promise(weight_count > 0);
|
||||
|
||||
quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode);
|
||||
int is_dual_plane = bm.is_dual_plane;
|
||||
|
||||
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
|
||||
|
||||
int partition_count = read_bits(2, 11, pcb) + 1;
|
||||
promise(partition_count > 0);
|
||||
|
||||
scb.block_mode = static_cast<uint16_t>(block_mode);
|
||||
scb.partition_count = static_cast<uint8_t>(partition_count);
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
bswapped[i] = static_cast<uint8_t>(bitrev8(pcb[15 - i]));
|
||||
}
|
||||
|
||||
int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method);
|
||||
|
||||
int below_weights_pos = 128 - bits_for_weights;
|
||||
|
||||
uint8_t indices[64];
|
||||
const auto& qat = quant_and_xfer_tables[weight_quant_method];
|
||||
|
||||
decode_ise(weight_quant_method, real_weight_count, bswapped, indices, 0);
|
||||
|
||||
if (is_dual_plane)
|
||||
{
|
||||
for (int i = 0; i < weight_count; i++)
|
||||
{
|
||||
scb.weights[i] = qat.unscramble_and_unquant_map[indices[2 * i]];
|
||||
scb.weights[i + WEIGHTS_PLANE2_OFFSET] = qat.unscramble_and_unquant_map[indices[2 * i + 1]];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < weight_count; i++)
|
||||
{
|
||||
scb.weights[i] = qat.unscramble_and_unquant_map[indices[i]];
|
||||
}
|
||||
}
|
||||
|
||||
if (is_dual_plane && partition_count == 4)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
scb.color_formats_matched = 0;
|
||||
|
||||
// Determine the format of each endpoint pair
|
||||
int color_formats[BLOCK_MAX_PARTITIONS];
|
||||
int encoded_type_highpart_size = 0;
|
||||
if (partition_count == 1)
|
||||
{
|
||||
color_formats[0] = read_bits(4, 13, pcb);
|
||||
scb.partition_index = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
encoded_type_highpart_size = (3 * partition_count) - 4;
|
||||
below_weights_pos -= encoded_type_highpart_size;
|
||||
int encoded_type = read_bits(6, 13 + PARTITION_INDEX_BITS, pcb) |
|
||||
(read_bits(encoded_type_highpart_size, below_weights_pos, pcb) << 6);
|
||||
int baseclass = encoded_type & 0x3;
|
||||
if (baseclass == 0)
|
||||
{
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_formats[i] = (encoded_type >> 2) & 0xF;
|
||||
}
|
||||
|
||||
below_weights_pos += encoded_type_highpart_size;
|
||||
scb.color_formats_matched = 1;
|
||||
encoded_type_highpart_size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bitpos = 2;
|
||||
baseclass--;
|
||||
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
|
||||
bitpos++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_formats[i] |= (encoded_type >> bitpos) & 3;
|
||||
bitpos += 2;
|
||||
}
|
||||
}
|
||||
scb.partition_index = static_cast<uint16_t>(read_bits(10, 13, pcb));
|
||||
}
|
||||
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
scb.color_formats[i] = static_cast<uint8_t>(color_formats[i]);
|
||||
}
|
||||
|
||||
// Determine number of color endpoint integers
|
||||
int color_integer_count = 0;
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
int endpoint_class = color_formats[i] >> 2;
|
||||
color_integer_count += (endpoint_class + 1) * 2;
|
||||
}
|
||||
|
||||
if (color_integer_count > 18)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine the color endpoint format to use
|
||||
static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS };
|
||||
int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
|
||||
if (is_dual_plane)
|
||||
{
|
||||
color_bits -= 2;
|
||||
}
|
||||
|
||||
if (color_bits < 0)
|
||||
{
|
||||
color_bits = 0;
|
||||
}
|
||||
|
||||
int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits];
|
||||
if (color_quant_level < QUANT_6)
|
||||
{
|
||||
scb.block_type = SYM_BTYPE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Unpack the integer color values and assign to endpoints
|
||||
scb.quant_mode = static_cast<quant_method>(color_quant_level);
|
||||
|
||||
uint8_t values_to_decode[32];
|
||||
decode_ise(static_cast<quant_method>(color_quant_level), color_integer_count, pcb,
|
||||
values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS));
|
||||
|
||||
int valuecount_to_decode = 0;
|
||||
const uint8_t* unpack_table = color_scrambled_pquant_to_uquant_tables[scb.quant_mode - QUANT_6];
|
||||
for (int i = 0; i < partition_count; i++)
|
||||
{
|
||||
int vals = 2 * (color_formats[i] >> 2) + 2;
|
||||
for (int j = 0; j < vals; j++)
|
||||
{
|
||||
scb.color_values[i][j] = unpack_table[values_to_decode[j + valuecount_to_decode]];
|
||||
}
|
||||
valuecount_to_decode += vals;
|
||||
}
|
||||
|
||||
// Fetch component for second-plane in the case of dual plane of weights.
|
||||
scb.plane2_component = -1;
|
||||
if (is_dual_plane)
|
||||
{
|
||||
scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb));
|
||||
}
|
||||
}
|
||||
570
engine/thirdparty/astcenc/astcenc_vecmathlib.h
vendored
Normal file
570
engine/thirdparty/astcenc/astcenc_vecmathlib.h
vendored
Normal file
|
|
@ -0,0 +1,570 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2019-2022 Arm Limited
|
||||
// Copyright 2008 Jose Fonseca
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
* This module implements vector support for floats, ints, and vector lane
|
||||
* control masks. It provides access to both explicit vector width types, and
|
||||
* flexible N-wide types where N can be determined at compile time.
|
||||
*
|
||||
* The design of this module encourages use of vector length agnostic code, via
|
||||
* the vint, vfloat, and vmask types. These will take on the widest SIMD vector
|
||||
* with that is available at compile time. The current vector width is
|
||||
* accessible for e.g. loop strides via the ASTCENC_SIMD_WIDTH constant.
|
||||
*
|
||||
* Explicit scalar types are accessible via the vint1, vfloat1, vmask1 types.
|
||||
* These are provided primarily for prototyping and algorithm debug of VLA
|
||||
* implementations.
|
||||
*
|
||||
* Explicit 4-wide types are accessible via the vint4, vfloat4, and vmask4
|
||||
* types. These are provided for use by VLA code, but are also expected to be
|
||||
* used as a fixed-width type and will supported a reference C++ fallback for
|
||||
* use on platforms without SIMD intrinsics.
|
||||
*
|
||||
* Explicit 8-wide types are accessible via the vint8, vfloat8, and vmask8
|
||||
* types. These are provide for use by VLA code, and are not expected to be
|
||||
* used as a fixed-width type in normal code. No reference C implementation is
|
||||
* provided on platforms without underlying SIMD intrinsics.
|
||||
*
|
||||
* With the current implementation ISA support is provided for:
|
||||
*
|
||||
* * 1-wide for scalar reference.
|
||||
* * 4-wide for Armv8-A NEON.
|
||||
* * 4-wide for x86-64 SSE2.
|
||||
* * 4-wide for x86-64 SSE4.1.
|
||||
* * 8-wide for x86-64 AVX2.
|
||||
*/
|
||||
|
||||
#ifndef ASTC_VECMATHLIB_H_INCLUDED
|
||||
#define ASTC_VECMATHLIB_H_INCLUDED
|
||||
|
||||
#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0
|
||||
#include <immintrin.h>
|
||||
#elif ASTCENC_NEON != 0
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__clang__) && defined(_MSC_VER)
|
||||
#define ASTCENC_SIMD_INLINE __forceinline
|
||||
#define ASTCENC_NO_INLINE
|
||||
#elif defined(__GNUC__) && !defined(__clang__)
|
||||
#define ASTCENC_SIMD_INLINE __attribute__((always_inline)) inline
|
||||
#define ASTCENC_NO_INLINE __attribute__ ((noinline))
|
||||
#else
|
||||
#define ASTCENC_SIMD_INLINE __attribute__((always_inline, nodebug)) inline
|
||||
#define ASTCENC_NO_INLINE __attribute__ ((noinline))
|
||||
#endif
|
||||
|
||||
#if ASTCENC_AVX >= 2
|
||||
/* If we have AVX2 expose 8-wide VLA. */
|
||||
#include "astcenc_vecmathlib_sse_4.h"
|
||||
#include "astcenc_vecmathlib_common_4.h"
|
||||
#include "astcenc_vecmathlib_avx2_8.h"
|
||||
|
||||
#define ASTCENC_SIMD_WIDTH 8
|
||||
|
||||
using vfloat = vfloat8;
|
||||
|
||||
#if defined(ASTCENC_NO_INVARIANCE)
|
||||
using vfloatacc = vfloat8;
|
||||
#else
|
||||
using vfloatacc = vfloat4;
|
||||
#endif
|
||||
|
||||
using vint = vint8;
|
||||
using vmask = vmask8;
|
||||
|
||||
constexpr auto loada = vfloat8::loada;
|
||||
constexpr auto load1 = vfloat8::load1;
|
||||
|
||||
#elif ASTCENC_SSE >= 20
|
||||
/* If we have SSE expose 4-wide VLA, and 4-wide fixed width. */
|
||||
#include "astcenc_vecmathlib_sse_4.h"
|
||||
#include "astcenc_vecmathlib_common_4.h"
|
||||
|
||||
#define ASTCENC_SIMD_WIDTH 4
|
||||
|
||||
using vfloat = vfloat4;
|
||||
using vfloatacc = vfloat4;
|
||||
using vint = vint4;
|
||||
using vmask = vmask4;
|
||||
|
||||
constexpr auto loada = vfloat4::loada;
|
||||
constexpr auto load1 = vfloat4::load1;
|
||||
|
||||
#elif ASTCENC_NEON > 0
|
||||
/* If we have NEON expose 4-wide VLA. */
|
||||
#include "astcenc_vecmathlib_neon_4.h"
|
||||
#include "astcenc_vecmathlib_common_4.h"
|
||||
|
||||
#define ASTCENC_SIMD_WIDTH 4
|
||||
|
||||
using vfloat = vfloat4;
|
||||
using vfloatacc = vfloat4;
|
||||
using vint = vint4;
|
||||
using vmask = vmask4;
|
||||
|
||||
constexpr auto loada = vfloat4::loada;
|
||||
constexpr auto load1 = vfloat4::load1;
|
||||
|
||||
#else
|
||||
// If we have nothing expose 4-wide VLA, and 4-wide fixed width.
|
||||
|
||||
// Note: We no longer expose the 1-wide scalar fallback because it is not
|
||||
// invariant with the 4-wide path due to algorithms that use horizontal
|
||||
// operations that accumulate a local vector sum before accumulating into
|
||||
// a running sum.
|
||||
//
|
||||
// For 4 items adding into an accumulator using 1-wide vectors the sum is:
|
||||
//
|
||||
// result = ((((sum + l0) + l1) + l2) + l3)
|
||||
//
|
||||
// ... whereas the accumulator for a 4-wide vector sum is:
|
||||
//
|
||||
// result = sum + ((l0 + l2) + (l1 + l3))
|
||||
//
|
||||
// In "normal maths" this is the same, but the floating point reassociation
|
||||
// differences mean that these will not produce the same result.
|
||||
|
||||
#include "astcenc_vecmathlib_none_4.h"
|
||||
#include "astcenc_vecmathlib_common_4.h"
|
||||
|
||||
#define ASTCENC_SIMD_WIDTH 4
|
||||
|
||||
using vfloat = vfloat4;
|
||||
using vfloatacc = vfloat4;
|
||||
using vint = vint4;
|
||||
using vmask = vmask4;
|
||||
|
||||
constexpr auto loada = vfloat4::loada;
|
||||
constexpr auto load1 = vfloat4::load1;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Round a count down to the largest multiple of 8.
|
||||
*
|
||||
* @param count The unrounded value.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int count)
|
||||
{
|
||||
return count & static_cast<unsigned int>(~(8 - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Round a count down to the largest multiple of 4.
|
||||
*
|
||||
* @param count The unrounded value.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int count)
|
||||
{
|
||||
return count & static_cast<unsigned int>(~(4 - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Round a count down to the largest multiple of the SIMD width.
|
||||
*
|
||||
* Assumption that the vector width is a power of two ...
|
||||
*
|
||||
* @param count The unrounded value.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int count)
|
||||
{
|
||||
return count & static_cast<unsigned int>(~(ASTCENC_SIMD_WIDTH - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Round a count up to the largest multiple of the SIMD width.
|
||||
*
|
||||
* Assumption that the vector width is a power of two ...
|
||||
*
|
||||
* @param count The unrounded value.
|
||||
*
|
||||
* @return The rounded value.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE unsigned int round_up_to_simd_multiple_vla(unsigned int count)
|
||||
{
|
||||
unsigned int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
|
||||
return multiples * ASTCENC_SIMD_WIDTH;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return @c a with lanes negated if the @c b lane is negative.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat change_sign(vfloat a, vfloat b)
|
||||
{
|
||||
vint ia = float_as_int(a);
|
||||
vint ib = float_as_int(b);
|
||||
vint sign_mask(static_cast<int>(0x80000000));
|
||||
vint r = ia ^ (ib & sign_mask);
|
||||
return int_as_float(r);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return fast, but approximate, vector atan(x).
|
||||
*
|
||||
* Max error of this implementation is 0.004883.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat atan(vfloat x)
|
||||
{
|
||||
vmask c = abs(x) > vfloat(1.0f);
|
||||
vfloat z = change_sign(vfloat(astc::PI_OVER_TWO), x);
|
||||
vfloat y = select(x, vfloat(1.0f) / x, c);
|
||||
y = y / (y * y * vfloat(0.28f) + vfloat(1.0f));
|
||||
return select(y, z - y, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return fast, but approximate, vector atan2(x, y).
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat atan2(vfloat y, vfloat x)
|
||||
{
|
||||
vfloat z = atan(abs(y / x));
|
||||
vmask xmask = vmask(float_as_int(x).m);
|
||||
return change_sign(select_msb(z, vfloat(astc::PI) - z, xmask), y);
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Factory that returns a unit length 4 component vfloat4.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 unit4()
|
||||
{
|
||||
return vfloat4(0.5f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Factory that returns a unit length 3 component vfloat4.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 unit3()
|
||||
{
|
||||
float val = 0.577350258827209473f;
|
||||
return vfloat4(val, val, val, 0.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Factory that returns a unit length 2 component vfloat4.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 unit2()
|
||||
{
|
||||
float val = 0.707106769084930420f;
|
||||
return vfloat4(val, val, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Factory that returns a 3 component vfloat4.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 vfloat3(float a, float b, float c)
|
||||
{
|
||||
return vfloat4(a, b, c, 0.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Factory that returns a 2 component vfloat4.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 vfloat2(float a, float b)
|
||||
{
|
||||
return vfloat4(a, b, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Normalize a non-zero length vector to unit length.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 normalize(vfloat4 a)
|
||||
{
|
||||
vfloat4 length = dot(a, a);
|
||||
return a / sqrt(length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Normalize a vector, returning @c safe if len is zero.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 normalize_safe(vfloat4 a, vfloat4 safe)
|
||||
{
|
||||
vfloat4 length = dot(a, a);
|
||||
if (length.lane<0>() != 0.0f)
|
||||
{
|
||||
return a / sqrt(length);
|
||||
}
|
||||
|
||||
return safe;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define POLY0(x, c0) ( c0)
|
||||
#define POLY1(x, c0, c1) ((POLY0(x, c1) * x) + c0)
|
||||
#define POLY2(x, c0, c1, c2) ((POLY1(x, c1, c2) * x) + c0)
|
||||
#define POLY3(x, c0, c1, c2, c3) ((POLY2(x, c1, c2, c3) * x) + c0)
|
||||
#define POLY4(x, c0, c1, c2, c3, c4) ((POLY3(x, c1, c2, c3, c4) * x) + c0)
|
||||
#define POLY5(x, c0, c1, c2, c3, c4, c5) ((POLY4(x, c1, c2, c3, c4, c5) * x) + c0)
|
||||
|
||||
/**
|
||||
* @brief Compute an approximate exp2(x) for each lane in the vector.
|
||||
*
|
||||
* Based on 5th degree minimax polynomials, ported from this blog
|
||||
* https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 exp2(vfloat4 x)
|
||||
{
|
||||
x = clamp(-126.99999f, 129.0f, x);
|
||||
|
||||
vint4 ipart = float_to_int(x - 0.5f);
|
||||
vfloat4 fpart = x - int_to_float(ipart);
|
||||
|
||||
// Integer contrib, using 1 << ipart
|
||||
vfloat4 iexp = int_as_float(lsl<23>(ipart + 127));
|
||||
|
||||
// Fractional contrib, using polynomial fit of 2^x in range [-0.5, 0.5)
|
||||
vfloat4 fexp = POLY5(fpart,
|
||||
9.9999994e-1f,
|
||||
6.9315308e-1f,
|
||||
2.4015361e-1f,
|
||||
5.5826318e-2f,
|
||||
8.9893397e-3f,
|
||||
1.8775767e-3f);
|
||||
|
||||
return iexp * fexp;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute an approximate log2(x) for each lane in the vector.
|
||||
*
|
||||
* Based on 5th degree minimax polynomials, ported from this blog
|
||||
* https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 log2(vfloat4 x)
|
||||
{
|
||||
vint4 exp(0x7F800000);
|
||||
vint4 mant(0x007FFFFF);
|
||||
vint4 one(0x3F800000);
|
||||
|
||||
vint4 i = float_as_int(x);
|
||||
|
||||
vfloat4 e = int_to_float(lsr<23>(i & exp) - 127);
|
||||
|
||||
vfloat4 m = int_as_float((i & mant) | one);
|
||||
|
||||
// Polynomial fit of log2(x)/(x - 1), for x in range [1, 2)
|
||||
vfloat4 p = POLY4(m,
|
||||
2.8882704548164776201f,
|
||||
-2.52074962577807006663f,
|
||||
1.48116647521213171641f,
|
||||
-0.465725644288844778798f,
|
||||
0.0596515482674574969533f);
|
||||
|
||||
// Increases the polynomial degree, but ensures that log2(1) == 0
|
||||
p = p * (m - 1.0f);
|
||||
|
||||
return p + e;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute an approximate pow(x, y) for each lane in the vector.
|
||||
*
|
||||
* Power function based on the exp2(log2(x) * y) transform.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 pow(vfloat4 x, vfloat4 y)
|
||||
{
|
||||
vmask4 zero_mask = y == vfloat4(0.0f);
|
||||
vfloat4 estimate = exp2(log2(x) * y);
|
||||
|
||||
// Guarantee that y == 0 returns exactly 1.0f
|
||||
return select(estimate, vfloat4(1.0f), zero_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Count the leading zeros for each lane in @c a.
|
||||
*
|
||||
* Valid for all data values of @c a; will return a per-lane value [0, 32].
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
|
||||
{
|
||||
// This function is a horrible abuse of floating point exponents to convert
|
||||
// the original integer value into a 2^N encoding we can recover easily.
|
||||
|
||||
// Convert to float without risk of rounding up by keeping only top 8 bits.
|
||||
// This trick is is guaranteed to keep top 8 bits and clear the 9th.
|
||||
a = (~lsr<8>(a)) & a;
|
||||
a = float_as_int(int_to_float(a));
|
||||
|
||||
// Extract and unbias exponent
|
||||
a = vint4(127 + 31) - lsr<23>(a);
|
||||
|
||||
// Clamp result to a valid 32-bit range
|
||||
return clamp(0, 32, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return lanewise 2^a for each lane in @c a.
|
||||
*
|
||||
* Use of signed int means that this is only valid for values in range [0, 31].
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vint4 two_to_the_n(vint4 a)
|
||||
{
|
||||
// 2^30 is the largest signed number than can be represented
|
||||
assert(all(a < vint4(31)));
|
||||
|
||||
// This function is a horrible abuse of floating point to use the exponent
|
||||
// and float conversion to generate a 2^N multiple.
|
||||
|
||||
// Bias the exponent
|
||||
vint4 exp = a + 127;
|
||||
exp = lsl<23>(exp);
|
||||
|
||||
// Reinterpret the bits as a float, and then convert to an int
|
||||
vfloat4 f = int_as_float(exp);
|
||||
return float_to_int(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert unorm16 [0, 65535] to float16 in range [0, 1].
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p)
|
||||
{
|
||||
vint4 fp16_one = vint4(0x3C00);
|
||||
vint4 fp16_small = lsl<8>(p);
|
||||
|
||||
vmask4 is_one = p == vint4(0xFFFF);
|
||||
vmask4 is_small = p < vint4(4);
|
||||
|
||||
// Manually inline clz() on Visual Studio to avoid release build codegen bug
|
||||
// see https://github.com/ARM-software/astc-encoder/issues/259
|
||||
#if !defined(__clang__) && defined(_MSC_VER)
|
||||
vint4 a = (~lsr<8>(p)) & p;
|
||||
a = float_as_int(int_to_float(a));
|
||||
a = vint4(127 + 31) - lsr<23>(a);
|
||||
vint4 lz = clamp(0, 32, a) - 16;
|
||||
#else
|
||||
vint4 lz = clz(p) - 16;
|
||||
#endif
|
||||
|
||||
p = p * two_to_the_n(lz + 1);
|
||||
p = p & vint4(0xFFFF);
|
||||
|
||||
p = lsr<6>(p);
|
||||
|
||||
p = p | lsl<10>(vint4(14) - lz);
|
||||
|
||||
vint4 r = select(p, fp16_one, is_one);
|
||||
r = select(r, fp16_small, is_small);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert 16-bit LNS to float16.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vint4 lns_to_sf16(vint4 p)
|
||||
{
|
||||
vint4 mc = p & 0x7FF;
|
||||
vint4 ec = lsr<11>(p);
|
||||
|
||||
vint4 mc_512 = mc * 3;
|
||||
vmask4 mask_512 = mc < vint4(512);
|
||||
|
||||
vint4 mc_1536 = mc * 4 - 512;
|
||||
vmask4 mask_1536 = mc < vint4(1536);
|
||||
|
||||
vint4 mc_else = mc * 5 - 2048;
|
||||
|
||||
vint4 mt = mc_else;
|
||||
mt = select(mt, mc_1536, mask_1536);
|
||||
mt = select(mt, mc_512, mask_512);
|
||||
|
||||
vint4 res = lsl<10>(ec) | lsr<3>(mt);
|
||||
return min(res, vint4(0x7BFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Extract mantissa and exponent of a float value.
|
||||
*
|
||||
* @param a The input value.
|
||||
* @param[out] exp The output exponent.
|
||||
*
|
||||
* @return The mantissa.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 frexp(vfloat4 a, vint4& exp)
|
||||
{
|
||||
// Interpret the bits as an integer
|
||||
vint4 ai = float_as_int(a);
|
||||
|
||||
// Extract and unbias the exponent
|
||||
exp = (lsr<23>(ai) & 0xFF) - 126;
|
||||
|
||||
// Extract and unbias the mantissa
|
||||
vint4 manti = (ai & static_cast<int>(0x807FFFFF)) | 0x3F000000;
|
||||
return int_as_float(manti);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert float to 16-bit LNS.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE vfloat4 float_to_lns(vfloat4 a)
|
||||
{
|
||||
vint4 exp;
|
||||
vfloat4 mant = frexp(a, exp);
|
||||
|
||||
// Do these early before we start messing about ...
|
||||
vmask4 mask_underflow_nan = ~(a > vfloat4(1.0f / 67108864.0f));
|
||||
vmask4 mask_infinity = a >= vfloat4(65536.0f);
|
||||
|
||||
// If input is smaller than 2^-14, multiply by 2^25 and don't bias.
|
||||
vmask4 exp_lt_m13 = exp < vint4(-13);
|
||||
|
||||
vfloat4 a1a = a * 33554432.0f;
|
||||
vint4 expa = vint4::zero();
|
||||
|
||||
vfloat4 a1b = (mant - 0.5f) * 4096;
|
||||
vint4 expb = exp + 14;
|
||||
|
||||
a = select(a1b, a1a, exp_lt_m13);
|
||||
exp = select(expb, expa, exp_lt_m13);
|
||||
|
||||
vmask4 a_lt_384 = a < vfloat4(384.0f);
|
||||
vmask4 a_lt_1408 = a <= vfloat4(1408.0f);
|
||||
|
||||
vfloat4 a2a = a * (4.0f / 3.0f);
|
||||
vfloat4 a2b = a + 128.0f;
|
||||
vfloat4 a2c = (a + 512.0f) * (4.0f / 5.0f);
|
||||
|
||||
a = a2c;
|
||||
a = select(a, a2b, a_lt_1408);
|
||||
a = select(a, a2a, a_lt_384);
|
||||
|
||||
a = a + (int_to_float(exp) * 2048.0f) + 1.0f;
|
||||
|
||||
a = select(a, vfloat4(65535.0f), mask_infinity);
|
||||
a = select(a, vfloat4::zero(), mask_underflow_nan);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
namespace astc
|
||||
{
|
||||
|
||||
static ASTCENC_SIMD_INLINE float pow(float x, float y)
|
||||
{
|
||||
return pow(vfloat4(x), vfloat4(y)).lane<0>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef ASTC_VECMATHLIB_H_INCLUDED
|
||||
1212
engine/thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h
vendored
Normal file
1212
engine/thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
423
engine/thirdparty/astcenc/astcenc_vecmathlib_common_4.h
vendored
Normal file
423
engine/thirdparty/astcenc/astcenc_vecmathlib_common_4.h
vendored
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2020-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Generic 4x32-bit vector functions.
|
||||
*
|
||||
* This module implements generic 4-wide vector functions that are valid for
|
||||
* all instruction sets, typically implemented using lower level 4-wide
|
||||
* operations that are ISA-specific.
|
||||
*/
|
||||
|
||||
#ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
|
||||
#define ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
|
||||
|
||||
#ifndef ASTCENC_SIMD_INLINE
|
||||
#error "Include astcenc_vecmathlib.h, do not include directly"
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
// ============================================================================
|
||||
// vmask4 operators and functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* @brief True if any lanes are enabled, false otherwise.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE bool any(vmask4 a)
|
||||
{
|
||||
return mask(a) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief True if all lanes are enabled, false otherwise.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE bool all(vmask4 a)
|
||||
{
|
||||
return mask(a) == 0xF;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// vint4 operators and functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar addition.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, int b)
|
||||
{
|
||||
return a + vint4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by vector incremental addition.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4& operator+=(vint4& a, const vint4& b)
|
||||
{
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar subtraction.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, int b)
|
||||
{
|
||||
return a - vint4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar multiplication.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, int b)
|
||||
{
|
||||
return a * vint4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar bitwise or.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, int b)
|
||||
{
|
||||
return a | vint4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar bitwise and.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, int b)
|
||||
{
|
||||
return a & vint4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar bitwise xor.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, int b)
|
||||
{
|
||||
return a ^ vint4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the clamped value between min and max.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vint4 clamp(int minv, int maxv, vint4 a)
|
||||
{
|
||||
return min(max(a, vint4(minv)), vint4(maxv));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the horizontal sum of RGB vector lanes as a scalar.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a)
|
||||
{
|
||||
return a.lane<0>() + a.lane<1>() + a.lane<2>();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// vfloat4 operators and functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by vector incremental addition.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4& operator+=(vfloat4& a, const vfloat4& b)
|
||||
{
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar addition.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, float b)
|
||||
{
|
||||
return a + vfloat4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar subtraction.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, float b)
|
||||
{
|
||||
return a - vfloat4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar multiplication.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, float b)
|
||||
{
|
||||
return a * vfloat4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: scalar by vector multiplication.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 operator*(float a, vfloat4 b)
|
||||
{
|
||||
return vfloat4(a) * b;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: vector by scalar division.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, float b)
|
||||
{
|
||||
return a / vfloat4(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overload: scalar by vector division.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 operator/(float a, vfloat4 b)
|
||||
{
|
||||
return vfloat4(a) / b;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the min vector of a vector and a scalar.
|
||||
*
|
||||
* If either lane value is NaN, @c b will be returned for that lane.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, float b)
|
||||
{
|
||||
return min(a, vfloat4(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the max vector of a vector and a scalar.
|
||||
*
|
||||
* If either lane value is NaN, @c b will be returned for that lane.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, float b)
|
||||
{
|
||||
return max(a, vfloat4(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the clamped value between min and max.
|
||||
*
|
||||
* It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN
|
||||
* then @c min will be returned for that lane.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 clamp(float minv, float maxv, vfloat4 a)
|
||||
{
|
||||
// Do not reorder - second operand will return if either is NaN
|
||||
return min(max(a, minv), maxv);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the clamped value between 0.0f and max.
|
||||
*
|
||||
* It is assumed that @c max is not a NaN value. If @c a is NaN then zero will
|
||||
* be returned for that lane.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 clampz(float maxv, vfloat4 a)
|
||||
{
|
||||
// Do not reorder - second operand will return if either is NaN
|
||||
return min(max(a, vfloat4::zero()), maxv);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the clamped value between 0.0f and 1.0f.
|
||||
*
|
||||
* If @c a is NaN then zero will be returned for that lane.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 clampzo(vfloat4 a)
|
||||
{
|
||||
// Do not reorder - second operand will return if either is NaN
|
||||
return min(max(a, vfloat4::zero()), 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the horizontal minimum of a vector.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE float hmin_s(vfloat4 a)
|
||||
{
|
||||
return hmin(a).lane<0>();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the horizontal min of RGB vector lanes as a scalar.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE float hmin_rgb_s(vfloat4 a)
|
||||
{
|
||||
a.set_lane<3>(a.lane<0>());
|
||||
return hmin_s(a);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the horizontal maximum of a vector.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE float hmax_s(vfloat4 a)
|
||||
{
|
||||
return hmax(a).lane<0>();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Accumulate lane-wise sums for a vector.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a)
|
||||
{
|
||||
accum = accum + a;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Accumulate lane-wise sums for a masked vector.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a, vmask4 m)
|
||||
{
|
||||
a = select(vfloat4::zero(), a, m);
|
||||
haccumulate(accum, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the horizontal sum of RGB vector lanes as a scalar.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a)
|
||||
{
|
||||
return a.lane<0>() + a.lane<1>() + a.lane<2>();
|
||||
}
|
||||
|
||||
#if !defined(ASTCENC_USE_NATIVE_DOT_PRODUCT)
|
||||
|
||||
/**
|
||||
* @brief Return the dot product for the full 4 lanes, returning scalar.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b)
|
||||
{
|
||||
vfloat4 m = a * b;
|
||||
return hadd_s(m);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the dot product for the full 4 lanes, returning vector.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b)
|
||||
{
|
||||
vfloat4 m = a * b;
|
||||
return vfloat4(hadd_s(m));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the dot product for the bottom 3 lanes, returning scalar.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
|
||||
{
|
||||
vfloat4 m = a * b;
|
||||
return hadd_rgb_s(m);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the dot product for the bottom 3 lanes, returning vector.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
|
||||
{
|
||||
vfloat4 m = a * b;
|
||||
float d3 = hadd_rgb_s(m);
|
||||
return vfloat4(d3, d3, d3, 0.0f);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(ASTCENC_USE_NATIVE_POPCOUNT)
|
||||
|
||||
/**
|
||||
* @brief Population bit count.
|
||||
*
|
||||
* @param v The value to population count.
|
||||
*
|
||||
* @return The number of 1 bits.
|
||||
*/
|
||||
static inline int popcount(uint64_t v)
|
||||
{
|
||||
uint64_t mask1 = 0x5555555555555555ULL;
|
||||
uint64_t mask2 = 0x3333333333333333ULL;
|
||||
uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
|
||||
v -= (v >> 1) & mask1;
|
||||
v = (v & mask2) + ((v >> 2) & mask2);
|
||||
v += v >> 4;
|
||||
v &= mask3;
|
||||
v *= 0x0101010101010101ULL;
|
||||
v >>= 56;
|
||||
return static_cast<int>(v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Apply signed bit transfer.
|
||||
*
|
||||
* @param input0 The first encoded endpoint.
|
||||
* @param input1 The second encoded endpoint.
|
||||
*/
|
||||
static ASTCENC_SIMD_INLINE void bit_transfer_signed(
|
||||
vint4& input0,
|
||||
vint4& input1
|
||||
) {
|
||||
input1 = lsr<1>(input1) | (input0 & 0x80);
|
||||
input0 = lsr<1>(input0) & 0x3F;
|
||||
|
||||
vmask4 mask = (input0 & 0x20) != vint4::zero();
|
||||
input0 = select(input0, input0 - 0x40, mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Debug function to print a vector of ints.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE void print(vint4 a)
|
||||
{
|
||||
ASTCENC_ALIGNAS int v[4];
|
||||
storea(a, v);
|
||||
printf("v4_i32:\n %8d %8d %8d %8d\n",
|
||||
v[0], v[1], v[2], v[3]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Debug function to print a vector of ints.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE void printx(vint4 a)
|
||||
{
|
||||
ASTCENC_ALIGNAS int v[4];
|
||||
storea(a, v);
|
||||
printf("v4_i32:\n %08x %08x %08x %08x\n",
|
||||
v[0], v[1], v[2], v[3]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Debug function to print a vector of floats.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE void print(vfloat4 a)
|
||||
{
|
||||
ASTCENC_ALIGNAS float v[4];
|
||||
storea(a, v);
|
||||
printf("v4_f32:\n %0.4f %0.4f %0.4f %0.4f\n",
|
||||
static_cast<double>(v[0]), static_cast<double>(v[1]),
|
||||
static_cast<double>(v[2]), static_cast<double>(v[3]));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Debug function to print a vector of masks.
|
||||
*/
|
||||
ASTCENC_SIMD_INLINE void print(vmask4 a)
|
||||
{
|
||||
print(select(vint4(0), vint4(1), a));
|
||||
}
|
||||
|
||||
#endif // #ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
|
||||
1099
engine/thirdparty/astcenc/astcenc_vecmathlib_neon_4.h
vendored
Normal file
1099
engine/thirdparty/astcenc/astcenc_vecmathlib_neon_4.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
1213
engine/thirdparty/astcenc/astcenc_vecmathlib_none_4.h
vendored
Normal file
1213
engine/thirdparty/astcenc/astcenc_vecmathlib_none_4.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
1315
engine/thirdparty/astcenc/astcenc_vecmathlib_sse_4.h
vendored
Normal file
1315
engine/thirdparty/astcenc/astcenc_vecmathlib_sse_4.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
479
engine/thirdparty/astcenc/astcenc_weight_align.cpp
vendored
Normal file
479
engine/thirdparty/astcenc/astcenc_weight_align.cpp
vendored
Normal file
|
|
@ -0,0 +1,479 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2024 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(ASTCENC_DECOMPRESS_ONLY)
|
||||
|
||||
/**
|
||||
* @brief Functions for angular-sum algorithm for weight alignment.
|
||||
*
|
||||
* This algorithm works as follows:
|
||||
* - we compute a complex number P as (cos s*i, sin s*i) for each weight,
|
||||
* where i is the input value and s is a scaling factor based on the spacing between the weights.
|
||||
* - we then add together complex numbers for all the weights.
|
||||
* - we then compute the length and angle of the resulting sum.
|
||||
*
|
||||
* This should produce the following results:
|
||||
* - perfect alignment results in a vector whose length is equal to the sum of lengths of all inputs
|
||||
* - even distribution results in a vector of length 0.
|
||||
* - all samples identical results in perfect alignment for every scaling.
|
||||
*
|
||||
* For each scaling factor within a given set, we compute an alignment factor from 0 to 1. This
|
||||
* should then result in some scalings standing out as having particularly good alignment factors;
|
||||
* we can use this to produce a set of candidate scale/shift values for various quantization levels;
|
||||
* we should then actually try them and see what happens.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
#include "astcenc_vecmathlib.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
static constexpr unsigned int ANGULAR_STEPS { 32 };
|
||||
|
||||
static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0,
|
||||
"ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH");
|
||||
|
||||
static_assert(ANGULAR_STEPS >= 32,
|
||||
"ANGULAR_STEPS must be at least max(steps_for_quant_level)");
|
||||
|
||||
// Store a reduced sin/cos table for 64 possible weight values; this causes
|
||||
// slight quality loss compared to using sin() and cos() directly. Must be 2^N.
|
||||
static constexpr unsigned int SINCOS_STEPS { 64 };
|
||||
|
||||
static const uint8_t steps_for_quant_level[12] {
|
||||
2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32
|
||||
};
|
||||
|
||||
ASTCENC_ALIGNAS static float sin_table[SINCOS_STEPS][ANGULAR_STEPS];
|
||||
ASTCENC_ALIGNAS static float cos_table[SINCOS_STEPS][ANGULAR_STEPS];
|
||||
|
||||
#if defined(ASTCENC_DIAGNOSTICS)
|
||||
static bool print_once { true };
|
||||
#endif
|
||||
|
||||
/* See header for documentation. */
|
||||
void prepare_angular_tables()
|
||||
{
|
||||
for (unsigned int i = 0; i < ANGULAR_STEPS; i++)
|
||||
{
|
||||
float angle_step = static_cast<float>(i + 1);
|
||||
|
||||
for (unsigned int j = 0; j < SINCOS_STEPS; j++)
|
||||
{
|
||||
sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
|
||||
cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute the angular alignment factors and offsets.
|
||||
*
|
||||
* @param weight_count The number of (decimated) weights.
|
||||
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
|
||||
* @param max_angular_steps The maximum number of steps to be tested.
|
||||
* @param[out] offsets The output angular offsets array.
|
||||
*/
|
||||
static void compute_angular_offsets(
|
||||
unsigned int weight_count,
|
||||
const float* dec_weight_ideal_value,
|
||||
unsigned int max_angular_steps,
|
||||
float* offsets
|
||||
) {
|
||||
promise(weight_count > 0);
|
||||
promise(max_angular_steps > 0);
|
||||
|
||||
ASTCENC_ALIGNAS int isamplev[BLOCK_MAX_WEIGHTS];
|
||||
|
||||
// Precompute isample; arrays are always allocated 64 elements long
|
||||
for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
// Add 2^23 and interpreting bits extracts round-to-nearest int
|
||||
vfloat sample = loada(dec_weight_ideal_value + i) * (SINCOS_STEPS - 1.0f) + vfloat(12582912.0f);
|
||||
vint isample = float_as_int(sample) & vint((SINCOS_STEPS - 1));
|
||||
storea(isample, isamplev + i);
|
||||
}
|
||||
|
||||
// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
|
||||
vfloat mult = vfloat(1.0f / (2.0f * astc::PI));
|
||||
|
||||
for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vfloat anglesum_x = vfloat::zero();
|
||||
vfloat anglesum_y = vfloat::zero();
|
||||
|
||||
for (unsigned int j = 0; j < weight_count; j++)
|
||||
{
|
||||
int isample = isamplev[j];
|
||||
anglesum_x += loada(cos_table[isample] + i);
|
||||
anglesum_y += loada(sin_table[isample] + i);
|
||||
}
|
||||
|
||||
vfloat angle = atan2(anglesum_y, anglesum_x);
|
||||
vfloat ofs = angle * mult;
|
||||
storea(ofs, offsets + i);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief For a given step size compute the lowest and highest weight.
|
||||
*
|
||||
* Compute the lowest and highest weight that results from quantizing using the given stepsize and
|
||||
* offset, and then compute the resulting error. The cut errors indicate the error that results from
|
||||
* forcing samples that should have had one weight value one step up or down.
|
||||
*
|
||||
* @param weight_count The number of (decimated) weights.
|
||||
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
|
||||
* @param max_angular_steps The maximum number of steps to be tested.
|
||||
* @param max_quant_steps The maximum quantization level to be tested.
|
||||
* @param offsets The angular offsets array.
|
||||
* @param[out] lowest_weight Per angular step, the lowest weight.
|
||||
* @param[out] weight_span Per angular step, the span between lowest and highest weight.
|
||||
* @param[out] error Per angular step, the error.
|
||||
* @param[out] cut_low_weight_error Per angular step, the low weight cut error.
|
||||
* @param[out] cut_high_weight_error Per angular step, the high weight cut error.
|
||||
*/
|
||||
static void compute_lowest_and_highest_weight(
|
||||
unsigned int weight_count,
|
||||
const float* dec_weight_ideal_value,
|
||||
unsigned int max_angular_steps,
|
||||
unsigned int max_quant_steps,
|
||||
const float* offsets,
|
||||
float* lowest_weight,
|
||||
int* weight_span,
|
||||
float* error,
|
||||
float* cut_low_weight_error,
|
||||
float* cut_high_weight_error
|
||||
) {
|
||||
promise(weight_count > 0);
|
||||
promise(max_angular_steps > 0);
|
||||
|
||||
vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
|
||||
|
||||
// Arrays are ANGULAR_STEPS long, so always safe to run full vectors
|
||||
for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
|
||||
{
|
||||
vfloat minidx(128.0f);
|
||||
vfloat maxidx(-128.0f);
|
||||
vfloat errval = vfloat::zero();
|
||||
vfloat cut_low_weight_err = vfloat::zero();
|
||||
vfloat cut_high_weight_err = vfloat::zero();
|
||||
vfloat offset = loada(offsets + sp);
|
||||
|
||||
for (unsigned int j = 0; j < weight_count; j++)
|
||||
{
|
||||
vfloat sval = load1(dec_weight_ideal_value + j) * rcp_stepsize - offset;
|
||||
vfloat svalrte = round(sval);
|
||||
vfloat diff = sval - svalrte;
|
||||
errval += diff * diff;
|
||||
|
||||
// Reset tracker on min hit
|
||||
vmask mask = svalrte < minidx;
|
||||
minidx = select(minidx, svalrte, mask);
|
||||
cut_low_weight_err = select(cut_low_weight_err, vfloat::zero(), mask);
|
||||
|
||||
// Accumulate on min hit
|
||||
mask = svalrte == minidx;
|
||||
vfloat accum = cut_low_weight_err + vfloat(1.0f) - vfloat(2.0f) * diff;
|
||||
cut_low_weight_err = select(cut_low_weight_err, accum, mask);
|
||||
|
||||
// Reset tracker on max hit
|
||||
mask = svalrte > maxidx;
|
||||
maxidx = select(maxidx, svalrte, mask);
|
||||
cut_high_weight_err = select(cut_high_weight_err, vfloat::zero(), mask);
|
||||
|
||||
// Accumulate on max hit
|
||||
mask = svalrte == maxidx;
|
||||
accum = cut_high_weight_err + vfloat(1.0f) + vfloat(2.0f) * diff;
|
||||
cut_high_weight_err = select(cut_high_weight_err, accum, mask);
|
||||
}
|
||||
|
||||
// Write out min weight and weight span; clamp span to a usable range
|
||||
vint span = float_to_int(maxidx - minidx + vfloat(1));
|
||||
span = min(span, vint(max_quant_steps + 3));
|
||||
span = max(span, vint(2));
|
||||
storea(minidx, lowest_weight + sp);
|
||||
storea(span, weight_span + sp);
|
||||
|
||||
// The cut_(lowest/highest)_weight_error indicate the error that results from forcing
|
||||
// samples that should have had the weight value one step (up/down).
|
||||
vfloat ssize = 1.0f / rcp_stepsize;
|
||||
vfloat errscale = ssize * ssize;
|
||||
storea(errval * errscale, error + sp);
|
||||
storea(cut_low_weight_err * errscale, cut_low_weight_error + sp);
|
||||
storea(cut_high_weight_err * errscale, cut_high_weight_error + sp);
|
||||
|
||||
rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The main function for the angular algorithm.
|
||||
*
|
||||
* @param weight_count The number of (decimated) weights.
|
||||
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
|
||||
* @param max_quant_level The maximum quantization level to be tested.
|
||||
* @param[out] low_value Per angular step, the lowest weight value.
|
||||
* @param[out] high_value Per angular step, the highest weight value.
|
||||
*/
|
||||
static void compute_angular_endpoints_for_quant_levels(
|
||||
unsigned int weight_count,
|
||||
const float* dec_weight_ideal_value,
|
||||
unsigned int max_quant_level,
|
||||
float low_value[TUNE_MAX_ANGULAR_QUANT + 1],
|
||||
float high_value[TUNE_MAX_ANGULAR_QUANT + 1]
|
||||
) {
|
||||
unsigned int max_quant_steps = steps_for_quant_level[max_quant_level];
|
||||
unsigned int max_angular_steps = steps_for_quant_level[max_quant_level];
|
||||
|
||||
ASTCENC_ALIGNAS float angular_offsets[ANGULAR_STEPS];
|
||||
|
||||
compute_angular_offsets(weight_count, dec_weight_ideal_value,
|
||||
max_angular_steps, angular_offsets);
|
||||
|
||||
ASTCENC_ALIGNAS float lowest_weight[ANGULAR_STEPS];
|
||||
ASTCENC_ALIGNAS int32_t weight_span[ANGULAR_STEPS];
|
||||
ASTCENC_ALIGNAS float error[ANGULAR_STEPS];
|
||||
ASTCENC_ALIGNAS float cut_low_weight_error[ANGULAR_STEPS];
|
||||
ASTCENC_ALIGNAS float cut_high_weight_error[ANGULAR_STEPS];
|
||||
|
||||
compute_lowest_and_highest_weight(weight_count, dec_weight_ideal_value,
|
||||
max_angular_steps, max_quant_steps,
|
||||
angular_offsets, lowest_weight, weight_span, error,
|
||||
cut_low_weight_error, cut_high_weight_error);
|
||||
|
||||
// For each quantization level, find the best error terms. Use packed vectors so data-dependent
|
||||
// branches can become selects. This involves some integer to float casts, but the values are
|
||||
// small enough so they never round the wrong way.
|
||||
vfloat4 best_results[36];
|
||||
|
||||
// Initialize the array to some safe defaults
|
||||
promise(max_quant_steps > 0);
|
||||
for (unsigned int i = 0; i < (max_quant_steps + 4); i++)
|
||||
{
|
||||
// Lane<0> = Best error
|
||||
// Lane<1> = Best scale; -1 indicates no solution found
|
||||
// Lane<2> = Cut low weight
|
||||
best_results[i] = vfloat4(ERROR_CALC_DEFAULT, -1.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
promise(max_angular_steps > 0);
|
||||
for (unsigned int i = 0; i < max_angular_steps; i++)
|
||||
{
|
||||
float i_flt = static_cast<float>(i);
|
||||
|
||||
int idx_span = weight_span[i];
|
||||
|
||||
float error_cut_low = error[i] + cut_low_weight_error[i];
|
||||
float error_cut_high = error[i] + cut_high_weight_error[i];
|
||||
float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i];
|
||||
|
||||
// Check best error against record N
|
||||
vfloat4 best_result = best_results[idx_span];
|
||||
vfloat4 new_result = vfloat4(error[i], i_flt, 0.0f, 0.0f);
|
||||
vmask4 mask = vfloat4(best_result.lane<0>()) > vfloat4(error[i]);
|
||||
best_results[idx_span] = select(best_result, new_result, mask);
|
||||
|
||||
// Check best error against record N-1 with either cut low or cut high
|
||||
best_result = best_results[idx_span - 1];
|
||||
|
||||
new_result = vfloat4(error_cut_low, i_flt, 1.0f, 0.0f);
|
||||
mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_low);
|
||||
best_result = select(best_result, new_result, mask);
|
||||
|
||||
new_result = vfloat4(error_cut_high, i_flt, 0.0f, 0.0f);
|
||||
mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_high);
|
||||
best_results[idx_span - 1] = select(best_result, new_result, mask);
|
||||
|
||||
// Check best error against record N-2 with both cut low and high
|
||||
best_result = best_results[idx_span - 2];
|
||||
new_result = vfloat4(error_cut_low_high, i_flt, 1.0f, 0.0f);
|
||||
mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_low_high);
|
||||
best_results[idx_span - 2] = select(best_result, new_result, mask);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i <= max_quant_level; i++)
|
||||
{
|
||||
unsigned int q = steps_for_quant_level[i];
|
||||
int bsi = static_cast<int>(best_results[q].lane<1>());
|
||||
|
||||
// Did we find anything?
|
||||
#if defined(ASTCENC_DIAGNOSTICS)
|
||||
if ((bsi < 0) && print_once)
|
||||
{
|
||||
print_once = false;
|
||||
printf("INFO: Unable to find full encoding within search error limit.\n\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
bsi = astc::max(0, bsi);
|
||||
|
||||
float lwi = lowest_weight[bsi] + best_results[q].lane<2>();
|
||||
float hwi = lwi + static_cast<float>(q) - 1.0f;
|
||||
|
||||
float stepsize = 1.0f / (1.0f + static_cast<float>(bsi));
|
||||
low_value[i] = (angular_offsets[bsi] + lwi) * stepsize;
|
||||
high_value[i] = (angular_offsets[bsi] + hwi) * stepsize;
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_angular_endpoints_1plane(
|
||||
bool only_always,
|
||||
const block_size_descriptor& bsd,
|
||||
const float* dec_weight_ideal_value,
|
||||
unsigned int max_weight_quant,
|
||||
compression_working_buffers& tmpbuf
|
||||
) {
|
||||
float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
|
||||
float (&high_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1;
|
||||
|
||||
float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1;
|
||||
float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1;
|
||||
|
||||
unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always
|
||||
: bsd.decimation_mode_count_selected;
|
||||
promise(max_decimation_modes > 0);
|
||||
for (unsigned int i = 0; i < max_decimation_modes; i++)
|
||||
{
|
||||
const decimation_mode& dm = bsd.decimation_modes[i];
|
||||
if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned int weight_count = bsd.get_decimation_info(i).weight_count;
|
||||
|
||||
unsigned int max_precision = dm.maxprec_1plane;
|
||||
if (max_precision > TUNE_MAX_ANGULAR_QUANT)
|
||||
{
|
||||
max_precision = TUNE_MAX_ANGULAR_QUANT;
|
||||
}
|
||||
|
||||
if (max_precision > max_weight_quant)
|
||||
{
|
||||
max_precision = max_weight_quant;
|
||||
}
|
||||
|
||||
compute_angular_endpoints_for_quant_levels(
|
||||
weight_count,
|
||||
dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
|
||||
max_precision, low_values[i], high_values[i]);
|
||||
}
|
||||
|
||||
unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always
|
||||
: bsd.block_mode_count_1plane_selected;
|
||||
promise(max_block_modes > 0);
|
||||
for (unsigned int i = 0; i < max_block_modes; i++)
|
||||
{
|
||||
const block_mode& bm = bsd.block_modes[i];
|
||||
assert(!bm.is_dual_plane);
|
||||
|
||||
unsigned int quant_mode = bm.quant_mode;
|
||||
unsigned int decim_mode = bm.decimation_mode;
|
||||
|
||||
if (quant_mode <= TUNE_MAX_ANGULAR_QUANT)
|
||||
{
|
||||
low_value[i] = low_values[decim_mode][quant_mode];
|
||||
high_value[i] = high_values[decim_mode][quant_mode];
|
||||
}
|
||||
else
|
||||
{
|
||||
low_value[i] = 0.0f;
|
||||
high_value[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* See header for documentation. */
|
||||
void compute_angular_endpoints_2planes(
|
||||
const block_size_descriptor& bsd,
|
||||
const float* dec_weight_ideal_value,
|
||||
unsigned int max_weight_quant,
|
||||
compression_working_buffers& tmpbuf
|
||||
) {
|
||||
float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
|
||||
float (&high_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1;
|
||||
float (&low_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value2;
|
||||
float (&high_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value2;
|
||||
|
||||
float (&low_values1)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1;
|
||||
float (&high_values1)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1;
|
||||
float (&low_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values2;
|
||||
float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values2;
|
||||
|
||||
promise(bsd.decimation_mode_count_selected > 0);
|
||||
for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
|
||||
{
|
||||
const decimation_mode& dm = bsd.decimation_modes[i];
|
||||
if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned int weight_count = bsd.get_decimation_info(i).weight_count;
|
||||
|
||||
unsigned int max_precision = dm.maxprec_2planes;
|
||||
if (max_precision > TUNE_MAX_ANGULAR_QUANT)
|
||||
{
|
||||
max_precision = TUNE_MAX_ANGULAR_QUANT;
|
||||
}
|
||||
|
||||
if (max_precision > max_weight_quant)
|
||||
{
|
||||
max_precision = max_weight_quant;
|
||||
}
|
||||
|
||||
compute_angular_endpoints_for_quant_levels(
|
||||
weight_count,
|
||||
dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
|
||||
max_precision, low_values1[i], high_values1[i]);
|
||||
|
||||
compute_angular_endpoints_for_quant_levels(
|
||||
weight_count,
|
||||
dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
|
||||
max_precision, low_values2[i], high_values2[i]);
|
||||
}
|
||||
|
||||
unsigned int start = bsd.block_mode_count_1plane_selected;
|
||||
unsigned int end = bsd.block_mode_count_1plane_2plane_selected;
|
||||
for (unsigned int i = start; i < end; i++)
|
||||
{
|
||||
const block_mode& bm = bsd.block_modes[i];
|
||||
unsigned int quant_mode = bm.quant_mode;
|
||||
unsigned int decim_mode = bm.decimation_mode;
|
||||
|
||||
if (quant_mode <= TUNE_MAX_ANGULAR_QUANT)
|
||||
{
|
||||
low_value1[i] = low_values1[decim_mode][quant_mode];
|
||||
high_value1[i] = high_values1[decim_mode][quant_mode];
|
||||
low_value2[i] = low_values2[decim_mode][quant_mode];
|
||||
high_value2[i] = high_values2[decim_mode][quant_mode];
|
||||
}
|
||||
else
|
||||
{
|
||||
low_value1[i] = 0.0f;
|
||||
high_value1[i] = 1.0f;
|
||||
low_value2[i] = 0.0f;
|
||||
high_value2[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
147
engine/thirdparty/astcenc/astcenc_weight_quant_xfer_tables.cpp
vendored
Normal file
147
engine/thirdparty/astcenc/astcenc_weight_quant_xfer_tables.cpp
vendored
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2011-2021 Arm Limited
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at:
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Data tables for quantization transfer.
|
||||
*/
|
||||
|
||||
#include "astcenc_internal.h"
|
||||
|
||||
#define _ 0 // Using _ to indicate an entry that will not be used.
|
||||
|
||||
const quant_and_transfer_table quant_and_xfer_tables[12] {
|
||||
// QUANT2, range 0..1
|
||||
{
|
||||
{0, 64},
|
||||
{0, 1},
|
||||
{0, 64},
|
||||
{0x4000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
|
||||
_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
|
||||
0x4000}
|
||||
},
|
||||
// QUANT_3, range 0..2
|
||||
{
|
||||
{0, 32, 64},
|
||||
{0, 1, 2},
|
||||
{0, 32, 64},
|
||||
{0x2000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
|
||||
_,_,0x4000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
|
||||
_,_,_,_,0x4020}
|
||||
},
|
||||
// QUANT_4, range 0..3
|
||||
{
|
||||
{0, 21, 43, 64},
|
||||
{0, 1, 2, 3},
|
||||
{0, 21, 43, 64},
|
||||
{0x1500,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x2b00,_,_,_,_,
|
||||
_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x4015,_,_,_,_,_,_,_,_,_,_,_,_,
|
||||
_,_,_,_,_,_,_,_,0x402b}
|
||||
},
|
||||
//QUANT_5, range 0..4
|
||||
{
|
||||
{0, 16, 32, 48, 64},
|
||||
{0, 1, 2, 3, 4},
|
||||
{0, 16, 32, 48, 64},
|
||||
{0x1000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x2000,_,_,_,_,_,_,_,_,_,
|
||||
_,_,_,_,_,_,0x3010,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x4020,_,_,_,
|
||||
_,_,_,_,_,_,_,_,_,_,_,_,0x4030}
|
||||
},
|
||||
// QUANT_6, range 0..5
|
||||
{
|
||||
{0, 12, 25, 39, 52, 64},
|
||||
{0, 2, 4, 5, 3, 1},
|
||||
{0, 64, 12, 52, 25, 39},
|
||||
{0x0c00,_,_,_,_,_,_,_,_,_,_,_,0x1900,_,_,_,_,_,_,_,_,_,_,_,_,
|
||||
0x270c,_,_,_,_,_,_,_,_,_,_,_,_,_,0x3419,_,_,_,_,_,_,_,_,_,_,
|
||||
_,_,0x4027,_,_,_,_,_,_,_,_,_,_,_,0x4034}
|
||||
},
|
||||
// QUANT_8, range 0..7
|
||||
{
|
||||
{0, 9, 18, 27, 37, 46, 55, 64},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7},
|
||||
{0, 9, 18, 27, 37, 46, 55, 64},
|
||||
{0x0900,_,_,_,_,_,_,_,_,0x1200,_,_,_,_,_,_,_,_,0x1b09,_,_,
|
||||
_,_,_,_,_,_,0x2512,_,_,_,_,_,_,_,_,_,0x2e1b,_,_,_,_,_,_,_,_,
|
||||
0x3725,_,_,_,_,_,_,_,_,0x402e,_,_,_,_,_,_,_,_,0x4037}
|
||||
},
|
||||
// QUANT_10, range 0..9
|
||||
{
|
||||
{0, 7, 14, 21, 28, 36, 43, 50, 57, 64},
|
||||
{0, 2, 4, 6, 8, 9, 7, 5, 3, 1},
|
||||
{0, 64, 7, 57, 14, 50, 21, 43, 28, 36},
|
||||
{0x0700,_,_,_,_,_,_,0x0e00,_,_,_,_,_,_,0x1507,_,_,_,_,_,_,
|
||||
0x1c0e,_,_,_,_,_,_,0x2415,_,_,_,_,_,_,_,0x2b1c,_,_,_,_,_,
|
||||
_,0x3224,_,_,_,_,_,_,0x392b,_,_,_,_,_,_,0x4032,_,_,_,_,_,
|
||||
_,0x4039}
|
||||
},
|
||||
// QUANT_12, range 0..11
|
||||
{
|
||||
{0, 5, 11, 17, 23, 28, 36, 41, 47, 53, 59, 64},
|
||||
{0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1},
|
||||
{0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36},
|
||||
{0x0500,_,_,_,_,0x0b00,_,_,_,_,_,0x1105,_,_,_,_,_,
|
||||
0x170b,_,_,_,_,_,0x1c11,_,_,_,_,0x2417,_,_,_,_,_,_,_,
|
||||
0x291c,_,_,_,_,0x2f24,_,_,_,_,_,0x3529,_,_,_,_,_,
|
||||
0x3b2f,_,_,_,_,_,0x4035,_,_,_,_,0x403b}
|
||||
},
|
||||
// QUANT_16, range 0..15
|
||||
{
|
||||
{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
|
||||
{0x0400,_,_,_,0x0800,_,_,_,0x0c04,_,_,_,0x1108,_,_,_,_,
|
||||
0x150c,_,_,_,0x1911,_,_,_,0x1d15,_,_,_,0x2319,_,_,_,_,
|
||||
_,0x271d,_,_,_,0x2b23,_,_,_,0x2f27,_,_,_,0x342b,_,_,_,
|
||||
_,0x382f,_,_,_,0x3c34,_,_,_,0x4038,_,_,_,0x403c}
|
||||
},
|
||||
// QUANT_20, range 0..19
|
||||
{
|
||||
{0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 35, 38, 41, 45, 48, 51, 55, 58, 61, 64},
|
||||
{0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1},
|
||||
{0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35},
|
||||
{0x0300,_,_,0x0600,_,_,0x0903,_,_,0x0d06,_,_,_,
|
||||
0x1009,_,_,0x130d,_,_,0x1710,_,_,_,0x1a13,_,_,
|
||||
0x1d17,_,_,0x231a,_,_,_,_,_,0x261d,_,_,0x2923,_,_,
|
||||
0x2d26,_,_,_,0x3029,_,_,0x332d,_,_,0x3730,_,_,_,
|
||||
0x3a33,_,_,0x3d37,_,_,0x403a,_,_,0x403d}
|
||||
},
|
||||
// QUANT_24, range 0..23
|
||||
{
|
||||
{0, 2, 5, 8, 11, 13, 16, 19, 22, 24, 27, 30, 34, 37, 40, 42, 45, 48, 51, 53, 56, 59, 62, 64},
|
||||
{0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19, 11, 3, 17, 9, 1},
|
||||
{0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34},
|
||||
{0x0200,_,0x0500,_,_,0x0802,_,_,0x0b05,_,_,0x0d08,
|
||||
_,0x100b,_,_,0x130d,_,_,0x1610,_,_,0x1813,_,
|
||||
0x1b16,_,_,0x1e18,_,_,0x221b,_,_,_,0x251e,_,_,
|
||||
0x2822,_,_,0x2a25,_,0x2d28,_,_,0x302a,_,_,0x332d,
|
||||
_,_,0x3530,_,0x3833,_,_,0x3b35,_,_,0x3e38,_,_,
|
||||
0x403b,_,0x403e}
|
||||
},
|
||||
// QUANT_32, range 0..31
|
||||
{
|
||||
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
|
||||
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},
|
||||
{0x0200,_,0x0400,_,0x0602,_,0x0804,_,0x0a06,_,
|
||||
0x0c08,_,0x0e0a,_,0x100c,_,0x120e,_,0x1410,_,
|
||||
0x1612,_,0x1814,_,0x1a16,_,0x1c18,_,0x1e1a,_,
|
||||
0x221c,_,_,_,0x241e,_,0x2622,_,0x2824,_,0x2a26,_,
|
||||
0x2c28,_,0x2e2a,_,0x302c,_,0x322e,_,0x3430,_,
|
||||
0x3632,_,0x3834,_,0x3a36,_,0x3c38,_,0x3e3a,_,
|
||||
0x403c,_,0x403e}
|
||||
}
|
||||
};
|
||||
201
engine/thirdparty/basis_universal/LICENSE
vendored
Normal file
201
engine/thirdparty/basis_universal/LICENSE
vendored
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
2052
engine/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
vendored
Normal file
2052
engine/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
45
engine/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h
vendored
Normal file
45
engine/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h
vendored
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
// File: android_astc_decomp.h
|
||||
#ifndef _TCUASTCUTIL_HPP
|
||||
#define _TCUASTCUTIL_HPP
|
||||
/*-------------------------------------------------------------------------
|
||||
* drawElements Quality Program Tester Core
|
||||
* ----------------------------------------
|
||||
*
|
||||
* Copyright 2016 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*//*!
|
||||
* \file
|
||||
* \brief ASTC Utilities.
|
||||
*//*--------------------------------------------------------------------*/
|
||||
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace basisu_astc
|
||||
{
|
||||
namespace astc
|
||||
{
|
||||
|
||||
// Unpacks a single ASTC block to pDst
|
||||
// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently,
|
||||
// which will lead to different outputs. So be sure to set it correctly (ideally it should match whatever the encoder did).
|
||||
bool decompress_ldr(uint8_t* pDst, const uint8_t* data, bool isSRGB, int blockWidth, int blockHeight);
|
||||
bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight);
|
||||
bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool& is_hdr);
|
||||
|
||||
} // astc
|
||||
} // basisu
|
||||
|
||||
#endif
|
||||
3310
engine/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
vendored
Normal file
3310
engine/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
224
engine/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h
vendored
Normal file
224
engine/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h
vendored
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
// basisu_astc_hdr_enc.h
|
||||
#pragma once
|
||||
#include "basisu_enc.h"
|
||||
#include "basisu_gpu_texture.h"
|
||||
#include "../transcoder/basisu_astc_helpers.h"
|
||||
#include "../transcoder/basisu_astc_hdr_core.h"
|
||||
|
||||
namespace basisu
|
||||
{
|
||||
// This MUST be called before encoding any blocks.
|
||||
void astc_hdr_enc_init();
|
||||
|
||||
const uint32_t MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
|
||||
const uint32_t MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
|
||||
const uint32_t MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS;
|
||||
const uint32_t MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS;
|
||||
const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
|
||||
const uint32_t MODE7_TOTAL_SUBMODES = 6;
|
||||
|
||||
struct astc_hdr_codec_options
|
||||
{
|
||||
float m_bc6h_err_weight;
|
||||
|
||||
bool m_use_solid;
|
||||
|
||||
bool m_use_mode11;
|
||||
bool m_mode11_uber_mode;
|
||||
uint32_t m_first_mode11_weight_ise_range;
|
||||
uint32_t m_last_mode11_weight_ise_range;
|
||||
bool m_mode11_direct_only;
|
||||
int32_t m_first_mode11_submode;
|
||||
int32_t m_last_mode11_submode;
|
||||
|
||||
bool m_use_mode7_part1;
|
||||
uint32_t m_first_mode7_part1_weight_ise_range;
|
||||
uint32_t m_last_mode7_part1_weight_ise_range;
|
||||
|
||||
bool m_use_mode7_part2;
|
||||
uint32_t m_mode7_part2_part_masks;
|
||||
uint32_t m_first_mode7_part2_weight_ise_range;
|
||||
uint32_t m_last_mode7_part2_weight_ise_range;
|
||||
|
||||
bool m_use_mode11_part2;
|
||||
uint32_t m_mode11_part2_part_masks;
|
||||
uint32_t m_first_mode11_part2_weight_ise_range;
|
||||
uint32_t m_last_mode11_part2_weight_ise_range;
|
||||
|
||||
float m_r_err_scale, m_g_err_scale;
|
||||
|
||||
bool m_refine_weights;
|
||||
|
||||
uint32_t m_level;
|
||||
|
||||
bool m_use_estimated_partitions;
|
||||
uint32_t m_max_estimated_partitions;
|
||||
|
||||
// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
|
||||
bool m_allow_uber_mode;
|
||||
|
||||
astc_hdr_codec_options();
|
||||
|
||||
void init();
|
||||
|
||||
// TODO: set_quality_level() is preferred to configure the codec for transcoding purposes.
|
||||
static const int cMinLevel = 0;
|
||||
static const int cMaxLevel = 4;
|
||||
static const int cDefaultLevel = 1;
|
||||
void set_quality_level(int level);
|
||||
|
||||
private:
|
||||
void set_quality_best();
|
||||
void set_quality_normal();
|
||||
void set_quality_fastest();
|
||||
};
|
||||
|
||||
struct astc_hdr_pack_results
|
||||
{
|
||||
double m_best_block_error;
|
||||
double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance
|
||||
|
||||
// Encoder results (logical ASTC block)
|
||||
astc_helpers::log_astc_block m_best_blk;
|
||||
|
||||
// For statistical use
|
||||
uint32_t m_best_submodes[2];
|
||||
uint32_t m_best_pat_index;
|
||||
bool m_constrained_weights;
|
||||
|
||||
bool m_improved_via_refinement_flag;
|
||||
|
||||
// Only valid if the block is solid
|
||||
basist::astc_blk m_solid_blk;
|
||||
|
||||
// The BC6H transcoded block
|
||||
basist::bc6h_block m_bc6h_block;
|
||||
|
||||
// Solid color/void extent flag
|
||||
bool m_is_solid;
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_best_block_error = 1e+30f;
|
||||
m_bc6h_block_error = 1e+30f;
|
||||
|
||||
m_best_blk.clear();
|
||||
m_best_blk.m_grid_width = 4;
|
||||
m_best_blk.m_grid_height = 4;
|
||||
m_best_blk.m_endpoint_ise_range = 20; // 0-255
|
||||
|
||||
clear_obj(m_best_submodes);
|
||||
|
||||
m_best_pat_index = 0;
|
||||
m_constrained_weights = false;
|
||||
|
||||
clear_obj(m_bc6h_block);
|
||||
|
||||
m_is_solid = false;
|
||||
m_improved_via_refinement_flag = false;
|
||||
}
|
||||
};
|
||||
|
||||
void interpolate_qlog12_colors(
|
||||
const int e[2][3],
|
||||
basist::half_float* pDecoded_half,
|
||||
vec3F* pDecoded_float,
|
||||
uint32_t n, uint32_t ise_weight_range);
|
||||
|
||||
bool get_astc_hdr_mode_11_block_colors(
|
||||
const uint8_t* pEndpoints,
|
||||
basist::half_float* pDecoded_half,
|
||||
vec3F* pDecoded_float,
|
||||
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
|
||||
|
||||
bool get_astc_hdr_mode_7_block_colors(
|
||||
const uint8_t* pEndpoints,
|
||||
basist::half_float* pDecoded_half,
|
||||
vec3F* pDecoded_float,
|
||||
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
|
||||
|
||||
double eval_selectors(
|
||||
uint32_t num_pixels,
|
||||
uint8_t* pWeights,
|
||||
const basist::half_float* pBlock_pixels_half,
|
||||
uint32_t num_weight_levels,
|
||||
const basist::half_float* pDecoded_half,
|
||||
const astc_hdr_codec_options& coptions,
|
||||
uint32_t usable_selector_bitmask = UINT32_MAX);
|
||||
|
||||
double compute_block_error(const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_options& coptions);
|
||||
|
||||
// Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels.
|
||||
// Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes),
|
||||
// and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format.
|
||||
// The packed ASTC weight grid dimensions are currently always 4x4 texels, but may be also 3x3 in the future.
|
||||
// This function is thread safe, i.e. it may be called from multiple encoding threads simultanously with different blocks.
|
||||
//
|
||||
// Parameters:
|
||||
// pRGBPixels - An array of 48 (16 RGB) floats: the 4x4 block to pack
|
||||
// pPacked_block - A pointer to the packed ASTC HDR block
|
||||
// coptions - Codec options
|
||||
// pInternal_results - An optional pointer to details about how the block was packed, for statistics/debugging purposes. May be nullptr.
|
||||
//
|
||||
// Requirements:
|
||||
// astc_hdr_enc_init() MUST have been called first to initialized the codec.
|
||||
// Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504).
|
||||
// Normal values and denormals are okay.
|
||||
bool astc_hdr_enc_block(
|
||||
const float* pRGBPixels,
|
||||
const astc_hdr_codec_options& coptions,
|
||||
basisu::vector<astc_hdr_pack_results> &all_results);
|
||||
|
||||
bool astc_hdr_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_pack_results& results);
|
||||
|
||||
bool astc_hdr_refine_weights(const basist::half_float* pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool* pImproved_flag);
|
||||
|
||||
struct astc_hdr_block_stats
|
||||
{
|
||||
std::mutex m_mutex;
|
||||
|
||||
uint32_t m_total_blocks;
|
||||
uint32_t m_total_2part, m_total_solid;
|
||||
uint32_t m_total_mode7_1part, m_total_mode7_2part;
|
||||
uint32_t m_total_mode11_1part, m_total_mode11_2part;
|
||||
uint32_t m_total_mode11_1part_constrained_weights;
|
||||
|
||||
uint32_t m_weight_range_hist_7[11];
|
||||
uint32_t m_weight_range_hist_7_2part[11];
|
||||
uint32_t m_mode7_submode_hist[6];
|
||||
|
||||
uint32_t m_weight_range_hist_11[11];
|
||||
uint32_t m_weight_range_hist_11_2part[11];
|
||||
uint32_t m_mode11_submode_hist[9];
|
||||
|
||||
uint32_t m_part_hist[32];
|
||||
|
||||
uint32_t m_total_refined;
|
||||
|
||||
astc_hdr_block_stats() { clear(); }
|
||||
|
||||
void clear()
|
||||
{
|
||||
std::lock_guard<std::mutex> lck(m_mutex);
|
||||
|
||||
m_total_blocks = 0;
|
||||
m_total_mode7_1part = 0, m_total_mode7_2part = 0, m_total_mode11_1part = 0, m_total_2part = 0, m_total_solid = 0, m_total_mode11_2part = 0;
|
||||
m_total_mode11_1part_constrained_weights = 0;
|
||||
m_total_refined = 0;
|
||||
|
||||
clear_obj(m_weight_range_hist_11);
|
||||
clear_obj(m_weight_range_hist_11_2part);
|
||||
clear_obj(m_weight_range_hist_7);
|
||||
clear_obj(m_weight_range_hist_7_2part);
|
||||
clear_obj(m_mode7_submode_hist);
|
||||
clear_obj(m_mode11_submode_hist);
|
||||
clear_obj(m_part_hist);
|
||||
}
|
||||
|
||||
void update(const astc_hdr_pack_results& log_blk);
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
} // namespace basisu
|
||||
|
||||
1778
engine/thirdparty/basis_universal/encoder/basisu_backend.cpp
vendored
Normal file
1778
engine/thirdparty/basis_universal/encoder/basisu_backend.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
409
engine/thirdparty/basis_universal/encoder/basisu_backend.h
vendored
Normal file
409
engine/thirdparty/basis_universal/encoder/basisu_backend.h
vendored
Normal file
|
|
@ -0,0 +1,409 @@
|
|||
// basisu_backend.h
|
||||
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include "../transcoder/basisu.h"
|
||||
#include "basisu_enc.h"
|
||||
#include "../transcoder/basisu_transcoder_internal.h"
|
||||
#include "basisu_frontend.h"
|
||||
|
||||
namespace basisu
|
||||
{
|
||||
struct etc1_selector_palette_entry
|
||||
{
|
||||
etc1_selector_palette_entry()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
basisu::clear_obj(*this);
|
||||
}
|
||||
|
||||
uint8_t operator[] (uint32_t i) const { assert(i < 16); return m_selectors[i]; }
|
||||
uint8_t& operator[] (uint32_t i) { assert(i < 16); return m_selectors[i]; }
|
||||
|
||||
void set_uint32(uint32_t v)
|
||||
{
|
||||
for (uint32_t byte_index = 0; byte_index < 4; byte_index++)
|
||||
{
|
||||
uint32_t b = (v >> (byte_index * 8)) & 0xFF;
|
||||
|
||||
m_selectors[byte_index * 4 + 0] = b & 3;
|
||||
m_selectors[byte_index * 4 + 1] = (b >> 2) & 3;
|
||||
m_selectors[byte_index * 4 + 2] = (b >> 4) & 3;
|
||||
m_selectors[byte_index * 4 + 3] = (b >> 6) & 3;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t get_uint32() const
|
||||
{
|
||||
return get_byte(0) | (get_byte(1) << 8) | (get_byte(2) << 16) | (get_byte(3) << 24);
|
||||
}
|
||||
|
||||
uint32_t get_byte(uint32_t byte_index) const
|
||||
{
|
||||
assert(byte_index < 4);
|
||||
|
||||
return m_selectors[byte_index * 4 + 0] |
|
||||
(m_selectors[byte_index * 4 + 1] << 2) |
|
||||
(m_selectors[byte_index * 4 + 2] << 4) |
|
||||
(m_selectors[byte_index * 4 + 3] << 6);
|
||||
}
|
||||
|
||||
uint8_t operator()(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; }
|
||||
uint8_t& operator()(uint32_t x, uint32_t y) { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; }
|
||||
|
||||
bool operator< (const etc1_selector_palette_entry& other) const
|
||||
{
|
||||
for (uint32_t i = 0; i < 16; i++)
|
||||
{
|
||||
if (m_selectors[i] < other.m_selectors[i])
|
||||
return true;
|
||||
else if (m_selectors[i] != other.m_selectors[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator== (const etc1_selector_palette_entry& other) const
|
||||
{
|
||||
for (uint32_t i = 0; i < 16; i++)
|
||||
{
|
||||
if (m_selectors[i] != other.m_selectors[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t m_selectors[16];
|
||||
};
|
||||
|
||||
typedef basisu::vector<etc1_selector_palette_entry> etc1_selector_palette_entry_vec;
|
||||
|
||||
struct encoder_block
|
||||
{
|
||||
encoder_block()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
uint32_t m_endpoint_predictor;
|
||||
|
||||
int m_endpoint_index;
|
||||
int m_selector_index;
|
||||
|
||||
int m_selector_history_buf_index;
|
||||
|
||||
bool m_is_cr_target;
|
||||
void clear()
|
||||
{
|
||||
m_endpoint_predictor = 0;
|
||||
|
||||
m_endpoint_index = 0;
|
||||
m_selector_index = 0;
|
||||
|
||||
m_selector_history_buf_index = 0;
|
||||
m_is_cr_target = false;
|
||||
}
|
||||
};
|
||||
|
||||
typedef basisu::vector<encoder_block> encoder_block_vec;
|
||||
typedef vector2D<encoder_block> encoder_block_vec2D;
|
||||
|
||||
struct etc1_endpoint_palette_entry
|
||||
{
|
||||
etc1_endpoint_palette_entry()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
color_rgba m_color5;
|
||||
uint32_t m_inten5;
|
||||
bool m_color5_valid;
|
||||
|
||||
void clear()
|
||||
{
|
||||
clear_obj(*this);
|
||||
}
|
||||
};
|
||||
|
||||
typedef basisu::vector<etc1_endpoint_palette_entry> etc1_endpoint_palette_entry_vec;
|
||||
|
||||
struct basisu_backend_params
|
||||
{
|
||||
bool m_etc1s;
|
||||
bool m_debug, m_debug_images;
|
||||
float m_endpoint_rdo_quality_thresh;
|
||||
float m_selector_rdo_quality_thresh;
|
||||
uint32_t m_compression_level;
|
||||
|
||||
bool m_used_global_codebooks;
|
||||
|
||||
bool m_validate;
|
||||
|
||||
basisu_backend_params()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_etc1s = false;
|
||||
m_debug = false;
|
||||
m_debug_images = false;
|
||||
m_endpoint_rdo_quality_thresh = 0.0f;
|
||||
m_selector_rdo_quality_thresh = 0.0f;
|
||||
m_compression_level = 0;
|
||||
m_used_global_codebooks = false;
|
||||
m_validate = true;
|
||||
}
|
||||
};
|
||||
|
||||
struct basisu_backend_slice_desc
|
||||
{
|
||||
basisu_backend_slice_desc()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
clear_obj(*this);
|
||||
}
|
||||
|
||||
uint32_t m_first_block_index;
|
||||
|
||||
uint32_t m_orig_width;
|
||||
uint32_t m_orig_height;
|
||||
|
||||
uint32_t m_width;
|
||||
uint32_t m_height;
|
||||
|
||||
uint32_t m_num_blocks_x;
|
||||
uint32_t m_num_blocks_y;
|
||||
|
||||
uint32_t m_num_macroblocks_x;
|
||||
uint32_t m_num_macroblocks_y;
|
||||
|
||||
uint32_t m_source_file_index; // also the basis image index
|
||||
uint32_t m_mip_index;
|
||||
bool m_alpha;
|
||||
bool m_iframe;
|
||||
};
|
||||
|
||||
typedef basisu::vector<basisu_backend_slice_desc> basisu_backend_slice_desc_vec;
|
||||
|
||||
struct basisu_backend_output
|
||||
{
|
||||
basist::basis_tex_format m_tex_format;
|
||||
|
||||
bool m_etc1s;
|
||||
bool m_uses_global_codebooks;
|
||||
bool m_srgb;
|
||||
|
||||
uint32_t m_num_endpoints;
|
||||
uint32_t m_num_selectors;
|
||||
|
||||
uint8_vec m_endpoint_palette;
|
||||
uint8_vec m_selector_palette;
|
||||
|
||||
basisu_backend_slice_desc_vec m_slice_desc;
|
||||
|
||||
uint8_vec m_slice_image_tables;
|
||||
basisu::vector<uint8_vec> m_slice_image_data;
|
||||
uint16_vec m_slice_image_crcs;
|
||||
|
||||
basisu_backend_output()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_tex_format = basist::basis_tex_format::cETC1S;
|
||||
m_etc1s = false;
|
||||
m_uses_global_codebooks = false;
|
||||
m_srgb = true;
|
||||
|
||||
m_num_endpoints = 0;
|
||||
m_num_selectors = 0;
|
||||
|
||||
m_endpoint_palette.clear();
|
||||
m_selector_palette.clear();
|
||||
m_slice_desc.clear();
|
||||
m_slice_image_tables.clear();
|
||||
m_slice_image_data.clear();
|
||||
m_slice_image_crcs.clear();
|
||||
}
|
||||
|
||||
uint32_t get_output_size_estimate() const
|
||||
{
|
||||
uint32_t total_compressed_bytes = (uint32_t)(m_slice_image_tables.size() + m_endpoint_palette.size() + m_selector_palette.size());
|
||||
for (uint32_t i = 0; i < m_slice_image_data.size(); i++)
|
||||
total_compressed_bytes += (uint32_t)m_slice_image_data[i].size();
|
||||
|
||||
return total_compressed_bytes;
|
||||
}
|
||||
};
|
||||
|
||||
class basisu_backend
|
||||
{
|
||||
BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_backend);
|
||||
|
||||
public:
|
||||
|
||||
basisu_backend();
|
||||
|
||||
void clear();
|
||||
|
||||
void init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_desc);
|
||||
|
||||
uint32_t encode();
|
||||
|
||||
const basisu_backend_output &get_output() const { return m_output; }
|
||||
const basisu_backend_params& get_params() const { return m_params; }
|
||||
|
||||
private:
|
||||
basisu_frontend *m_pFront_end;
|
||||
basisu_backend_params m_params;
|
||||
basisu_backend_slice_desc_vec m_slices;
|
||||
basisu_backend_output m_output;
|
||||
|
||||
etc1_endpoint_palette_entry_vec m_endpoint_palette;
|
||||
etc1_selector_palette_entry_vec m_selector_palette;
|
||||
|
||||
struct etc1_global_selector_cb_entry_desc
|
||||
{
|
||||
uint32_t m_pal_index;
|
||||
uint32_t m_mod_index;
|
||||
bool m_was_used;
|
||||
};
|
||||
|
||||
typedef basisu::vector<etc1_global_selector_cb_entry_desc> etc1_global_selector_cb_entry_desc_vec;
|
||||
|
||||
etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc;
|
||||
|
||||
basisu::vector<encoder_block_vec2D> m_slice_encoder_blocks;
|
||||
|
||||
// Maps OLD to NEW endpoint/selector indices
|
||||
uint_vec m_endpoint_remap_table_old_to_new;
|
||||
uint_vec m_endpoint_remap_table_new_to_old;
|
||||
bool_vec m_old_endpoint_was_used;
|
||||
bool_vec m_new_endpoint_was_used;
|
||||
|
||||
uint_vec m_selector_remap_table_old_to_new;
|
||||
|
||||
// Maps NEW to OLD endpoint/selector indices
|
||||
uint_vec m_selector_remap_table_new_to_old;
|
||||
|
||||
uint32_t get_total_slices() const
|
||||
{
|
||||
return (uint32_t)m_slices.size();
|
||||
}
|
||||
|
||||
uint32_t get_total_slice_blocks() const
|
||||
{
|
||||
return m_pFront_end->get_total_output_blocks();
|
||||
}
|
||||
|
||||
uint32_t get_block_index(uint32_t slice_index, uint32_t block_x, uint32_t block_y) const
|
||||
{
|
||||
const basisu_backend_slice_desc &slice = m_slices[slice_index];
|
||||
|
||||
assert((block_x < slice.m_num_blocks_x) && (block_y < slice.m_num_blocks_y));
|
||||
|
||||
return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x;
|
||||
}
|
||||
|
||||
uint32_t get_total_blocks(uint32_t slice_index) const
|
||||
{
|
||||
return m_slices[slice_index].m_num_blocks_x * m_slices[slice_index].m_num_blocks_y;
|
||||
}
|
||||
|
||||
uint32_t get_total_blocks() const
|
||||
{
|
||||
uint32_t total_blocks = 0;
|
||||
for (uint32_t i = 0; i < m_slices.size(); i++)
|
||||
total_blocks += get_total_blocks(i);
|
||||
return total_blocks;
|
||||
}
|
||||
|
||||
// Returns the total number of input texels, not counting padding up to blocks/macroblocks.
|
||||
uint32_t get_total_input_texels(uint32_t slice_index) const
|
||||
{
|
||||
return m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height;
|
||||
}
|
||||
|
||||
uint32_t get_total_input_texels() const
|
||||
{
|
||||
uint32_t total_texels = 0;
|
||||
for (uint32_t i = 0; i < m_slices.size(); i++)
|
||||
total_texels += get_total_input_texels(i);
|
||||
return total_texels;
|
||||
}
|
||||
|
||||
int find_slice(uint32_t block_index, uint32_t *pBlock_x, uint32_t *pBlock_y) const
|
||||
{
|
||||
for (uint32_t i = 0; i < m_slices.size(); i++)
|
||||
{
|
||||
if ((block_index >= m_slices[i].m_first_block_index) && (block_index < (m_slices[i].m_first_block_index + m_slices[i].m_num_blocks_x * m_slices[i].m_num_blocks_y)))
|
||||
{
|
||||
const uint32_t ofs = block_index - m_slices[i].m_first_block_index;
|
||||
const uint32_t x = ofs % m_slices[i].m_num_blocks_x;
|
||||
const uint32_t y = ofs / m_slices[i].m_num_blocks_x;
|
||||
|
||||
if (pBlock_x) *pBlock_x = x;
|
||||
if (pBlock_y) *pBlock_y = y;
|
||||
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void create_endpoint_palette();
|
||||
|
||||
void create_selector_palette();
|
||||
|
||||
// endpoint palette
|
||||
// 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices
|
||||
// selector palette
|
||||
// 4x4 2-bit selectors
|
||||
|
||||
// per-macroblock:
|
||||
// 4 diff bits
|
||||
// 4 flip bits
|
||||
// Endpoint template index, 1-8 endpoint indices
|
||||
// Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices
|
||||
// 4 selector indices
|
||||
|
||||
void reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec &all_endpoint_indices);
|
||||
void sort_selector_codebook();
|
||||
void create_encoder_blocks();
|
||||
void compute_slice_crcs();
|
||||
bool encode_image();
|
||||
bool encode_endpoint_palette();
|
||||
bool encode_selector_palette();
|
||||
int find_video_frame(int slice_index, int delta);
|
||||
void check_for_valid_cr_blocks();
|
||||
};
|
||||
|
||||
} // namespace basisu
|
||||
|
||||
269
engine/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
vendored
Normal file
269
engine/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
vendored
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
// basisu_basis_file.cpp
|
||||
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#include "basisu_basis_file.h"
|
||||
#include "../transcoder/basisu_transcoder.h"
|
||||
|
||||
// The output file version. Keep in sync with BASISD_SUPPORTED_BASIS_VERSION.
|
||||
#define BASIS_FILE_VERSION (0x13)
|
||||
|
||||
namespace basisu
|
||||
{
|
||||
void basisu_file::create_header(const basisu_backend_output &encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame)
|
||||
{
|
||||
m_header.m_header_size = sizeof(basist::basis_file_header);
|
||||
|
||||
m_header.m_data_size = m_total_file_size - sizeof(basist::basis_file_header);
|
||||
|
||||
m_header.m_total_slices = (uint32_t)encoder_output.m_slice_desc.size();
|
||||
|
||||
m_header.m_total_images = 0;
|
||||
for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++)
|
||||
m_header.m_total_images = maximum<uint32_t>(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1);
|
||||
|
||||
m_header.m_tex_format = (int)encoder_output.m_tex_format;
|
||||
m_header.m_flags = 0;
|
||||
|
||||
if (encoder_output.m_etc1s)
|
||||
{
|
||||
assert(encoder_output.m_tex_format == basist::basis_tex_format::cETC1S);
|
||||
m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagETC1S;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(encoder_output.m_tex_format != basist::basis_tex_format::cETC1S);
|
||||
}
|
||||
|
||||
if (y_flipped)
|
||||
m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagYFlipped;
|
||||
if (encoder_output.m_uses_global_codebooks)
|
||||
m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagUsesGlobalCodebook;
|
||||
if (encoder_output.m_srgb)
|
||||
m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagSRGB;
|
||||
|
||||
for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++)
|
||||
{
|
||||
if (encoder_output.m_slice_desc[i].m_alpha)
|
||||
{
|
||||
m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagHasAlphaSlices;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_header.m_tex_type = static_cast<uint8_t>(tex_type);
|
||||
m_header.m_us_per_frame = clamp<uint32_t>(us_per_frame, 0, basist::cBASISMaxUSPerFrame);
|
||||
|
||||
m_header.m_userdata0 = userdata0;
|
||||
m_header.m_userdata1 = userdata1;
|
||||
|
||||
m_header.m_total_endpoints = encoder_output.m_num_endpoints;
|
||||
if (!encoder_output.m_uses_global_codebooks)
|
||||
{
|
||||
m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs;
|
||||
m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(!m_endpoint_cb_file_ofs);
|
||||
}
|
||||
|
||||
m_header.m_total_selectors = encoder_output.m_num_selectors;
|
||||
if (!encoder_output.m_uses_global_codebooks)
|
||||
{
|
||||
m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs;
|
||||
m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(!m_selector_cb_file_ofs);
|
||||
}
|
||||
|
||||
m_header.m_tables_file_ofs = m_tables_file_ofs;
|
||||
m_header.m_tables_file_size = (uint32_t)encoder_output.m_slice_image_tables.size();
|
||||
|
||||
m_header.m_slice_desc_file_ofs = m_slice_descs_file_ofs;
|
||||
}
|
||||
|
||||
bool basisu_file::create_image_descs(const basisu_backend_output &encoder_output)
|
||||
{
|
||||
const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc;
|
||||
|
||||
m_images_descs.resize(slice_descs.size());
|
||||
|
||||
uint64_t cur_slice_file_ofs = m_first_image_file_ofs;
|
||||
for (uint32_t i = 0; i < slice_descs.size(); i++)
|
||||
{
|
||||
clear_obj(m_images_descs[i]);
|
||||
|
||||
m_images_descs[i].m_image_index = slice_descs[i].m_source_file_index;
|
||||
m_images_descs[i].m_level_index = slice_descs[i].m_mip_index;
|
||||
|
||||
if (slice_descs[i].m_alpha)
|
||||
m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsHasAlpha;
|
||||
if (slice_descs[i].m_iframe)
|
||||
m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsFrameIsIFrame;
|
||||
|
||||
m_images_descs[i].m_orig_width = slice_descs[i].m_orig_width;
|
||||
m_images_descs[i].m_orig_height = slice_descs[i].m_orig_height;
|
||||
m_images_descs[i].m_num_blocks_x = slice_descs[i].m_num_blocks_x;
|
||||
m_images_descs[i].m_num_blocks_y = slice_descs[i].m_num_blocks_y;
|
||||
m_images_descs[i].m_slice_data_crc16 = encoder_output.m_slice_image_crcs[i];
|
||||
|
||||
if (encoder_output.m_slice_image_data[i].size() > UINT32_MAX)
|
||||
{
|
||||
error_printf("basisu_file::create_image_descs: Basis file too large\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint32_t image_size = (uint32_t)encoder_output.m_slice_image_data[i].size();
|
||||
|
||||
m_images_descs[i].m_file_ofs = (uint32_t)cur_slice_file_ofs;
|
||||
m_images_descs[i].m_file_size = image_size;
|
||||
|
||||
cur_slice_file_ofs += image_size;
|
||||
if (cur_slice_file_ofs > UINT32_MAX)
|
||||
{
|
||||
error_printf("basisu_file::create_image_descs: Basis file too large\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
assert(cur_slice_file_ofs == m_total_file_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void basisu_file::create_comp_data(const basisu_backend_output &encoder_output)
|
||||
{
|
||||
const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc;
|
||||
|
||||
append_vector(m_comp_data, reinterpret_cast<const uint8_t *>(&m_header), sizeof(m_header));
|
||||
|
||||
assert(m_comp_data.size() == m_slice_descs_file_ofs);
|
||||
append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&m_images_descs[0]), m_images_descs.size() * sizeof(m_images_descs[0]));
|
||||
|
||||
if (!encoder_output.m_uses_global_codebooks)
|
||||
{
|
||||
if (encoder_output.m_endpoint_palette.size())
|
||||
{
|
||||
assert(m_comp_data.size() == m_endpoint_cb_file_ofs);
|
||||
append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size());
|
||||
}
|
||||
|
||||
if (encoder_output.m_selector_palette.size())
|
||||
{
|
||||
assert(m_comp_data.size() == m_selector_cb_file_ofs);
|
||||
append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size());
|
||||
}
|
||||
}
|
||||
|
||||
if (encoder_output.m_slice_image_tables.size())
|
||||
{
|
||||
assert(m_comp_data.size() == m_tables_file_ofs);
|
||||
append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size());
|
||||
}
|
||||
|
||||
assert(m_comp_data.size() == m_first_image_file_ofs);
|
||||
for (uint32_t i = 0; i < slice_descs.size(); i++)
|
||||
append_vector(m_comp_data, &encoder_output.m_slice_image_data[i][0], encoder_output.m_slice_image_data[i].size());
|
||||
|
||||
assert(m_comp_data.size() == m_total_file_size);
|
||||
}
|
||||
|
||||
void basisu_file::fixup_crcs()
|
||||
{
|
||||
basist::basis_file_header *pHeader = reinterpret_cast<basist::basis_file_header *>(&m_comp_data[0]);
|
||||
|
||||
pHeader->m_data_size = m_total_file_size - sizeof(basist::basis_file_header);
|
||||
pHeader->m_data_crc16 = basist::crc16(&m_comp_data[0] + sizeof(basist::basis_file_header), m_total_file_size - sizeof(basist::basis_file_header), 0);
|
||||
|
||||
pHeader->m_header_crc16 = basist::crc16(&pHeader->m_data_size, sizeof(basist::basis_file_header) - BASISU_OFFSETOF(basist::basis_file_header, m_data_size), 0);
|
||||
|
||||
pHeader->m_sig = basist::basis_file_header::cBASISSigValue;
|
||||
pHeader->m_ver = BASIS_FILE_VERSION;// basist::basis_file_header::cBASISFirstVersion;
|
||||
}
|
||||
|
||||
bool basisu_file::init(const basisu_backend_output &encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame)
|
||||
{
|
||||
clear();
|
||||
|
||||
const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc;
|
||||
|
||||
// The Basis file uses 32-bit fields for lots of stuff, so make sure it's not too large.
|
||||
uint64_t check_size = 0;
|
||||
if (!encoder_output.m_uses_global_codebooks)
|
||||
{
|
||||
check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() +
|
||||
(uint64_t)encoder_output.m_endpoint_palette.size() + (uint64_t)encoder_output.m_selector_palette.size() + (uint64_t)encoder_output.m_slice_image_tables.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() +
|
||||
(uint64_t)encoder_output.m_slice_image_tables.size();
|
||||
}
|
||||
if (check_size >= 0xFFFF0000ULL)
|
||||
{
|
||||
error_printf("basisu_file::init: File is too large!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_header_file_ofs = 0;
|
||||
m_slice_descs_file_ofs = sizeof(basist::basis_file_header);
|
||||
if (encoder_output.m_tex_format == basist::basis_tex_format::cETC1S)
|
||||
{
|
||||
if (encoder_output.m_uses_global_codebooks)
|
||||
{
|
||||
m_endpoint_cb_file_ofs = 0;
|
||||
m_selector_cb_file_ofs = 0;
|
||||
m_tables_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size();
|
||||
m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size();
|
||||
m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size();
|
||||
}
|
||||
m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_endpoint_cb_file_ofs = 0;
|
||||
m_selector_cb_file_ofs = 0;
|
||||
m_tables_file_ofs = 0;
|
||||
m_first_image_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size();
|
||||
}
|
||||
|
||||
uint64_t total_file_size = m_first_image_file_ofs;
|
||||
for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++)
|
||||
total_file_size += encoder_output.m_slice_image_data[i].size();
|
||||
if (total_file_size >= 0xFFFF0000ULL)
|
||||
{
|
||||
error_printf("basisu_file::init: File is too large!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_total_file_size = (uint32_t)total_file_size;
|
||||
|
||||
create_header(encoder_output, tex_type, userdata0, userdata1, y_flipped, us_per_frame);
|
||||
|
||||
if (!create_image_descs(encoder_output))
|
||||
return false;
|
||||
|
||||
create_comp_data(encoder_output);
|
||||
|
||||
fixup_crcs();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace basisu
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue