feat: godot-engine-source-4.3-stable

This commit is contained in:
Jan van der Weide 2025-01-17 16:36:38 +01:00
parent c59a7dcade
commit 7125d019b5
11149 changed files with 5070401 additions and 0 deletions

5
engine/thirdparty/etcpak/AUTHORS.txt vendored Normal file
View file

@ -0,0 +1,5 @@
Bartosz Taudul <wolf@nereid.pl>
Daniel Jungmann <el.3d.source@gmail.com>
Florian Penzkofer <fp@nullptr.de>
Jae-Ho Nah <nahjaeho@gmail.com>
Marcin Ławicki <marcin.lawicki@gmail.com>

120
engine/thirdparty/etcpak/Dither.cpp vendored Normal file
View file

@ -0,0 +1,120 @@
#include <algorithm>
#include <string.h>
#include "Dither.hpp"
#include "Math.hpp"
#ifdef __SSE4_1__
# ifdef _MSC_VER
# include <intrin.h>
# include <Windows.h>
# else
# include <x86intrin.h>
# endif
#endif
#ifdef __AVX2__
void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
{
static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
const __m256i BayerAdd0 = _mm256_setr_epi8(
a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
);
const __m256i BayerAdd1 = _mm256_setr_epi8(
a31[8], a63[8], a31[8], 0, a31[9], a63[9], a31[9], 0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
);
const __m256i BayerSub0 = _mm256_setr_epi8(
s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
);
const __m256i BayerSub1 = _mm256_setr_epi8(
s31[8], s63[8], s31[8], 0, s31[9], s63[9], s31[9], 0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
);
__m256i l0 = _mm256_inserti128_si256( _mm256_castsi128_si256( px0 ), px1, 1 );
__m256i l1 = _mm256_inserti128_si256( _mm256_castsi128_si256( px2 ), px3, 1 );
__m256i a0 = _mm256_adds_epu8( l0, BayerAdd0 );
__m256i a1 = _mm256_adds_epu8( l1, BayerAdd1 );
__m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
__m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
_mm256_storeu_si256( (__m256i*)(data ), s0 );
_mm256_storeu_si256( (__m256i*)(data+32), s1 );
}
#endif
void Dither( uint8_t* data )
{
#ifdef __AVX2__
static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
const __m256i BayerAdd0 = _mm256_setr_epi8(
a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
);
const __m256i BayerAdd1 = _mm256_setr_epi8(
a31[8], a63[8], a31[8], 0, a31[9], a63[9], a31[9], 0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
);
const __m256i BayerSub0 = _mm256_setr_epi8(
s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
);
const __m256i BayerSub1 = _mm256_setr_epi8(
s31[8], s63[8], s31[8], 0, s31[9], s63[9], s31[9], 0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
);
__m256i px0 = _mm256_loadu_si256( (__m256i*)(data ) );
__m256i px1 = _mm256_loadu_si256( (__m256i*)(data+32) );
__m256i a0 = _mm256_adds_epu8( px0, BayerAdd0 );
__m256i a1 = _mm256_adds_epu8( px1, BayerAdd1 );
__m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
__m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
_mm256_storeu_si256( (__m256i*)(data ), s0 );
_mm256_storeu_si256( (__m256i*)(data+32), s1 );
#else
static constexpr int8_t Bayer31[16] = {
( 0-8)*2/3, ( 8-8)*2/3, ( 2-8)*2/3, (10-8)*2/3,
(12-8)*2/3, ( 4-8)*2/3, (14-8)*2/3, ( 6-8)*2/3,
( 3-8)*2/3, (11-8)*2/3, ( 1-8)*2/3, ( 9-8)*2/3,
(15-8)*2/3, ( 7-8)*2/3, (13-8)*2/3, ( 5-8)*2/3
};
static constexpr int8_t Bayer63[16] = {
( 0-8)*2/6, ( 8-8)*2/6, ( 2-8)*2/6, (10-8)*2/6,
(12-8)*2/6, ( 4-8)*2/6, (14-8)*2/6, ( 6-8)*2/6,
( 3-8)*2/6, (11-8)*2/6, ( 1-8)*2/6, ( 9-8)*2/6,
(15-8)*2/6, ( 7-8)*2/6, (13-8)*2/6, ( 5-8)*2/6
};
for( int i=0; i<16; i++ )
{
uint32_t col;
memcpy( &col, data, 4 );
uint8_t r = col & 0xFF;
uint8_t g = ( col >> 8 ) & 0xFF;
uint8_t b = ( col >> 16 ) & 0xFF;
r = clampu8( r + Bayer31[i] );
g = clampu8( g + Bayer63[i] );
b = clampu8( b + Bayer31[i] );
col = r | ( g << 8 ) | ( b << 16 );
memcpy( data, &col, 4 );
data += 4;
}
#endif
}

21
engine/thirdparty/etcpak/Dither.hpp vendored Normal file
View file

@ -0,0 +1,21 @@
#ifndef __DITHER_HPP__
#define __DITHER_HPP__
#include <stddef.h>
#include <stdint.h>
#ifdef __AVX2__
# ifdef _MSC_VER
# include <intrin.h>
# else
# include <x86intrin.h>
# endif
#endif
void Dither( uint8_t* data );
#ifdef __AVX2__
void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 );
#endif
#endif

View file

@ -0,0 +1,20 @@
#ifndef __FORCEINLINE_HPP__
#define __FORCEINLINE_HPP__
#if defined(__GNUC__)
# define etcpak_force_inline __attribute__((always_inline)) inline
#elif defined(_MSC_VER)
# define etcpak_force_inline __forceinline
#else
# define etcpak_force_inline inline
#endif
#if defined(__GNUC__)
# define etcpak_no_inline __attribute__((noinline))
#elif defined(_MSC_VER)
# define etcpak_no_inline __declspec(noinline)
#else
# define etcpak_no_inline
#endif
#endif

26
engine/thirdparty/etcpak/LICENSE.txt vendored Normal file
View file

@ -0,0 +1,26 @@
etcpak, an extremely fast ETC compression utility (https://github.com/wolfpld/etcpak)
Copyright (c) 2013-2022, Bartosz Taudul <wolf@nereid.pl>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

92
engine/thirdparty/etcpak/Math.hpp vendored Normal file
View file

@ -0,0 +1,92 @@
#ifndef __DARKRL__MATH_HPP__
#define __DARKRL__MATH_HPP__
#include <algorithm>
#include <cmath>
#include <stdint.h>
#include "ForceInline.hpp"
template<typename T>
static etcpak_force_inline T AlignPOT( T val )
{
if( val == 0 ) return 1;
val--;
for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
{
val |= val >> i;
}
return val + 1;
}
static etcpak_force_inline int CountSetBits( uint32_t val )
{
val -= ( val >> 1 ) & 0x55555555;
val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
val += val >> 8;
val += val >> 16;
return val & 0x0000003f;
}
static etcpak_force_inline int CountLeadingZeros( uint32_t val )
{
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 32 - CountSetBits( val );
}
static etcpak_force_inline float sRGB2linear( float v )
{
const float a = 0.055f;
if( v <= 0.04045f )
{
return v / 12.92f;
}
else
{
return pow( ( v + a ) / ( 1 + a ), 2.4f );
}
}
static etcpak_force_inline float linear2sRGB( float v )
{
const float a = 0.055f;
if( v <= 0.0031308f )
{
return 12.92f * v;
}
else
{
return ( 1 + a ) * pow( v, 1/2.4f ) - a;
}
}
template<class T>
static etcpak_force_inline T SmoothStep( T x )
{
return x*x*(3-2*x);
}
static etcpak_force_inline uint8_t clampu8( int32_t val )
{
if( ( val & ~0xFF ) == 0 ) return val;
return ( ( ~val ) >> 31 ) & 0xFF;
}
template<class T>
static etcpak_force_inline T sq( T val )
{
return val * val;
}
static etcpak_force_inline int mul8bit( int a, int b )
{
int t = a*b + 128;
return ( t + ( t >> 8 ) ) >> 8;
}
#endif

View file

@ -0,0 +1,50 @@
#ifndef __PROCESSCOMMON_HPP__
#define __PROCESSCOMMON_HPP__
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
template<class T>
static size_t GetLeastError( const T* err, size_t num )
{
size_t idx = 0;
for( size_t i=1; i<num; i++ )
{
if( err[i] < err[idx] )
{
idx = i;
}
}
return idx;
}
static uint64_t FixByteOrder( uint64_t d )
{
return ( ( d & 0x00000000FFFFFFFF ) ) |
( ( d & 0xFF00000000000000 ) >> 24 ) |
( ( d & 0x000000FF00000000 ) << 24 ) |
( ( d & 0x00FF000000000000 ) >> 8 ) |
( ( d & 0x0000FF0000000000 ) << 8 );
}
template<class T, class S>
static uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id )
{
size_t tidx[2];
tidx[0] = GetLeastError( terr[0], 8 );
tidx[1] = GetLeastError( terr[1], 8 );
d |= tidx[0] << 26;
d |= tidx[1] << 29;
for( int i=0; i<16; i++ )
{
uint64_t t = tsel[i][tidx[id[i]%2]];
d |= ( t & 0x1 ) << ( i + 32 );
d |= ( t & 0x2 ) << ( i + 47 );
}
return d;
}
#endif

1086
engine/thirdparty/etcpak/ProcessDxtc.cpp vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,14 @@
#ifndef __PROCESSDXT1_HPP__
#define __PROCESSDXT1_HPP__
#include <stddef.h>
#include <stdint.h>
void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressDxt1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
#endif

4331
engine/thirdparty/etcpak/ProcessRGB.cpp vendored Normal file

File diff suppressed because it is too large Load diff

16
engine/thirdparty/etcpak/ProcessRGB.hpp vendored Normal file
View file

@ -0,0 +1,16 @@
#ifndef __PROCESSRGB_HPP__
#define __PROCESSRGB_HPP__
#include <stdint.h>
void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
void CompressEacR( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEacRg( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
#endif

223
engine/thirdparty/etcpak/Tables.cpp vendored Normal file
View file

@ -0,0 +1,223 @@
#include "Tables.hpp"
const int32_t g_table[8][4] = {
{ 2, 8, -2, -8 },
{ 5, 17, -5, -17 },
{ 9, 29, -9, -29 },
{ 13, 42, -13, -42 },
{ 18, 60, -18, -60 },
{ 24, 80, -24, -80 },
{ 33, 106, -33, -106 },
{ 47, 183, -47, -183 }
};
const int64_t g_table256[8][4] = {
{ 2*256, 8*256, -2*256, -8*256 },
{ 5*256, 17*256, -5*256, -17*256 },
{ 9*256, 29*256, -9*256, -29*256 },
{ 13*256, 42*256, -13*256, -42*256 },
{ 18*256, 60*256, -18*256, -60*256 },
{ 24*256, 80*256, -24*256, -80*256 },
{ 33*256, 106*256, -33*256, -106*256 },
{ 47*256, 183*256, -47*256, -183*256 }
};
const uint32_t g_id[4][16] = {
{ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
{ 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
{ 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
};
const uint32_t g_avg2[16] = {
0x00,
0x11,
0x22,
0x33,
0x44,
0x55,
0x66,
0x77,
0x88,
0x99,
0xAA,
0xBB,
0xCC,
0xDD,
0xEE,
0xFF
};
const uint32_t g_flags[64] = {
0x80800402, 0x80800402, 0x80800402, 0x80800402,
0x80800402, 0x80800402, 0x80800402, 0x8080E002,
0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
0x80000402, 0x80000402, 0x80000402, 0x80000402,
0x80000402, 0x80000402, 0x80000402, 0x8000E002,
0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
0x00800402, 0x00800402, 0x00800402, 0x00800402,
0x00800402, 0x00800402, 0x00800402, 0x0080E002,
0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
0x00000402, 0x00000402, 0x00000402, 0x00000402,
0x00000402, 0x00000402, 0x00000402, 0x0000E002,
0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
};
const int32_t g_alpha[16][8] = {
{ -3, -6, -9, -15, 2, 5, 8, 14 },
{ -3, -7, -10, -13, 2, 6, 9, 12 },
{ -2, -5, -8, -13, 1, 4, 7, 12 },
{ -2, -4, -6, -13, 1, 3, 5, 12 },
{ -3, -6, -8, -12, 2, 5, 7, 11 },
{ -3, -7, -9, -11, 2, 6, 8, 10 },
{ -4, -7, -8, -11, 3, 6, 7, 10 },
{ -3, -5, -8, -11, 2, 4, 7, 10 },
{ -2, -6, -8, -10, 1, 5, 7, 9 },
{ -2, -5, -8, -10, 1, 4, 7, 9 },
{ -2, -4, -8, -10, 1, 3, 7, 9 },
{ -2, -5, -7, -10, 1, 4, 6, 9 },
{ -3, -4, -7, -10, 2, 3, 6, 9 },
{ -1, -2, -3, -10, 0, 1, 2, 9 },
{ -4, -6, -8, -9, 3, 5, 7, 8 },
{ -3, -5, -7, -9, 2, 4, 6, 8 }
};
const int32_t g_alpha11Mul[16] = { 1, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120 };
const int32_t g_alphaRange[16] = {
0x100FF / ( 1 + g_alpha[0][7] - g_alpha[0][3] ),
0x100FF / ( 1 + g_alpha[1][7] - g_alpha[1][3] ),
0x100FF / ( 1 + g_alpha[2][7] - g_alpha[2][3] ),
0x100FF / ( 1 + g_alpha[3][7] - g_alpha[3][3] ),
0x100FF / ( 1 + g_alpha[4][7] - g_alpha[4][3] ),
0x100FF / ( 1 + g_alpha[5][7] - g_alpha[5][3] ),
0x100FF / ( 1 + g_alpha[6][7] - g_alpha[6][3] ),
0x100FF / ( 1 + g_alpha[7][7] - g_alpha[7][3] ),
0x100FF / ( 1 + g_alpha[8][7] - g_alpha[8][3] ),
0x100FF / ( 1 + g_alpha[9][7] - g_alpha[9][3] ),
0x100FF / ( 1 + g_alpha[10][7] - g_alpha[10][3] ),
0x100FF / ( 1 + g_alpha[11][7] - g_alpha[11][3] ),
0x100FF / ( 1 + g_alpha[12][7] - g_alpha[12][3] ),
0x100FF / ( 1 + g_alpha[13][7] - g_alpha[13][3] ),
0x100FF / ( 1 + g_alpha[14][7] - g_alpha[14][3] ),
0x100FF / ( 1 + g_alpha[15][7] - g_alpha[15][3] ),
};
#ifdef __SSE4_1__
const __m128i g_table_SIMD[2] =
{
_mm_setr_epi16( 2, 5, 9, 13, 18, 24, 33, 47),
_mm_setr_epi16( 8, 17, 29, 42, 60, 80, 106, 183)
};
const __m128i g_table128_SIMD[2] =
{
_mm_setr_epi16( 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128),
_mm_setr_epi16( 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128)
};
const __m128i g_table256_SIMD[4] =
{
_mm_setr_epi32( 2*256, 5*256, 9*256, 13*256),
_mm_setr_epi32( 8*256, 17*256, 29*256, 42*256),
_mm_setr_epi32( 18*256, 24*256, 33*256, 47*256),
_mm_setr_epi32( 60*256, 80*256, 106*256, 183*256)
};
const __m128i g_alpha_SIMD[16] = {
_mm_setr_epi16( g_alpha[ 0][0], g_alpha[ 0][1], g_alpha[ 0][2], g_alpha[ 0][3], g_alpha[ 0][4], g_alpha[ 0][5], g_alpha[ 0][6], g_alpha[ 0][7] ),
_mm_setr_epi16( g_alpha[ 1][0], g_alpha[ 1][1], g_alpha[ 1][2], g_alpha[ 1][3], g_alpha[ 1][4], g_alpha[ 1][5], g_alpha[ 1][6], g_alpha[ 1][7] ),
_mm_setr_epi16( g_alpha[ 2][0], g_alpha[ 2][1], g_alpha[ 2][2], g_alpha[ 2][3], g_alpha[ 2][4], g_alpha[ 2][5], g_alpha[ 2][6], g_alpha[ 2][7] ),
_mm_setr_epi16( g_alpha[ 3][0], g_alpha[ 3][1], g_alpha[ 3][2], g_alpha[ 3][3], g_alpha[ 3][4], g_alpha[ 3][5], g_alpha[ 3][6], g_alpha[ 3][7] ),
_mm_setr_epi16( g_alpha[ 4][0], g_alpha[ 4][1], g_alpha[ 4][2], g_alpha[ 4][3], g_alpha[ 4][4], g_alpha[ 4][5], g_alpha[ 4][6], g_alpha[ 4][7] ),
_mm_setr_epi16( g_alpha[ 5][0], g_alpha[ 5][1], g_alpha[ 5][2], g_alpha[ 5][3], g_alpha[ 5][4], g_alpha[ 5][5], g_alpha[ 5][6], g_alpha[ 5][7] ),
_mm_setr_epi16( g_alpha[ 6][0], g_alpha[ 6][1], g_alpha[ 6][2], g_alpha[ 6][3], g_alpha[ 6][4], g_alpha[ 6][5], g_alpha[ 6][6], g_alpha[ 6][7] ),
_mm_setr_epi16( g_alpha[ 7][0], g_alpha[ 7][1], g_alpha[ 7][2], g_alpha[ 7][3], g_alpha[ 7][4], g_alpha[ 7][5], g_alpha[ 7][6], g_alpha[ 7][7] ),
_mm_setr_epi16( g_alpha[ 8][0], g_alpha[ 8][1], g_alpha[ 8][2], g_alpha[ 8][3], g_alpha[ 8][4], g_alpha[ 8][5], g_alpha[ 8][6], g_alpha[ 8][7] ),
_mm_setr_epi16( g_alpha[ 9][0], g_alpha[ 9][1], g_alpha[ 9][2], g_alpha[ 9][3], g_alpha[ 9][4], g_alpha[ 9][5], g_alpha[ 9][6], g_alpha[ 9][7] ),
_mm_setr_epi16( g_alpha[10][0], g_alpha[10][1], g_alpha[10][2], g_alpha[10][3], g_alpha[10][4], g_alpha[10][5], g_alpha[10][6], g_alpha[10][7] ),
_mm_setr_epi16( g_alpha[11][0], g_alpha[11][1], g_alpha[11][2], g_alpha[11][3], g_alpha[11][4], g_alpha[11][5], g_alpha[11][6], g_alpha[11][7] ),
_mm_setr_epi16( g_alpha[12][0], g_alpha[12][1], g_alpha[12][2], g_alpha[12][3], g_alpha[12][4], g_alpha[12][5], g_alpha[12][6], g_alpha[12][7] ),
_mm_setr_epi16( g_alpha[13][0], g_alpha[13][1], g_alpha[13][2], g_alpha[13][3], g_alpha[13][4], g_alpha[13][5], g_alpha[13][6], g_alpha[13][7] ),
_mm_setr_epi16( g_alpha[14][0], g_alpha[14][1], g_alpha[14][2], g_alpha[14][3], g_alpha[14][4], g_alpha[14][5], g_alpha[14][6], g_alpha[14][7] ),
_mm_setr_epi16( g_alpha[15][0], g_alpha[15][1], g_alpha[15][2], g_alpha[15][3], g_alpha[15][4], g_alpha[15][5], g_alpha[15][6], g_alpha[15][7] ),
};
const __m128i g_alphaRange_SIMD = _mm_setr_epi16(
g_alphaRange[0],
g_alphaRange[1],
g_alphaRange[4],
g_alphaRange[5],
g_alphaRange[8],
g_alphaRange[14],
0,
0 );
#endif
#ifdef __AVX2__
const __m256i g_alpha_AVX[8] = {
_mm256_setr_epi16( g_alpha[ 0][0], g_alpha[ 1][0], g_alpha[ 2][0], g_alpha[ 3][0], g_alpha[ 4][0], g_alpha[ 5][0], g_alpha[ 6][0], g_alpha[ 7][0], g_alpha[ 8][0], g_alpha[ 9][0], g_alpha[10][0], g_alpha[11][0], g_alpha[12][0], g_alpha[13][0], g_alpha[14][0], g_alpha[15][0] ),
_mm256_setr_epi16( g_alpha[ 0][1], g_alpha[ 1][1], g_alpha[ 2][1], g_alpha[ 3][1], g_alpha[ 4][1], g_alpha[ 5][1], g_alpha[ 6][1], g_alpha[ 7][1], g_alpha[ 8][1], g_alpha[ 9][1], g_alpha[10][1], g_alpha[11][1], g_alpha[12][1], g_alpha[13][1], g_alpha[14][1], g_alpha[15][1] ),
_mm256_setr_epi16( g_alpha[ 0][2], g_alpha[ 1][2], g_alpha[ 2][2], g_alpha[ 3][2], g_alpha[ 4][2], g_alpha[ 5][2], g_alpha[ 6][2], g_alpha[ 7][2], g_alpha[ 8][2], g_alpha[ 9][2], g_alpha[10][2], g_alpha[11][2], g_alpha[12][2], g_alpha[13][2], g_alpha[14][2], g_alpha[15][2] ),
_mm256_setr_epi16( g_alpha[ 0][3], g_alpha[ 1][3], g_alpha[ 2][3], g_alpha[ 3][3], g_alpha[ 4][3], g_alpha[ 5][3], g_alpha[ 6][3], g_alpha[ 7][3], g_alpha[ 8][3], g_alpha[ 9][3], g_alpha[10][3], g_alpha[11][3], g_alpha[12][3], g_alpha[13][3], g_alpha[14][3], g_alpha[15][3] ),
_mm256_setr_epi16( g_alpha[ 0][4], g_alpha[ 1][4], g_alpha[ 2][4], g_alpha[ 3][4], g_alpha[ 4][4], g_alpha[ 5][4], g_alpha[ 6][4], g_alpha[ 7][4], g_alpha[ 8][4], g_alpha[ 9][4], g_alpha[10][4], g_alpha[11][4], g_alpha[12][4], g_alpha[13][4], g_alpha[14][4], g_alpha[15][4] ),
_mm256_setr_epi16( g_alpha[ 0][5], g_alpha[ 1][5], g_alpha[ 2][5], g_alpha[ 3][5], g_alpha[ 4][5], g_alpha[ 5][5], g_alpha[ 6][5], g_alpha[ 7][5], g_alpha[ 8][5], g_alpha[ 9][5], g_alpha[10][5], g_alpha[11][5], g_alpha[12][5], g_alpha[13][5], g_alpha[14][5], g_alpha[15][5] ),
_mm256_setr_epi16( g_alpha[ 0][6], g_alpha[ 1][6], g_alpha[ 2][6], g_alpha[ 3][6], g_alpha[ 4][6], g_alpha[ 5][6], g_alpha[ 6][6], g_alpha[ 7][6], g_alpha[ 8][6], g_alpha[ 9][6], g_alpha[10][6], g_alpha[11][6], g_alpha[12][6], g_alpha[13][6], g_alpha[14][6], g_alpha[15][6] ),
_mm256_setr_epi16( g_alpha[ 0][7], g_alpha[ 1][7], g_alpha[ 2][7], g_alpha[ 3][7], g_alpha[ 4][7], g_alpha[ 5][7], g_alpha[ 6][7], g_alpha[ 7][7], g_alpha[ 8][7], g_alpha[ 9][7], g_alpha[10][7], g_alpha[11][7], g_alpha[12][7], g_alpha[13][7], g_alpha[14][7], g_alpha[15][7] ),
};
const __m256i g_alphaRange_AVX = _mm256_setr_epi16(
g_alphaRange[ 0], g_alphaRange[ 1], g_alphaRange[ 2], g_alphaRange[ 3], g_alphaRange[ 4], g_alphaRange[ 5], g_alphaRange[ 6], g_alphaRange[ 7],
g_alphaRange[ 8], g_alphaRange[ 9], g_alphaRange[10], g_alphaRange[11], g_alphaRange[12], g_alphaRange[13], g_alphaRange[14], g_alphaRange[15]
);
#endif
#ifdef __ARM_NEON
const int16x8_t g_table128_NEON[2] =
{
{ 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128 },
{ 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128 }
};
const int32x4_t g_table256_NEON[4] =
{
{ 2*256, 5*256, 9*256, 13*256 },
{ 8*256, 17*256, 29*256, 42*256 },
{ 18*256, 24*256, 33*256, 47*256 },
{ 60*256, 80*256, 106*256, 183*256 }
};
const int16x8_t g_alpha_NEON[16] =
{
{ -3, -6, -9, -15, 2, 5, 8, 14 },
{ -3, -7, -10, -13, 2, 6, 9, 12 },
{ -2, -5, -8, -13, 1, 4, 7, 12 },
{ -2, -4, -6, -13, 1, 3, 5, 12 },
{ -3, -6, -8, -12, 2, 5, 7, 11 },
{ -3, -7, -9, -11, 2, 6, 8, 10 },
{ -4, -7, -8, -11, 3, 6, 7, 10 },
{ -3, -5, -8, -11, 2, 4, 7, 10 },
{ -2, -6, -8, -10, 1, 5, 7, 9 },
{ -2, -5, -8, -10, 1, 4, 7, 9 },
{ -2, -4, -8, -10, 1, 3, 7, 9 },
{ -2, -5, -7, -10, 1, 4, 6, 9 },
{ -3, -4, -7, -10, 2, 3, 6, 9 },
{ -1, -2, -3, -10, 0, 1, 2, 9 },
{ -4, -6, -8, -9, 3, 5, 7, 8 },
{ -3, -5, -7, -9, 2, 4, 6, 8 }
};
const int16x8_t g_alphaRange_NEON =
{
(int16_t)g_alphaRange[0],
(int16_t)g_alphaRange[1],
(int16_t)g_alphaRange[4],
(int16_t)g_alphaRange[5],
(int16_t)g_alphaRange[8],
(int16_t)g_alphaRange[14],
0,
0
};
#endif

50
engine/thirdparty/etcpak/Tables.hpp vendored Normal file
View file

@ -0,0 +1,50 @@
#ifndef __TABLES_HPP__
#define __TABLES_HPP__
#include <stdint.h>
#ifdef __AVX2__
# include <immintrin.h>
#endif
#ifdef __SSE4_1__
# include <smmintrin.h>
#endif
#ifdef __ARM_NEON
# include <arm_neon.h>
#endif
extern const int32_t g_table[8][4];
extern const int64_t g_table256[8][4];
extern const uint32_t g_id[4][16];
extern const uint32_t g_avg2[16];
extern const uint32_t g_flags[64];
extern const int32_t g_alpha[16][8];
extern const int32_t g_alpha11Mul[16];
extern const int32_t g_alphaRange[16];
#ifdef __SSE4_1__
extern const __m128i g_table_SIMD[2];
extern const __m128i g_table128_SIMD[2];
extern const __m128i g_table256_SIMD[4];
extern const __m128i g_alpha_SIMD[16];
extern const __m128i g_alphaRange_SIMD;
#endif
#ifdef __AVX2__
extern const __m256i g_alpha_AVX[8];
extern const __m256i g_alphaRange_AVX;
#endif
#ifdef __ARM_NEON
extern const int16x8_t g_table128_NEON[2];
extern const int32x4_t g_table256_NEON[4];
extern const int16x8_t g_alpha_NEON[16];
extern const int16x8_t g_alphaRange_NEON;
#endif
#endif

222
engine/thirdparty/etcpak/Vector.hpp vendored Normal file
View file

@ -0,0 +1,222 @@
#ifndef __DARKRL__VECTOR_HPP__
#define __DARKRL__VECTOR_HPP__
#include <assert.h>
#include <algorithm>
#include <math.h>
#include <stdint.h>
#include "Math.hpp"
template<class T>
struct Vector2
{
Vector2() : x( 0 ), y( 0 ) {}
Vector2( T v ) : x( v ), y( v ) {}
Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
Vector2<T>& operator+=( const Vector2<T>& rhs )
{
x += rhs.x;
y += rhs.y;
return *this;
}
Vector2<T>& operator-=( const Vector2<T>& rhs )
{
x -= rhs.x;
y -= rhs.y;
return *this;
}
Vector2<T>& operator*=( const Vector2<T>& rhs )
{
x *= rhs.x;
y *= rhs.y;
return *this;
}
T x, y;
};
template<class T>
Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
{
return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
}
template<class T>
Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
{
return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
}
template<class T>
Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
{
return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
}
template<class T>
Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
{
return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
}
typedef Vector2<int32_t> v2i;
typedef Vector2<float> v2f;
template<class T>
struct Vector3
{
Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
Vector3( T v ) : x( v ), y( v ), z( v ) {}
Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
template<class Y>
Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
void Clamp()
{
x = std::min( T(1), std::max( T(0), x ) );
y = std::min( T(1), std::max( T(0), y ) );
z = std::min( T(1), std::max( T(0), z ) );
}
bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
T& operator[]( unsigned int idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
const T& operator[]( unsigned int idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
Vector3<T> operator+=( const Vector3<T>& rhs )
{
x += rhs.x;
y += rhs.y;
z += rhs.z;
return *this;
}
Vector3<T> operator*=( const Vector3<T>& rhs )
{
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
return *this;
}
Vector3<T> operator*=( const float& rhs )
{
x *= rhs;
y *= rhs;
z *= rhs;
return *this;
}
T x, y, z;
T padding;
};
template<class T>
Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
}
template<class T>
Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
}
template<class T>
Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
}
template<class T>
Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
{
return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
}
template<class T>
Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
{
return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
}
template<class T>
bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return lhs.Luminance() < rhs.Luminance();
}
typedef Vector3<int32_t> v3i;
typedef Vector3<float> v3f;
typedef Vector3<uint8_t> v3b;
static inline v3b v3f_to_v3b( const v3f& v )
{
return v3b( uint8_t( std::min( 1.f, v.x ) * 255 ), uint8_t( std::min( 1.f, v.y ) * 255 ), uint8_t( std::min( 1.f, v.z ) * 255 ) );
}
template<class T>
Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
{
return v1 + ( v2 - v1 ) * amount;
}
template<>
inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
{
return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
}
template<class T>
Vector3<T> Desaturate( const Vector3<T>& v )
{
T l = v.Luminance();
return Vector3<T>( l, l, l );
}
template<class T>
Vector3<T> Desaturate( const Vector3<T>& v, float mul )
{
T l = T( v.Luminance() * mul );
return Vector3<T>( l, l, l );
}
template<class T>
Vector3<T> pow( const Vector3<T>& base, float exponent )
{
return Vector3<T>(
pow( base.x, exponent ),
pow( base.y, exponent ),
pow( base.z, exponent ) );
}
template<class T>
Vector3<T> sRGB2linear( const Vector3<T>& v )
{
return Vector3<T>(
sRGB2linear( v.x ),
sRGB2linear( v.y ),
sRGB2linear( v.z ) );
}
template<class T>
Vector3<T> linear2sRGB( const Vector3<T>& v )
{
return Vector3<T>(
linear2sRGB( v.x ),
linear2sRGB( v.y ),
linear2sRGB( v.z ) );
}
#endif