feat: updated engine version to 4.4-rc1

This commit is contained in:
Sara 2025-02-23 14:38:14 +01:00
parent ee00efde1f
commit 21ba8e33af
5459 changed files with 1128836 additions and 198305 deletions

View file

@ -3858,127 +3858,6 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
#endif
}
void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int w = 0;
uint32_t buf[4*4];
do
{
#ifdef __SSE4_1__
__m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
__m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
__m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
__m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
_MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
__m128i c0 = _mm_castps_si128( px0 );
__m128i c1 = _mm_castps_si128( px1 );
__m128i c2 = _mm_castps_si128( px2 );
__m128i c3 = _mm_castps_si128( px3 );
__m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
__m128i p0 = _mm_shuffle_epi8( c0, mask );
__m128i p1 = _mm_shuffle_epi8( c1, mask );
__m128i p2 = _mm_shuffle_epi8( c2, mask );
__m128i p3 = _mm_shuffle_epi8( c3, mask );
_mm_store_si128( (__m128i*)(buf + 0), p0 );
_mm_store_si128( (__m128i*)(buf + 4), p1 );
_mm_store_si128( (__m128i*)(buf + 8), p2 );
_mm_store_si128( (__m128i*)(buf + 12), p3 );
src += 4;
#else
auto ptr = buf;
for( int x=0; x<4; x++ )
{
unsigned int a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src += width;
a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src += width;
a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src += width;
a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src -= width * 3 - 1;
}
#endif
if( ++w == width/4 )
{
src += width * 3;
w = 0;
}
*dst++ = ProcessRGB( (uint8_t*)buf );
}
while( --blocks );
}
void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics )
{
int w = 0;
uint32_t buf[4*4];
do
{
#ifdef __SSE4_1__
__m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
__m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
__m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
__m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
_MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
__m128i c0 = _mm_castps_si128( px0 );
__m128i c1 = _mm_castps_si128( px1 );
__m128i c2 = _mm_castps_si128( px2 );
__m128i c3 = _mm_castps_si128( px3 );
__m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
__m128i p0 = _mm_shuffle_epi8( c0, mask );
__m128i p1 = _mm_shuffle_epi8( c1, mask );
__m128i p2 = _mm_shuffle_epi8( c2, mask );
__m128i p3 = _mm_shuffle_epi8( c3, mask );
_mm_store_si128( (__m128i*)(buf + 0), p0 );
_mm_store_si128( (__m128i*)(buf + 4), p1 );
_mm_store_si128( (__m128i*)(buf + 8), p2 );
_mm_store_si128( (__m128i*)(buf + 12), p3 );
src += 4;
#else
auto ptr = buf;
for( int x=0; x<4; x++ )
{
unsigned int a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src += width;
a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src += width;
a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src += width;
a = *src >> 24;
*ptr++ = a | ( a << 8 ) | ( a << 16 );
src -= width * 3 - 1;
}
#endif
if( ++w == width/4 )
{
src += width * 3;
w = 0;
}
*dst++ = ProcessRGB_ETC2( (uint8_t*)buf, useHeuristics );
}
while( --blocks );
}
#include <chrono>
#include <thread>
void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
{
int w = 0;