feat: updated engine version to 4.4-rc1
This commit is contained in:
parent
ee00efde1f
commit
21ba8e33af
5459 changed files with 1128836 additions and 198305 deletions
121
engine/thirdparty/etcpak/ProcessRGB.cpp
vendored
121
engine/thirdparty/etcpak/ProcessRGB.cpp
vendored
|
|
@ -3858,127 +3858,6 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
|
|||
#endif
|
||||
}
|
||||
|
||||
void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
|
||||
{
|
||||
int w = 0;
|
||||
uint32_t buf[4*4];
|
||||
do
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
__m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
|
||||
__m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
|
||||
__m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
|
||||
__m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
|
||||
|
||||
_MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
|
||||
|
||||
__m128i c0 = _mm_castps_si128( px0 );
|
||||
__m128i c1 = _mm_castps_si128( px1 );
|
||||
__m128i c2 = _mm_castps_si128( px2 );
|
||||
__m128i c3 = _mm_castps_si128( px3 );
|
||||
|
||||
__m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
|
||||
__m128i p0 = _mm_shuffle_epi8( c0, mask );
|
||||
__m128i p1 = _mm_shuffle_epi8( c1, mask );
|
||||
__m128i p2 = _mm_shuffle_epi8( c2, mask );
|
||||
__m128i p3 = _mm_shuffle_epi8( c3, mask );
|
||||
|
||||
_mm_store_si128( (__m128i*)(buf + 0), p0 );
|
||||
_mm_store_si128( (__m128i*)(buf + 4), p1 );
|
||||
_mm_store_si128( (__m128i*)(buf + 8), p2 );
|
||||
_mm_store_si128( (__m128i*)(buf + 12), p3 );
|
||||
|
||||
src += 4;
|
||||
#else
|
||||
auto ptr = buf;
|
||||
for( int x=0; x<4; x++ )
|
||||
{
|
||||
unsigned int a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src += width;
|
||||
a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src += width;
|
||||
a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src += width;
|
||||
a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src -= width * 3 - 1;
|
||||
}
|
||||
#endif
|
||||
if( ++w == width/4 )
|
||||
{
|
||||
src += width * 3;
|
||||
w = 0;
|
||||
}
|
||||
*dst++ = ProcessRGB( (uint8_t*)buf );
|
||||
}
|
||||
while( --blocks );
|
||||
}
|
||||
|
||||
void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics )
|
||||
{
|
||||
int w = 0;
|
||||
uint32_t buf[4*4];
|
||||
do
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
__m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
|
||||
__m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
|
||||
__m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
|
||||
__m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
|
||||
|
||||
_MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
|
||||
|
||||
__m128i c0 = _mm_castps_si128( px0 );
|
||||
__m128i c1 = _mm_castps_si128( px1 );
|
||||
__m128i c2 = _mm_castps_si128( px2 );
|
||||
__m128i c3 = _mm_castps_si128( px3 );
|
||||
|
||||
__m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
|
||||
__m128i p0 = _mm_shuffle_epi8( c0, mask );
|
||||
__m128i p1 = _mm_shuffle_epi8( c1, mask );
|
||||
__m128i p2 = _mm_shuffle_epi8( c2, mask );
|
||||
__m128i p3 = _mm_shuffle_epi8( c3, mask );
|
||||
|
||||
_mm_store_si128( (__m128i*)(buf + 0), p0 );
|
||||
_mm_store_si128( (__m128i*)(buf + 4), p1 );
|
||||
_mm_store_si128( (__m128i*)(buf + 8), p2 );
|
||||
_mm_store_si128( (__m128i*)(buf + 12), p3 );
|
||||
|
||||
src += 4;
|
||||
#else
|
||||
auto ptr = buf;
|
||||
for( int x=0; x<4; x++ )
|
||||
{
|
||||
unsigned int a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src += width;
|
||||
a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src += width;
|
||||
a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src += width;
|
||||
a = *src >> 24;
|
||||
*ptr++ = a | ( a << 8 ) | ( a << 16 );
|
||||
src -= width * 3 - 1;
|
||||
}
|
||||
#endif
|
||||
if( ++w == width/4 )
|
||||
{
|
||||
src += width * 3;
|
||||
w = 0;
|
||||
}
|
||||
*dst++ = ProcessRGB_ETC2( (uint8_t*)buf, useHeuristics );
|
||||
}
|
||||
while( --blocks );
|
||||
}
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
|
||||
{
|
||||
int w = 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue