1
0
mirror of https://github.com/FunkyFr3sh/cnc-ddraw.git synced 2025-03-15 06:04:49 +01:00

add AVX memset

This commit is contained in:
FunkyFr3sh 2022-09-20 02:27:01 +02:00
parent fee61f7b4a
commit 6372dc6884
3 changed files with 31 additions and 16 deletions

View File

@ -74,6 +74,7 @@ void blt_colorkey_mirror_stretch(
void blt_clear(
unsigned char* dst,
char color,
size_t size);
void blt_colorfill(

View File

@ -461,16 +461,39 @@ void blt_colorkey_mirror_stretch(
}
void blt_clear(
unsigned char* dst,
unsigned char* dst,
char color,
size_t size)
{
if (size >= 1024 * 200)
#ifdef _MSC_VER
if (size < 1024 * 100 && g_blt_use_avx && !((DWORD)dst % 32))
{
__stosb(dst, 0, size);
while (size >= 128)
{
__m256i c0 = _mm256_set1_epi8(color);
_mm256_store_si256((((__m256i*)dst) + 0), c0);
_mm256_store_si256((((__m256i*)dst) + 1), c0);
_mm256_store_si256((((__m256i*)dst) + 2), c0);
_mm256_store_si256((((__m256i*)dst) + 3), c0);
dst += 128;
size -= 128;
}
_mm256_zeroupper();
/* memset below handles the remainder */
}
#endif
if (size >= 1024 * 100)
{
__stosb(dst, color, size);
}
else
{
memset(dst, 0, size);
memset(dst, color, size);
}
}
@ -500,22 +523,13 @@ void blt_colorfill(
{
if (size == dst_p)
{
size_t s = dst_p * dst_h;
if (s >= 1024 * 200)
{
__stosb(dst, color, s);
}
else
{
memset(dst, color, s);
}
blt_clear(dst, color, dst_p * dst_h);
}
else
{
for (int i = 0; i < dst_h; i++)
{
memset(dst, color, size);
blt_clear(dst, color, size);
dst += dst_p;
}
}

View File

@ -551,7 +551,7 @@ HRESULT dds_Flip(IDirectDrawSurfaceImpl* This, IDirectDrawSurfaceImpl* lpDDSurfa
if (g_ddraw->flipclear)
{
blt_clear(buf, backbuffer->size);
blt_clear(buf, 0, backbuffer->size);
}
LeaveCriticalSection(&g_ddraw->cs);