diff --git a/inc/blt.h b/inc/blt.h index 641c4d3..1cd2a3f 100644 --- a/inc/blt.h +++ b/inc/blt.h @@ -5,6 +5,8 @@ #include +extern BOOL g_blt_use_avx; + void blt_copy( unsigned char* dst, unsigned char* src, diff --git a/inc/utils.h b/inc/utils.h index 1f8c54a..98b51fc 100644 --- a/inc/utils.h +++ b/inc/utils.h @@ -5,6 +5,7 @@ #include +BOOL util_is_avx_supported(); void util_limit_game_ticks(); void util_update_bnet_pos(int newX, int newY); BOOL util_get_lowest_resolution(float ratio, SIZE* outRes, DWORD minWidth, DWORD minHeight, DWORD maxWidth, DWORD maxHeight); diff --git a/src/blt.c b/src/blt.c index dfd3912..846f034 100644 --- a/src/blt.c +++ b/src/blt.c @@ -4,13 +4,15 @@ #include "blt.h" +BOOL g_blt_use_avx; + void blt_copy( unsigned char* dst, unsigned char* src, size_t size) { #ifdef __AVX__ - if (size >= 1024 * 1536 && !((DWORD)dst % 32) && !((DWORD)src % 32)) + if (size >= 1024 * 4096 && g_blt_use_avx && !((DWORD)dst % 32) && !((DWORD)src % 32)) { while (size >= 256) { @@ -45,7 +47,7 @@ void blt_copy( } return; } -#endif +#endif // __AVX__ if (size >= 1024 * 100) { diff --git a/src/dd.c b/src/dd.c index d8fe35e..ff43bf5 100644 --- a/src/dd.c +++ b/src/dd.c @@ -1096,6 +1096,7 @@ HRESULT dd_CreateEx(GUID* lpGuid, LPVOID* lplpDD, REFIID iid, IUnknown* pUnkOute g_ddraw->render.sem = CreateSemaphore(NULL, 0, 1, NULL); g_ddraw->wine = GetProcAddress(GetModuleHandleA("ntdll.dll"), "wine_get_version") != 0; + g_blt_use_avx = util_is_avx_supported(); cfg_load(); g_ddraw->ref--; diff --git a/src/utils.c b/src/utils.c index 76fc56b..ed078c7 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,4 +1,5 @@ #include +#include #include "ddraw.h" #include "debug.h" #include "dd.h" @@ -10,6 +11,22 @@ #include "config.h" +BOOL util_is_avx_supported() +{ + unsigned int xcr0 = 0; + +#if defined(_MSC_VER) + xcr0 = (unsigned int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#else + __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); +#endif + + int info[4] = { 0 }; + __cpuid(info, 1); + + return (info[2] & (1 << 27)) && (info[2] & (1 << 28)) && (xcr0 & 6); +} + void util_limit_game_ticks() { if (g_ddraw->ticks_limiter.htimer)