1
0
mirror of https://github.com/FunkyFr3sh/cnc-ddraw.git synced 2025-03-20 16:09:12 +01:00
cnc-ddraw/src/blt.c

723 lines
17 KiB
C
Raw Normal View History

#include <windows.h>
2022-09-16 11:47:09 +02:00
#include <intrin.h>
#include "debug.h"
#include "blt.h"
2022-09-19 13:13:34 +02:00
BOOL g_blt_use_avx;
2022-09-18 19:02:15 +02:00
void blt_copy(
unsigned char* dst,
unsigned char* src,
size_t size)
{
#ifdef __AVX__
2022-09-19 13:13:34 +02:00
if (size >= 1024 * 4096 && g_blt_use_avx && !((DWORD)dst % 32) && !((DWORD)src % 32))
2022-09-18 19:02:15 +02:00
{
2022-09-19 13:59:02 +02:00
_mm_prefetch((const char*)(src), _MM_HINT_NTA);
2022-09-18 19:02:15 +02:00
while (size >= 256)
{
__m256i c0 = _mm256_load_si256(((const __m256i*)src) + 0);
__m256i c1 = _mm256_load_si256(((const __m256i*)src) + 1);
__m256i c2 = _mm256_load_si256(((const __m256i*)src) + 2);
__m256i c3 = _mm256_load_si256(((const __m256i*)src) + 3);
2022-09-19 13:59:02 +02:00
__m256i c4 = _mm256_load_si256(((const __m256i*)src) + 4);
__m256i c5 = _mm256_load_si256(((const __m256i*)src) + 5);
__m256i c6 = _mm256_load_si256(((const __m256i*)src) + 6);
__m256i c7 = _mm256_load_si256(((const __m256i*)src) + 7);
_mm_prefetch((const char*)(src + 512), _MM_HINT_NTA);
2022-09-18 19:02:15 +02:00
_mm256_stream_si256((((__m256i*)dst) + 0), c0);
_mm256_stream_si256((((__m256i*)dst) + 1), c1);
_mm256_stream_si256((((__m256i*)dst) + 2), c2);
_mm256_stream_si256((((__m256i*)dst) + 3), c3);
2022-09-19 13:59:02 +02:00
_mm256_stream_si256((((__m256i*)dst) + 4), c4);
_mm256_stream_si256((((__m256i*)dst) + 5), c5);
_mm256_stream_si256((((__m256i*)dst) + 6), c6);
_mm256_stream_si256((((__m256i*)dst) + 7), c7);
2022-09-18 19:02:15 +02:00
src += 256;
dst += 256;
size -= 256;
}
2022-09-19 14:06:55 +02:00
_mm_sfence();
_mm256_zeroupper();
2022-09-18 19:02:15 +02:00
if (size > 0)
{
memcpy(dst, src, size);
}
return;
}
2022-09-19 13:13:34 +02:00
#endif // __AVX__
2022-09-18 19:02:15 +02:00
if (size >= 1024 * 100)
{
__movsb(dst, src, size);
}
else
{
memcpy(dst, src, size);
}
}
void blt_clean(
unsigned char* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned char* src,
int src_x,
int src_y,
int src_p,
int bpp)
{
int bytes_pp = bpp / 8;
size_t size = dst_w * bytes_pp;
src += (src_x * bytes_pp) + (src_p * src_y);
dst += (dst_x * bytes_pp) + (dst_p * dst_y);
2022-09-13 13:41:46 +02:00
if (size == dst_p && dst_p == src_p)
{
2022-09-18 20:27:46 +02:00
blt_copy(dst, src, dst_p * dst_h);
2022-09-13 13:41:46 +02:00
}
else
{
for (int i = 0; i < dst_h; i++)
{
memcpy(dst, src, size);
2022-09-13 13:41:46 +02:00
src += src_p;
dst += dst_p;
}
}
}
void blt_overlap(
unsigned char* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned char* src,
int src_x,
int src_y,
int src_p,
int bpp)
{
int bytes_pp = bpp / 8;
size_t size = dst_w * bytes_pp;
src += (src_x * bytes_pp) + (src_p * src_y);
dst += (dst_x * bytes_pp) + (dst_p * dst_y);
if (dst_y > src_y)
{
src += src_p * dst_h;
dst += dst_p * dst_h;
for (int i = dst_h; i-- > 0;)
{
src -= src_p;
dst -= dst_p;
memmove(dst, src, size);
}
}
2022-09-13 13:41:46 +02:00
else if (size == dst_p && dst_p == src_p)
{
memmove(dst, src, dst_p * dst_h);
}
else
{
for (int i = 0; i < dst_h; i++)
{
memmove(dst, src, size);
src += src_p;
dst += dst_p;
}
}
}
void blt_colorkey(
unsigned char* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned char* src,
int src_x,
int src_y,
int src_p,
unsigned int key_low,
unsigned int key_high,
int bpp)
{
int bytes_pp = bpp / 8;
2022-09-09 10:44:04 +02:00
size_t s_a = (src_p / bytes_pp) - dst_w;
size_t d_a = (dst_p / bytes_pp) - dst_w;
src += (src_x * bytes_pp) + (src_p * src_y);
dst += (dst_x * bytes_pp) + (dst_p * dst_y);
if (bpp == 8)
{
unsigned char key_l = (unsigned char)key_low;
unsigned char key_h = (unsigned char)key_high;
if (key_l == key_h)
{
2022-09-16 02:59:21 +02:00
for (unsigned char* h_end = dst + dst_h * (dst_w + d_a); dst < h_end;)
{
2022-09-16 02:59:21 +02:00
for (unsigned char* w_end = dst + dst_w; dst < w_end;)
{
unsigned char c = *src++;
if (c != key_l)
{
*dst = c;
}
dst++;
}
2022-09-13 13:41:46 +02:00
2022-09-09 10:44:04 +02:00
src += s_a;
dst += d_a;
}
}
else
{
2022-09-16 02:59:21 +02:00
for (unsigned char* h_end = dst + dst_h * (dst_w + d_a); dst < h_end;)
{
2022-09-16 02:59:21 +02:00
for (unsigned char* w_end = dst + dst_w; dst < w_end;)
{
unsigned char c = *src++;
if (c < key_l || c > key_h)
{
*dst = c;
}
dst++;
}
2022-09-09 10:44:04 +02:00
src += s_a;
dst += d_a;
}
}
}
else if (bpp == 16)
{
unsigned short key_l = (unsigned short)key_low;
unsigned short key_h = (unsigned short)key_high;
2022-09-08 08:12:11 +02:00
unsigned short* d = (unsigned short*)dst;
unsigned short* s = (unsigned short*)src;
if (key_l == key_h)
{
2022-09-16 02:59:21 +02:00
for (unsigned short* h_end = d + dst_h * (dst_w + d_a); d < h_end;)
{
2022-09-16 02:59:21 +02:00
for (unsigned short* w_end = d + dst_w; d < w_end;)
{
2022-09-08 08:12:11 +02:00
unsigned short c = *s++;
if (c != key_l)
{
2022-09-08 08:12:11 +02:00
*d = c;
}
2022-09-08 08:12:11 +02:00
d++;
}
2022-09-09 10:44:04 +02:00
s += s_a;
d += d_a;
}
}
else
{
2022-09-16 02:59:21 +02:00
for (unsigned short* h_end = d + dst_h * (dst_w + d_a); d < h_end;)
{
2022-09-16 02:59:21 +02:00
for (unsigned short* w_end = d + dst_w; d < w_end;)
{
2022-09-08 08:12:11 +02:00
unsigned short c = *s++;
if (c < key_l || c > key_h)
{
2022-09-08 08:12:11 +02:00
*d = c;
}
2022-09-08 08:12:11 +02:00
d++;
}
2022-09-09 10:44:04 +02:00
s += s_a;
d += d_a;
}
}
}
else if (bpp == 32)
{
unsigned int key_l = (unsigned int)key_low;
unsigned int key_h = (unsigned int)key_high;
2022-09-08 08:12:11 +02:00
unsigned int* d = (unsigned int*)dst;
unsigned int* s = (unsigned int*)src;
if (key_l == key_h)
{
2022-09-16 02:59:21 +02:00
for (unsigned int* h_end = d + dst_h * (dst_w + d_a); d < h_end;)
{
2022-09-16 02:59:21 +02:00
for (unsigned int* w_end = d + dst_w; d < w_end;)
{
2022-09-08 08:12:11 +02:00
unsigned int c = *s++;
if (c != key_l)
{
2022-09-08 08:12:11 +02:00
*d = c;
}
2022-09-08 08:12:11 +02:00
d++;
}
2022-09-09 10:44:04 +02:00
s += s_a;
d += d_a;
}
}
else
{
2022-09-16 02:59:21 +02:00
for (unsigned int* h_end = d + dst_h * (dst_w + d_a); d < h_end;)
{
2022-09-16 02:59:21 +02:00
for (unsigned int* w_end = d + dst_w; d < w_end;)
{
2022-09-08 08:12:11 +02:00
unsigned int c = *s++;
if (c < key_l || c > key_h)
{
2022-09-08 08:12:11 +02:00
*d = c;
}
2022-09-08 08:12:11 +02:00
d++;
}
2022-09-09 10:44:04 +02:00
s += s_a;
d += d_a;
}
}
}
}
void blt_colorkey_mirror_stretch(
unsigned char* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned char* src,
int src_x,
int src_y,
int src_w,
int src_h,
int src_p,
unsigned int key_low,
unsigned int key_high,
BOOL mirror_up_down,
BOOL mirror_left_right,
int bpp)
{
int bytes_pp = bpp / 8;
int dst_surf_w = dst_p / bytes_pp;
int src_surf_w = src_p / bytes_pp;
float scale_w = (float)src_w / dst_w;
float scale_h = (float)src_h / dst_h;
if (bpp == 8)
{
unsigned char key_l = (unsigned char)key_low;
unsigned char key_h = (unsigned char)key_high;
for (int y = 0; y < dst_h; y++)
{
int scaled_y = (int)(y * scale_h);
if (mirror_up_down)
scaled_y = src_h - 1 - scaled_y;
2022-09-13 16:16:35 +02:00
int src_row = src_x + src_surf_w * (scaled_y + src_y);
int dst_row = dst_x + dst_surf_w * (y + dst_y);
for (int x = 0; x < dst_w; x++)
{
int scaled_x = (int)(x * scale_w);
if (mirror_left_right)
scaled_x = src_w - 1 - scaled_x;
2022-09-13 16:16:35 +02:00
unsigned char c = ((unsigned char*)src)[scaled_x + src_row];
if (c < key_l || c > key_h)
{
2022-09-13 16:16:35 +02:00
((unsigned char*)dst)[x + dst_row] = c;
}
}
}
}
else if (bpp == 16)
{
unsigned short key_l = (unsigned short)key_low;
unsigned short key_h = (unsigned short)key_high;
for (int y = 0; y < dst_h; y++)
{
int scaled_y = (int)(y * scale_h);
if (mirror_up_down)
scaled_y = src_h - 1 - scaled_y;
2022-09-13 16:16:35 +02:00
int src_row = src_x + src_surf_w * (scaled_y + src_y);
int dst_row = dst_x + dst_surf_w * (y + dst_y);
for (int x = 0; x < dst_w; x++)
{
int scaled_x = (int)(x * scale_w);
if (mirror_left_right)
scaled_x = src_w - 1 - scaled_x;
2022-09-13 16:16:35 +02:00
unsigned short c = ((unsigned short*)src)[scaled_x + src_row];
if (c < key_l || c > key_h)
{
2022-09-13 16:16:35 +02:00
((unsigned short*)dst)[x + dst_row] = c;
}
}
}
}
else if (bpp == 32)
{
unsigned int key_l = (unsigned int)key_low;
unsigned int key_h = (unsigned int)key_high;
for (int y = 0; y < dst_h; y++)
{
int scaled_y = (int)(y * scale_h);
if (mirror_up_down)
scaled_y = src_h - 1 - scaled_y;
2022-09-13 16:16:35 +02:00
int src_row = src_x + src_surf_w * (scaled_y + src_y);
int dst_row = dst_x + dst_surf_w * (y + dst_y);
for (int x = 0; x < dst_w; x++)
{
int scaled_x = (int)(x * scale_w);
if (mirror_left_right)
scaled_x = src_w - 1 - scaled_x;
2022-09-13 16:16:35 +02:00
unsigned int c = ((unsigned int*)src)[scaled_x + src_row];
if (c < key_l || c > key_h)
{
2022-09-13 16:16:35 +02:00
((unsigned int*)dst)[x + dst_row] = c;
}
}
}
}
}
2022-09-15 02:12:27 +02:00
void blt_clear(
unsigned char* dst,
size_t size)
{
2022-09-16 12:08:22 +02:00
if (size >= 1024 * 200)
{
__stosb(dst, 0, size);
}
else
{
memset(dst, 0, size);
}
2022-09-15 02:12:27 +02:00
}
void blt_colorfill(
unsigned char* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned int color,
int bpp)
{
int bytes_pp = bpp / 8;
size_t size = dst_w * bytes_pp;
dst += (dst_x * bytes_pp) + (dst_p * dst_y);
2022-09-13 10:18:24 +02:00
if (bpp == 8 ||
(bpp == 16 &&
(color & 0xFF) == ((color >> 8) & 0xFF)) ||
(bpp == 32 &&
(color & 0xFF) == ((color >> 8) & 0xFF) &&
(color & 0xFF) == ((color >> 16) & 0xFF) &&
(color & 0xFF) == ((color >> 24) & 0xFF)))
{
2022-09-13 09:53:29 +02:00
if (size == dst_p)
{
2022-09-16 12:08:22 +02:00
size_t s = dst_p * dst_h;
if (s >= 1024 * 200)
{
2022-09-16 12:10:45 +02:00
__stosb(dst, color, s);
2022-09-16 12:08:22 +02:00
}
else
{
2022-09-16 12:10:45 +02:00
memset(dst, color, s);
2022-09-16 12:08:22 +02:00
}
2022-09-13 09:53:29 +02:00
}
else
{
for (int i = 0; i < dst_h; i++)
{
memset(dst, color, size);
dst += dst_p;
}
}
}
else if (bpp == 16)
{
2022-09-13 10:18:24 +02:00
unsigned short* first_row = (unsigned short*)dst;
for (int x = 0; x < dst_w; x++)
{
2022-09-13 10:18:24 +02:00
first_row[x] = (unsigned short)color;
}
2022-09-13 10:18:24 +02:00
for (int i = 1; i < dst_h; i++)
{
dst += dst_p;
memcpy(dst, first_row, size);
}
}
else if (bpp == 32)
{
2022-09-13 10:18:24 +02:00
unsigned int* first_row = (unsigned int*)dst;
for (int x = 0; x < dst_w; x++)
{
2022-09-13 10:18:24 +02:00
first_row[x] = color;
}
2022-09-13 10:18:24 +02:00
for (int i = 1; i < dst_h; i++)
{
dst += dst_p;
memcpy(dst, first_row, size);
}
}
}
2022-09-09 10:44:04 +02:00
void blt_rgb565_to_rgba8888(
unsigned int* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned short* src,
int src_x,
int src_y,
int src_p)
{
2022-09-09 10:57:16 +02:00
size_t s_a = (src_p / sizeof(src[0])) - dst_w;
size_t d_a = (dst_p / sizeof(dst[0])) - dst_w;
2022-09-09 10:44:04 +02:00
2022-09-14 15:30:16 +02:00
src += src_x + ((src_p / sizeof(src[0])) * src_y);
dst += dst_x + ((dst_p / sizeof(dst[0])) * dst_y);
2022-09-09 10:44:04 +02:00
2022-09-16 02:59:21 +02:00
for (unsigned int* h_end = dst + dst_h * (dst_w + d_a); dst < h_end;)
2022-09-09 10:44:04 +02:00
{
2022-09-16 02:59:21 +02:00
for (unsigned int* w_end = dst + dst_w; dst < w_end;)
2022-09-09 10:44:04 +02:00
{
unsigned short pixel = *src++;
BYTE r = ((pixel & 0xF800) >> 11) << 3;
BYTE g = ((pixel & 0x07E0) >> 5) << 2;
BYTE b = ((pixel & 0x001F)) << 3;
*dst++ = (0xFF << 24) | (b << 16) | (g << 8) | r;
}
src += s_a;
dst += d_a;
}
}
void blt_bgra8888_to_rgba8888(
unsigned int* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
unsigned int* src,
int src_x,
int src_y,
int src_p)
{
2022-09-09 10:57:16 +02:00
size_t s_a = (src_p / sizeof(src[0])) - dst_w;
size_t d_a = (dst_p / sizeof(dst[0])) - dst_w;
2022-09-09 10:44:04 +02:00
2022-09-14 15:30:16 +02:00
src += src_x + ((src_p / sizeof(src[0])) * src_y);
dst += dst_x + ((dst_p / sizeof(dst[0])) * dst_y);
2022-09-09 10:44:04 +02:00
2022-09-16 02:59:21 +02:00
for (unsigned int* h_end = dst + dst_h * (dst_w + d_a); dst < h_end;)
2022-09-09 10:44:04 +02:00
{
2022-09-16 02:59:21 +02:00
for (unsigned int* w_end = dst + dst_w; dst < w_end;)
2022-09-09 10:44:04 +02:00
{
unsigned int pixel = *src++;
BYTE r = pixel >> 16;
BYTE g = pixel >> 8;
BYTE b = pixel;
*dst++ = (0xFF << 24) | (b << 16) | (g << 8) | r;
}
src += s_a;
dst += d_a;
}
}
void blt_stretch(
2022-09-13 15:51:14 +02:00
unsigned char* dst,
int dst_x,
int dst_y,
int dst_w,
int dst_h,
int dst_p,
2022-09-13 15:51:14 +02:00
unsigned char* src,
int src_x,
int src_y,
int src_w,
int src_h,
int src_p,
int bpp)
{
2022-09-13 15:51:14 +02:00
int bytes_pp = bpp / 8;
2022-09-13 15:51:14 +02:00
size_t size = dst_w * bytes_pp;
2022-09-13 15:51:14 +02:00
int dst_surf_w = dst_p / bytes_pp;
int src_surf_w = src_p / bytes_pp;
2022-09-13 15:51:14 +02:00
float scale_w = (float)src_w / dst_w;
float scale_h = (float)src_h / dst_h;
2022-09-13 15:51:14 +02:00
int last_y = -1;
int last_row = -1;
2022-09-13 15:51:14 +02:00
if (bpp == 8)
{
2022-09-13 16:08:46 +02:00
unsigned char* d = (unsigned char*)dst;
unsigned char* s = (unsigned char*)src;
2022-09-13 15:51:14 +02:00
for (int y = 0; y < dst_h; y++)
{
int scaled_y = (int)(y * scale_h);
2022-09-13 16:08:46 +02:00
int dst_row = dst_x + dst_surf_w * (y + dst_y);
2022-09-13 15:51:14 +02:00
if (scaled_y == last_y)
{
2022-09-13 16:08:46 +02:00
memcpy(&d[dst_row], &d[last_row], size);
2022-09-13 15:51:14 +02:00
continue;
}
2022-09-13 15:51:14 +02:00
last_y = scaled_y;
last_row = dst_row;
2022-09-13 16:08:46 +02:00
int src_row = src_x + src_surf_w * (scaled_y + src_y);
2022-09-13 15:51:14 +02:00
for (int x = 0; x < dst_w; x++)
{
2022-09-13 15:51:14 +02:00
int scaled_x = (int)(x * scale_w);
2022-09-13 16:08:46 +02:00
d[x + dst_row] = s[scaled_x + src_row];
2022-09-13 15:51:14 +02:00
}
}
}
else if (bpp == 16)
{
unsigned short* d = (unsigned short*)dst;
unsigned short* s = (unsigned short*)src;
2022-09-13 15:51:14 +02:00
for (int y = 0; y < dst_h; y++)
{
int scaled_y = (int)(y * scale_h);
2022-09-13 16:08:46 +02:00
int dst_row = dst_x + dst_surf_w * (y + dst_y);
2022-09-13 15:51:14 +02:00
if (scaled_y == last_y)
{
2022-09-13 16:08:46 +02:00
memcpy(&d[dst_row], &d[last_row], size);
2022-09-13 15:51:14 +02:00
continue;
}
2022-09-13 15:51:14 +02:00
last_y = scaled_y;
last_row = dst_row;
2022-09-13 16:08:46 +02:00
int src_row = src_x + src_surf_w * (scaled_y + src_y);
2022-09-13 15:51:14 +02:00
for (int x = 0; x < dst_w; x++)
{
2022-09-13 15:51:14 +02:00
int scaled_x = (int)(x * scale_w);
2022-09-13 16:08:46 +02:00
d[x + dst_row] = s[scaled_x + src_row];
2022-09-13 15:51:14 +02:00
}
}
}
else if (bpp == 32)
{
unsigned int* d = (unsigned int*)dst;
unsigned int* s = (unsigned int*)src;
2022-09-13 15:51:14 +02:00
for (int y = 0; y < dst_h; y++)
{
int scaled_y = (int)(y * scale_h);
2022-09-13 16:08:46 +02:00
int dst_row = dst_x + dst_surf_w * (y + dst_y);
2022-09-13 15:51:14 +02:00
if (scaled_y == last_y)
{
2022-09-13 16:08:46 +02:00
memcpy(&d[dst_row], &d[last_row], size);
2022-09-13 15:51:14 +02:00
continue;
}
2022-09-13 15:51:14 +02:00
last_y = scaled_y;
last_row = dst_row;
2022-09-13 16:08:46 +02:00
int src_row = src_x + src_surf_w * (scaled_y + src_y);
2022-09-13 15:51:14 +02:00
for (int x = 0; x < dst_w; x++)
{
int scaled_x = (int)(x * scale_w);
2022-09-13 16:08:46 +02:00
d[x + dst_row] = s[scaled_x + src_row];
}
}
}
}