From 03f8528315fa46c95991a34f3325d7b33ae5538c Mon Sep 17 00:00:00 2001
From: Edward Rudd <urkle@outoforder.cc>
Date: Sat, 02 May 2020 21:48:36 +0000
Subject: [PATCH] Update source to SDL2 2.0.12
---
source/src/video/SDL_blit_N.c | 866 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 819 insertions(+), 47 deletions(-)
diff --git a/source/src/video/SDL_blit_N.c b/source/src/video/SDL_blit_N.c
index d6ec417..39c08f6 100644
--- a/source/src/video/SDL_blit_N.c
+++ b/source/src/video/SDL_blit_N.c
@@ -1,6 +1,6 @@
/*
Simple DirectMedia Layer
- Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
+ Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@@ -20,6 +20,8 @@
*/
#include "../SDL_internal.h"
+#if SDL_HAVE_BLIT_N
+
#include "SDL_video.h"
#include "SDL_endian.h"
#include "SDL_cpuinfo.h"
@@ -27,7 +29,24 @@
#include "SDL_assert.h"
+/* General optimized routines that write char by char */
+#define HAVE_FAST_WRITE_INT8 1
+
+/* On some CPU, it's slower than combining and write a word */
+#if defined(__MIPS__)
+# undef HAVE_FAST_WRITE_INT8
+# define HAVE_FAST_WRITE_INT8 0
+#endif
+
/* Functions to blit from N-bit surfaces to other surfaces */
+
+enum blit_features {
+ BLIT_FEATURE_NONE = 0,
+ BLIT_FEATURE_HAS_MMX = 1,
+ BLIT_FEATURE_HAS_ALTIVEC = 2,
+ BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4,
+ BLIT_FEATURE_HAS_ARM_SIMD = 8
+};
#if SDL_ALTIVEC_BLITTERS
#ifdef HAVE_ALTIVEC_H
@@ -155,6 +174,34 @@
vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
return (vswiz);
}
+
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+/* reorder bytes for PowerPC little endian */
+static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
+{
+ /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
+ The LE transformation for vec_perm has an implicit assumption
+ that the permutation is being used to reorder vector elements,
+ not to reorder bytes within those elements.
+ Unfortunatly the result order is not the expected one for powerpc
+ little endian when the two first vector parameters of vec_perm are
+ not of type 'vector char'. This is because the numbering from the
+ left for BE, and numbering from the right for LE, produces a
+ different interpretation of what the odd and even lanes are.
+ Refer to fedora bug 1392465
+ */
+
+ const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
+ 0x01, 0x00, 0x03, 0x02,
+ 0x05, 0x04, 0x07, 0x06,
+ 0x09, 0x08, 0x0B, 0x0A,
+ 0x0D, 0x0C, 0x0F, 0x0E );
+
+ vector unsigned char vswiz_ppc64le;
+ vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
+ return(vswiz_ppc64le);
+}
+#endif
static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
static void
@@ -649,6 +696,10 @@
/* vsel is set for items that match the key */
vsel = (vector unsigned char) vec_and(vs, vrgbmask);
vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+ /* reorder bytes for PowerPC little endian */
+ vpermute = reorder_ppc64le_vec(vpermute);
+#endif
/* permute the src vec to the dest format */
vs = vec_perm(vs, valpha, vpermute);
/* load the destination vec */
@@ -728,6 +779,10 @@
src += 4;
width -= 4;
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+ /* reorder bytes for PowerPC little endian */
+ vpermute = reorder_ppc64le_vec(vpermute);
+#endif
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
vec_st(vbits, 0, dst); /* store it back out. */
dst += 4;
@@ -819,6 +874,10 @@
src += 4;
width -= 4;
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+ /* reorder bytes for PowerPC little endian */
+ vpermute = reorder_ppc64le_vec(vpermute);
+#endif
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
vec_st(vbits, 0, dst); /* store it back out. */
dst += 4;
@@ -845,25 +904,26 @@
vec_dss(DST_CHAN_DEST);
}
-static Uint32
+static enum blit_features
GetBlitFeatures(void)
{
- static Uint32 features = 0xffffffff;
- if (features == 0xffffffff) {
+ static enum blit_features features = -1;
+ if (features == (enum blit_features) -1) {
/* Provide an override for testing .. */
char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
if (override) {
- features = 0;
- SDL_sscanf(override, "%u", &features);
+ unsigned int features_as_uint = 0;
+ SDL_sscanf(override, "%u", &features_as_uint);
+ features = (enum blit_features) features_as_uint;
} else {
features = (0
/* Feature 1 is has-MMX */
- | ((SDL_HasMMX())? 1 : 0)
+ | ((SDL_HasMMX())? BLIT_FEATURE_HAS_MMX : 0)
/* Feature 2 is has-AltiVec */
- | ((SDL_HasAltiVec())? 2 : 0)
+ | ((SDL_HasAltiVec())? BLIT_FEATURE_HAS_ALTIVEC : 0)
/* Feature 4 is dont-use-prefetch */
/* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
- | ((GetL3CacheSize() == 0) ? 4 : 0)
+ | ((GetL3CacheSize() == 0) ? BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH : 0)
);
}
}
@@ -875,7 +935,39 @@
#endif
#else
/* Feature 1 is has-MMX */
-#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
+#define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0) | (SDL_HasARMSIMD() ? BLIT_FEATURE_HAS_ARM_SIMD : 0))
+#endif
+
+#if SDL_ARM_SIMD_BLITTERS
+void Blit_BGR888_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
+
+static void
+Blit_BGR888_RGB888ARMSIMD(SDL_BlitInfo * info)
+{
+ int32_t width = info->dst_w;
+ int32_t height = info->dst_h;
+ uint32_t *dstp = (uint32_t *)info->dst;
+ int32_t dststride = width + (info->dst_skip >> 2);
+ uint32_t *srcp = (uint32_t *)info->src;
+ int32_t srcstride = width + (info->src_skip >> 2);
+
+ Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
+}
+
+void Blit_RGB444_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint16_t *src, int32_t src_stride);
+
+static void
+Blit_RGB444_RGB888ARMSIMD(SDL_BlitInfo * info)
+{
+ int32_t width = info->dst_w;
+ int32_t height = info->dst_h;
+ uint32_t *dstp = (uint32_t *)info->dst;
+ int32_t dststride = width + (info->dst_skip >> 2);
+ uint16_t *srcp = (uint16_t *)info->src;
+ int32_t srcstride = width + (info->src_skip >> 1);
+
+ Blit_RGB444_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
+}
#endif
/* This is now endian dependent */
@@ -1360,6 +1452,8 @@
}
+#if SDL_HAVE_BLIT_N_RGB565
+
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
static void
@@ -1376,7 +1470,7 @@
/* Set up some basic variables */
width = info->dst_w;
height = info->dst_h;
- src = (Uint8 *) info->src;
+ src = info->src;
srcskip = info->src_skip;
dst = (Uint32 *) info->dst;
dstskip = info->dst_skip / 4;
@@ -1978,6 +2072,8 @@
Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
}
+#endif /* SDL_HAVE_BLIT_N_RGB565 */
+
static void
BlitNto1(SDL_BlitInfo * info)
{
@@ -2149,6 +2245,87 @@
}
}
+/* permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel */
+static void
+get_permutation(SDL_PixelFormat *srcfmt, SDL_PixelFormat *dstfmt,
+ int *_p0 , int *_p1, int *_p2, int *_p3, int *_alpha_channel)
+{
+ int alpha_channel = 0, p0, p1, p2, p3;
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int Pixel = 0x04030201; /* identity permutation */
+#else
+ int Pixel = 0x01020304; /* identity permutation */
+ int srcbpp = srcfmt->BytesPerPixel;
+ int dstbpp = dstfmt->BytesPerPixel;
+#endif
+
+ if (srcfmt->Amask) {
+ RGBA_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2, p3);
+ } else {
+ RGB_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2);
+ p3 = 0;
+ }
+
+ if (dstfmt->Amask) {
+ if (srcfmt->Amask) {
+ PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, p3);
+ } else {
+ PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, 0);
+ }
+ } else {
+ PIXEL_FROM_RGB(Pixel, dstfmt, p0, p1, p2);
+ }
+
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ p0 = Pixel & 0xFF;
+ p1 = (Pixel >> 8) & 0xFF;
+ p2 = (Pixel >> 16) & 0xFF;
+ p3 = (Pixel >> 24) & 0xFF;
+#else
+ p3 = Pixel & 0xFF;
+ p2 = (Pixel >> 8) & 0xFF;
+ p1 = (Pixel >> 16) & 0xFF;
+ p0 = (Pixel >> 24) & 0xFF;
+#endif
+
+ if (p0 == 0) {
+ p0 = 1;
+ alpha_channel = 0;
+ } else if (p1 == 0) {
+ p1 = 1;
+ alpha_channel = 1;
+ } else if (p2 == 0) {
+ p2 = 1;
+ alpha_channel = 2;
+ } else if (p3 == 0) {
+ p3 = 1;
+ alpha_channel = 3;
+ }
+
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+#else
+ if (srcbpp == 3 && dstbpp == 4) {
+ if (p0 != 1) p0--;
+ if (p1 != 1) p1--;
+ if (p2 != 1) p2--;
+ if (p3 != 1) p3--;
+ } else if (srcbpp == 4 && dstbpp == 3) {
+ p0 = p1;
+ p1 = p2;
+ p2 = p3;
+ }
+#endif
+ *_p0 = p0 - 1;
+ *_p1 = p1 - 1;
+ *_p2 = p2 - 1;
+ *_p3 = p3 - 1;
+
+ if (_alpha_channel) {
+ *_alpha_channel = alpha_channel;
+ }
+}
+
+
static void
BlitNtoN(SDL_BlitInfo * info)
{
@@ -2163,6 +2340,90 @@
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;
unsigned alpha = dstfmt->Amask ? info->a : 0;
+
+#if HAVE_FAST_WRITE_INT8
+ /* Blit with permutation: 4->4 */
+ if (srcbpp == 4 && dstbpp == 4 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int alpha_channel, p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
+ src += 4;
+ dst += 4;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+#endif
+
+ /* Blit with permutation: 4->3 */
+ if (srcbpp == 4 && dstbpp == 3 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ src += 4;
+ dst += 3;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+
+#if HAVE_FAST_WRITE_INT8
+ /* Blit with permutation: 3->4 */
+ if (srcbpp == 3 && dstbpp == 4 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int alpha_channel, p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
+ src += 3;
+ dst += 4;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+#endif
while (height--) {
/* *INDENT-OFF* */
@@ -2198,6 +2459,35 @@
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;
int c;
+
+#if HAVE_FAST_WRITE_INT8
+ /* Blit with permutation: 4->4 */
+ if (srcbpp == 4 && dstbpp == 4 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ src += 4;
+ dst += 4;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+#endif
while (height--) {
for (c = width; c; --c) {
@@ -2329,34 +2619,238 @@
int dstbpp = dstfmt->BytesPerPixel;
unsigned alpha = dstfmt->Amask ? info->a : 0;
Uint32 rgbmask = ~srcfmt->Amask;
+ int sfmt = srcfmt->format;
+ int dfmt = dstfmt->format;
/* Set up some basic variables */
ckey &= rgbmask;
- /* Fastpath: same source/destination format, no Amask, bpp 32, loop is vectorized. ~10x faster */
- if (srcfmt->format == dstfmt->format &&
- (srcfmt->format == SDL_PIXELFORMAT_RGB888 || srcfmt->format == SDL_PIXELFORMAT_BGR888)) {
+ /* BPP 4, same rgb */
+ if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
Uint32 *src32 = (Uint32*)src;
Uint32 *dst32 = (Uint32*)dst;
- srcskip /= sizeof(Uint32);
- dstskip /= sizeof(Uint32);
+
+ if (dstfmt->Amask) {
+ /* RGB->RGBA, SET_ALPHA */
+ Uint32 mask = info->a << dstfmt->Ashift;
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ if ((*src32 & rgbmask) != ckey) {
+ *dst32 = *src32 | mask;
+ }
+ ++dst32;
+ ++src32;
+ }, width);
+ /* *INDENT-ON* */
+ src32 = (Uint32 *) ((Uint8 *) src32 + srcskip);
+ dst32 = (Uint32 *) ((Uint8 *) dst32 + dstskip);
+ }
+ return;
+ } else {
+ /* RGBA->RGB, NO_ALPHA */
+ Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ if ((*src32 & rgbmask) != ckey) {
+ *dst32 = *src32 & mask;
+ }
+ ++dst32;
+ ++src32;
+ }, width);
+ /* *INDENT-ON* */
+ src32 = (Uint32 *) ((Uint8 *) src32 + srcskip);
+ dst32 = (Uint32 *) ((Uint8 *) dst32 + dstskip);
+ }
+ return;
+ }
+ }
+
+#if HAVE_FAST_WRITE_INT8
+ /* Blit with permutation: 4->4 */
+ if (srcbpp == 4 && dstbpp == 4 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int alpha_channel, p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
- if (*src32 != ckey) {
- *dst32 = *src32;
+ Uint32 *src32 = (Uint32*)src;
+
+ if ((*src32 & rgbmask) != ckey) {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
}
- ++src32;
- ++dst32;
- },
- width);
+ src += 4;
+ dst += 4;
+ }, width);
/* *INDENT-ON* */
- src32 += srcskip;
- dst32 += dstskip;
+ src += srcskip;
+ dst += dstskip;
}
return;
}
+#endif
+
+ /* BPP 3, same rgb triplet */
+ if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
+ (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {
+
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ Uint8 k0 = ckey & 0xFF;
+ Uint8 k1 = (ckey >> 8) & 0xFF;
+ Uint8 k2 = (ckey >> 16) & 0xFF;
+#else
+ Uint8 k0 = (ckey >> 16) & 0xFF;
+ Uint8 k1 = (ckey >> 8) & 0xFF;
+ Uint8 k2 = ckey & 0xFF;
+#endif
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint8 s0 = src[0];
+ Uint8 s1 = src[1];
+ Uint8 s2 = src[2];
+
+ if (k0 != s0 || k1 != s1 || k2 != s2) {
+ dst[0] = s0;
+ dst[1] = s1;
+ dst[2] = s2;
+ }
+ src += 3;
+ dst += 3;
+ },
+ width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+
+ /* BPP 3, inversed rgb triplet */
+ if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_BGR24) ||
+ (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_RGB24)) {
+
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ Uint8 k0 = ckey & 0xFF;
+ Uint8 k1 = (ckey >> 8) & 0xFF;
+ Uint8 k2 = (ckey >> 16) & 0xFF;
+#else
+ Uint8 k0 = (ckey >> 16) & 0xFF;
+ Uint8 k1 = (ckey >> 8) & 0xFF;
+ Uint8 k2 = ckey & 0xFF;
+#endif
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint8 s0 = src[0];
+ Uint8 s1 = src[1];
+ Uint8 s2 = src[2];
+ if (k0 != s0 || k1 != s1 || k2 != s2) {
+ /* Inversed RGB */
+ dst[0] = s2;
+ dst[1] = s1;
+ dst[2] = s0;
+ }
+ src += 3;
+ dst += 3;
+ },
+ width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+
+ /* Blit with permutation: 4->3 */
+ if (srcbpp == 4 && dstbpp == 3 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *src32 = (Uint32*)src;
+ if ((*src32 & rgbmask) != ckey) {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ }
+ src += 4;
+ dst += 3;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+
+#if HAVE_FAST_WRITE_INT8
+ /* Blit with permutation: 3->4 */
+ if (srcbpp == 3 && dstbpp == 4 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ Uint8 k0 = ckey & 0xFF;
+ Uint8 k1 = (ckey >> 8) & 0xFF;
+ Uint8 k2 = (ckey >> 16) & 0xFF;
+#else
+ Uint8 k0 = (ckey >> 16) & 0xFF;
+ Uint8 k1 = (ckey >> 8) & 0xFF;
+ Uint8 k2 = ckey & 0xFF;
+#endif
+
+ /* Find the appropriate permutation */
+ int alpha_channel, p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint8 s0 = src[0];
+ Uint8 s1 = src[1];
+ Uint8 s2 = src[2];
+
+ if (k0 != s0 || k1 != s1 || k2 != s2) {
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
+ }
+ src += 3;
+ dst += 4;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ return;
+ }
+#endif
while (height--) {
/* *INDENT-OFF* */
@@ -2406,32 +2900,65 @@
ckey &= rgbmask;
/* Fastpath: same source/destination format, with Amask, bpp 32, loop is vectorized. ~10x faster */
- if (srcfmt->format == dstfmt->format &&
- (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 ||
- srcfmt->format == SDL_PIXELFORMAT_ABGR8888 ||
- srcfmt->format == SDL_PIXELFORMAT_BGRA8888 ||
- srcfmt->format == SDL_PIXELFORMAT_RGBA8888)) {
- Uint32 *src32 = (Uint32*)src;
- Uint32 *dst32 = (Uint32*)dst;
- srcskip /= sizeof(Uint32);
- dstskip /= sizeof(Uint32);
+ if (srcfmt->format == dstfmt->format) {
+
+ if (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 ||
+ srcfmt->format == SDL_PIXELFORMAT_ABGR8888 ||
+ srcfmt->format == SDL_PIXELFORMAT_BGRA8888 ||
+ srcfmt->format == SDL_PIXELFORMAT_RGBA8888) {
+
+ Uint32 *src32 = (Uint32*)src;
+ Uint32 *dst32 = (Uint32*)dst;
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ if ((*src32 & rgbmask) != ckey) {
+ *dst32 = *src32;
+ }
+ ++src32;
+ ++dst32;
+ },
+ width);
+ /* *INDENT-ON* */
+ src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+ dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ }
+ }
+ return;
+ }
+
+#if HAVE_FAST_WRITE_INT8
+ /* Blit with permutation: 4->4 */
+ if (srcbpp == 4 && dstbpp == 4 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ /* Find the appropriate permutation */
+ int p0, p1, p2, p3;
+ get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
+ Uint32 *src32 = (Uint32*)src;
if ((*src32 & rgbmask) != ckey) {
- *dst32 = *src32;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
}
- ++src32;
- ++dst32;
- },
- width);
+ src += 4;
+ dst += 4;
+ }, width);
/* *INDENT-ON* */
- src32 += srcskip;
- dst32 += dstskip;
+ src += srcskip;
+ dst += dstskip;
}
return;
}
+#endif
while (height--) {
/* *INDENT-OFF* */
@@ -2515,6 +3042,184 @@
}
}
+/* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
+static void
+Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint8 *src = info->src;
+ int srcskip = info->src_skip;
+ Uint8 *dst = info->dst;
+ int dstskip = info->dst_skip;
+ SDL_PixelFormat *srcfmt = info->src_fmt;
+ int srcbpp = srcfmt->BytesPerPixel;
+ SDL_PixelFormat *dstfmt = info->dst_fmt;
+ int dstbpp = dstfmt->BytesPerPixel;
+
+ if (dstfmt->Amask) {
+ /* SET_ALPHA */
+ Uint32 mask = info->a << dstfmt->Ashift;
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int i0 = 0, i1 = 1, i2 = 2;
+#else
+ int i0 = srcbpp - 1 - 0;
+ int i1 = srcbpp - 1 - 1;
+ int i2 = srcbpp - 1 - 2;
+#endif
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ Uint8 s0 = src[i0];
+ Uint8 s1 = src[i1];
+ Uint8 s2 = src[i2];
+ *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
+ dst += 4;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ } else {
+ /* NO_ALPHA */
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int i0 = 0, i1 = 1, i2 = 2;
+ int j0 = 0, j1 = 1, j2 = 2;
+#else
+ int i0 = srcbpp - 1 - 0;
+ int i1 = srcbpp - 1 - 1;
+ int i2 = srcbpp - 1 - 2;
+ int j0 = dstbpp - 1 - 0;
+ int j1 = dstbpp - 1 - 1;
+ int j2 = dstbpp - 1 - 2;
+#endif
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint8 s0 = src[i0];
+ Uint8 s1 = src[i1];
+ Uint8 s2 = src[i2];
+ dst[j0] = s0;
+ dst[j1] = s1;
+ dst[j2] = s2;
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ }
+}
+
+/* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
+static void
+Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint8 *src = info->src;
+ int srcskip = info->src_skip;
+ Uint8 *dst = info->dst;
+ int dstskip = info->dst_skip;
+ SDL_PixelFormat *srcfmt = info->src_fmt;
+ int srcbpp = srcfmt->BytesPerPixel;
+ SDL_PixelFormat *dstfmt = info->dst_fmt;
+ int dstbpp = dstfmt->BytesPerPixel;
+
+ if (dstfmt->Amask) {
+ if (srcfmt->Amask) {
+ /* COPY_ALPHA */
+ /* Only to switch ABGR8888 <-> ARGB8888 */
+ while (height--) {
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int i0 = 0, i1 = 1, i2 = 2, i3 = 3;
+#else
+ int i0 = 3, i1 = 2, i2 = 1, i3 = 0;
+#endif
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ Uint8 s0 = src[i0];
+ Uint8 s1 = src[i1];
+ Uint8 s2 = src[i2];
+ Uint32 alphashift = src[i3] << dstfmt->Ashift;
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
+ dst += 4;
+ src += 4;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ } else {
+ /* SET_ALPHA */
+ Uint32 mask = info->a << dstfmt->Ashift;
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int i0 = 0, i1 = 1, i2 = 2;
+#else
+ int i0 = srcbpp - 1 - 0;
+ int i1 = srcbpp - 1 - 1;
+ int i2 = srcbpp - 1 - 2;
+#endif
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ Uint8 s0 = src[i0];
+ Uint8 s1 = src[i1];
+ Uint8 s2 = src[i2];
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
+ dst += 4;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ }
+ } else {
+ /* NO_ALPHA */
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int i0 = 0, i1 = 1, i2 = 2;
+ int j0 = 2, j1 = 1, j2 = 0;
+#else
+ int i0 = srcbpp - 1 - 0;
+ int i1 = srcbpp - 1 - 1;
+ int i2 = srcbpp - 1 - 2;
+ int j0 = dstbpp - 1 - 2;
+ int j1 = dstbpp - 1 - 1;
+ int j2 = dstbpp - 1 - 0;
+#endif
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint8 s0 = src[i0];
+ Uint8 s1 = src[i1];
+ Uint8 s2 = src[i2];
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ dst[j0] = s0;
+ dst[j1] = s1;
+ dst[j2] = s2;
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ }
+}
+
/* Normal N to N optimized blitters */
#define NO_ALPHA 1
#define SET_ALPHA 2
@@ -2524,7 +3229,7 @@
Uint32 srcR, srcG, srcB;
int dstbpp;
Uint32 dstR, dstG, dstB;
- Uint32 blit_features;
+ enum blit_features blit_features;
SDL_BlitFunc blitfunc;
Uint32 alpha; /* bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA */
};
@@ -2537,10 +3242,15 @@
#if SDL_ALTIVEC_BLITTERS
/* has-altivec */
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
- 2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
+ BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
{0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
- 2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
+ BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
#endif
+#if SDL_ARM_SIMD_BLITTERS
+ {0x00000F00, 0x000000F0, 0x0000000F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ BLIT_FEATURE_HAS_ARM_SIMD, Blit_RGB444_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA},
+#endif
+#if SDL_HAVE_BLIT_N_RGB565
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
@@ -2549,12 +3259,44 @@
0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
+#endif
/* Default for 16-bit RGB source, used if no other blitter matches */
{0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
};
static const struct blit_table normal_blit_3[] = {
+ /* 3->4 with same rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__same_rgb,
+#if HAVE_FAST_WRITE_INT8
+ NO_ALPHA |
+#endif
+ SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__same_rgb,
+#if HAVE_FAST_WRITE_INT8
+ NO_ALPHA |
+#endif
+ SET_ALPHA},
+ /* 3->4 with inversed rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb,
+#if HAVE_FAST_WRITE_INT8
+ NO_ALPHA |
+#endif
+ SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb,
+#if HAVE_FAST_WRITE_INT8
+ NO_ALPHA |
+#endif
+ SET_ALPHA},
+ /* 3->3 to switch RGB 24 <-> BGR 24 */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
/* Default for 24-bit RGB source, never optimized */
{0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
};
@@ -2563,14 +3305,42 @@
#if SDL_ALTIVEC_BLITTERS
/* has-altivec | dont-use-prefetch */
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
- 6, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
+ BLIT_FEATURE_HAS_ALTIVEC | BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
/* has-altivec */
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
- 2, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
+ BLIT_FEATURE_HAS_ALTIVEC, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
/* has-altivec */
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
- 2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
+ BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB888_RGB565Altivec, NO_ALPHA},
#endif
+#if SDL_ARM_SIMD_BLITTERS
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ BLIT_FEATURE_HAS_ARM_SIMD, Blit_BGR888_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA },
+#endif
+ /* 4->3 with same rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+ /* 4->3 with inversed rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+ /* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb,
+#if HAVE_FAST_WRITE_INT8
+ NO_ALPHA |
+#endif
+ SET_ALPHA | COPY_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb,
+#if HAVE_FAST_WRITE_INT8
+ NO_ALPHA |
+#endif
+ SET_ALPHA | COPY_ALPHA},
+ /* RGB 888 and RGB 565 */
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
0, Blit_RGB888_RGB565, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
@@ -2623,7 +3393,7 @@
}
} else {
/* Now the meat, choose the blitter we want */
- int a_need = NO_ALPHA;
+ Uint32 a_need = NO_ALPHA;
if (dstfmt->Amask)
a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
table = normal_blit[srcfmt->BytesPerPixel - 1];
@@ -2697,4 +3467,6 @@
return NULL;
}
+#endif /* SDL_HAVE_BLIT_N */
+
/* vi: set ts=4 sw=4 expandtab: */
--
Gitblit v1.9.3