Jesse Allen wrote:
On 9/8/07, Martin Owens doctormo@gmail.com wrote:
Does this mean that the current _painfully_ slow Imperialism II that uses the DIB drawing to draw it's in game maps will work faster? I'm waiting for this functionality and have pledged $20 just in case it can spur on the solution.
Best Regards, Martin Owens
I don't know, you'll have to try it yourself. It depends on what the app is doing. You can get a checkout of the tree here: http://repo.or.cz/w/wine/dibdrv.git
Beware there are probably visual regressions, but it's speed you're checking here right? :)
Jesse
Hi, as a part of struggle to understand the drivers I played a little with winex11.drv and made some simple optimizations. Actually the changes make 'Settlers 3' playable. On my Duron/900 the delay caused by copying bitmaps is hardly noticable (it's no longer a .5sec lag, now it looks like a frame or two dropped once every few seconds). If anyone wants to give it a try with S3, then there's another problem with races condition on WaitFor..Object/SetEvent/ResetEvent, but that can be overcome by just disabling ResetEvent (simple return STATUS_SUCCESS; at the beginning of NtResetEvent). I'm trying to get some more generic solution to the ResetEvent problem but it seems related to the lack of thread boost after WaitFor(Single|Multiple)Object on Wine (seems like poor design of game code) and is quite difficult in general. As I'm new to Wine/X development, what I'd appreciate the most, is critical comments on the attached patch: is omitting XGetPixel/XSetPixel and reading directly XImage data a correct solution? Am I allowed to use inline assembler in above case? Any other issues? Thanks & regards Piotr Maceluch
-- ----- AlphaNet - najtaniej w sieci! -------- Odnowienia domen w rewelacyjnych cenach! .pl - 65 zl, .com.pl - 50 zl, reg - 20 zl http://www.domeny.alpha.pl --------------------------------------------
From 9d26b16c37a08aaf54f66712563cdcd523c1a95a Mon Sep 17 00:00:00 2001
From: Piotr Maceluch skrzynka365@konto.pl Date: Mon, 10 Sep 2007 22:56:25 +0200 Subject: [PATCH] Optimized copying of 1bit bitmaps to and from XImage data.
The copying operations for copying 1bit bitmap to memory and other way no longer employ XGetPixel/XSetPixel. Instead memory operations are used. They're implemented in asm memcpy which does bit order swapping on the fly (swapping is lookup table based). --- dlls/winex11.drv/dib.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 137 insertions(+), 0 deletions(-)
diff --git a/dlls/winex11.drv/dib.c b/dlls/winex11.drv/dib.c index 00b1f23..0e938b8 100644 --- a/dlls/winex11.drv/dib.c +++ b/dlls/winex11.drv/dib.c @@ -58,6 +58,9 @@ static PVOID dibs_handler;
static int ximageDepthTable[32];
+static BYTE bit_reversal_lookup_table[0x100]; //bit-order-reversed value for every 8bit value +static int bit_reversal_lookup_table_initialized = 0; + /* This structure holds the arguments for DIB_SetImageBits() */ typedef struct { @@ -101,6 +104,9 @@ static INT X11DRV_DIB_Coerce(X_PHYSBITMAP *,INT,BOOL); static INT X11DRV_DIB_Lock(X_PHYSBITMAP *,INT,BOOL); static void X11DRV_DIB_Unlock(X_PHYSBITMAP *,BOOL);
+static void X11DRV_InitBitReversalLookupTable(); +static void X11DRV_MemoryCopyWithBitOrderSwap(void *dest, const void *src, int byteCount); + /* Some of the following helper functions are duplicated in dlls/gdi/dib.c @@ -508,6 +514,30 @@ static void X11DRV_DIB_SetImageBits_1( int lines, const BYTE *srcbits, width = min(srcwidth, dstwidth);
/* ==== pal 1 dib -> any bmp format ==== */ + if(((bmpImage->bits_per_pixel | bmpImage->depth) == 1) && + (bmpImage->byte_order == bmpImage->bitmap_bit_order)) + { + //optimization for 1to1 copy + int y; + BYTE* bitmapPtr = ((BYTE*)(bmpImage->data)) + (bmpImage->xoffset >> 3) + + ((lines - 1) * bmpImage->bytes_per_line); + const BYTE* dataPtr = srcbits; + + TRACE("1 bit data -> 1 bit bmp, copy: width=%i, lines=%i, image->bpl=%i, dest_bpl=%i\n", + width, lines, bmpImage->bytes_per_line, linebytes); + + X11DRV_InitBitReversalLookupTable(); + + for (y = 0; y != lines; y++) + { + X11DRV_MemoryCopyWithBitOrderSwap(bitmapPtr, dataPtr, (width + 7) >> 3); + + dataPtr += linebytes; + bitmapPtr -= bmpImage->bytes_per_line; + } + } + else + { for (h = lines-1; h >=0; h--) { srcbyte=srcbits; /* FIXME: should avoid putting x<left pixels (minor speed issue) */ @@ -537,6 +567,7 @@ static void X11DRV_DIB_SetImageBits_1( int lines, const BYTE *srcbits, } srcbits += linebytes; } + } }
/*********************************************************************** @@ -561,6 +592,27 @@ static void X11DRV_DIB_GetImageBits_1( int lines, BYTE *dstbits, switch (bmpImage->depth) { case 1: + { + int y; + const BYTE* bitmapPtr = ((BYTE*)(bmpImage->data)) + (bmpImage->xoffset >> 3) + + ((lines - 1) * bmpImage->bytes_per_line); + BYTE* dataPtr = dstbits; + + TRACE("1 bit bmp -> 1 bit data, copy: width=%i, lines=%i, image->bpl=%i, dest_bpl=%i\n", + width, lines, bmpImage->bytes_per_line, linebytes); + + X11DRV_InitBitReversalLookupTable(); + + for (y = 0; y != lines; y++) + { + //used (width + 7) so rounded down will always contain last pixels in line when ((width % 8) != 0) + X11DRV_MemoryCopyWithBitOrderSwap(dataPtr, bitmapPtr, (width + 7) >> 3); + + dataPtr += linebytes; + bitmapPtr -= bmpImage->bytes_per_line; + } + } + break; case 4: if (X11DRV_DIB_CheckMask(bmpImage->red_mask,bmpImage->green_mask,bmpImage->blue_mask) && srccolors) { @@ -4880,3 +4932,88 @@ Pixmap X11DRV_DIB_CreatePixmapFromDIB( HGLOBAL hPackedDIB, HDC hdc ) TRACE("Returning Pixmap %ld\n", pixmap); return pixmap; } + + +/** Returns given input byte with bit order reversed. */ +static BYTE X11DRV_ReverseBits(BYTE input) +{ + BYTE output = 0; + int i; + for(i = 0; i != 8; i++) + { + output |= (input & 1); + input >>= 1; + if(i != 7) + { + output <<= 1; + } + } + return output; +} + +/* Initializes bit_reversal_lookup_table with bit-order-reversed values for + * every possible byte value. */ +static void X11DRV_InitBitReversalLookupTable() +{ + int i; + + if(bit_reversal_lookup_table_initialized) + { + //skip if already initialized + return; + } + + for(i = 0; i != 0x100; i++) + { + bit_reversal_lookup_table[i] = X11DRV_ReverseBits((BYTE)i); + } + + bit_reversal_lookup_table_initialized = 1; +} + +/* Copies byteCount bytes from src to dest and changes bit order of copied + * data bytes. + * FIXME: The function doesn't work when declared as inline or static inline. + * */ +static void X11DRV_MemoryCopyWithBitOrderSwap(void *dest, const void *src, int byteCount) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "cld\n" + "xorl %%edx, %%edx\n" + "shr $2, %%ecx\n" + "jz 2f\n" + + "1:\n" + "lodsl\n" + "movb %%al, %%dl\n" + "movb bit_reversal_lookup_table(%%edx),%%al\n" + "movb %%ah, %%dl\n" + "movb bit_reversal_lookup_table(%%edx),%%ah\n" + "bswap %%eax\n" + "movb %%al, %%dl\n" + "movb bit_reversal_lookup_table(%%edx),%%al\n" + "movb %%ah, %%dl\n" + "movb bit_reversal_lookup_table(%%edx),%%ah\n" + "bswap %%eax\n" + + "stosl\n" + "loop 1b\n" + + "2:\n" + "popl %%ecx\n" + "andl $3, %%ecx\n" + "jz 4f\n" + + "3:\n" + "lodsb\n" + "movb %%al, %%dl\n" + "movb bit_reversal_lookup_table(%%edx),%%al\n" + "stosb\n" + "loop 3b\n" + + "4:\n" + : //output + : "S" (src), "D" (dest), "c" (byteCount) //input + : "eax", "edx", "cc" );//cc - condition codes (flags) +}