I'm talking about this code, from http://source.winehq.org/source/dlls/gdi32/bitblt.c, in function PlgBlt :

577     rect[0].x = nXSrc;
578     rect[0].y = nYSrc;
579     rect[1].x = nXSrc + nWidth;
580     rect[1].y = nYSrc;
581     rect[2].x = nXSrc;
582     rect[2].y = nYSrc + nHeight;
583     /* calc XFORM matrix to transform hdcDest -> hdcSrc (parallelogram to rectangle) */
584     /* determinant */
585     det = rect[1].x*(rect[2].y - rect[0].y) - rect[2].x*(rect[1].y - rect[0].y) - rect[0].x*(rect[2].y - rect[1].y);
586 
587     if (fabs(det) < 1e-5)
588     {
589         SetGraphicsMode(hdcDest,oldgMode);
590         return FALSE;
591     }
592 
593     TRACE("hdcSrc=%p %d,%d,%dx%d -> hdcDest=%p %d,%d,%d,%d,%d,%d\n",
594         hdcSrc, nXSrc, nYSrc, nWidth, nHeight, hdcDest, plg[0].x, plg[0].y, plg[1].x, plg[1].y, plg[2].x, plg[2].y);
595 
596     /* X components */
597     xf.eM11 = (plg[1].x*(rect[2].y - rect[0].y) - plg[2].x*(rect[1].y - rect[0].y) - plg[0].x*(rect[2].y - rect[1].y)) / det;
598     xf.eM21 = (rect[1].x*(plg[2].x - plg[0].x) - rect[2].x*(plg[1].x - plg[0].x) - rect[0].x*(plg[2].x - plg[1].x)) / det;
599     xf.eDx  = (rect[0].x*(rect[1].y*plg[2].x - rect[2].y*plg[1].x) -
600                rect[1].x*(rect[0].y*plg[2].x - rect[2].y*plg[0].x) +
601                rect[2].x*(rect[0].y*plg[1].x - rect[1].y*plg[0].x)
602                ) / det;
603 
604     /* Y components */
605     xf.eM12 = (plg[1].y*(rect[2].y - rect[0].y) - plg[2].y*(rect[1].y - rect[0].y) - plg[0].y*(rect[2].y - rect[1].y)) / det;
606     xf.eM22 = (plg[1].x*(rect[2].y - rect[0].y) - plg[2].x*(rect[1].y - rect[0].y) - plg[0].x*(rect[2].y - rect[1].y)) / det;
607     xf.eDy  = (rect[0].x*(rect[1].y*plg[2].y - rect[2].y*plg[1].y) -
608                rect[1].x*(rect[0].y*plg[2].y - rect[2].y*plg[0].y) +
609                rect[2].x*(rect[0].y*plg[1].y - rect[1].y*plg[0].y)
610                ) / det;


I have a formula that so far seems to give always the correct matrix:

xf.eM11 = (FLOAT) -(rect[1].y*plg[2].x+rect[0].y*(plg[1].x-plg[2].x)-rect[2].y*plg[1].x+plg[0].x*(rect[2].y-rect[1].y)) /(rect[0].y*(rect[2].x-rect[1].x)-rect[1].y*rect[2].x+rect[2].y*rect[1].x+(rect[1].y-rect[2].y)*rect[0].x);
xf.eM12 = (FLOAT) -(rect[1].y*(plg[2].y-plg[0].y)+rect[0].y*(plg[1].y-plg[2].y)+rect[2].y*(plg[0].y-plg[1].y)) /(rect[0].y*(rect[2].x-rect[1].x)-rect[1].y*rect[2].x+rect[2].y*rect[1].x+(rect[1].y-rect[2].y)*rect[0].x);
xf.eM21 = (FLOAT) (plg[0].x*(rect[2].x-rect[1].x)-plg[1].x*rect[2].x+plg[2].x*rect[1].x+(plg[1].x-plg[2].x)*rect[0].x) /(rect[0].y*(rect[2].x-rect[1].x)-rect[1].y*rect[2].x+rect[2].y*rect[1].x+(rect[1].y-rect[2].y)*rect[0].x);
xf.eM22 = (FLOAT) ((plg[0].y-plg[1].y)*rect[2].x+(plg[2].y-plg[0].y)*rect[1].x+(plg[1].y-plg[2].y)*rect[0].x) /(rect[0].y*(rect[2].x-rect[1].x)-rect[1].y*rect[2].x+rect[2].y*rect[1].x+(rect[1].y-rect[2].y)*rect[0].x);
xf.eDx = (FLOAT) -(rect[0].y*(plg[2].x*rect[1].x-plg[1].x*rect[2].x)+plg[0].x*(rect[1].y*rect[2].x-rect[2].y*rect[1].x)+(rect[2].y*plg[1].x-rect[1].y*plg[2].x)*rect[0].x) /(rect[0].y*(rect[2].x-rect[1].x)-rect[1].y*rect[2].x+rect[2].y*rect[1].x+(rect[1].y-rect[2].y)*rect[0].x);
xf.eDy = (FLOAT) (rect[0].y*(plg[1].y*rect[2].x-plg[2].y*rect[1].x)-rect[1].y*plg[0].y*rect[2].x+rect[2].y*plg[0].y*rect[1].x+(rect[1].y*plg[2].y-rect[2].y*plg[1].y)*rect[0].x) /(rect[0].y*(rect[2].x-rect[1].x)-rect[1].y*rect[2].x+rect[2].y*rect[1].x+(rect[1].y-rect[2].y)*rect[0].x);


The rect and plg arrays must be of FLOAT points for the formula to work well, like

struct
{
double x;
double y;
}

Otherwise sometimes the compiler will decide to divide integers.

I found out about the bug by converting the rect points using the XFORM matrix, and checking that their the same as the plg points.