[PATCH 5/5] dsound: Get all channel samples in one go.

June 6, 2026

From: Anton Baskanov <baskanov@gmail.com>

Inspired by a patch by Matteo Bruni, which was in turn inspired by a
patch by Giovanni Mascellani.
---
 dlls/dsound/dsound_convert.c | 184 ++++++++++++++++++++++-------------
 dlls/dsound/dsound_private.h |   2 +-
 dlls/dsound/mixer.c          |  68 ++++++++-----
 3 files changed, 160 insertions(+), 94 deletions(-)

diff --git a/dlls/dsound/dsound_convert.c b/dlls/dsound/dsound_convert.c
index 296c1b16e96..c72ee7e5e07 100644
--- a/dlls/dsound/dsound_convert.c
+++ b/dlls/dsound/dsound_convert.c
@@ -55,130 +55,176 @@ WINE_DEFAULT_DEBUG_CHANNEL(dsound);
 #define le32(x) (x)
 #endif
 
-static float get8(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get8(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
+    DWORD channels = dsb->pwfx->nChannels;
     const BYTE *buf = base + channel;
-    return (buf[0] - 0x80) / (float)0x80;
+    int i;
+
+    for (i = 0; i < samples; ++i)
+        dst[i] = (buf[i * channels] - 0x80) / (float)0x80;
 }
 
-static float get16(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get16(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
+    DWORD channels = dsb->pwfx->nChannels;
     const BYTE *buf = base + 2 * channel;
     const SHORT *sbuf = (const SHORT*)(buf);
-    SHORT sample = (SHORT)le16(*sbuf);
-    return sample / (float)0x8000;
+    int i;
+
+    for (i = 0; i < samples; ++i) {
+        SHORT sample = (SHORT)le16(sbuf[i * channels]);
+        dst[i] = sample / (float)0x8000;
+    }
 }
 
-static float get24(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get24(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
-    LONG sample;
+    DWORD channels = dsb->pwfx->nChannels;
     const BYTE *buf = base + 3 * channel;
+    int i;
 
-    /* The next expression deliberately has an overflow for buf[2] >= 0x80,
-       this is how negative values are made.
-     */
-    sample = (buf[0] << 8) | (buf[1] << 16) | (buf[2] << 24);
-    return sample / (float)0x80000000U;
+    for (i = 0; i < samples; ++i) {
+        /* The next expression deliberately has an overflow for buf[2] >= 0x80,
+           this is how negative values are made.
+         */
+        LONG sample =
+                (buf[i * channels * 3 + 0] << 8) |
+                (buf[i * channels * 3 + 1] << 16) |
+                (buf[i * channels * 3 + 2] << 24);
+        dst[i] = sample / (float)0x80000000U;
+    }
 }
 
-static float get32(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get32(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
+    DWORD channels = dsb->pwfx->nChannels;
     const BYTE *buf = base + 4 * channel;
     const LONG *sbuf = (const LONG*)(buf);
-    LONG sample = le32(*sbuf);
-    return sample / (float)0x80000000U;
+    int i;
+
+    for (i = 0; i < samples; ++i) {
+        LONG sample = le32(sbuf[i * channels]);
+        dst[i] = sample / (float)0x80000000U;
+    }
 }
 
-static float getieee32(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void getieee32(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
+    DWORD channels = dsb->pwfx->nChannels;
     const BYTE *buf = base + 4 * channel;
     const float *sbuf = (const float*)(buf);
-    /* The value will be clipped later, when put into some non-float buffer */
-    return *sbuf;
+    int i;
+
+    for (i = 0; i < samples; ++i)
+        /* The value will be clipped later, when put into some non-float buffer */
+        dst[i] = sbuf[i * channels];
 }
 
 const bitsgetfunc getbpp[5] = {get8, get16, get24, get32, getieee32};
 
-static float get8_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get8_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
     DWORD channels = dsb->pwfx->nChannels;
     DWORD c;
-    float val = 0;
-    /* XXX: does Windows include LFE into the mix? */
-    for (c = 0; c < channels; c++) {
-        const BYTE *buf = base + c;
-        val += (buf[0] - 0x80) / (float)0x80;
+    int i;
+
+    for (i = 0; i < samples; ++i) {
+        float val = 0;
+        /* XXX: does Windows include LFE into the mix? */
+        for (c = 0; c < channels; c++) {
+            const BYTE *buf = base + c;
+            val += (buf[i * channels] - 0x80) / (float)0x80;
+        }
+        val /= channels;
+        dst[i] = val;
     }
-    val /= channels;
-    return val;
 }
 
-static float get16_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get16_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
     DWORD channels = dsb->pwfx->nChannels;
     DWORD c;
-    float val = 0;
-    /* XXX: does Windows include LFE into the mix? */
-    for (c = 0; c < channels; c++) {
-        const BYTE *buf = base + 2 * c;
-        const SHORT *sbuf = (const SHORT*)(buf);
-        SHORT sample = (SHORT)le16(*sbuf);
-        val += sample / (float)0x8000;
+    int i;
+
+    for (i = 0; i < samples; ++i) {
+        float val = 0;
+        /* XXX: does Windows include LFE into the mix? */
+        for (c = 0; c < channels; c++) {
+            const BYTE *buf = base + 2 * c;
+            const SHORT *sbuf = (const SHORT*)(buf);
+            SHORT sample = (SHORT)le16(sbuf[i * channels]);
+            val += sample / (float)0x8000;
+        }
+        val /= channels;
+        dst[i] = val;
     }
-    val /= channels;
-    return val;
 }
 
-static float get24_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get24_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
     DWORD channels = dsb->pwfx->nChannels;
     DWORD c;
-    float val = 0;
-    /* XXX: does Windows include LFE into the mix? */
-    for (c = 0; c < channels; c++) {
-        LONG sample;
-        const BYTE *buf = base + 3 * c;
+    int i;
 
-        /* The next expression deliberately has an overflow for buf[2] >= 0x80,
-           this is how negative values are made.
-         */
-        sample = (buf[0] << 8) | (buf[1] << 16) | (buf[2] << 24);
-        val += sample / (float)0x80000000U;
+    for (i = 0; i < samples; ++i) {
+        float val = 0;
+        /* XXX: does Windows include LFE into the mix? */
+        for (c = 0; c < channels; c++) {
+            LONG sample;
+            const BYTE *buf = base + 3 * c;
+
+            /* The next expression deliberately has an overflow for buf[2] >= 0x80,
+               this is how negative values are made.
+             */
+            sample = (buf[i * channels * 3 + 0] << 8) |
+                    (buf[i * channels * 3 + 1] << 16) |
+                    (buf[i * channels * 3 + 2] << 24);
+            val += sample / (float)0x80000000U;
+        }
+        val /= channels;
+        dst[i] = val;
     }
-    val /= channels;
-    return val;
 }
 
-static float get32_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void get32_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
     DWORD channels = dsb->pwfx->nChannels;
     DWORD c;
-    float val = 0;
-    /* XXX: does Windows include LFE into the mix? */
-    for (c = 0; c < channels; c++) {
-        const BYTE *buf = base + 4 * c;
-        const LONG *sbuf = (const LONG*)(buf);
-        LONG sample = le32(*sbuf);
-        val += sample / (float)0x80000000U;
+    int i;
+
+    for (i = 0; i < samples; ++i) {
+        float val = 0;
+        /* XXX: does Windows include LFE into the mix? */
+        for (c = 0; c < channels; c++) {
+            const BYTE *buf = base + 4 * c;
+            const LONG *sbuf = (const LONG*)(buf);
+            LONG sample = le32(sbuf[i * channels]);
+            val += sample / (float)0x80000000U;
+        }
+        val /= channels;
+        dst[i] = val;
     }
-    val /= channels;
-    return val;
 }
 
-static float getieee32_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel)
+static void getieee32_mono(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel)
 {
     DWORD channels = dsb->pwfx->nChannels;
     DWORD c;
-    float val = 0;
-    /* XXX: does Windows include LFE into the mix? */
-    for (c = 0; c < channels; c++) {
-        const BYTE *buf = base + 4 * c;
-        const float *sbuf = (const float*)(buf);
-        /* The value will be clipped later, when put into some non-float buffer */
-        val += *sbuf;
+    int i;
+
+    for (i = 0; i < samples; ++i) {
+        float val = 0;
+        /* XXX: does Windows include LFE into the mix? */
+        for (c = 0; c < channels; c++) {
+            const BYTE *buf = base + 4 * c;
+            const float *sbuf = (const float*)(buf);
+            /* The value will be clipped later, when put into some non-float buffer */
+            val += sbuf[i * channels];
+        }
+        val /= channels;
+        dst[i] = val;
     }
-    val /= channels;
-    return val;
 }
 
 const bitsgetfunc getbpp_mono[5] = {get8_mono, get16_mono, get24_mono, get32_mono, getieee32_mono};
diff --git a/dlls/dsound/dsound_private.h b/dlls/dsound/dsound_private.h
index b1443bf49ad..569ffc7c234 100644
--- a/dlls/dsound/dsound_private.h
+++ b/dlls/dsound/dsound_private.h
@@ -43,7 +43,7 @@ typedef struct IDirectSoundBufferImpl        IDirectSoundBufferImpl;
 typedef struct DirectSoundDevice             DirectSoundDevice;
 
 /* dsound_convert.h */
-typedef float (*bitsgetfunc)(const IDirectSoundBufferImpl *dsb, BYTE *base, DWORD channel);
+typedef void (*bitsgetfunc)(const IDirectSoundBufferImpl *dsb, BYTE *base, float *dst, unsigned samples, DWORD channel);
 typedef void (*bitsputfunc)(const IDirectSoundBufferImpl *dsb, float *src, unsigned samples, DWORD channel);
 extern const bitsgetfunc getbpp[5];
 extern const bitsgetfunc getbpp_mono[5];
diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c
index 7c2d99d65f1..01f1b139c32 100644
--- a/dlls/dsound/mixer.c
+++ b/dlls/dsound/mixer.c
@@ -273,12 +273,32 @@ void DSOUND_CheckEvent(const IDirectSoundBufferImpl *dsb, DWORD playpos, int len
     }
 }
 
-static inline float get_current_sample(const IDirectSoundBufferImpl *dsb,
-        BYTE *buffer, DWORD buflen, DWORD mixpos, DWORD channel)
+static inline void get_samples(const IDirectSoundBufferImpl *dsb, BYTE *buffer, DWORD buflen,
+        DWORD mixpos, DWORD channel, DWORD count, float *dst)
 {
-    if (mixpos >= buflen && !(dsb->playflags & DSBPLAY_LOOPING))
-        return 0.0f;
-    return dsb->get(dsb, buffer + (mixpos % buflen), channel);
+    UINT istride = dsb->pwfx->nBlockAlign;
+    DWORD advance;
+    DWORD pos;
+
+    if (!(dsb->playflags & DSBPLAY_LOOPING)) {
+        if (mixpos >= buflen) {
+            memset(dst, 0, count * sizeof(float));
+            return;
+        }
+        advance = min((buflen - mixpos) / istride, count);
+        dsb->get(dsb, buffer + mixpos, dst, advance, channel);
+        memset(dst + advance, 0, (count - advance) * sizeof(float));
+        return;
+    }
+
+    advance = min((buflen - mixpos % buflen) / istride, count);
+    dsb->get(dsb, buffer + mixpos % buflen, dst, advance, channel);
+    pos = advance;
+    while (pos < count) {
+        advance = min(buflen / istride, count - pos);
+        dsb->get(dsb, buffer, dst + pos, advance, channel);
+        pos += advance;
+    }
 }
 
 #ifdef __SSE__
@@ -517,7 +537,7 @@ static void resample(DWORD freq_adjust_num, DWORD freq_adjust_den, DWORD freq_ac
 
 static UINT cp_fields_resample(IDirectSoundBufferImpl *dsb, UINT count, DWORD *freqAccNum)
 {
-    UINT i, channel;
+    UINT channel;
     UINT istride = dsb->pwfx->nBlockAlign;
     UINT committed_samples = 0;
 
@@ -529,7 +549,7 @@ static UINT cp_fields_resample(IDirectSoundBufferImpl *dsb, UINT count, DWORD *f
     UINT required_input = max(
             (freqAcc_start + (count - 1) * dsb->freqAdjustNum) / dsb->freqAdjustDen + FIR_WIDTH,
             (freqAcc_start + (count - 1 + FIR_WIDTH) * dsb->freqAdjustNum) / dsb->freqAdjustDen);
-    float *intermediate, *output, *itmp;
+    float *intermediate, *output;
 
     DWORD len = required_input * channels;
     /* Allocate an output buffer for each channel with padding on both ends as
@@ -563,14 +583,14 @@ static UINT cp_fields_resample(IDirectSoundBufferImpl *dsb, UINT count, DWORD *f
      * if you want -msse3 to have any effect.
      * This is good for CPU cache effects, too.
      */
-    itmp = intermediate;
     for (channel = 0; channel < channels; channel++) {
-        for (i = 0; i < committed_samples; i++)
-            *(itmp++) = get_current_sample(dsb, dsb->committedbuff,
-                dsb->writelead, dsb->committed_mixpos + i * istride, channel);
-        for (; i < required_input; i++)
-            *(itmp++) = get_current_sample(dsb, dsb->buffer->memory,
-                    dsb->buflen, dsb->sec_mixpos + i * istride, channel);
+        get_samples(dsb, dsb->committedbuff, dsb->writelead, dsb->committed_mixpos, channel,
+                committed_samples, intermediate + channel * required_input);
+        if (required_input > committed_samples)
+            get_samples(dsb, dsb->buffer->memory, dsb->buflen,
+                    dsb->sec_mixpos + committed_samples * istride, channel,
+                    required_input - committed_samples,
+                    intermediate + channel * required_input + committed_samples);
     }
 
     for (channel = 0; channel < channels; channel++)
@@ -587,9 +607,9 @@ static UINT cp_fields_resample(IDirectSoundBufferImpl *dsb, UINT count, DWORD *f
 static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count)
 {
     UINT istride = dsb->pwfx->nBlockAlign;
-    float *intermediate, *itmp;
     UINT committed_samples = 0;
-    DWORD channel, i;
+    float *intermediate;
+    DWORD channel;
 
     DWORD len = count * dsb->mix_channels;
     len *= sizeof(float);
@@ -612,14 +632,14 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count)
         committed_samples = committed_samples <= count ? committed_samples : count;
     }
 
-    itmp = intermediate;
-    for (channel = 0; channel < dsb->mix_channels; channel++) {
-        for (i = 0; i < committed_samples; i++)
-            *(itmp++) = get_current_sample(dsb, dsb->committedbuff,
-                dsb->writelead, dsb->committed_mixpos + i * istride, channel);
-        for (; i < count; i++)
-            *(itmp++) = get_current_sample(dsb, dsb->buffer->memory,
-                    dsb->buflen, dsb->sec_mixpos + i * istride, channel);
+    for (channel = 0; channel < dsb->mix_channels; channel++)
+    {
+        get_samples(dsb, dsb->committedbuff, dsb->writelead, dsb->committed_mixpos, channel,
+                committed_samples, intermediate + channel * count);
+        if (count > committed_samples)
+            get_samples(dsb, dsb->buffer->memory, dsb->buflen,
+                    dsb->sec_mixpos + committed_samples * istride, channel,
+                    count - committed_samples, intermediate + channel * count + committed_samples);
     }
 
     for (channel = 0; channel < dsb->mix_channels; channel++)
-- 
GitLab

https://gitlab.winehq.org/wine/wine/-/merge_requests/11082