[PATCH v2 0/5] MR10423: dsound: Speed up resampling, part 5

newer
[PATCH 0/1] MR10524: configure:...

older
[PATCH 0/2] MR10523: winemac.drv:...

Anton Baskanov (＠baskanov)

March 26, 2026

1:52 a.m.

-- v2: dsound: Calculate rem and rem_inv incrementally. https://gitlab.winehq.org/wine/wine/-/merge_requests/10423

Show replies by date

Anton Baskanov

March 2026

1:52 a.m.

New subject: [PATCH v2 1/5] dsound: Replace multiplications by fir_step and fir_width with bit shifts.

From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/fir.h | 4 ++++ dlls/dsound/mixer.c | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dlls/dsound/fir.h b/dlls/dsound/fir.h index c03d0b9f237..45ad65d7398 100644 --- a/dlls/dsound/fir.h +++ b/dlls/dsound/fir.h @@ -86,7 +86,9 @@ int main() fprintf(stderr, "q %f\n", (double)output.q); fprintf(stderr, "status %s\n", get_pm_status_str(output.status)); + printf("static const int fir_width_shift = %d;\n", fir_width_shift); printf("static const int fir_width = %d;\n", fir_width); + printf("static const int fir_step_shift = %d;\n", fir_step_shift); printf("static const int fir_step = %d;\n", fir_step); printf("static const float fir[] = {"); // Print the FIR array with an additional row at the end. This simplifies @@ -112,7 +114,9 @@ int main() printf("};\n"); } */ +static const int fir_width_shift = 6; static const int fir_width = 64; +static const int fir_step_shift = 7; static const int fir_step = 128; static const float fir[] = { 0.0000000000e+00, -2.4830013102e-06, 1.9318705150e-06, 2.6614854151e-06, diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 43a9a8c4f91..3003715b413 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -322,8 +322,8 @@ static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 fr /* opos is in the range [-(fir_width - 1), count) */ int opos = opos_num / freq_adjust_num - fir_width; - UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) * fir_step; - UINT idx = idx_num / freq_adjust_num * fir_width; + UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) << fir_step_shift; + UINT idx = (idx_num / freq_adjust_num) << fir_width_shift; float rem = idx_num % freq_adjust_num / (float)freq_adjust_num; float input_value = input[j] * firgain; @@ -345,8 +345,8 @@ static void upsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq LONG64 ipos_num = freq_acc_start + i * freq_adjust_num; UINT ipos = ipos_num / freq_adjust_den; - UINT idx_num = ipos_num % freq_adjust_den * fir_step; - UINT idx = (fir_step - 1 - idx_num / freq_adjust_den) * fir_width; + UINT idx_num = (ipos_num % freq_adjust_den) << fir_step_shift; + UINT idx = (fir_step - 1 - idx_num / freq_adjust_den) << fir_width_shift; float rem_inv = idx_num % freq_adjust_den / (float)freq_adjust_den; float rem = 1.0f - rem_inv; -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423

Anton Baskanov

1:52 a.m.

New subject: [PATCH v2 2/5] dsound: Use a 0.32 fixed point to represent the resampling ratio.

From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/mixer.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 3003715b413..530b8e23d69 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -312,19 +312,19 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count) * Note that this function will overwrite up to fir_width - 1 frames before and * after output[]. */ -static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq_acc_start, - float firgain, UINT required_input, float *input, float *output) +static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, + UINT required_input, float *input, float *output) { int j; for (j = 0; j < required_input; ++j) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + j * freq_adjust_den + freq_adjust_num - 1; + LONG64 opos_num = freq_adjust_den - freq_acc_start + j * (LONG64)freq_adjust_den + ~0u; /* opos is in the range [-(fir_width - 1), count) */ - int opos = opos_num / freq_adjust_num - fir_width; + int opos = (int)(opos_num >> 32) - fir_width; - UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) << fir_step_shift; - UINT idx = (idx_num / freq_adjust_num) << fir_width_shift; - float rem = idx_num % freq_adjust_num / (float)freq_adjust_num; + UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; + UINT rem_num = ~(DWORD)opos_num << fir_step_shift; + float rem = rem_num * (1.0f / (1ll << 32)); float input_value = input[j] * firgain; float input_value0 = (1.0f - rem) * input_value; @@ -336,18 +336,18 @@ static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 fr } } -static void upsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq_acc_start, - UINT count, float *input, float *output) +static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, + float *output) { UINT i; for(i = 0; i < count; ++i) { - LONG64 ipos_num = freq_acc_start + i * freq_adjust_num; - UINT ipos = ipos_num / freq_adjust_den; + LONG64 ipos_num = freq_acc_start + i * (LONG64)freq_adjust_num; + UINT ipos = ipos_num >> 32; - UINT idx_num = (ipos_num % freq_adjust_den) << fir_step_shift; - UINT idx = (fir_step - 1 - idx_num / freq_adjust_den) << fir_width_shift; - float rem_inv = idx_num % freq_adjust_den / (float)freq_adjust_den; + UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; + UINT rem_num = (DWORD)ipos_num << fir_step_shift; + float rem_inv = rem_num * (1.0f / (1ll << 32)); float rem = 1.0f - rem_inv; int j; @@ -368,11 +368,21 @@ static void resample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq float firgain, UINT required_input, UINT count, float *input, float *output) { if (freq_adjust_num > freq_adjust_den) { + /* Convert the values to 0.32 fixed point. Round down to prevent output + * buffer overflow. */ + DWORD freq_adjust_fixed_den = (freq_adjust_den << 32) / freq_adjust_num; + DWORD freq_acc_fixed_start = freq_acc_start * freq_adjust_fixed_den / freq_adjust_den; + memset(output, 0, count * sizeof(float)); - downsample(freq_adjust_num, freq_adjust_den, freq_acc_start, firgain, required_input, - input, output); + downsample(freq_adjust_fixed_den, freq_acc_fixed_start, firgain, required_input, input, + output); } else { - upsample(freq_adjust_num, freq_adjust_den, freq_acc_start, count, input, output); + /* Convert the values to 0.32 fixed point. Round down to prevent input + * buffer overflow. */ + DWORD freq_adjust_fixed_num = (freq_adjust_num << 32) / freq_adjust_den; + DWORD freq_acc_fixed_start = (freq_acc_start << 32) / freq_adjust_den; + + upsample(freq_adjust_fixed_num, freq_acc_fixed_start, count, input, output); } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423

Anton Baskanov

1:53 a.m.

New subject: [PATCH v2 3/5] dsound: Make rem_num signed.

From: Anton Baskanov <baskanov@gmail.com> Both x87 and SSE don't accept unsigned integers directly, requiring extra conversion steps. --- dlls/dsound/mixer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 530b8e23d69..fefc0d2fe05 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -323,8 +323,8 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai int opos = (int)(opos_num >> 32) - fir_width; UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; - UINT rem_num = ~(DWORD)opos_num << fir_step_shift; - float rem = rem_num * (1.0f / (1ll << 32)); + int rem_num = ~(DWORD)opos_num << fir_step_shift >> 1; + float rem = rem_num * (1.0f / (1ll << 31)); float input_value = input[j] * firgain; float input_value0 = (1.0f - rem) * input_value; @@ -346,8 +346,8 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl UINT ipos = ipos_num >> 32; UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; - UINT rem_num = (DWORD)ipos_num << fir_step_shift; - float rem_inv = rem_num * (1.0f / (1ll << 32)); + int rem_num = (DWORD)ipos_num << fir_step_shift >> 1; + float rem_inv = rem_num * (1.0f / (1ll << 31)); float rem = 1.0f - rem_inv; int j; -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423

Anton Baskanov

1:53 a.m.

New subject: [PATCH v2 4/5] dsound: Calculate opos_num and ipos_num incrementally.

From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/mixer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index fefc0d2fe05..e3190535f19 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -315,10 +315,11 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count) static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, UINT required_input, float *input, float *output) { + LONG64 opos_num = freq_adjust_den - freq_acc_start + (LONG64)~0u; + DWORD opos_num_step = freq_adjust_den; int j; for (j = 0; j < required_input; ++j) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + j * (LONG64)freq_adjust_den + ~0u; /* opos is in the range [-(fir_width - 1), count) */ int opos = (int)(opos_num >> 32) - fir_width; @@ -333,16 +334,19 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai UINT i; for (i = 0; i < fir_width; ++i) output[opos + i] += fir[idx + i] * input_value0 + fir[idx + fir_width + i] * input_value1; + + opos_num += opos_num_step; } } static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, float *output) { + LONG64 ipos_num = freq_acc_start; + DWORD ipos_num_step = freq_adjust_num; UINT i; for(i = 0; i < count; ++i) { - LONG64 ipos_num = freq_acc_start + i * (LONG64)freq_adjust_num; UINT ipos = ipos_num >> 32; UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; @@ -357,6 +361,8 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl for (j = 0; j < fir_width; j++) sum += (fir[idx + j] * rem_inv + fir[idx + j + fir_width] * rem) * cache[j]; output[i] = sum; + + ipos_num += ipos_num_step; } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423

Anton Baskanov

1:53 a.m.

New subject: [PATCH v2 5/5] dsound: Calculate rem and rem_inv incrementally.

From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/mixer.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index e3190535f19..c7565c03fa6 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -315,17 +315,23 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count) static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, UINT required_input, float *input, float *output) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + (LONG64)~0u; - DWORD opos_num_step = freq_adjust_den; + /* Clear the lower bits of opos_num and opos_num_step to keep rem in perfect + * sync with the lower part of opos_num. As rem is always less than 2, its + * fractional part has 23 bits in the worst case. */ + LONG64 opos_num_mask = ~0ull << (32 - 23 - fir_step_shift); + LONG64 opos_num = (freq_adjust_den - freq_acc_start + (LONG64)~0u) & opos_num_mask; + DWORD opos_num_step = freq_adjust_den & (DWORD)opos_num_mask; + + DWORD rem_num = ((DWORD)opos_num ^ (DWORD)opos_num_mask) << fir_step_shift >> 1; + DWORD rem_step_num = -opos_num_step << fir_step_shift >> 1; + float rem = rem_num * (1.0f / (1ll << 31)); + float rem_step = rem_step_num * (1.0f / (1ll << 31)); int j; for (j = 0; j < required_input; ++j) { /* opos is in the range [-(fir_width - 1), count) */ int opos = (int)(opos_num >> 32) - fir_width; - UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; - int rem_num = ~(DWORD)opos_num << fir_step_shift >> 1; - float rem = rem_num * (1.0f / (1ll << 31)); float input_value = input[j] * firgain; float input_value0 = (1.0f - rem) * input_value; @@ -335,6 +341,9 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai for (i = 0; i < fir_width; ++i) output[opos + i] += fir[idx + i] * input_value0 + fir[idx + fir_width + i] * input_value1; + rem += rem_step; + rem -= rem >= 1.0f ? 1.0f : 0.0f; + opos_num += opos_num_step; } } @@ -342,16 +351,22 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, float *output) { - LONG64 ipos_num = freq_acc_start; - DWORD ipos_num_step = freq_adjust_num; + /* Clear the lower bits of ipos_num and ipos_num_step to keep rem_inv in + * perfect sync with the lower part of ipos_num. As rem is always less than + * 2, its fractional part has 23 bits in the worst case. */ + DWORD ipos_num_mask = ~0u << (32 - 23 - fir_step_shift); + LONG64 ipos_num = freq_acc_start & ipos_num_mask; + DWORD ipos_num_step = freq_adjust_num & ipos_num_mask; + + DWORD rem_num = (DWORD)ipos_num << fir_step_shift >> 1; + DWORD rem_step_num = ipos_num_step << fir_step_shift >> 1; + float rem_inv = rem_num * (1.0f / (1ll << 31)); + float rem_inv_step = rem_step_num * (1.0f / (1ll << 31)); UINT i; for(i = 0; i < count; ++i) { UINT ipos = ipos_num >> 32; - UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; - int rem_num = (DWORD)ipos_num << fir_step_shift >> 1; - float rem_inv = rem_num * (1.0f / (1ll << 31)); float rem = 1.0f - rem_inv; int j; @@ -362,6 +377,9 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl sum += (fir[idx + j] * rem_inv + fir[idx + j + fir_width] * rem) * cache[j]; output[i] = sum; + rem_inv += rem_inv_step; + rem_inv -= rem_inv >= 1.0f ? 1.0f : 0.0f; + ipos_num += ipos_num_step; } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423

Anton Baskanov (＠baskanov)

1:54 a.m.

v2: - Use `>=` when comparing `rem_inv` to `1.0f`. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_133820

Anton Baskanov (＠baskanov)

1:54 a.m.

On Thu Mar 26 01:52:58 2026 +0000, Anton Baskanov wrote:

...

changed this line in [version 2 of the diff](/wine/wine/-/merge_requests/10423/diffs?diff_id=254387&start_sha=4a16374e84ef5e45cff2ac0b8973a4522afe959b#3611483beebc594b1206bbcc979e9584d27bf7ce_381_381) Done.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_133821

Matteo Bruni (＠Mystral)

12:16 p.m.

Matteo Bruni (@Mystral) commented about dlls/dsound/mixer.c:

...

* Note that this function will overwrite up to fir_width - 1 frames before and * after output[]. */ -static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq_acc_start, - float firgain, UINT required_input, float *input, float *output) +static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, + UINT required_input, float *input, float *output) { int j;

for (j = 0; j < required_input; ++j) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + j * freq_adjust_den + freq_adjust_num - 1; + LONG64 opos_num = freq_adjust_den - freq_acc_start + j * (LONG64)freq_adjust_den + ~0u; /* opos is in the range [-(fir_width - 1), count) */ - int opos = opos_num / freq_adjust_num - fir_width; + int opos = (int)(opos_num >> 32) - fir_width; It might be nice to introduce a couple of `#define`s, like:

#define POS_FRAC_SHIFT 32
#define POS_FRAC_MASK ((1ull &lt;&lt; POS_FRAC_SHIFT) - 1)

I'm not attached to the specific naming. The choice here comes from the idea of eventually ending up with something like https://gitlab.winehq.org/Mystral/wine/-/commit/aca8b39927dd75268cb18fe19307..., which still seems like a good improvement to have :slight_smile: -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_134503

Matteo Bruni (＠Mystral)

12:16 p.m.

Matteo Bruni (@Mystral) commented about dlls/dsound/mixer.c:

...

+static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, + UINT required_input, float *input, float *output) { int j;

for (j = 0; j < required_input; ++j) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + j * freq_adjust_den + freq_adjust_num - 1; + LONG64 opos_num = freq_adjust_den - freq_acc_start + j * (LONG64)freq_adjust_den + ~0u; /* opos is in the range [-(fir_width - 1), count) */ - int opos = opos_num / freq_adjust_num - fir_width; + int opos = (int)(opos_num >> 32) - fir_width;

- UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) << fir_step_shift; - UINT idx = (idx_num / freq_adjust_num) << fir_width_shift; - float rem = idx_num % freq_adjust_num / (float)freq_adjust_num; + UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; Note to the interested reader: one's complement here replaces the whole `- 1 - opos_num` part i.e. it seems correct.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_134505

Matteo Bruni (＠Mystral)

12:16 p.m.

Matteo Bruni (@Mystral) commented about dlls/dsound/mixer.c:

...

int opos = (int)(opos_num >> 32) - fir_width;

UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; - UINT rem_num = ~(DWORD)opos_num << fir_step_shift; - float rem = rem_num * (1.0f / (1ll << 32)); + int rem_num = ~(DWORD)opos_num << fir_step_shift >> 1; + float rem = rem_num * (1.0f / (1ll << 31));

Smart! I'd have `#define`s for these conversion numbers as well, maybe something like: ``` #define POS_FRAC_TO_SIGNED_SHIFT 1 #define POS_FRAC_SIGNED_MAX (1ll << 31) ``` I expect the resulting code to be more verbose but also quite a bit more self-explanatory. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_134506

Matteo Bruni (＠Mystral)

12:16 p.m.

Matteo Bruni (@Mystral) commented about dlls/dsound/mixer.c:

...

static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, UINT required_input, float *input, float *output) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + (LONG64)~0u; - DWORD opos_num_step = freq_adjust_den; + /* Clear the lower bits of opos_num and opos_num_step to keep rem in perfect + * sync with the lower part of opos_num. As rem is always less than 2, its + * fractional part has 23 bits in the worst case. */ + LONG64 opos_num_mask = ~0ull << (32 - 23 - fir_step_shift); + LONG64 opos_num = (freq_adjust_den - freq_acc_start + (LONG64)~0u) & opos_num_mask; + DWORD opos_num_step = freq_adjust_den & (DWORD)opos_num_mask; + + DWORD rem_num = ((DWORD)opos_num ^ (DWORD)opos_num_mask) << fir_step_shift >> 1; + DWORD rem_step_num = -opos_num_step << fir_step_shift >> 1; + float rem = rem_num * (1.0f / (1ll << 31)); + float rem_step = rem_step_num * (1.0f / (1ll << 31)); Does this make a large difference in the end? I.e. how bad is it if we store and update the linear interpolation factor as fixed point and convert it to float in SIMD fashion every time? IIRC "sane" int->float conversion (e.g. not unsigned, as taken care of by your earlier patch) is fast in SSE+.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_134507

Age (days ago)

Last active (days ago)

List overview

11 comments

3 participants

participants (3)

Anton Baskanov
Anton Baskanov (＠baskanov)
Matteo Bruni (＠Mystral)

[PATCH v2 0/5] MR10423: dsound: Speed up resampling, part 5

tags

participants (3)