[PATCH v2 0/5] MR10423: dsound: Speed up resampling, part 5
-- v2: dsound: Calculate rem and rem_inv incrementally. https://gitlab.winehq.org/wine/wine/-/merge_requests/10423
From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/fir.h | 4 ++++ dlls/dsound/mixer.c | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dlls/dsound/fir.h b/dlls/dsound/fir.h index c03d0b9f237..45ad65d7398 100644 --- a/dlls/dsound/fir.h +++ b/dlls/dsound/fir.h @@ -86,7 +86,9 @@ int main() fprintf(stderr, "q %f\n", (double)output.q); fprintf(stderr, "status %s\n", get_pm_status_str(output.status)); + printf("static const int fir_width_shift = %d;\n", fir_width_shift); printf("static const int fir_width = %d;\n", fir_width); + printf("static const int fir_step_shift = %d;\n", fir_step_shift); printf("static const int fir_step = %d;\n", fir_step); printf("static const float fir[] = {"); // Print the FIR array with an additional row at the end. This simplifies @@ -112,7 +114,9 @@ int main() printf("};\n"); } */ +static const int fir_width_shift = 6; static const int fir_width = 64; +static const int fir_step_shift = 7; static const int fir_step = 128; static const float fir[] = { 0.0000000000e+00, -2.4830013102e-06, 1.9318705150e-06, 2.6614854151e-06, diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 43a9a8c4f91..3003715b413 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -322,8 +322,8 @@ static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 fr /* opos is in the range [-(fir_width - 1), count) */ int opos = opos_num / freq_adjust_num - fir_width; - UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) * fir_step; - UINT idx = idx_num / freq_adjust_num * fir_width; + UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) << fir_step_shift; + UINT idx = (idx_num / freq_adjust_num) << fir_width_shift; float rem = idx_num % freq_adjust_num / (float)freq_adjust_num; float input_value = input[j] * firgain; @@ -345,8 +345,8 @@ static void upsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq LONG64 ipos_num = freq_acc_start + i * freq_adjust_num; UINT ipos = ipos_num / freq_adjust_den; - UINT idx_num = ipos_num % freq_adjust_den * fir_step; - UINT idx = (fir_step - 1 - idx_num / freq_adjust_den) * fir_width; + UINT idx_num = (ipos_num % freq_adjust_den) << fir_step_shift; + UINT idx = (fir_step - 1 - idx_num / freq_adjust_den) << fir_width_shift; float rem_inv = idx_num % freq_adjust_den / (float)freq_adjust_den; float rem = 1.0f - rem_inv; -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423
From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/mixer.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 3003715b413..530b8e23d69 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -312,19 +312,19 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count) * Note that this function will overwrite up to fir_width - 1 frames before and * after output[]. */ -static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq_acc_start, - float firgain, UINT required_input, float *input, float *output) +static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, + UINT required_input, float *input, float *output) { int j; for (j = 0; j < required_input; ++j) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + j * freq_adjust_den + freq_adjust_num - 1; + LONG64 opos_num = freq_adjust_den - freq_acc_start + j * (LONG64)freq_adjust_den + ~0u; /* opos is in the range [-(fir_width - 1), count) */ - int opos = opos_num / freq_adjust_num - fir_width; + int opos = (int)(opos_num >> 32) - fir_width; - UINT idx_num = (freq_adjust_num - 1 - opos_num % freq_adjust_num) << fir_step_shift; - UINT idx = (idx_num / freq_adjust_num) << fir_width_shift; - float rem = idx_num % freq_adjust_num / (float)freq_adjust_num; + UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; + UINT rem_num = ~(DWORD)opos_num << fir_step_shift; + float rem = rem_num * (1.0f / (1ll << 32)); float input_value = input[j] * firgain; float input_value0 = (1.0f - rem) * input_value; @@ -336,18 +336,18 @@ static void downsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 fr } } -static void upsample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq_acc_start, - UINT count, float *input, float *output) +static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, + float *output) { UINT i; for(i = 0; i < count; ++i) { - LONG64 ipos_num = freq_acc_start + i * freq_adjust_num; - UINT ipos = ipos_num / freq_adjust_den; + LONG64 ipos_num = freq_acc_start + i * (LONG64)freq_adjust_num; + UINT ipos = ipos_num >> 32; - UINT idx_num = (ipos_num % freq_adjust_den) << fir_step_shift; - UINT idx = (fir_step - 1 - idx_num / freq_adjust_den) << fir_width_shift; - float rem_inv = idx_num % freq_adjust_den / (float)freq_adjust_den; + UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; + UINT rem_num = (DWORD)ipos_num << fir_step_shift; + float rem_inv = rem_num * (1.0f / (1ll << 32)); float rem = 1.0f - rem_inv; int j; @@ -368,11 +368,21 @@ static void resample(LONG64 freq_adjust_num, LONG64 freq_adjust_den, LONG64 freq float firgain, UINT required_input, UINT count, float *input, float *output) { if (freq_adjust_num > freq_adjust_den) { + /* Convert the values to 0.32 fixed point. Round down to prevent output + * buffer overflow. */ + DWORD freq_adjust_fixed_den = (freq_adjust_den << 32) / freq_adjust_num; + DWORD freq_acc_fixed_start = freq_acc_start * freq_adjust_fixed_den / freq_adjust_den; + memset(output, 0, count * sizeof(float)); - downsample(freq_adjust_num, freq_adjust_den, freq_acc_start, firgain, required_input, - input, output); + downsample(freq_adjust_fixed_den, freq_acc_fixed_start, firgain, required_input, input, + output); } else { - upsample(freq_adjust_num, freq_adjust_den, freq_acc_start, count, input, output); + /* Convert the values to 0.32 fixed point. Round down to prevent input + * buffer overflow. */ + DWORD freq_adjust_fixed_num = (freq_adjust_num << 32) / freq_adjust_den; + DWORD freq_acc_fixed_start = (freq_acc_start << 32) / freq_adjust_den; + + upsample(freq_adjust_fixed_num, freq_acc_fixed_start, count, input, output); } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423
From: Anton Baskanov <baskanov@gmail.com> Both x87 and SSE don't accept unsigned integers directly, requiring extra conversion steps. --- dlls/dsound/mixer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 530b8e23d69..fefc0d2fe05 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -323,8 +323,8 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai int opos = (int)(opos_num >> 32) - fir_width; UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; - UINT rem_num = ~(DWORD)opos_num << fir_step_shift; - float rem = rem_num * (1.0f / (1ll << 32)); + int rem_num = ~(DWORD)opos_num << fir_step_shift >> 1; + float rem = rem_num * (1.0f / (1ll << 31)); float input_value = input[j] * firgain; float input_value0 = (1.0f - rem) * input_value; @@ -346,8 +346,8 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl UINT ipos = ipos_num >> 32; UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; - UINT rem_num = (DWORD)ipos_num << fir_step_shift; - float rem_inv = rem_num * (1.0f / (1ll << 32)); + int rem_num = (DWORD)ipos_num << fir_step_shift >> 1; + float rem_inv = rem_num * (1.0f / (1ll << 31)); float rem = 1.0f - rem_inv; int j; -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423
From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/mixer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index fefc0d2fe05..e3190535f19 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -315,10 +315,11 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count) static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, UINT required_input, float *input, float *output) { + LONG64 opos_num = freq_adjust_den - freq_acc_start + (LONG64)~0u; + DWORD opos_num_step = freq_adjust_den; int j; for (j = 0; j < required_input; ++j) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + j * (LONG64)freq_adjust_den + ~0u; /* opos is in the range [-(fir_width - 1), count) */ int opos = (int)(opos_num >> 32) - fir_width; @@ -333,16 +334,19 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai UINT i; for (i = 0; i < fir_width; ++i) output[opos + i] += fir[idx + i] * input_value0 + fir[idx + fir_width + i] * input_value1; + + opos_num += opos_num_step; } } static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, float *output) { + LONG64 ipos_num = freq_acc_start; + DWORD ipos_num_step = freq_adjust_num; UINT i; for(i = 0; i < count; ++i) { - LONG64 ipos_num = freq_acc_start + i * (LONG64)freq_adjust_num; UINT ipos = ipos_num >> 32; UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; @@ -357,6 +361,8 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl for (j = 0; j < fir_width; j++) sum += (fir[idx + j] * rem_inv + fir[idx + j + fir_width] * rem) * cache[j]; output[i] = sum; + + ipos_num += ipos_num_step; } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423
From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/mixer.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index e3190535f19..c7565c03fa6 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -315,17 +315,23 @@ static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb, UINT count) static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgain, UINT required_input, float *input, float *output) { - LONG64 opos_num = freq_adjust_den - freq_acc_start + (LONG64)~0u; - DWORD opos_num_step = freq_adjust_den; + /* Clear the lower bits of opos_num and opos_num_step to keep rem in perfect + * sync with the lower part of opos_num. As rem is always less than 2, its + * fractional part has 23 bits in the worst case. */ + LONG64 opos_num_mask = ~0ull << (32 - 23 - fir_step_shift); + LONG64 opos_num = (freq_adjust_den - freq_acc_start + (LONG64)~0u) & opos_num_mask; + DWORD opos_num_step = freq_adjust_den & (DWORD)opos_num_mask; + + DWORD rem_num = ((DWORD)opos_num ^ (DWORD)opos_num_mask) << fir_step_shift >> 1; + DWORD rem_step_num = -opos_num_step << fir_step_shift >> 1; + float rem = rem_num * (1.0f / (1ll << 31)); + float rem_step = rem_step_num * (1.0f / (1ll << 31)); int j; for (j = 0; j < required_input; ++j) { /* opos is in the range [-(fir_width - 1), count) */ int opos = (int)(opos_num >> 32) - fir_width; - UINT idx = ~(DWORD)opos_num >> (32 - fir_step_shift) << fir_width_shift; - int rem_num = ~(DWORD)opos_num << fir_step_shift >> 1; - float rem = rem_num * (1.0f / (1ll << 31)); float input_value = input[j] * firgain; float input_value0 = (1.0f - rem) * input_value; @@ -335,6 +341,9 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai for (i = 0; i < fir_width; ++i) output[opos + i] += fir[idx + i] * input_value0 + fir[idx + fir_width + i] * input_value1; + rem += rem_step; + rem -= rem >= 1.0f ? 1.0f : 0.0f; + opos_num += opos_num_step; } } @@ -342,16 +351,22 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, float *output) { - LONG64 ipos_num = freq_acc_start; - DWORD ipos_num_step = freq_adjust_num; + /* Clear the lower bits of ipos_num and ipos_num_step to keep rem_inv in + * perfect sync with the lower part of ipos_num. As rem is always less than + * 2, its fractional part has 23 bits in the worst case. */ + DWORD ipos_num_mask = ~0u << (32 - 23 - fir_step_shift); + LONG64 ipos_num = freq_acc_start & ipos_num_mask; + DWORD ipos_num_step = freq_adjust_num & ipos_num_mask; + + DWORD rem_num = (DWORD)ipos_num << fir_step_shift >> 1; + DWORD rem_step_num = ipos_num_step << fir_step_shift >> 1; + float rem_inv = rem_num * (1.0f / (1ll << 31)); + float rem_inv_step = rem_step_num * (1.0f / (1ll << 31)); UINT i; for(i = 0; i < count; ++i) { UINT ipos = ipos_num >> 32; - UINT idx = ~(DWORD)ipos_num >> (32 - fir_step_shift) << fir_width_shift; - int rem_num = (DWORD)ipos_num << fir_step_shift >> 1; - float rem_inv = rem_num * (1.0f / (1ll << 31)); float rem = 1.0f - rem_inv; int j; @@ -362,6 +377,9 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl sum += (fir[idx + j] * rem_inv + fir[idx + j + fir_width] * rem) * cache[j]; output[i] = sum; + rem_inv += rem_inv_step; + rem_inv -= rem_inv >= 1.0f ? 1.0f : 0.0f; + ipos_num += ipos_num_step; } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10423
v2: - Use `>=` when comparing `rem_inv` to `1.0f`. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_133820
On Thu Mar 26 01:52:58 2026 +0000, Anton Baskanov wrote:
changed this line in [version 2 of the diff](/wine/wine/-/merge_requests/10423/diffs?diff_id=254387&start_sha=4a16374e84ef5e45cff2ac0b8973a4522afe959b#3611483beebc594b1206bbcc979e9584d27bf7ce_381_381) Done.
-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10423#note_133821
participants (2)
-
Anton Baskanov -
Anton Baskanov (@baskanov)