Skip to content

Commit

Permalink
[mod_dptools:playback] fix audio speed change to preserve pitch
Browse files Browse the repository at this point in the history
When  a  file  is  being played  with  the  playback
application  and  the  speed  is  changed  with  eg.
uuid_fileman  speed:+1  the audio  pitch  increases.
This commit tries to preserve the pitch even when the
speed is changed.

How?
+ Use longer sample chunks (where original freq is preserved)
+ Search for the best cut position using cross-correlation
+ Crossfade adjacent chunks to each other

This updated co-authored commit from the original attempts
to optimize memory usage by allocating memory on the heap
for samples of the previous frame, but using stack memory
for the modified data that is flushed to the buffer.

Co-authored-by: hari <[email protected]>
Co-authored-by: Joseph Nadiv <[email protected]>
  • Loading branch information
3 people committed Jan 9, 2025
1 parent bb682fc commit bc6842a
Showing 1 changed file with 86 additions and 26 deletions.
112 changes: 86 additions & 26 deletions src/switch_ivr_play_say.c
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,14 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess


for (cur = 0; switch_channel_ready(channel) && !done && cur < argc; cur++) {
const int sp_fadeLen = 32;
const int sp_cut_src_rng = 64;
int16_t sp_ovrlap[32];
int sp_has_overlap = 0;
int sp_prev_idx = 0;
int sp_prev_cap = 0;
int16_t *sp_prev = NULL;

file = argv[cur];
eof = 0;

Expand Down Expand Up @@ -1859,42 +1867,91 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess

if (!switch_test_flag(fh, SWITCH_FILE_NATIVE) && fh->speed && do_speed) {
float factor = 0.25f * abs(fh->speed);
switch_size_t newlen, supplement, step;
short *bp = write_frame.data;
switch_size_t wrote = 0;

supplement = (int) (factor * olen);
if (!supplement) {
supplement = 1;
int16_t* bp = write_frame.data;
switch_size_t supplement = (int) (factor * olen);
switch_size_t newlen = (fh->speed > 0) ? olen - supplement : olen + supplement;
int src_rng = (fh->speed > 0 ? supplement : sp_cut_src_rng)* sp_has_overlap;
int datalen = newlen + src_rng + sp_fadeLen;
int extra = datalen - olen;
int16_t data[datalen * 2];
int16_t* currp = NULL;
int best_cut_idx = 0;

memset(data, 0, sizeof(data));

if (!sp_prev) {
sp_prev_cap = olen * 3 + sp_prev_idx;
switch_zmalloc(sp_prev, sp_prev_cap);
}
newlen = (fh->speed > 0) ? olen - supplement : olen + supplement;

step = (fh->speed > 0) ? (newlen / supplement) : (olen / supplement);

if (!fh->sp_audio_buffer) {
switch_buffer_create_dynamic(&fh->sp_audio_buffer, 1024, 1024, 0);
}

while ((wrote + step) < newlen) {
switch_buffer_write(fh->sp_audio_buffer, bp, step * 2);
wrote += step;
bp += step;
if (fh->speed > 0) {
bp++;
if (extra > 0) {
int cont = 0;
currp = data;

if (extra > sp_prev_idx) {
// not enough data
switch_buffer_write(fh->sp_audio_buffer, bp, newlen * 2);
cont = 1;
} else {
float f;
short s;
f = (float) (*bp + *(bp + 1) + *(bp - 1));
f /= 3;
s = (short) f;
switch_buffer_write(fh->sp_audio_buffer, &s, 2);
wrote++;
memcpy(currp, sp_prev + sp_prev_idx - extra, extra * 2);
memcpy(currp + extra, bp, olen * 2);
}

if (olen * 3 + sp_prev_idx > sp_prev_cap) {
sp_prev_cap = olen * 3 + sp_prev_idx;
sp_prev = realloc(sp_prev, sp_prev_cap);
sp_prev_idx = 0;
}

if (sp_prev_idx > olen) {
memmove(sp_prev, sp_prev + sp_prev_idx - olen, olen * 2);
sp_prev_idx = olen;
}

memcpy(sp_prev + sp_prev_idx, bp, olen * 2);
sp_prev_idx += olen;

if (cont) {
continue;
}
} else {
sp_prev_idx = 0;
currp = bp;
}

if (sp_has_overlap) {
double best = INT_MIN;
for (int idx_src = 0; idx_src < src_rng; idx_src++) {
double cc = 0;
for (int i = 0; i < sp_fadeLen; i++) {
cc += sp_ovrlap[i] * (*(currp + idx_src + i));
}
if (cc > best) {
best = cc;
best_cut_idx = idx_src;
}
}
}
if (wrote < newlen) {
switch_size_t r = newlen - wrote;
switch_buffer_write(fh->sp_audio_buffer, bp, r * 2);

currp += best_cut_idx;
switch_buffer_write(fh->sp_audio_buffer, currp, newlen * 2);
currp += newlen;

if (sp_has_overlap) {
int16_t* ret = (int16_t*)((unsigned char*)switch_buffer_get_head_pointer(fh->sp_audio_buffer) + switch_buffer_inuse(fh->sp_audio_buffer) - 2 * newlen);
// crossfade
for (int i = 0; i < (newlen < sp_fadeLen ? newlen : sp_fadeLen); i++) {
double factor = ((double)i) / sp_fadeLen;
*(ret + i) = (int16_t)(sp_ovrlap[i] * (1 - factor) + *(ret + i) * factor);
}
}

memcpy(sp_ovrlap, currp, sp_fadeLen * 2);
sp_has_overlap = 1;
last_speed = fh->speed;
continue;
}
Expand Down Expand Up @@ -2056,6 +2113,9 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess
if (fh->sp_audio_buffer) {
switch_buffer_destroy(&fh->sp_audio_buffer);
}

switch_safe_free(sp_prev);

}

if (switch_core_codec_ready((&codec))) {
Expand Down

0 comments on commit bc6842a

Please sign in to comment.