From 2bb2802458a4d057b6cda5d0c81df856f78eb2e1 Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Wed, 5 Oct 2022 18:57:09 +1100 Subject: [PATCH] SPU: Logic re-write --- desmume/src/NDSSystem.cpp | 9 +- desmume/src/NDSSystem.h | 6 +- desmume/src/SPU.cpp | 1229 ++++++++++++-------- desmume/src/SPU.h | 49 +- desmume/src/frontend/windows/soundView.cpp | 24 +- 5 files changed, 764 insertions(+), 553 deletions(-) diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index 58cbb57d9..99c8f3cf5 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -1422,9 +1422,12 @@ static void execHardware_hblank() //emulation housekeeping. for some reason we always do this at hblank, //even though it sounds more reasonable to do it at hstart - SPU_Emulate_core(); - driver->AVI_SoundUpdate(SPU_core->outbuf,spu_core_samples); - WAV_WavSoundUpdate(SPU_core->outbuf,spu_core_samples); + int coreSamples = SPU_Emulate_core(355*6); // HLine = 355 dots @ 6c/dot + if(coreSamples) + { + driver->AVI_SoundUpdate(SPU_core->outbuf,coreSamples); + WAV_WavSoundUpdate(SPU_core->outbuf,coreSamples); + } } static void execHardware_hstart_vblankEnd() diff --git a/desmume/src/NDSSystem.h b/desmume/src/NDSSystem.h index f2af566d4..4dbbd779d 100644 --- a/desmume/src/NDSSystem.h +++ b/desmume/src/NDSSystem.h @@ -530,6 +530,7 @@ extern struct TCommonSettings , spuInterpolationMode(2) , manualBackupType(0) , autodetectBackupMethod(0) + , spu_muteChannels(0) , spu_captureMuted(false) , spu_advanced(true) , StylusPressure(50) @@ -543,9 +544,6 @@ extern struct TCommonSettings strcpy(ARM7BIOS, "biosnds7.bin"); strcpy(ExtFirmwarePath, "firmware.bin"); - for(int i=0;i<16;i++) - spu_muteChannels[i] = false; - for(int g=0;g<2;g++) for(int x=0;x<5;x++) dispLayers[g][x]=true; @@ -652,7 +650,7 @@ extern struct TCommonSettings int SPU_sync_mode; int SPU_sync_method; - bool spu_muteChannels[16]; + u16 spu_muteChannels; bool spu_captureMuted; bool spu_advanced; diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index 6c15e71cc..1d03e4296 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -48,7 +48,14 @@ static inline s16 read16(u32 addr) { return (s16)_MMU_read16(addr); } static inline s8 read_s8(u32 addr) { return (s8)_MMU_read08(addr); } -#define K_ADPCM_LOOPING_RECOVERY_INDEX 99999 +// Disabling capture when _currentSNDCore == SNDDummy can increase +// performance by disabling all mixing, but could potentially cause +// problems if the software is relying on the capture output. By +// default, we disable all mixing only when capture isn't running, +// as this is guaranteed to be safe. +#define ENABLE_DUMMY_SPU_CAPTURE 1 + +#define K_ADPCM_LOOPING_RECOVERY_INDEX 255 #define CATMULLROM_INTERPOLATION_RESOLUTION_BITS 11 #define CATMULLROM_INTERPOLATION_RESOLUTION (1< static FORCEINLINE T MinMax(T val, T min, T max) @@ -211,19 +211,19 @@ int SPU_Init(int coreid, int newBufferSizeBytes) // If we wanted to, we could stick entirely to integer maths // here, but I doubt it's worth the hassle. double x = i / (double)CATMULLROM_INTERPOLATION_RESOLUTION; - double a = x*(x*(-x + 2) - 1); + double a = x*(x*(x - 2) + 1); double b = x*x*(3*x - 5) + 2; double c = x*(x*(-3*x + 4) + 1); - double d = x*x*(x - 1); - catmullrom_lut[i][0] = (u16)floor((1u<<15) * -0.5*a); - catmullrom_lut[i][1] = (u16)floor((1u<<15) * 0.5*b); - catmullrom_lut[i][2] = (u16)floor((1u<<15) * 0.5*c); - catmullrom_lut[i][3] = (u16)floor((1u<<15) * -0.5*d); + double d = x*x*(1 - x); + catmullrom_lut[i][0] = (u16)floor((double)(1<<15) * 0.5*a); + catmullrom_lut[i][1] = (u16)floor((double)(1<<15) * 0.5*b); + catmullrom_lut[i][2] = (u16)floor((double)(1<<15) * 0.5*c); + catmullrom_lut[i][3] = (u16)floor((double)(1<<15) * 0.5*d); } for (size_t i = 0; i < COSINE_INTERPOLATION_RESOLUTION; i++) - cos_lut[i] = (u16)floor((1u<<16) * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); + cos_lut[i] = (u16)floor((double)(1<<16) * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); - SPU_core = new SPU_struct((int)ceil(samples_per_hline)); + SPU_core = new SPU_struct(); SPU_Reset(); //create adpcm decode accelerator lookups @@ -285,7 +285,7 @@ void SPU_SetSynchMode(int mode, int method) if (_currentSynchMode == ESynchMode_DualSynchAsynch) { - SPU_user = new SPU_struct(_currentBufferSize); + SPU_user = new SPU_struct(); SPU_CloneUser(); } } @@ -327,15 +327,14 @@ void SPU_Reset(void) for (i = 0x400; i < 0x51D; i++) T1WriteByte(MMU.ARM7_REG, i, 0); - _samples = 0; + _spu_core_cyclesCounter = 0; } //------------------------------------------ void SPU_struct::reset() { - memset(sndbuf,0,bufsize*2*4); - memset(outbuf,0,bufsize*2*2); + memset(outbuf,0,bufsize*sizeof(s16)*2); memset((void *)channels, 0, sizeof(channel_struct) * 16); @@ -347,22 +346,33 @@ void SPU_struct::reset() } } -SPU_struct::SPU_struct(int buffersize) - : bufpos(0) - , buflength(0) - , sndbuf(0) - , outbuf(0) - , bufsize(buffersize) +void SPU_struct::resizeBuffer(int buffersize) { - sndbuf = new s32[buffersize*2]; - outbuf = new s16[buffersize*2]; + if(outbuf) delete[] outbuf; + outbuf = new s16[(size_t)buffersize*2]; + bufsize = buffersize; +} + +SPU_struct::SPU_struct() + : outbuf(NULL) + , bufsize(0) +{ + // mixdata[] must be able to contain: + // struct { + // s32 mixbuf [N][2] + // s32 mutedmixbuf[N][2] + // s16 capbuf [N][2] + // s16 chanbuf [N][2] + // }; + // where N is at most SPUCAPTURE_FIFO_SIZE + mixdata = new s32[SPUCAPTURE_FIFO_SIZE * (sizeof(s32)+sizeof(s32)+sizeof(s16)+sizeof(s16))*2 / sizeof(s32)]; reset(); } SPU_struct::~SPU_struct() { - if(sndbuf) delete[] sndbuf; - if(outbuf) delete[] outbuf; + if(mixdata) delete[] mixdata; + if(outbuf) delete[] outbuf; } void SPU_DeInit(void) @@ -383,12 +393,13 @@ void SPU_struct::ShutUp() channels[i].status = CHANSTAT_STOPPED; } -static FORCEINLINE void adjust_channel_timer(channel_struct *chan) +/*FORCEINLINE*/ static void adjust_channel_timer(channel_struct *chan) { // ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2) / (2^16 - Timer) // = ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2 * (2^16 - Timer)) - // ... and then round up for good measure - u64 sampinc = ((u32)ARM7_CLOCK*(1ull << 32) - 1) / (DESMUME_SAMPLE_RATE * 2ull * (0x10000 - chan->timer)) + 1; + // Make sure to round DOWN, as we'd rather lag behind + // than be ahead, as this causes synchronization issues + u64 sampinc = (ARM7_CLOCK*(1ull << 32)) / (DESMUME_SAMPLE_RATE * 2ull * (0x10000 - chan->timer)); chan->sampincInt = (u32)(sampinc >> 32), chan->sampincFrac = (u32)sampinc; } @@ -418,15 +429,14 @@ void SPU_struct::KeyOn(int channel) { channel_struct &thischan = channels[channel]; thischan.status = CHANSTAT_PLAY; - thischan.totlength = thischan.length + thischan.loopstart; + thischan.totlength_shifted = thischan.totlength << format_shift[thischan.format]; + thischan.sampcntFrac = 0; adjust_channel_timer(&thischan); thischan.pcm16bOffs = 0; - for(int i=0;i static FORCEINLINE s32 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos) +template +FORCEINLINE static s16 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos) { switch (INTERPOLATE_MODE) { @@ -1051,12 +1071,20 @@ template static FORCEINLINE s32 Interpola { // Catmull-Rom spline // Delay: 2 samples, Maximum gain: 1.25 + // NOTE: Ideally, we would just re-scale the resampling + // kernel to have a maximum gain of 1.0. However, this + // would mean reducing the output volume, which can then + // go on to make feedback capture (ie. echo effects) + // decay abnormally quickly. Since Catmull-Rom is more + // of a 'luxury' thing, we should be able to use MinMax + // since if the user is using this interpolation method, + // there's likely enough processing power to handle it. s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 3)]; s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 2)]; s32 c = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 d = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; const u16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)]; - return (-a*(s32)w[0] + b*(s32)w[1] + c*(s32)w[2] - d*(s32)w[3]) >> 15; + return (s16)MinMax((-a*(s32)w[0] + b*(s32)w[1] + c*(s32)w[2] - d*(s32)w[3]) >> 15, -0x8000, +0x7FFF); } case SPUInterpolation_Cosine: @@ -1065,10 +1093,13 @@ template static FORCEINLINE s32 Interpola // ratio2 = (1 - cos(ratio * M_PI)) / 2 // sampleI = sampleA * (1 - ratio2) + sampleB * ratio2 // Delay: 1 sample, Maximum gain: 1.0 + // NOTE: Always cast the result to s16. (b-a) can + // overflow, but a+(b-a)*subPos can't. So we might + // have garbage in the upper 16 bits. s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; s32 subPos16 = (s32)cos_lut[subPos >> (32 - COSINE_INTERPOLATION_RESOLUTION_BITS)]; - return a + ((b - a)*subPos16 >> 16); + return (s16)(a + (((b - a)*subPos16) >> 16)); } case SPUInterpolation_Linear: @@ -1076,10 +1107,11 @@ template static FORCEINLINE s32 Interpola // Linear Interpolation Formula: // sampleI = sampleA * (1 - ratio) + sampleB * ratio // Delay: 1 sample, Maximum gain: 1.0 + // NOTE: Always cast the result to s16 (see above). s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; s32 subPos16 = subPos >> (32 - 16); - return a + ((b - a)*subPos16 >> 16); + return (s16)(a + (((b - a)*subPos16) >> 16)); } default: @@ -1088,41 +1120,43 @@ template static FORCEINLINE s32 Interpola } } -static FORCEINLINE s32 Fetch8BitData(channel_struct *chan, s32 pos) +FORCEINLINE static s16 Fetch8BitData(channel_struct *chan, s32 pos) { if(pos < 0) return 0; - return read_s8(chan->addr + pos*1) << 8; + return (s16)read_s8(chan->addr + pos*1) << 8; } -static FORCEINLINE s32 Fetch16BitData(channel_struct *chan, s32 pos) +FORCEINLINE static s16 Fetch16BitData(channel_struct *chan, s32 pos) { if(pos < 0) return 0; return read16(chan->addr + pos*2); } -static FORCEINLINE s32 FetchADPCMData(channel_struct *chan, s32 pos) +// NOTE: The decoding state is updated during this function call +FORCEINLINE static s16 FetchADPCMData(channel_struct *chan, s32 pos) { if(pos < 8) return 0; s16 last = chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)]; - if(pos == (chan->loopstart<<3)) { - //if(chan->loop_index != K_ADPCM_LOOPING_RECOVERY_INDEX) printf("over-snagging\n"); + // Stash loop sample and index + // This saves having to decode to the loop point every time + if(pos == ((s32)chan->loopstart<<3)) { chan->loop_pcm16b = last; chan->loop_index = chan->index; } - const u32 shift = (pos&1) * 4; - const u32 data4bit = ((u32)read08(chan->addr + (pos>>1))) >> shift; + const u8 shift = ((u8)pos&1) * 4; + const u8 data4bit = read08(chan->addr + (pos>>1)) >> shift; const s32 diff = precalcdifftbl [chan->index][data4bit & 0xF]; chan->index = precalcindextbl[chan->index][data4bit & 0x7]; - return MinMax(last + diff, -0x8000, 0x7FFF); + return (s16)MinMax(last + diff, -0x8000, 0x7FFF); } -static FORCEINLINE s32 FetchPSGData(channel_struct *chan, s32 pos) +FORCEINLINE static s16 FetchPSGData(channel_struct *chan, s32 pos) { if(pos < 0 || chan->num < 8) return 0; @@ -1130,7 +1164,7 @@ static FORCEINLINE s32 FetchPSGData(channel_struct *chan, s32 pos) if(chan->num < 14) { // Doing this avoids using a LUT - return ((pos%8u) > chan->waveduty) ? (-0x7FFF) : (+0x7FFF); + return (((u8)pos%8u) > chan->waveduty) ? (-0x7FFF) : (+0x7FFF); } else { @@ -1149,43 +1183,22 @@ static FORCEINLINE s32 FetchPSGData(channel_struct *chan, s32 pos) ////////////////////////////////////////////////////////////////////////////// -static FORCEINLINE void MixL(SPU_struct* SPU, channel_struct *chan, s32 data) -{ - data = spumuldiv7(data, chan->vol) >> volume_shift[chan->volumeDiv]; - SPU->sndbuf[SPU->bufpos<<1] += data; -} - -static FORCEINLINE void MixR(SPU_struct* SPU, channel_struct *chan, s32 data) +// Returns false when the channel needs to stop +// NOTE: Assumes channel has already reached the end of playback +template +/*FORCEINLINE*/ static bool TestForLoop(channel_struct *chan, s32 *pos, s32 totalLength) { - data = spumuldiv7(data, chan->vol) >> volume_shift[chan->volumeDiv]; - SPU->sndbuf[(SPU->bufpos<<1)+1] += data; -} - -static FORCEINLINE void MixLR(SPU_struct* SPU, channel_struct *chan, s32 data) -{ - data = spumuldiv7(data, chan->vol) >> volume_shift[chan->volumeDiv]; - SPU->sndbuf[SPU->bufpos<<1] += spumuldiv7(data, 127 - chan->pan); - SPU->sndbuf[(SPU->bufpos<<1)+1] += spumuldiv7(data, chan->pan); -} - -////////////////////////////////////////////////////////////////////////////// - -template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channel_struct *chan) -{ - // Do nothing if we haven't reached the end - if(chan->sampcntInt < chan->totlength_shifted) return; - // Kill the channel if we don't repeat if(chan->repeat != 1) { - SPU->KeyOff(chan->num); - SPU->bufpos = SPU->buflength; - return; + //SPU->KeyOff(chan->num); // Inlining this avoids having to pass an SPU pointer around + chan->status = CHANSTAT_STOPPED; + return false; } // Wrap sampcnt - u32 loopSize = chan->totlength_shifted - (chan->loopstart << format_shift[FORMAT]); - do chan->sampcntInt -= loopSize; while(chan->sampcntInt >= chan->totlength_shifted) + u32 loopSize = totalLength - (chan->loopstart << format_shift[FORMAT]); + do *pos -= loopSize; while(*pos >= totalLength); // ADPCM needs special handling if(FORMAT == 2) @@ -1194,7 +1207,7 @@ template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channe // smaller values (0..3 words) are causing hang-ups // (busy bit remains set infinite, but no sound output occurs). // fix: 7th Dragon (JP) - http://sourceforge.net/p/desmume/bugs/1357/ - if (chan->totlength < 4) return; + if (totalLength < (4 << format_shift[FORMAT])) return true; // Fetch loop sample and index, and get the "new" current decoding position s32 curpos; @@ -1217,314 +1230,257 @@ template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channe // Decode until we reach the target position // This is really only used for fast seeking (ie. SNDDummy // and loop reset), but makes the code much cleaner. - while(curpos < chan->sampcntInt) + while(curpos < *pos) { *pcm16Dst = FetchADPCMData(chan, curpos); curpos++; } } + return true; } -template FORCEINLINE static void SPU_Mix(SPU_struct* SPU, channel_struct *chan, s32 data) +////////////////////////////////////////////////////////////////////////////// + +//WORK +template +static void __SPU_GenerateChanData(channel_struct* const chan, s16 *chanbuf, int length) { - switch(CHANNELS) + s32 totalLength = chan->totlength_shifted; + + if (!CHANNELS) { - case 0: MixL(SPU, chan, data); break; - case 1: MixLR(SPU, chan, data); break; - case 2: MixR(SPU, chan, data); break; - default: break; + // When we aren't mixing at all, take a much faster path where + // we simply update sampcnt. This can glitch interpolation for + // up to SPUCHAN_PCM16B_SIZE source samples (since we're not + // updating chan->pcm16b[]), but this glitching should really + // only show up when switching from Dual SPU to Sync mode, or + // when switching from SNDDummy core to an actual output core, + // and only for non-ADPCM sources (ADPCM needs to decode data + // all the time, so we keep pcm16b[] filled correctly anyway). + s32 cursampcntInt = chan->sampcntInt; + s64 newsampcnt = (chan->sampcntFrac | (s64) cursampcntInt<<32); + newsampcnt += (chan->sampincFrac | (u64)chan->sampincInt<<32) * length; + s32 newsampcntInt = (s32)(newsampcnt >> 32); + if(FORMAT == 2 && newsampcntInt <= totalLength) + { + // We won't go past the end, so decode until reaching the target position + while(cursampcntInt < newsampcntInt) + { + s16 data = FetchADPCMData(chan, cursampcntInt); + chan->pcm16bOffs++; + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = data; + cursampcntInt++; + } + } + else if(FORMAT != 3 && newsampcntInt >= totalLength) TestForLoop(chan, &newsampcntInt, totalLength); + chan->sampcntFrac = (u32)newsampcnt; + chan->sampcntInt = newsampcntInt; + return; } - SPU->lastdata = data; -} -//WORK -template - FORCEINLINE static void ____SPU_ChanUpdate(SPU_struct* const SPU, channel_struct* const chan) -{ - for (; SPU->bufpos < SPU->buflength; SPU->bufpos++) + // chan->vol is .7fxp, plus .4fxp for chan->volumeDiv (total .11fxp) + // chan->pan is .7fxp + // This gives us .18fxp, but we need at most .16fxp, so we shift down. + s32 vol_shifted = spumuladjust7(chan->vol); + vol_shifted <<= 4; + vol_shifted >>= volume_shift[chan->volumeDiv]; + s32 vol_left = spumuladjust7(127 - chan->pan); + vol_left *= vol_shifted; + vol_left >>= 2; // .16fxp + s32 vol_right = spumuladjust7(chan->pan); + vol_right *= vol_shifted; + vol_right >>= 2; // .16fxp + + // Start mixing loop + u32 sampcntFrac = chan->sampcntFrac; + s32 sampcntInt = chan->sampcntInt; + do { // Advance sampcnt one sample at a time. This is // needed to keep pcm16b[] filled for interpolation. - u32 nSamplesToSkip = chan->sampincInt + AddAndReturnCarry(&chan->sampcntFrac, chan->sampincFrac); + u32 nSamplesToSkip = chan->sampincInt + AddAndReturnCarry(&sampcntFrac, chan->sampincFrac); while(nSamplesToSkip--) { + // If channel stops, fill the rest of the buffer with 0 + if(FORMAT != 3 && sampcntInt >= totalLength && !TestForLoop(chan, &sampcntInt, totalLength)) + { + memset(chanbuf, 0, length*sizeof(s16)*2); + return; + } + s16 data = 0; - s32 pos = chan->sampcntInt; switch(FORMAT) { - case 0: data = Fetch8BitData (chan, pos); break; - case 1: data = Fetch16BitData(chan, pos); break; - case 2: data = FetchADPCMData(chan, pos); break; - case 3: data = FetchPSGData (chan, pos); break; - default: break; + case 0: data = Fetch8BitData (chan, sampcntInt); break; + case 1: data = Fetch16BitData(chan, sampcntInt); break; + case 2: data = FetchADPCMData(chan, sampcntInt); break; + case 3: data = FetchPSGData (chan, sampcntInt); break; } chan->pcm16bOffs++; chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = data; - - chan->sampcntInt++; - if (FORMAT != 3) TestForLoop(SPU, chan); + sampcntInt++; } - if(CHANNELS != -1) + // Because chanbuf[] is aligned to at least 32 bits, we can + // cheat and store a hard-panned sample by writing as 32bit + s16 sample, sampleL = 0, sampleR = 0; // <- Clearing these to 0 shuts the compiler up + sample = Interpolate(chan->pcm16b, chan->pcm16bOffs, sampcntFrac); + if(CHANNELS & (1<<0)) sampleL = (s16)(sample * vol_left >> 16); + if(CHANNELS & (1<<1)) sampleR = (s16)(sample * vol_right >> 16); + switch(CHANNELS) { - s32 data = Interpolate(chan->pcm16b, chan->pcm16bOffs, chan->sampcntFrac); - SPU_Mix(SPU, chan, data); + case (1<<0)|(0<<1): +#ifdef MSB_FIRST + *(u32*)chanbuf = (u32)sampleL << 16; +#else + *(u32*)chanbuf = (u32)sampleL; +#endif + break; + case (0<<0)|(1<<1): +#ifdef MSB_FIRST + *(u32*)chanbuf = (u32)sampleR; +#else + *(u32*)chanbuf = (u32)sampleR << 16; +#endif + break; + case (1<<0)|(1<<1): + chanbuf[0] = sampleL; + chanbuf[1] = sampleR; + break; } + chanbuf += 2; + } while(--length); + chan->sampcntFrac = sampcntFrac; + chan->sampcntInt = sampcntInt; +} + +// Outputs {L,R} into chanbuf[] +// Assumes chanbuf[] is always aligned to at least 32 bits +FORCEINLINE static void _SPU_GenerateChanData(bool actuallyMix, channel_struct* const chan, s16 *chanbuf, int length) +{ + typedef void (*_SPU_GenerateChanData_Func_t)(channel_struct* const chan, s16 *chanbuf, int length); + + // This looks insane and pointless, but compilers generate + // a massive if/elseif block in place of something like this, + // since they don't know the range of values we use. + // Note that we use SPUInterpolation_None in the case of PSG + // channels, as we don't want to interpolate the raw samples. + // We also use SPUInterpolation_None with actuallyMix==false, + // so that we avoid instantiating the exact same code under + // a new template instance. + // The table is acessed as: FuncTable[INTERPOLATE_MODE][FORMAT][CHANNELS] +#define __GENERATE_FUNCTABLE(CHANNELS, FORMAT, INTERPOLATE_MODE) \ + __SPU_GenerateChanData +#define _GENERATE_FUNCTABLE(FORMAT, INTERPOLATE_MODE) \ + { \ + __GENERATE_FUNCTABLE((0<<0 | 0<<1), FORMAT, SPUInterpolation_None), \ + __GENERATE_FUNCTABLE((1<<0 | 0<<1), FORMAT, INTERPOLATE_MODE), \ + __GENERATE_FUNCTABLE((0<<0 | 1<<1), FORMAT, INTERPOLATE_MODE), \ + __GENERATE_FUNCTABLE((1<<0 | 1<<1), FORMAT, INTERPOLATE_MODE), \ } -} - -template - FORCEINLINE static void ___SPU_ChanUpdate(const bool actuallyMix, SPU_struct* const SPU, channel_struct* const chan) -{ - if(!actuallyMix) - ____SPU_ChanUpdate(SPU,chan); - else if (chan->pan == 0) - ____SPU_ChanUpdate(SPU,chan); - else if (chan->pan == 127) - ____SPU_ChanUpdate(SPU,chan); - else - ____SPU_ChanUpdate(SPU,chan); -} - -template - FORCEINLINE static void __SPU_ChanUpdate(const bool actuallyMix, SPU_struct* const SPU, channel_struct* const chan) -{ - // NOTE: PSG doesn't use interpolation, or it would try to - // interpolate between the raw sample points (very bad) - switch(chan->format) - { - case 0: ___SPU_ChanUpdate<0,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; - case 1: ___SPU_ChanUpdate<1,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; - case 2: ___SPU_ChanUpdate<2,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; - case 3: ___SPU_ChanUpdate<3,SPUInterpolation_None>(actuallyMix, SPU, chan); break; - default: assert(false); +#define GENERATE_FUNCTABLE(INTERPOLATE_MODE) \ + { \ + _GENERATE_FUNCTABLE(0, INTERPOLATE_MODE), \ + _GENERATE_FUNCTABLE(1, INTERPOLATE_MODE), \ + _GENERATE_FUNCTABLE(2, INTERPOLATE_MODE), \ + _GENERATE_FUNCTABLE(3, SPUInterpolation_None), \ } -} - -FORCEINLINE static void _SPU_ChanUpdate(const bool actuallyMix, SPU_struct* const SPU, channel_struct* const chan) -{ - switch(CommonSettings.spuInterpolationMode) + static const _SPU_GenerateChanData_Func_t FuncTable[4][4][4] = { - case SPUInterpolation_None: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_Linear: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_Cosine: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_CatmullRom: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - default: assert(false); - } -} - -//ENTERNEW -static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) -{ - //the advanced spu function correctly handles all sound control mixing options, as well as capture - //this code is not entirely optimal, as it relies on sort of manhandling the core mixing functions - //in order to get the results it needs. - - //THIS IS MAX HACKS!!!! - //AND NEEDS TO BE REWRITTEN ALONG WITH THE DEEPEST PARTS OF THE SPU - //ONCE WE KNOW THAT IT WORKS - - //BIAS gets ignored since our spu is still not bit perfect, - //and it doesnt matter for purposes of capture - - //-----------DEBUG CODE - bool skipcap = false; - //----------------- - - s32 samp0[2] = {0,0}; - - //believe it or not, we are going to do this one sample at a time. - //like i said, it is slower. - for (int samp = 0; samp < length; samp++) + GENERATE_FUNCTABLE(SPUInterpolation_None), + GENERATE_FUNCTABLE(SPUInterpolation_Linear), + GENERATE_FUNCTABLE(SPUInterpolation_Cosine), + GENERATE_FUNCTABLE(SPUInterpolation_CatmullRom), + }; +#undef GENERATE_FUNCTABLE +#undef _GENERATE_FUNCTABLE +#undef __GENERATE_FUNCTABLE + + const _SPU_GenerateChanData_Func_t *Funcs = FuncTable[CommonSettings.spuInterpolationMode][chan->format]; + if(!actuallyMix) Funcs[0](chan, chanbuf, length); + else if(chan->pan == 0) Funcs[1](chan, chanbuf, length); + else if(chan->pan == 127) Funcs[2](chan, chanbuf, length); + else Funcs[3](chan, chanbuf, length); +} + +template +/*FORCEINLINE*/ static bool __SPU_WriteCapture(SPU_struct::REGS::CAP& cap, const channel_struct& srcChan, const s16 *srcBuf, int length) +{ + s32 capLen_shifted = cap.len * (32 / CAP_BITS); + SPU_struct::REGS::CAP::Runtime& runtime = cap.runtime; + s32 pos = runtime.sampcntInt; + do { - SPU->sndbuf[0] = 0; - SPU->sndbuf[1] = 0; - SPU->buflength = 1; - - s32 capmix[2] = {0,0}; - s32 mix[2] = {0,0}; - s32 chanout[16]; - s32 submix[32]; - - //generate each channel, and helpfully mix it at the same time - for (int i = 0; i < 16; i++) + s16 sample = USE_SRCBUF ? (*srcBuf) : 0; + u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&runtime.sampcntFrac, srcChan.sampincFrac); + while(nSamplesToProcess--) { - channel_struct *chan = &SPU->channels[i]; - - if (chan->status == CHANSTAT_PLAY) + if(pos >= capLen_shifted) { - SPU->bufpos = 0; - - bool bypass = false; - if (i==1 && SPU->regs.ctl_ch1bypass) bypass=true; - if (i==3 && SPU->regs.ctl_ch3bypass) bypass=true; - - - //output to mixer unless we are bypassed. - //dont output to mixer if the user muted us - bool outputToMix = true; - if (CommonSettings.spu_muteChannels[i]) outputToMix = false; - if (bypass) outputToMix = false; - bool outputToCap = outputToMix; - if (CommonSettings.spu_captureMuted && !bypass) outputToCap = true; - - //channels 1 and 3 should probably always generate their audio - //internally at least, just in case they get used by the spu output - bool domix = outputToCap || outputToMix || i==1 || i==3; - - //clear the output buffer since this is where _SPU_ChanUpdate wants to accumulate things - SPU->sndbuf[0] = SPU->sndbuf[1] = 0; - - //get channel's next output sample. - _SPU_ChanUpdate(domix, SPU, chan); - chanout[i] = SPU->lastdata >> volume_shift[chan->volumeDiv]; - - //save the panned results - submix[i*2] = SPU->sndbuf[0]; - submix[i*2+1] = SPU->sndbuf[1]; - - //send sample to our capture mix - if (outputToCap) + if(cap.oneshot) { - capmix[0] += submix[i*2]; - capmix[1] += submix[i*2+1]; + cap.active = runtime.running = 0; + return false; } + pos -= capLen_shifted; + } - //send sample to our main mixer - if (outputToMix) + s16 *data = &runtime.pcm16b[SPUCAPTURE_PCM16B_AT(runtime.pcm16bOffs)]; + if(pos >= 0) + { + if (CAP_BITS == 8) { - mix[0] += submix[i*2]; - mix[1] += submix[i*2+1]; + _MMU_write08(runtime.dad + pos*sizeof(s8), (u8)(*data >> 8)); + } + else + { + _MMU_write16(runtime.dad + pos*sizeof(s16), (u16)(*data)); } } - else - { - chanout[i] = 0; - submix[i*2] = 0; - submix[i*2+1] = 0; - } - } //foreach channel - - s32 mixout[2] = {mix[0],mix[1]}; - s32 capmixout[2] = {capmix[0],capmix[1]}; - s32 sndout[2]; - s32 capout[2]; - - //create SPU output - switch (SPU->regs.ctl_left) - { - case SPU_struct::REGS::LOM_LEFT_MIXER: sndout[0] = mixout[0]; break; - case SPU_struct::REGS::LOM_CH1: sndout[0] = submix[1*2+0]; break; - case SPU_struct::REGS::LOM_CH3: sndout[0] = submix[3*2+0]; break; - case SPU_struct::REGS::LOM_CH1_PLUS_CH3: sndout[0] = submix[1*2+0] + submix[3*2+0]; break; - default: break; - } - switch (SPU->regs.ctl_right) - { - case SPU_struct::REGS::ROM_RIGHT_MIXER: sndout[1] = mixout[1]; break; - case SPU_struct::REGS::ROM_CH1: sndout[1] = submix[1*2+1]; break; - case SPU_struct::REGS::ROM_CH3: sndout[1] = submix[3*2+1]; break; - case SPU_struct::REGS::ROM_CH1_PLUS_CH3: sndout[1] = submix[1*2+1] + submix[3*2+1]; break; - default: break; + *data = sample; + runtime.pcm16bOffs++; + pos++; } + // srcBuf[] stores two samples per time unit + // Either {Ch0[+Ch1],Ch2[+Ch3]}, or {LMix,RMix} + if(USE_SRCBUF) srcBuf += 2; + } while(--length); + runtime.sampcntInt = pos; + return true; +} - //generate capture output ("capture bugs" from gbatek are not emulated) - if (SPU->regs.cap[0].source == 0) - capout[0] = capmixout[0]; //cap0 = L-mix - else if (SPU->regs.cap[0].add) - capout[0] = chanout[0] + chanout[1]; //cap0 = ch0+ch1 - else capout[0] = chanout[0]; //cap0 = ch0 - - if (SPU->regs.cap[1].source == 0) - capout[1] = capmixout[1]; //cap1 = R-mix - else if (SPU->regs.cap[1].add) - capout[1] = chanout[2] + chanout[3]; //cap1 = ch2+ch3 - else capout[1] = chanout[2]; //cap1 = ch2 - - capout[0] = MinMax(capout[0],-0x8000,0x7FFF); - capout[1] = MinMax(capout[1],-0x8000,0x7FFF); +// Writes capture output to capture unit destination +// Returns false if capture has stopped +template +FORCEINLINE static bool _SPU_WriteCapture(SPU_struct::REGS::CAP& cap, const channel_struct& srcChan, const s16 *srcBuf, int length) +{ + if(cap.bits8) + return __SPU_WriteCapture< 8,USE_SRCBUF>(cap, srcChan, srcBuf, length); + else + return __SPU_WriteCapture<16,USE_SRCBUF>(cap, srcChan, srcBuf, length); +} - //write the output sample where it is supposed to go - if (samp == 0) +// Advances capture unit destination without writing anything +/*FORCEINLINE*/ static void _SPU_SeekCapture(SPU_struct::REGS::CAP& cap, const channel_struct& srcChan, int length) +{ + s32 capLen_shifted = cap.len * (cap.bits8 ? (32/8) : (32/16)); + SPU_struct::REGS::CAP::Runtime& runtime = cap.runtime; + s64 pos64 = (runtime.sampcntFrac | (s64)runtime.sampcntInt<<32); + pos64 += (srcChan.sampincFrac | (u64)srcChan.sampincInt<<32) * length; + runtime.sampcntFrac = (u32)pos64; + runtime.sampcntInt = (s32)(pos64 >> 32); + if(runtime.sampcntInt >= capLen_shifted) + { + if(cap.oneshot) { - samp0[0] = sndout[0]; - samp0[1] = sndout[1]; + cap.active = runtime.running = 0; } else { - SPU->sndbuf[samp*2+0] = sndout[0]; - SPU->sndbuf[samp*2+1] = sndout[1]; + do runtime.sampcntInt -= capLen_shifted; while(runtime.sampcntInt >= capLen_shifted); } - - for (int capchan = 0; capchan < 2; capchan++) - { - SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; - channel_struct& srcChan = SPU->channels[1 + 2 * capchan]; - if (SPU->regs.cap[capchan].runtime.running) - { - u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&cap.runtime.sampcntFrac, srcChan.sampincFrac); - cap.runtime.sampcntInt += nSamplesToProcess; - while(nSamplesToProcess--) - { - //so, this is a little strange. why go through a fifo? - //it seems that some games will set up a reverb effect by capturing - //to the nearly same address as playback, but ahead by a couple. - //So, playback will always end up being what was captured a couple of samples ago. - //This system counts on playback always having read ahead 16 samples. - //In that case, playback will end up being what was processed at one entire buffer length ago, - //since the 16 samples would have read ahead before they got captured over - - //It's actually the source channels which should have a fifo, but we are - //not going to take the hit in speed and complexity. Save it for a future rewrite. - //Instead, what we do here is delay the capture by 16 samples to create a similar effect. - //Subjectively, it seems to be working. - - //Don't do anything until the fifo is filled, so as to delay it - if (cap.runtime.fifo.size < 16) - { - cap.runtime.fifo.enqueue(capout[capchan]); - continue; - } - - //(actually capture sample from fifo instead of most recently generated) - u32 multiplier; - s32 sample = cap.runtime.fifo.dequeue(); - cap.runtime.fifo.enqueue(capout[capchan]); - - //static FILE* fp = NULL; - //if(!fp) fp = fopen("d:\\capout.raw","wb"); - //fwrite(&sample,2,1,fp); - - if (cap.bits8) - { - s8 sample8 = sample >> 8; - if (skipcap) _MMU_write08<1,MMU_AT_DMA>(cap.runtime.curdad,0); - else _MMU_write08<1,MMU_AT_DMA>(cap.runtime.curdad,sample8); - cap.runtime.curdad++; - multiplier = 4; - } - else - { - s16 sample16 = sample; - if (skipcap) _MMU_write16<1,MMU_AT_DMA>(cap.runtime.curdad,0); - else _MMU_write16<1,MMU_AT_DMA>(cap.runtime.curdad,sample16); - cap.runtime.curdad+=2; - multiplier = 2; - } - - if (cap.runtime.curdad >= cap.runtime.maxdad) - { - cap.runtime.curdad = cap.dad; - cap.runtime.sampcntInt -= cap.len*multiplier; - } - } //sampinc loop - } //if capchan running - } //capchan loop - } //main sample loop - - SPU->sndbuf[0] = samp0[0]; - SPU->sndbuf[1] = samp0[1]; + } } //ENTER @@ -1532,8 +1488,8 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) { if (actuallyMix) { - memset(SPU->sndbuf, 0, length*4*2); - memset(SPU->outbuf, 0, length*2*2); + if(SPU->bufsize < length) SPU->resizeBuffer(length); + memset(SPU->outbuf, 0, length*sizeof(s16)*2); } //we used to use master enable here, and do nothing if audio is disabled. @@ -1542,133 +1498,366 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) //is this still a good idea? zeroing the capture buffers is important... if(!SPU->regs.masteren) return; - bool advanced = CommonSettings.spu_advanced; - - //branch here so that slow computers don't have to take the advanced (slower) codepath. - //it remainds to be seen exactly how much slower it is - //if it isnt much slower then we should refactor everything to be simpler, once it is working - if (advanced && SPU == SPU_core) + // We used to branch here into advanced/non-advanced mode here. + // Hopefully, the current code is good enough to avoid the need now... + + /************************************************/ + + // Overall flow: + // For each channel: + // Generate L/R sample data into chanbuf[] + // If not bypassed: + // If not muted: + // Mix chanbuf[] into mixbuf[] + // Else if capturing muted channels: + // Mix chanbuf[] into mutedmixbuf[] + // If capturing from channels: + // Copy/mix chanbuf[] into capbuf[] + // If not playing from mixer: + // Copy/mix chanbuf[] into outbuf[] + // If capturing from channels: + // Output capbuf[] to capture units + // If playing from mixer: + // Output mixbuf[] to outbuf[] + // If capturing from mixer: + // If capturing muted channels: + // Output mixbuf[]+mutedmixbuf[] to capture units + // Else + // Output mixbuf[] to capture units + + //we used to bail out if speakers were disabled. + //this is technically wrong. sound may still be captured, or something. + //in all likelihood, any game doing this probably master disabled the SPU also + //so, optimization of this case is probably not necessary. + //later, we'll just silence the output + bool speakersOn = T1ReadWord(MMU.ARM7_REG, 0x304) & 0x01; + + // Translate the mixer and capture states. + // This should improve the code generation so that + // it doesn't have to reference a lot of memory and + // can instead just bitwise-test as needed. + // -bypassMixer controls whether chanbuf[] should NOT be added to mixbuf[] + // -capbufFlags0/1 controls the following: + // -The least-significant bit enabled will store UN-PANNED chanbuf[] to capbuf[] + // -All other bits will add UN-PANNED chanbuf[] to capbuf[] + // -If all zero, capture is either disabled or comes from the mixer + // -outbufFlagsL/R controls the following: + // -The least-significant bit enabled will store PANNED chanbuf[] to outbuf[] + // -All other bits will add PANNED chanbuf[] to outbuf[] + // -If all zero, output comes from the mixer + enum { - SPU_MixAudio_Advanced(actuallyMix, SPU, length); - } - else + CAPSRC_NONE, + CAPSRC_MIXER, // Capture mixer output + CAPSRC_CHAN, // Capture channel 1/3 output + CAPSRC_MIXED, // Capture channel 0+1/2+3 output (buggy on hardware) + }; + u8 bypassMixer = 0; + u16 chanMuteFlags = CommonSettings.spu_muteChannels; + u8 capbufFlags0 = 0; + u8 capbufFlags1 = 0; + u8 outbufFlagsL = 0; + u8 outbufFlagsR = 0; + u8 cap0Src = CAPSRC_NONE; + u8 cap1Src = CAPSRC_NONE; + u8 *mixdata = (u8*)SPU->mixdata; + s32 *mixbuf = NULL; + s32 *mutedmixbuf = NULL; + s16 *capbuf = NULL; + s16 *chanbuf = NULL; + s16 *outbuf = SPU->outbuf; + s32 masterVol = spumuladjust7(SPU->regs.mastervol); + int mixdataClearSizeBytes = 0; + if(actuallyMix) { - //non-advanced mode - for (int i = 0; i < 16; i++) - { - channel_struct *chan = &SPU->channels[i]; + if(SPU->regs.ctl_ch1bypass) bypassMixer |= (1 << 1); + if(SPU->regs.ctl_ch3bypass) bypassMixer |= (1 << 3); - if (chan->status != CHANSTAT_PLAY) - continue; + // Translate capture state + if(SPU->regs.cap[0].runtime.running) + { + if(SPU->regs.cap[0].source == 0) cap0Src = CAPSRC_MIXER; + else if(SPU->regs.cap[0].add) cap0Src = CAPSRC_MIXED; + else cap0Src = CAPSRC_CHAN; + } + if(SPU->regs.cap[1].runtime.running) + { + if(SPU->regs.cap[1].source == 0) cap1Src = CAPSRC_MIXER; + else if(SPU->regs.cap[1].add) cap1Src = CAPSRC_MIXED; + else cap1Src = CAPSRC_CHAN; + } + if(cap0Src == CAPSRC_CHAN || cap0Src == CAPSRC_MIXED) capbufFlags0 |= (1 << 0); + if( cap0Src == CAPSRC_MIXED) capbufFlags0 |= (1 << 1); + if(cap1Src == CAPSRC_CHAN || cap1Src == CAPSRC_MIXED) capbufFlags1 |= (1 << 2); + if( cap1Src == CAPSRC_MIXED) capbufFlags1 |= (1 << 3); + bool isCapturing = (cap0Src != CAPSRC_NONE) || (cap1Src != CAPSRC_NONE); + bool captureMuted = isCapturing && CommonSettings.spu_captureMuted; + if(!captureMuted) + { + capbufFlags0 &= ~chanMuteFlags; + capbufFlags1 &= ~chanMuteFlags; + } - SPU->bufpos = 0; - SPU->buflength = length; + // Translate outputs + switch(SPU->regs.ctl_left) + { + case SPU_struct::REGS::LOM_CH1: + outbufFlagsL = (1 << 1); + break; + case SPU_struct::REGS::LOM_CH3: + outbufFlagsL = (1 << 3); + break; + case SPU_struct::REGS::LOM_CH1_PLUS_CH3: + outbufFlagsL = (1 << 1) | (1 << 3); + break; + } + switch(SPU->regs.ctl_right) + { + case SPU_struct::REGS::ROM_CH1: + outbufFlagsR = (1 << 1); + break; + case SPU_struct::REGS::ROM_CH3: + outbufFlagsR = (1 << 3); + break; + case SPU_struct::REGS::ROM_CH1_PLUS_CH3: + outbufFlagsR = (1 << 1) | (1 << 3); + break; + } - // Mix audio - _SPU_ChanUpdate(!CommonSettings.spu_muteChannels[i] && actuallyMix, SPU, chan); + // Generate mixing pointers + // This setup is so we can clear everything in a single memset() call + // PONDER: Can we put these on the stack? + // ie. u8 mixdata[FIFO_SIZE * (sizeof(s32)+sizeof(s32)+sizeof(s16)+sizeof(s16))*2] + int mixBufSize = MIN(length, SPUCAPTURE_FIFO_SIZE); + u8 *nextdata = mixdata; + if(actuallyMix) mixbuf = (s32*)nextdata, nextdata += mixBufSize * sizeof(s32)*2; + if(captureMuted) mutedmixbuf = (s32*)nextdata, nextdata += mixBufSize * sizeof(s32)*2; + if(isCapturing) capbuf = (s16*)nextdata, nextdata += mixBufSize * sizeof(s16)*2; + if(actuallyMix) chanbuf = (s16*)nextdata; // <- Do not increment nextData + mixdataClearSizeBytes = nextdata - mixdata; + } + else + { + // If we end up here, we're either mixing the core SPU while + // in dual SPU mode (meaning we shouldn't output data from + // the capture unit, as we'll do this in the user SPU), or + // the output core is SNDDummy (with the capture units either + // not running, or disabled via ENABLE_DUMMY_SPU_CAPTURE==0). + // In the former case, we can just seek the capture position, + // but in the latter case, we must output silence to avoid + // potentially leaving the capture buffers filled with garbage. + bool captureZeros = (SPU_SoundCore() == &SNDDummy); + if(SPU->regs.cap[0].runtime.running) + { + if(captureZeros) _SPU_WriteCapture(SPU->regs.cap[0], SPU->channels[1], NULL, length); + else _SPU_SeekCapture (SPU->regs.cap[0], SPU->channels[1], length); + } + if(SPU->regs.cap[1].runtime.running) + { + if(captureZeros) _SPU_WriteCapture(SPU->regs.cap[1], SPU->channels[3], NULL, length); + else _SPU_SeekCapture (SPU->regs.cap[1], SPU->channels[3], length); } + } - //zero out capture buffers - effectively transform no-advanced-spu-emulation to capturing-zeroes - //this is needed so when the option is changed (or a state with a different setting is loaded) - //this code is bulkier and slower than it might otherwise be to reduce the chance of bugs - //IDEALLY the non-advanced codepath would be removed (while the advanced codepath was optimized and improved) - //and this code would disappear, to be replaced with code more capable of emitting zeroes at the opportune time. - for (int capchan = 0; capchan < 2; capchan++) + while(length) + { + if(mixdataClearSizeBytes) memset(mixdata, 0, mixdataClearSizeBytes); + + // We can only process at most SPUCAPTURE_FIFO_SIZE samples + // per mixing batch, in case the capture buffers wrap around. + // Technically, we could actually check if this is needed at + // all, but this should work well enough as is. + int thisLength = MIN(length, SPUCAPTURE_FIFO_SIZE); + length -= thisLength; + + // Process each channel in turn + // Note that we are using unsigned overflow to avoid counting + // directly, as we need to keep track of the bit index anyway + channel_struct *chan = SPU->channels; + for(u16 chanBit=1; chanBit != 0; chan++, chanBit <<= 1) { - SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; - channel_struct& srcChan = SPU->channels[1 + 2 * capchan]; - if (cap.runtime.running) + if (chan->status != CHANSTAT_PLAY) continue; + + // Generate data into chanbuf[] + // NOTE: If actuallyMix==false, the channel is updated but no data is generated. + _SPU_GenerateChanData(actuallyMix, chan, chanbuf, thisLength); + if(!actuallyMix) continue; + + // Bypass means we must NOT mix this channel into mixbuf[] OR mutedmixbuf[] + if((bypassMixer & chanBit) == 0) { - for (int samp = 0; samp < length; samp++) + s32 *mixtarget = ((chanMuteFlags & chanBit) == 0) ? mixbuf : mutedmixbuf; + if(mixtarget) { - u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&cap.runtime.sampcntFrac, srcChan.sampincFrac); - cap.runtime.sampcntInt += nSamplesToProcess; - while (nSamplesToProcess--) - { - if (cap.bits8) - { - _MMU_write08<1,MMU_AT_DMA>(cap.runtime.curdad,0); - cap.runtime.curdad++; - } - else - { - _MMU_write16<1,MMU_AT_DMA>(cap.runtime.curdad,0); - cap.runtime.curdad+=2; - } - - if (cap.runtime.curdad >= cap.runtime.maxdad) - { - cap.runtime.curdad = cap.dad; - cap.runtime.sampcntInt -= cap.len*(cap.bits8?4:2); - } - } + for(int n=0; n < thisLength*2; n++) mixtarget[n] += chanbuf[n]; } } - } - } //non-advanced branch - //we used to bail out if speakers were disabled. - //this is technically wrong. sound may still be captured, or something. - //in all likelihood, any game doing this probably master disabled the SPU also - //so, optimization of this case is probably not necessary. - //later, we'll just silence the output - bool speakers = T1ReadWord(MMU.ARM7_REG, 0x304) & 0x01; + // Generate outputs for channel capture + // Yes, we have to undo the panning here, but that's fine. + // Incidentally, this emulates the ch(a)+ch(b) overflow bug + if((capbufFlags0 & chanBit) != 0) + { + if((capbufFlags0 & (chanBit-1)) == 0) + for(int n=0; n < thisLength; n++) + capbuf[n*2+0] = chanbuf[n*2+0] + chanbuf[n*2+1]; + else + for(int n=0; n < thisLength; n++) + capbuf[n*2+0] += chanbuf[n*2+0] + chanbuf[n*2+1]; + } + if((capbufFlags1 & chanBit) != 0) + { + if((capbufFlags1 & (chanBit-1)) == 0) + for(int n=0; n < thisLength; n++) + capbuf[n*2+1] = chanbuf[n*2+0] + chanbuf[n*2+1]; + else + for(int n=0; n < thisLength; n++) + capbuf[n*2+1] += chanbuf[n*2+0] + chanbuf[n*2+1]; + } - u8 vol = SPU->regs.mastervol; + // If speakers are turned off or the channel is muted, we can skip + // setting outbuf[]. Note that if the channel is muted and we are + // generating outbuf[] from chanbuf[], outbuf[] must already have + // been cleared to silence. + if(!speakersOn || (chanMuteFlags & chanBit) != 0) continue; - // convert from 32-bit->16-bit - if (actuallyMix && speakers) - for (int i = 0; i < length*2; i++) + // Set outbuf[] from chanbuf[] when L/R source is not the mixer + // Note that Ch1+Ch3 mode clips as intended; only capture has overflow bugs + if((outbufFlagsL & chanBit) != 0) + { + if((outbufFlagsL & (chanBit-1)) == 0) + for(int n=0; n < thisLength; n++) + outbuf[n*2+0] = (chanbuf[n*2+0] * masterVol >> 7); + else + for(int n=0; n < thisLength; n++) + outbuf[n*2+0] = MinMax(outbuf[n*2+0] + (chanbuf[n*2+0] * masterVol >> 7), -0x8000, +0x7FFF); + } + if((outbufFlagsR & chanBit) != 0) + { + if((outbufFlagsR & (chanBit-1)) == 0) + for(int n=0; n < thisLength; n++) + outbuf[n*2+1] = (chanbuf[n*2+1] * masterVol >> 7); + else + for(int n=0; n < thisLength; n++) + outbuf[n*2+1] = MinMax(outbuf[n*2+1] + (chanbuf[n*2+1] * masterVol >> 7), -0x8000, +0x7FFF); + } + } + + // Generate mixer output to outbuf[] + if(mixbuf && speakersOn) { - // Apply Master Volume - SPU->sndbuf[i] = spumuldiv7(SPU->sndbuf[i], vol); - s16 outsample = MinMax(SPU->sndbuf[i],-0x8000,0x7FFF); - SPU->outbuf[i] = outsample; + if(outbufFlagsL == 0) + { + for(int n=0; n < thisLength; n++) + outbuf[n*2+0] = MinMax(mixbuf[n*2+0] * masterVol >> 7, -0x8000, +0x7FFF); + } + if(outbufFlagsR == 0) + { + for(int n=0; n < thisLength; n++) + outbuf[n*2+1] = MinMax(mixbuf[n*2+1] * masterVol >> 7, -0x8000, +0x7FFF); + } } + // Generate final capture output + if(cap0Src != CAPSRC_NONE) + { + if(cap0Src == CAPSRC_MIXER) + { + if(mutedmixbuf) + for(int n=0; n < thisLength; n++) + capbuf[n*2+0] = MinMax(mixbuf[n*2+0] + mutedmixbuf[n*2+0], -0x8000, +0x7FFF); + else + for(int n=0; n < thisLength; n++) + capbuf[n*2+0] = MinMax(mixbuf[n*2+0], -0x8000, +0x7FFF); + } + bool run = _SPU_WriteCapture(SPU->regs.cap[0], SPU->channels[1], capbuf, thisLength); + if(!run) cap0Src = CAPSRC_NONE, capbufFlags0 = 0; + } + if(cap1Src != CAPSRC_NONE) + { + if(cap1Src == CAPSRC_MIXER) + { + if(mutedmixbuf) + for(int n=0; n < thisLength; n++) + capbuf[n*2+1] = MinMax(mixbuf[n*2+1] + mutedmixbuf[n*2+1], -0x8000, +0x7FFF); + else + for(int n=0; n < thisLength; n++) + capbuf[n*2+1] = MinMax(mixbuf[n*2+1], -0x8000, +0x7FFF); + } + bool run = _SPU_WriteCapture(SPU->regs.cap[1], SPU->channels[3], capbuf+1, thisLength); + if(!run) cap1Src = CAPSRC_NONE, capbufFlags1 = 0; + } + // Advance buffer + outbuf += (size_t)thisLength*2; + } } ////////////////////////////////////////////////////////////////////////////// -//emulates one hline of the cpu core. -//this will produce a variable number of samples, calculated to keep a 44100hz output -//in sync with the emulator framerate -void SPU_Emulate_core() +// Emulates the cpu core for the specified number of ARM7 cycles. +// This will produce a variable number of samples to sync to DESMUME_SAMPLE_RATE +int SPU_Emulate_core(u32 numberOfARM7Cycles) { - bool needToMix = true; SoundInterface_struct *soundProcessor = SPU_SoundCore(); - - _samples += samples_per_hline; - spu_core_samples = (int)(_samples); - _samples -= spu_core_samples; - + // We don't need to mix audio for Dual Synch/Asynch mode since we do this // later in SPU_Emulate_user(). Disable mixing here to speed up processing. - // However, recording still needs to mix the audio, so make sure we're also - // not recording before we disable mixing. - if ( _currentSynchMode == ESynchMode_DualSynchAsynch && - !(driver->AVI_IsRecording() || driver->WAV_IsRecording()) ) + // If we are outputting to the dummy core, we can disable all mixing if + // we are not capturing the output at all, increasing performance. + bool needToMix = false; + if(soundProcessor != &SNDDummy) + needToMix = (_currentSynchMode != ESynchMode_DualSynchAsynch); +#if ENABLE_DUMMY_SPU_CAPTURE + else if(SPU_core->regs.cap[0].runtime.running || SPU_core->regs.cap[1].runtime.running) + needToMix = true; +#endif + + // If we are recording, we will need to mix the core SPU regardless of anything else + // NOTE: Technically, we should be checking wavWriter.mode==WAVMODE_CORE, but that + // is only enabled with DEVELOPER_MENU_ITEMS, and we won't break anything anyway. + needToMix = needToMix || driver->AVI_IsRecording() || driver->WAV_IsRecording(); + + // NOTE: We used to keep a double-type counter here, and pre-divided by + // ARM7_CLOCK. This is probably enough for most cases, but for the sake + // of perfect accuracy (at least in regards to this), we use a cycles + // counter instead here, and figure out the sample count from there. + int samplesToMix; { - needToMix = false; + // minMixSize controls the mixing latency, which reduces the + // overhead of the update routines at the cost of synchronicity. + // NOTE: minMixSize must be <= 128, or else _spu_core_cyclesCounter + // would cause 32bit overflow if we postpone mixing for long enough. + // We could use a 64bit counter instead, but 128 samples should be plenty. + static const u32 doMix_minMixSize = 1; // <- Setting this too high can break streams, so keep at minimum + static const u32 noMix_minMixSize = 64; + u64 minDeltaCycles = (u64)(needToMix ? doMix_minMixSize : noMix_minMixSize) * ARM7_CLOCK; + u64 cycles64 = _spu_core_cyclesCounter + (u64)numberOfARM7Cycles*DESMUME_SAMPLE_RATE; + if(cycles64 < minDeltaCycles) + { + _spu_core_cyclesCounter = (u32)cycles64; + return 0; + } + samplesToMix = (int)(cycles64 / ARM7_CLOCK); + _spu_core_cyclesCounter = (u32)(cycles64 % ARM7_CLOCK); } - SPU_MixAudio(needToMix, SPU_core, spu_core_samples); + SPU_MixAudio(needToMix, SPU_core, samplesToMix); - if (soundProcessor == NULL) + if (soundProcessor != NULL) { - return; - } - - if (soundProcessor->FetchSamples != NULL) - { - soundProcessor->FetchSamples(SPU_core->outbuf, spu_core_samples, _currentSynchMode, _currentSynchronizer); - } - else - { - SPU_DefaultFetchSamples(SPU_core->outbuf, spu_core_samples, _currentSynchMode, _currentSynchronizer); + if (soundProcessor->FetchSamples != NULL) + { + soundProcessor->FetchSamples(SPU_core->outbuf, samplesToMix, _currentSynchMode, _currentSynchronizer); + } + else + { + SPU_DefaultFetchSamples(SPU_core->outbuf, samplesToMix, _currentSynchMode, _currentSynchronizer); + } } + + return samplesToMix; } void SPU_Emulate_user(bool mix) @@ -1687,16 +1876,16 @@ void SPU_Emulate_user(bool mix) // Check to see how many free samples are available. // If there are some, fill up the output buffer. freeSampleCount = soundProcessor->GetAudioSpace(); - if (freeSampleCount == 0) - { - return; - } //printf("mix %i samples\n", audiosize); if (freeSampleCount > _currentBufferSize) { freeSampleCount = _currentBufferSize; } + if (freeSampleCount == 0) + { + return; + } // If needed, resize the post-process buffer to guarantee that // we can store all the sound data. @@ -1877,7 +2066,7 @@ void WavWriter::update(void* soundData, int numSamples) { if(!spufp) return; //TODO - big endian for the s16 samples?? - size_t elems_written = fwrite(soundData, numSamples*2, 2, spufp); + size_t elems_written = fwrite(soundData, sizeof(s16)*2, numSamples, spufp); } bool WavWriter::isRecording() const @@ -1929,14 +2118,14 @@ void WAV_WavSoundUpdate(void* soundData, int numSamples, WAVMode mode) void spu_savestate(EMUFILE &os) { //version - os.write_32LE(7); + os.write_32LE(8); SPU_struct *spu = SPU_core; + os.write_u8(SPUCHAN_PCM16B_SIZE); for (int j = 0; j < 16; j++) { channel_struct &chan = spu->channels[j]; - os.write_32LE(chan.num); os.write_u8(chan.vol); os.write_u8(chan.volumeDiv); os.write_u8(chan.hold); @@ -1952,15 +2141,13 @@ void spu_savestate(EMUFILE &os) os.write_32LE(chan.length); os.write_32LE(chan.sampcntFrac); os.write_32LE(chan.sampcntInt); - os.write_32LE(chan.sampincFrac); - os.write_32LE(chan.sampincInt); - for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) os.write_16LE(chan.pcm16b[i]); - os.write_32LE(chan.index); + for (int i = 0; i < SPUCHAN_PCM16B_SIZE; i++) os.write_16LE(chan.pcm16b[i]); + os.write_u8(chan.index); os.write_16LE(chan.x); os.write_u8(chan.keyon); } - os.write_doubleLE(_samples); + os.write_32LE(_spu_core_cyclesCounter); os.write_u8(spu->regs.mastervol); os.write_u8(spu->regs.ctl_left); @@ -1980,23 +2167,22 @@ void spu_savestate(EMUFILE &os) os.write_32LE(spu->regs.cap[i].dad); os.write_16LE(spu->regs.cap[i].len); os.write_u8(spu->regs.cap[i].runtime.running); - os.write_32LE(spu->regs.cap[i].runtime.curdad); - os.write_32LE(spu->regs.cap[i].runtime.maxdad); + os.write_32LE(spu->regs.cap[i].runtime.dad); os.write_32LE(spu->regs.cap[i].runtime.sampcntFrac); os.write_32LE(spu->regs.cap[i].runtime.sampcntInt); } + os.write_u8(SPUCAPTURE_FIFO_SIZE); for (int i = 0; i < 2; i++) - spu->regs.cap[i].runtime.fifo.save(os); + { + os.write_u8(spu->regs.cap[i].runtime.pcm16bOffs); + for (int n = 0; n < SPUCAPTURE_FIFO_SIZE; n++) + os.write_16LE(spu->regs.cap[i].runtime.pcm16b[n]); + } } bool spu_loadstate(EMUFILE &is, int size) { - //note! if we load a state created with advanced spu logic on a system without it, - //there's a high likelihood of captured data existing. - //this would get played back forever without being replaced by captured data. - //it's been solved by capturing zeroes though even when advanced spu logic is disabled. - //read version u32 version; if (is.read_32LE(version) != 1) return false; @@ -2004,10 +2190,11 @@ bool spu_loadstate(EMUFILE &is, int size) SPU_struct *spu = SPU_core; reconstruct(&SPU_core->regs); + int pcm16bSz_Chan = (version >= 8) ? (int)is.read_u8() : 4; for (int j = 0; j < 16; j++) { channel_struct &chan = spu->channels[j]; - is.read_32LE(chan.num); + if(version < 8) is.read_32LE(chan.num); else chan.num = j; is.read_u8(chan.vol); is.read_u8(chan.volumeDiv); if (chan.volumeDiv == 4) chan.volumeDiv = 3; @@ -2017,7 +2204,7 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_u8(chan.repeat); is.read_u8(chan.format); is.read_u8(chan.status); - if (version >= 7) is.read_u8(chan.pcm16bOffs); else chan.pcm16bOffs = 0; + if (version >= 7) chan.pcm16bOffs = SPUCHAN_PCM16B_AT(is.read_u8()); is.read_32LE(chan.addr); is.read_16LE(chan.timer); is.read_16LE(chan.loopstart); @@ -2027,54 +2214,45 @@ bool spu_loadstate(EMUFILE &is, int size) if(version >= 7) { is.read_32LE(chan.sampcntFrac); is.read_32LE(chan.sampcntInt); - is.read_32LE(chan.sampincFrac); - is.read_32LE(chan.sampincInt); + if(version < 8) is.fseek(8, SEEK_CUR); // chan.sampincFrac (LE32), chan.sampincInt (LE32) } - else if (version >= 2) + else /*if (version >= 2)*/ // <- This check (and its broken else clause) was never needed { - double temp; - s64 temp2; - is.read_doubleLE(temp); temp2 = (s64)(temp * (1ll << 32)); - chan.sampcntFrac = (u32)temp2; - chan.sampcntInt = (s32)(temp2 >> 32); - is.read_doubleLE(temp); temp2 = (u64)(temp * (1ull << 32)); // Intentionally unsigned - chan.sampincFrac = (u32)temp2; - chan.sampincInt = (u32)(temp2 >> 32); - } - else - { - // FIXME - // What even is supposed to be happening here? - // sampcnt and sampinc were double type before - // I even made any changes, so this is broken. - chan.sampcntFrac = 0; - is.read_32LE(chan.sampcntInt); - chan.sampincFrac = 0; - is.read_32LE(chan.sampincInt); + s64 temp = (s64)(is.read_doubleLE() * (double)(1ll << 32)); + chan.sampcntFrac = (u32)temp; + chan.sampcntInt = (s32)(temp >> 32); + is.fseek(8, SEEK_CUR); // chan.sampinc (LEdouble) } if (version >= 7) { - for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) is.read_16LE(chan.pcm16b[i]); + for (int i = 0; i < pcm16bSz_Chan; i++) is.read_16LE(chan.pcm16b[SPUCHAN_PCM16B_AT(i)]); } else { is.fseek(4, SEEK_CUR); // chan.lastsampcnt (LE32) is.read_16LE(chan.pcm16b[0]); // chan.pcm16b - is.fseek(2, SEEK_CUR); // chan.pcm16b_last + is.fseek(2, SEEK_CUR); // chan.pcm16b_last (LE16) } - is.read_32LE(chan.index); + chan.index = (version >= 8) ? is.read_u8() : (u8)is.read_s32LE(); is.read_16LE(chan.x); if (version < 7) is.fseek(2, SEEK_CUR); // chan.psgnoise_last (LE16) if (version >= 4) is.read_u8(chan.keyon); + // Because we don't save sampinc, we need to recalculate it + adjust_channel_timer(&chan); + //hopefully trigger a recovery of the adpcm looping system chan.loop_index = K_ADPCM_LOOPING_RECOVERY_INDEX; } - if (version >= 2) + if (version >= 8) { - is.read_doubleLE(_samples); + is.read_32LE(_spu_core_cyclesCounter); + } + else if (version >= 2) + { + _spu_core_cyclesCounter = (u32)(is.read_doubleLE() * ARM7_CLOCK); // _samples (doubleLE) } if (version >= 4) @@ -2100,27 +2278,52 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_32LE(spu->regs.cap[i].dad); is.read_16LE(spu->regs.cap[i].len); is.read_u8(spu->regs.cap[i].runtime.running); - is.read_32LE(spu->regs.cap[i].runtime.curdad); - is.read_32LE(spu->regs.cap[i].runtime.maxdad); + if (version >= 8) is.read_32LE(spu->regs.cap[i].dad); + else { + is.fseek(4, SEEK_CUR); // regs.cap[i].runtime.curdad (LE32) + is.read_32LE(spu->regs.cap[i].dad); // regs.cap[i].runtime.maxdad + spu->regs.cap[i].dad -= spu->regs.cap[i].len*4; + } if (version >= 7) { is.read_32LE(spu->regs.cap[i].runtime.sampcntFrac); is.read_32LE(spu->regs.cap[i].runtime.sampcntInt); } else { - double temp; - u64 temp2; - is.read_doubleLE(temp); temp2 = (u64)(temp * (1ull << 32)); - spu->regs.cap[i].runtime.sampcntFrac = (u32)temp2; - spu->regs.cap[i].runtime.sampcntInt = (u32)(temp2 >> 32); + s64 temp = (s64)(is.read_doubleLE() * (double)(1ll << 32)); + spu->regs.cap[i].runtime.sampcntFrac = (u32)temp; + spu->regs.cap[i].runtime.sampcntInt = (s32)(temp >> 32); + } + if(version <= 7) + { + // Before, sampcnt incremented "as expected" and the FIFO + // delay was implemented within the SndFifo construct. + // Now, though, we create the delay by setting sampcnt to + // -FIFO_SIZE on starting capture, so account for this here. + spu->regs.cap[i].runtime.sampcntInt -= SPUCAPTURE_FIFO_SIZE; } } } - if (version >= 6) - for (int i=0;i<2;i++) spu->regs.cap[i].runtime.fifo.load(is); - else - for (int i=0;i<2;i++) spu->regs.cap[i].runtime.fifo.reset(); + int pcm16bSz_Capture = (version >= 8) ? (int)is.read_u8() : 16; + if (version >= 8) + for (int i=0;i<2;i++) + { + spu->regs.cap[i].runtime.pcm16bOffs = SPUCAPTURE_PCM16B_AT(is.read_u8()); + for (int n = 0; n < pcm16bSz_Capture; n++) + is.read_16LE(spu->regs.cap[i].runtime.pcm16b[SPUCAPTURE_PCM16B_AT(n)]); + } + else if (version >= 6) + for (int i=0;i<2;i++) + { + // Setting pcm16bOffs to -fifo.size ensures that we always + // fill at the correct offset relative to the FIFO queue size + SPUFifo fifo; + fifo.load(is); + spu->regs.cap[i].runtime.pcm16bOffs = (u8)(-fifo.size); + for (int n = 0; n < 16; n++) + spu->regs.cap[i].runtime.pcm16b[SPUCAPTURE_PCM16B_AT(n)] = fifo.dequeue(); + } //older versions didnt store a mastervol; //we must reload this or else games will start silent diff --git a/desmume/src/SPU.h b/desmume/src/SPU.h index 5ba6d1c96..7ab7a470f 100644 --- a/desmume/src/SPU.h +++ b/desmume/src/SPU.h @@ -36,12 +36,16 @@ class EMUFILE; #define CHANSTAT_STOPPED 0 #define CHANSTAT_PLAY 1 -#define SPUINTERPOLATION_TAPS 4 // Must be at least 4 for Catmull-Rom interpolation +#define SPUCHAN_PCM16B_SIZE 4 // Must be 2^n, and at least 4 for Catmull-Rom interpolation +#define SPUCAPTURE_FIFO_SIZE 16 // Must be 2^n -//who made these static? theyre used in multiple places. -FORCEINLINE s32 spumuldiv7(s32 val, u8 multiplier) { - assert(multiplier <= 127); - return (multiplier == 127) ? val : ((val * multiplier) >> 7); +// This converts a value of 127/128 into 128/128. Needed for volume/pan/etc. calculations +template +FORCEINLINE T spumuladjust7(T x) +{ + // Using >= can result in better code on some platforms + assert(x <= 127); + return x + (x >= (T)127); } enum SPUInterpolationMode @@ -95,6 +99,7 @@ struct channel_struct sampcntInt(0), sampincFrac(0), sampincInt(0), + pcm16b(), loop_pcm16b(0), index(0), loop_index(0), @@ -121,11 +126,11 @@ struct channel_struct s32 sampcntInt; u32 sampincFrac; u32 sampincInt; - s16 pcm16b[SPUINTERPOLATION_TAPS]; + s16 pcm16b[SPUCHAN_PCM16B_SIZE]; // ADPCM specific s16 loop_pcm16b; - s32 index; - int loop_index; + u8 index; + u8 loop_index; // PSG noise u16 x; }; @@ -146,12 +151,9 @@ class SPUFifo class SPU_struct { public: - SPU_struct(int buffersize); - u32 bufpos; - u32 buflength; - s32 *sndbuf; - s32 lastdata; //the last sample that a channel generated - s16 *outbuf; + SPU_struct(); + s32 *mixdata; // Mixing buffers + s16 *outbuf; // Device output source (L,R) u32 bufsize; channel_struct channels[16]; @@ -192,19 +194,22 @@ class SPU_struct u16 len; struct Runtime { Runtime() - : running(0), curdad(0), maxdad(0) + : running(0), pcm16bOffs(0), dad(0), len(0), sampcntFrac(0), sampcntInt(0), pcm16b() {} + u8 running; - u32 curdad; - u32 maxdad; + u8 pcm16bOffs; + u32 dad; + u32 len; u32 sampcntFrac; - u32 sampcntInt; - SPUFifo fifo; + s32 sampcntInt; + s16 pcm16b[SPUCAPTURE_FIFO_SIZE]; } runtime; } cap[2]; } regs; void reset(); + void resizeBuffer(int buffersize); ~SPU_struct(); void KeyOff(int channel); void KeyOn(int channel); @@ -223,7 +228,6 @@ class SPU_struct }; extern SPU_struct *SPU_core, *SPU_user; -extern int spu_core_samples; int SPU_ChangeSoundCore(int coreid, int newBufferSizeBytes); SoundInterface_struct *SPU_SoundCore(); @@ -236,7 +240,7 @@ void SPU_SetSynchMode(int mode, int method); void SPU_ClearOutputBuffer(void); void SPU_Reset(void); void SPU_DeInit(void); -void SPU_KeyOn(int channel); + static FORCEINLINE void SPU_WriteByte(u32 addr, u8 val) { addr &= 0xFFF; @@ -264,7 +268,8 @@ static FORCEINLINE void SPU_WriteLong(u32 addr, u32 val) static FORCEINLINE u8 SPU_ReadByte(u32 addr) { return SPU_core->ReadByte(addr & 0x0FFF); } static FORCEINLINE u16 SPU_ReadWord(u32 addr) { return SPU_core->ReadWord(addr & 0x0FFF); } static FORCEINLINE u32 SPU_ReadLong(u32 addr) { return SPU_core->ReadLong(addr & 0x0FFF); } -void SPU_Emulate_core(void); + +int SPU_Emulate_core(u32 numberOfARM7Cycles); void SPU_Emulate_user(bool mix = true); void SPU_DefaultFetchSamples(s16 *sampleBuffer, size_t sampleCount, ESynchMode synchMode, ISynchronizingAudioBuffer *theSynchronizer); size_t SPU_DefaultPostProcessSamples(s16 *postProcessBuffer, size_t requestedSampleCount, ESynchMode synchMode, ISynchronizingAudioBuffer *theSynchronizer); diff --git a/desmume/src/frontend/windows/soundView.cpp b/desmume/src/frontend/windows/soundView.cpp index 786b789a1..d0e9a1424 100644 --- a/desmume/src/frontend/windows/soundView.cpp +++ b/desmume/src/frontend/windows/soundView.cpp @@ -141,7 +141,7 @@ void SoundView_Refresh(bool forceRedraw) InvalidateRect(GetDlgItem(hDlg, IDC_SOUND0PANBAR+chanId), NULL, FALSE); if(thischan.status != CHANSTAT_STOPPED) { - volBar[chan] = spumuldiv7(128, thischan.vol) >> volume_shift[thischan.volumeDiv]; + volBar[chan] = spumuladjust7(thischan.vol) >> volume_shift[thischan.volumeDiv]; InvalidateRect(GetDlgItem(hDlg, IDC_SOUND0VOLBAR+chanId), NULL, FALSE); if(SoundView_Data->volModeAlternate) @@ -274,7 +274,7 @@ void SoundView_Refresh(bool forceRedraw) sprintf(buf,"%08X",cap0.len); SetDlgItemText(hDlg,IDC_CAP0_LEN,buf); - sprintf(buf,"%08X",cap0.runtime.curdad); + sprintf(buf,"%08X",cap0.runtime.dad+cap0.runtime.sampcntInt*(cap0.bits8 ? 1 : 2)); SetDlgItemText(hDlg,IDC_CAP0_CURDAD,buf); memcpy(&oldCap[0], &cap0, sizeof(SPU_struct::REGS::CAP)); @@ -306,7 +306,7 @@ void SoundView_Refresh(bool forceRedraw) sprintf(buf,"%08X",cap1.len); SetDlgItemText(hDlg,IDC_CAP1_LEN,buf); - sprintf(buf,"%08X",cap1.runtime.curdad); + sprintf(buf,"%08X",cap1.runtime.dad+cap1.runtime.sampcntInt*(cap1.bits8 ? 1 : 2)); SetDlgItemText(hDlg,IDC_CAP1_CURDAD,buf); memcpy(&oldCap[1], &cap1, sizeof(SPU_struct::REGS::CAP)); @@ -319,19 +319,23 @@ void SoundView_Refresh(bool forceRedraw) static void updateMute_toSettings(HWND hDlg, int chan) { for(int chanId = 0; chanId < 8; chanId++) - CommonSettings.spu_muteChannels[chanId+chanOfs()] = IsDlgButtonChecked(hDlg, IDC_SOUND0MUTE+chanId) == BST_CHECKED; + { + u16 bit = 1 << (chanId+chanOfs()); + CommonSettings.spu_muteChannels &= ~bit; + CommonSettings.spu_muteChannels |= bit * (IsDlgButtonChecked(hDlg, IDC_SOUND0MUTE+chanId) == BST_CHECKED); + } } static void updateMute_allFromSettings(HWND hDlg) { for(int chanId = 0; chanId < 16; chanId++) - CheckDlgItem(hDlg,IDC_SOUND0MUTE+chanId,CommonSettings.spu_muteChannels[chanId]); + CheckDlgItem(hDlg,IDC_SOUND0MUTE+chanId,(CommonSettings.spu_muteChannels & (1 << chanId)) != 0); } static void updateMute_fromSettings(HWND hDlg) { for(int chanId = 0; chanId < 8; chanId++) - CheckDlgItem(hDlg,IDC_SOUND0MUTE+chanId,CommonSettings.spu_muteChannels[chanId+chanOfs()]); + CheckDlgItem(hDlg,IDC_SOUND0MUTE+chanId,(CommonSettings.spu_muteChannels & (1 << (chanId+chanOfs()))) != 0); } static void SoundView_SwitchChanOfs(SoundView_DataStruct *data) { @@ -435,7 +439,7 @@ static INT_PTR CALLBACK SoundView_DlgProc(HWND hDlg, UINT uMsg, WPARAM wParam, L } for(int chanId = 0; chanId < 8; chanId++) { - if(CommonSettings.spu_muteChannels[chanId]) + if((CommonSettings.spu_muteChannels & (1<