So far I've managed to implement Dolby's Matrix Decoder based on the following specifications:
Left Right
Center 0.707 0.707
Left 1 0
Right 0 1
SLeft 0.871 0.489
SRight 0.489 0.871
But after testing my matrix I've discovered there's a lot clipping, and it sounds nothing like Dolby's decoder. I'm relatively new to DSP, although I'm pretty sure I understand most of the basics; but I'm still left clueless as to what's causing this to happen, am I missing something in the specifications, or is it just my code?
My current matrix decoder,
private static void DolbyProLogicII(List<float> leftSamples, List<float> rightSamples, int sampleRate, string outputDirectory)
{
// WavFileWrite is a wrapper class for NAudio to create Wav files.
var meta = new WaveFormat(sampleRate, 16, 1);
var c = new WavFileWrite { MetaData = meta, FileName = Path.Combine(outputDirectory, "c.wav") };
var l = new WavFileWrite { MetaData = meta, FileName = Path.Combine(outputDirectory, "l.wav") };
var r = new WavFileWrite { MetaData = meta, FileName = Path.Combine(outputDirectory, "r.wav") };
var sl = new WavFileWrite { MetaData = meta, FileName = Path.Combine(outputDirectory, "sl.wav") };
var sr = new WavFileWrite { MetaData = meta, FileName = Path.Combine(outputDirectory, "sr.wav") };
var ii = (leftSamples.Count > rightSamples.Count ? rightSamples.Count : leftSamples.Count);
// Process center channel.
for (var i = 0; i < ii; i++)
{
c.MonoChannelAudioData.Add((leftSamples[i] * 0.707) + (rightSamples[i] * 0.707));
}
c.Flush();
// Process left channel.
l.MonoChannelAudioData = leftSamples;
l.Flush();
// Process right channel.
r.MonoChannelAudioData = rightSamples;
r.Flush();
// Process surround left channel.
for (var i = 0; i < ii - 1; i++)
{
sl.MonoChannelAudioData.Add((leftSamples[i] * 0.871) + (rightSamples[i] * 0.489));
}
sl.Flush();
// Process surround right channel.
for (var i = 0; i < ii - 1; i++)
{
sr.MonoChannelAudioData.Add((leftSamples[i] * 0.489) + (rightSamples[i] * 0.871));
}
sr.Flush();
}
While the Dolby Pro Logic codec has lost some of its significance, it's still used for the playback of older recordings but also for creating a surround effect from television programs which are often still transmitted in stereo. For this reason, it's common to see it's inclusion with even newer A/V receivers.
Dolby Digital offers greater dynamic range than Pro-Logic, better frequency response and improved separation between channels.
DPL II processes any high-quality stereo signal source into five separate full frequency channels (right front, center, left front, right rear and left rear), while also decoding five channels from stereo signals encoded in traditional four-channel Dolby Surround.
While most AV preamps can decode Dolby Pro Logic today, it is not considered the best format for surround sound, as there are both lossless and lossy codec, such as Dolby TrueHD, DTS Master Audio and even Dolby Digital that do a better job with surround effects, specifically providing less compression to the sound and ...
To strictly compare your implementation with Dolby's specifications, you're missing several things,
The decoder's mixing levels (not the ratios) are incorrect,
The LFE channel isn't being processed,
The surround left & surround right channels aren't phase shifted, delayed, or passed through a HPF,
And the center channel isn't passed through a Bandpass.
The clipping problem is the result of your maxtrix mixing levels (the ratios are fine), for example, the center channel is being over amp'd by 41.4%
since 0.707 + 0.707 = 1.414
; so, to retain the correct ratios simply halve your mixing levels.
With that in mind, your maxtrix should now look like this,
Left Right
Center 0.354 0.354
Left 0.5 0
Right 0 0.5
SLeft 0.436 0.245
SRight 0.245 0.436
If you're not intentionally leaving out the LFE channel, you can decode it just like the center channel, but you must apply an LPF at 120Hz (Dolby's standard).
To process the surround left and surround right channels, you'll need to pass the left and right channels through a HPF at 100 Hz then apply a 90o phase shift (then invert the left side); mix, and then add a delay (I believe Dolby doesn't specify the exact amount of time, but after some testing I've found the "sweetspot" appears to be around 10 milliseconds).
(If you're going to play around with the delay, I'd recommend doing so between 5ms to 12.5ms (feel free to experiment). If the delay is too small you'll end up with a "compressed/condensed" sounding mix, too long, and you'll end up with an awful echoing of higher frequencies in the background. Ideally, the end result should sound as "airy/open" as possible, but without any hint of echoing/reverberation.)
leftSamples -> HPF -> phase shift -> invert -> mix \
-> delay
rightSamples -> HPF -> phase shift -> mix /
Dolby also specifies that the center channel needs to be passed through a bandpass at 70Hz to 20kHz (after mixing).
So, putting that all together you should end up with the following,
public void DolbyProLogicII(float[] leftSamples, float[] rightSamples, int sampleRate)
{
var ii = Math.Min(leftSamples.Length, rightSamples.Length);
var c = new float[ii];
var l = new float[ii];
var r = new float[ii];
var sl = new float[ii];
var sr = new float[ii];
var lfe = new float[ii];
// Process center channel
for (var i = 0; i < ii; i++)
{
// Best to be as precise as possible.
c[i] = (leftSamples[i] * 0.35355339059327376220042218105242f) + (rightSamples[i] * 0.35355339059327376220042218105242f);
}
c = LinkwitzRileyHighPass(c, sampleRate, 70);
c = LinkwitzRileyLowPass(c, sampleRate, 20000);
// Process front left channel
for (var i = 0; i < ii; i++)
{
l[i] = leftSamples[i] * 0.5f;
}
// Process front right channel
for (var i = 0; i < ii; i++)
{
r[i] = rightSamples[i] * 0.5f;
}
// Process left samples for SL channel
var slL = new float[ii];
for (var i = 0; i < ii; i++)
{
slL[ii] = leftSamples[i] * 0.43588989435406735522369819838596f;
}
slL = LinkwitzRileyHighPass(slL, sampleRate, 100);
slL = PhaseShift(slL, sampleRate, true);
// Process right samples for SL channel.
var slR = new float[ii];
for (var i = 0; i < ii; i++)
{
slR[i] = rightSamples[i] * 0.24494897427831780981972840747059f;
}
slR = LinkwitzRileyHighPass(slR, sampleRate, 100);
slR = PhaseShift(slR, sampleRate);
// Combine new left & right samples for SL channel
for (var i = 0; i < ii - 1; i++)
{
sl[i] = slL[i] + slR[i];
}
sl = Delay(sl, sampleRate, 10);
// Process left samples for SR channel
var srL = new float[ii];
for (var i = 0; i < ii; i++)
{
srL[i] = leftSamples[i] * 0.24494897427831780981972840747059f;
}
srL = LinkwitzRileyHighPass(srL, sampleRate, 100);
srL = PhaseShift(srL, sampleRate, true);
// Process right samples for SR channel
var srR = new float[ii];
for (var i = 0; i < ii; i++)
{
srR[i] = rightSamples[i] * 0.43588989435406735522369819838596f;
}
srR = LinkwitzRileyHighPass(srR, sampleRate, 100);
srR = PhaseShift(srR, sampleRate);
// Combine new left & right samples for SR channel
for (var i = 0; i < ii - 1; i++)
{
sr[i] = srL[i] + srR[i];
}
sr = Delay(sr, sampleRate, 10);
// Process LFE channel.
for (var i = 0; i < ii; i++)
{
lfe[i] = (leftSamples[i] * 0.35355339059327376220042218105242f) + (rightSamples[i] * 0.35355339059327376220042218105242f);
}
lfe = LinkwitzRileyLowPass(lfe, sampleRate, 120);
}
public float[] PhaseShift(float[] samples, double sampleRate, bool invertOutput = false)
{
var depth = 4.0;
var delay = 100.0;
var rate = 0.1;
var newSamples = new float[samples.Length];
double wp, min_wp, max_wp, range, coef, sweepfac;
double inval, x1, outval = 0.0;
double lx1 = 0.0, ly1 = 0.0, lx2 = 0.0, ly2 = 0.0, lx3 = 0.0, ly3 = 0.0, lx4 = 0.0, ly4 = 0.0;
// calc params for sweeping filters
wp = min_wp = (Math.PI * delay) / sampleRate;
range = Math.Pow(2.0, depth);
max_wp = (Math.PI * delay * range) / sampleRate;
rate = Math.Pow(range, rate / (sampleRate / 2));
sweepfac = rate;
for (var i = 0; i < samples.Length; i++)
{
coef = (1.0 - wp) / (1.0 + wp); // calc coef for current freq
x1 = (inval = (double)samples[i]);
ly1 = coef * (ly1 + x1) - lx1; // do 1st filter
lx1 = x1;
ly2 = coef * (ly2 + ly1) - lx2; // do 2nd filter
lx2 = ly1;
ly3 = coef * (ly3 + ly2) - lx3; // do 3rd filter
lx3 = ly2;
ly4 = coef * (ly4 + ly3) - lx4; // do 4th filter
lx4 = ly3;
// final output
outval = ly4;
if (invertOutput)
{
newSamples[i] = -(float)outval;
}
else
{
newSamples[i] = (float)outval;
}
wp *= sweepfac; // adjust freq of filters
if (wp > max_wp) // max?
{
sweepfac = 1.0 / rate; // sweep back down
}
else
{
if (wp < min_wp) // min?
{
sweepfac = rate; // sweep back up
}
}
}
return newSamples;
}
public float[] Delay(float[] samples, int sampleRate, double milliseconds)
{
var output = new List<float>(samples);
var ii = (sampleRate / 1000) * milliseconds;
for (var i = 0; i < ii; i++)
{
output.Insert(0, 0);
}
return output.ToArray();
}
public float[] LinkwitzRileyHighPass(float[] samples, int sampleRate, double cutoff)
{
if (cutoff <= 0 && cutoff >= sampleRate / 2)
{
throw new ArgumentOutOfRangeException("cutoff", "The cutoff frequency must be between 0 and \"sampleRate\" / 2.");
}
if (sampleRate <= 0)
{
throw new ArgumentOutOfRangeException("sampleRate", "The sample rate must be more than 0.");
}
if (samples == null || samples.Length == 0)
{
throw new ArgumentNullException("samples", "\"samples\" can not be null or empty.");
}
var newSamples = new float[samples.Length];
var wc = 2 * Math.PI * cutoff;
var wc2 = wc * wc;
var wc3 = wc2 * wc;
var wc4 = wc2 * wc2;
var k = wc / Math.Tan(Math.PI * cutoff / sampleRate);
var k2 = k * k;
var k3 = k2 * k;
var k4 = k2 * k2;
var sqrt2 = Math.Sqrt(2);
var sq_tmp1 = sqrt2 * wc3 * k;
var sq_tmp2 = sqrt2 * wc * k3;
var a_tmp = 4 * wc2 * k2 + 2 * sq_tmp1 + k4 + 2 * sq_tmp2 + wc4;
var b1 = (4 * (wc4 + sq_tmp1 - k4 - sq_tmp2)) / a_tmp;
var b2 = (6 * wc4 - 8 * wc2 * k2 + 6 * k4) / a_tmp;
var b3 = (4 * (wc4 - sq_tmp1 + sq_tmp2 - k4)) / a_tmp;
var b4 = (k4 - 2 * sq_tmp1 + wc4 - 2 * sq_tmp2 + 4 * wc2 * k2) / a_tmp;
var a0 = k4 / a_tmp;
var a1 = -4 * k4 / a_tmp;
var a2 = 6 * k4 / a_tmp;
var a3 = a1;
var a4 = a0;
double ym1 = 0.0, ym2 = 0.0, ym3 = 0.0, ym4 = 0.0, xm1 = 0.0, xm2 = 0.0, xm3 = 0.0, xm4 = 0.0, tempy = 0.0;
for (var i = 0; i < samples.Length; i++)
{
var tempx = samples[i];
tempy = a0 * tempx + a1 * xm1 + a2 * xm2 + a3 * xm3 + a4 * xm4 - b1 * ym1 - b2 * ym2 - b3 * ym3 - b4 * ym4;
xm4 = xm3;
xm3 = xm2;
xm2 = xm1;
xm1 = tempx;
ym4 = ym3;
ym3 = ym2;
ym2 = ym1;
ym1 = tempy;
newSamples[i] = (float)tempy;
}
return newSamples;
}
public float[] LinkwitzRileyLowPass(float[] samples, int sampleRate, double cutoff)
{
if (cutoff <= 0 && cutoff >= sampleRate / 2)
{
throw new ArgumentOutOfRangeException("cutoff", "The cutoff frequency must be between 0 and \"sampleRate\" / 2.");
}
if (sampleRate <= 0)
{
throw new ArgumentOutOfRangeException("sampleRate", "The sample rate must be more than 0.");
}
if (samples == null || samples.Length == 0)
{
throw new ArgumentNullException("samples", "\"samples\" can not be null or empty.");
}
var newSamples = new float[samples.Length];
var wc = 2 * Math.PI * cutoff;
var wc2 = wc * wc;
var wc3 = wc2 * wc;
var wc4 = wc2 * wc2;
var k = wc / Math.Tan(Math.PI * cutoff / sampleRate);
var k2 = k * k;
var k3 = k2 * k;
var k4 = k2 * k2;
var sqrt2 = Math.Sqrt(2);
var sq_tmp1 = sqrt2 * wc3 * k;
var sq_tmp2 = sqrt2 * wc * k3;
var a_tmp = 4 * wc2 * k2 + 2 * sq_tmp1 + k4 + 2 * sq_tmp2 + wc4;
var b1 = (4 * (wc4 + sq_tmp1 - k4 - sq_tmp2)) / a_tmp;
var b2 = (6 * wc4 - 8 * wc2 * k2 + 6 * k4) / a_tmp;
var b3 = (4 * (wc4 - sq_tmp1 + sq_tmp2 - k4)) / a_tmp;
var b4 = (k4 - 2 * sq_tmp1 + wc4 - 2 * sq_tmp2 + 4 * wc2 * k2) / a_tmp;
var a0 = wc4 / a_tmp;
var a1 = 4 * wc4 / a_tmp;
var a2 = 6 * wc4 / a_tmp;
var a3 = a1;
var a4 = a0;
double ym1 = 0.0, ym2 = 0.0, ym3 = 0.0, ym4 = 0.0, xm1 = 0.0, xm2 = 0.0, xm3 = 0.0, xm4 = 0.0, tempy = 0.0;
for (var i = 0; i < samples.Length; i++)
{
var tempx = samples[i];
tempy = a0 * tempx + a1 * xm1 + a2 * xm2 + a3 * xm3 + a4 * xm4 - b1 * ym1 - b2 * ym2 - b3 * ym3 - b4 * ym4;
xm4 = xm3;
xm3 = xm2;
xm2 = xm1;
xm1 = tempx;
ym4 = ym3;
ym3 = ym2;
ym2 = ym1;
ym1 = tempy;
newSamples[i] = (float)tempy;
}
return newSamples;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With