Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Circular moving average filter in LINQ

I'm looking for an elegant way to implement a moving average filter in c#. Now, that would be easy, but at the borders, the averaging window shall wrap around the start/end. This kind of made my code ugly and unintuitive, and I was wondering whether there was a smarter way to solve this using LINQ or so.

So what I currently have is:

// input is a List<double> y, output is List<double> yfiltered
int yLength = y.Count;
for (int i = 0; i < yLength; i++)
{
    double sum = 0.0;
    for (int k = i - halfWindowWidth; k <= i + halfWindowWidth; k++)
    {
        if (k < 0)
        {
            // k is negative, wrap around
            sum += y[yLength - 1 + k];
        }
        else if (k >= yLength)
        {
            // k exceeds y length, wrap around
            sum += y[k - yLength];
        }
        else
        {
            // k within y.Count
            sum += y[k];
        }
    }
    yfiltered[i] = sum / (2 * halfWindowWidth + 1);
}
like image 995
Efrain Avatar asked Dec 17 '22 00:12

Efrain


1 Answers

Here is a completely other suggestion -

I was trying to actually make it better, rather than more readable.

The problem with your current code is that it sums up many numbers again and again, when not really needed.

Comparing both approaches after the implementation code...

I'm only summing a bunch for the first time, and then subtracting the tail and adding the head, again and again:

double sum = 0;

// sum = Enumerable.Range(i - halfWindowWidth, halfWindowWidth * 2 + 1)
//    .Select(k => y[(k + yLength) % yLength]).Sum();

for (var i = -halfWindowWidth; i <= halfWindowWidth; i++)
{
    sum += y[(i + yLength) % yLength];
}

yFiltered[0] = sum / (2 * halfWindowWidth + 1);

for (var i = 1; i < yLength; i++)
{
    sum = sum -
          y[(i - halfWindowWidth - 1 + yLength) % yLength] +
          y[(i + halfWindowWidth) % yLength];

    yFiltered[i] = sum / (2 * halfWindowWidth + 1);
}

And here are the speed tests, comparing the full-recalculation approach vs. this one:

private static double[] Foo1(IList<double> y, int halfWindowWidth)
{
    var yfiltered = new double[y.Count];

    var yLength = y.Count;

    for (var i = 0; i < yLength; i++)
    {
        var sum = 0.0;

        for (var k = i - halfWindowWidth; k <= i + halfWindowWidth; k++)
        {
            sum += y[(k + yLength) % yLength];
        }

        yfiltered[i] = sum / (2 * halfWindowWidth + 1);
    }

    return yfiltered;
}

private static double[] Foo2(IList<double> y, int halfWindowWidth)
{
    var yFiltered = new double[y.Count];
    var windowSize = 2 * halfWindowWidth + 1;

    double sum = 0;

    for (var i = -halfWindowWidth; i <= halfWindowWidth; i++)
    {
        sum += y[(i + y.Count) % y.Count];
    }

    yFiltered[0] = sum / windowSize;

    for (var i = 1; i < y.Count; i++)
    {
        sum = sum -
              y[(i - halfWindowWidth - 1 + y.Count) % y.Count] +
              y[(i + halfWindowWidth) % y.Count];

        yFiltered[i] = sum / windowSize;
    }

    return yFiltered;
}

private static TimeSpan TestFunc(Func<IList<double>, int, double[]> func, IList<double> y, int halfWindowWidth, int iteration
{
    var sw = Stopwatch.StartNew();

    for (var i = 0; i < iterations; i++)
    {
        var yFiltered = func(y, halfWindowWidth);
    }

    sw.Stop();
    return sw.Elapsed;
}

private static void RunTests()
{
    var y = new List<double>();
    var rand = new Random();

    for (var i = 0; i < 1000; i++)
    {
        y.Add(rand.Next());
    }

    var foo1Res = Foo1(y, 100);
    var foo2Res = Foo2(y, 100);

    Debug.WriteLine("Results are equal: " + foo1Res.SequenceEqual(foo2Res));

    Debug.WriteLine("Foo1: " + TestFunc(Foo1, y, 100, 1000));
    Debug.WriteLine("Foo2: " + TestFunc(Foo2, y, 100, 1000));
}

Time complexities:

MyWay: O(n + m)

OtherWay: O(n * m)

Since Foo1 is O(n * m) and Foo2 is O(n + m) it's really not surprising that the difference is huge.

Results on this not really crazy big scale are:

Results are equal: True

Foo1: 5.52 seconds

Foo2: 61.1 milliseconds

And on a bigger scale (replaced 1000 with 10000 on both iterations and count):

Foo1: Stopped after 10 minutes...

Foo2: 6.9 seconds

like image 78
SimpleVar Avatar answered Dec 29 '22 00:12

SimpleVar