Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why is Parallel.Invoke much faster if the call is in a separate method?

I implemented the QuickSort-Algorithm 3 times and measured the time for sorting 50 million random numbers:

  1. sequential (took ~14 seconds)

  2. With Parallel.Invoke() in the same method as the sorting algorithm (took ~12 seconds)

  3. With Parallel.Invoke() in a separate method (took ~7 seconds)

So my question is: Why is Parallel.Invoke() much faster if the call is in a separate method? On my computer the 3. example was more than twice as fast as the 2.

2. With Parallel.Invoke() in the same method as the sorting algorithm

public class ParallelQuickSort
{

    private const int Threshold = 100;

    public static void Sort(int[] array)
    {
        if (array == null || array.Length == 0)
        {
            new ArgumentException("number array must be at least of length 1");
        }
        QuickSort(array, 0, array.Length - 1);
    }

    private static void QuickSort(int[] array, int left, int right)
    {
        var i = left;
        var j = right;
        var m = array[(left + right) / 2];
        while (i <= j)
        {
            while (array[i] < m) { i++; }
            while (array[j] > m) { j--; }
            if (i <= j)
            {
                var t = array[i]; array[i] = array[j]; array[j] = t;
                i++; j--;
            }
        }
        if (j - left > Threshold && right - i > Threshold)
        {
            Parallel.Invoke(
                () => QuickSort(array, left, j),
                () => QuickSort(array, i, right)
            );
        }
        else
        {
            if (j > left) { QuickSort(array, left, j); }
            if (i < right) { QuickSort(array, i, right); }
        }
    }

}

3. With Parallel.Invoke() in a separate method

public class ParallelSeparateMethodQuickSort
{

    private const int Threshold = 100;

    public static void Sort(int[] array)
    {
        if (array == null || array.Length == 0)
        {
            new ArgumentException("number array must be at least of length 1");
        }
        QuickSort(array, 0, array.Length - 1);
    }

    private static void QuickSort(int[] array, int left, int right)
    {
        var i = left;
        var j = right;
        var m = array[(left + right) / 2];
        while (i <= j)
        {
            while (array[i] < m) { i++; }
            while (array[j] > m) { j--; }
            if (i <= j)
            {
                var t = array[i]; array[i] = array[j]; array[j] = t;
                i++; j--;
            }
        }
        if (j - left > Threshold && right - i > Threshold)
        {
            ParallelInvoke(array, left, j, i, right);
        }
        else
        {
            if (j > left) { QuickSort(array, left, j); }
            if (i < right) { QuickSort(array, i, right); }
        }
    }

    private static void ParallelInvoke(int[] array, int left, int j, int i, int right)
    {
        Parallel.Invoke(
                () => QuickSort(array, left, j),
                () => QuickSort(array, i, right)
            );
    }

}

Full Code

using System;
using System.Threading.Tasks;
using System.Diagnostics;

namespace parallelQuicksort
{
    class Program
    {
        static void Main(string[] args)
        {
            const int N = 50_000_000;
            for (int i = 0; i < 5; i++)
            {
                var array = GetRandomArray(N);
                Measure("Sequential\t", () => SequentialQuickSort.Sort(array));
                array = GetRandomArray(N);
                Measure("Parallel\t", () => ParallelQuickSort.Sort(array));
                array = GetRandomArray(N);
                Measure("P. Separate Method", () => ParallelSeparateMethodQuickSort.Sort(array));
            }
        }

        private static int[] GetRandomArray(int length)
        {
            var random = new Random(4711);
            var array = new int[length];
            for (int i = 0; i < length; i++)
            {
                array[i] = random.Next();
            }
            return array;
        }

        public static void Measure(string name, Action action)
        {
            Stopwatch stopwatch = Stopwatch.StartNew();
            action();
            stopwatch.Stop();
            var time = stopwatch.ElapsedMilliseconds;
            Console.WriteLine($"{name}: \tElapsed={time}");
        }
    }

    public class SequentialQuickSort
    {
        public static void Sort(int[] array)
        {
            if (array == null || array.Length == 0)
            {
                new ArgumentException("number array must be at least of length 1");
            }
            QuickSort(array, 0, array.Length - 1);
        }

        private static void QuickSort(int[] array, int left, int right)
        {
            var i = left;
            var j = right;
            var m = array[(left + right) / 2];
            while (i <= j)
            {
                while (array[i] < m) { i++; }
                while (array[j] > m) { j--; }
                if (i <= j)
                {
                    var t = array[i]; array[i] = array[j]; array[j] = t;
                    i++; j--;
                }
            }

            if (j > left) { QuickSort(array, left, j); }
            if (i < right) { QuickSort(array, i, right); }
        }
    }

    public class ParallelQuickSort
    {

        private const int Threshold = 100;

        public static void Sort(int[] array)
        {
            if (array == null || array.Length == 0)
            {
                new ArgumentException("number array must be at least of length 1");
            }
            QuickSort(array, 0, array.Length - 1);
        }

        private static void QuickSort(int[] array, int left, int right)
        {
            var i = left;
            var j = right;
            var m = array[(left + right) / 2];
            while (i <= j)
            {
                while (array[i] < m) { i++; }
                while (array[j] > m) { j--; }
                if (i <= j)
                {
                    var t = array[i]; array[i] = array[j]; array[j] = t;
                    i++; j--;
                }
            }
            if (j - left > Threshold && right - i > Threshold)
            {
                Parallel.Invoke(
                    () => QuickSort(array, left, j),
                    () => QuickSort(array, i, right)
                );
            }
            else
            {
                if (j > left) { QuickSort(array, left, j); }
                if (i < right) { QuickSort(array, i, right); }
            }
        }

    }

    public class ParallelSeparateMethodQuickSort
    {

        private const int Threshold = 100;

        public static void Sort(int[] array)
        {
            if (array == null || array.Length == 0)
            {
                new ArgumentException("number array must be at least of length 1");
            }
            QuickSort(array, 0, array.Length - 1);
        }

        private static void QuickSort(int[] array, int left, int right)
        {
            var i = left;
            var j = right;
            var m = array[(left + right) / 2];
            while (i <= j)
            {
                while (array[i] < m) { i++; }
                while (array[j] > m) { j--; }
                if (i <= j)
                {
                    var t = array[i]; array[i] = array[j]; array[j] = t;
                    i++; j--;
                }
            }
            if (j - left > Threshold && right - i > Threshold)
            {
                ParallelInvoke(array, left, j, i, right);
            }
            else
            {
                if (j > left) { QuickSort(array, left, j); }
                if (i < right) { QuickSort(array, i, right); }
            }
        }

        private static void ParallelInvoke(int[] array, int left, int j, int i, int right)
        {
            Parallel.Invoke(
                    () => QuickSort(array, left, j),
                    () => QuickSort(array, i, right)
                );
        }

    }

}

You find my code here: https://github.com/Lazzaretti/ParallelQuicksort

Output

Sequential      :       Elapsed=14534
Parallel        :       Elapsed=11960
P. Separate Method:     Elapsed=6353
Sequential      :       Elapsed=14620
Parallel        :       Elapsed=11954
P. Separate Method:     Elapsed=6647
Sequential      :       Elapsed=14529
Parallel        :       Elapsed=11870
P. Separate Method:     Elapsed=6389
Sequential      :       Elapsed=14512
Parallel        :       Elapsed=11787
P. Separate Method:     Elapsed=6590
Sequential      :       Elapsed=16203
Parallel        :       Elapsed=11738
P. Separate Method:     Elapsed=6674
like image 213
fab Avatar asked Apr 13 '18 13:04

fab


2 Answers

After fixing that problem with sorting already sorted array mentioned in comments, your problem still reproduces.

I think the reason is how and what is captured by lambdas you pass to Parallel.Invoke.

In first case (when Parallel.Invoke is inside QuickSort method):

Parallel.Invoke(
    () => QuickSort(array, left, j),
    () => QuickSort(array, i, right)
);

You capture 5 variables, all of which are used throughout the whole QuickSort method. All those variables become fields of compiler generated class. That means whole QuickSort method now works with object fields and not local variables. If you decompile that method you'll see something like this:

  var cDisplayClass20 = new SomeCompilerGeneratedClass();
  cDisplayClass20.array = array;
  cDisplayClass20.left = left;
  cDisplayClass20.right = right;
  cDisplayClass20.i = cDisplayClass20.left;
  cDisplayClass20.j = cDisplayClass20.right;
  int num1 = cDisplayClass20.array[(cDisplayClass20.left + cDisplayClass20.right) / 2];
  while (cDisplayClass20.i <= cDisplayClass20.j) // field access
  {
    while (cDisplayClass20.array[cDisplayClass20.i] < num1) // field access
      cDisplayClass20.i++;
    while (cDisplayClass20.array[cDisplayClass20.j] > num1) // and again
      cDisplayClass20.j--;
    if (cDisplayClass20.i <= cDisplayClass20.j) // again field access
    {
      // they are everywhere
      int num2 = cDisplayClass20.array[cDisplayClass20.i];
      cDisplayClass20.array[cDisplayClass20.i] = cDisplayClass20.array[cDisplayClass20.j];
      cDisplayClass20.array[cDisplayClass20.j] = num2;
      cDisplayClass20.i++;
      cDisplayClass20.j--;
    }
  }

Which confirms point above.

However if you move Parallel.Invoke to separate method, that is no longer the case. 5 variables are still captured, but that does not affect whole QuickSort method, because capture now happens inside separate ParallelInvoke method, and so is localized. The QuickSort still works with local variables and not fields of compiler generated class. If you decompile version with separate method, it will look exactly like you wrote:

  int index1 = left;
  int index2 = right;
  int num1 = array[(left + right) / 2];
  while (index1 <= index2) // local variables
  {
    while (array[index1] < num1) // num1 is local variable
      ++index1;
    while (array[index2] > num1)
      --index2;
    if (index1 <= index2) // local variables again
    {
      int num2 = array[index1];
      array[index1] = array[index2];
      array[index2] = num2;
      ++index1;
      --index2;
    }
  }
  ...

Now, I assume that accessing object fields (which are generally on heap) is somewhat slower than accessing local variables (which are generally on stack \ in CPU register), hence version with separate method is faster. Eric Lippert also notes in comments that:

The jitter will likely do a worse job with the fields than it will with the locals because it will not enregister them as aggressively.

You can confirm the above by modifying your first version like this:

    private static void QuickSort(int[] array, int left, int right) {
        var i = left;
        var j = right;
        var m = array[(left + right) / 2];
        while (i <= j) {
            while (array[i] < m) {
                i++;
            }

            while (array[j] > m) {
                j--;
            }

            if (i <= j) {
                var t = array[i];
                array[i] = array[j];
                array[j] = t;
                i++;
                j--;
            }
        }

        if (j - left > Threshold && right - i > Threshold) {
            // copy all variables you pass to lambda
            // so that their capture does not affect the whole method
            var tmpArray = array;
            var tmpLeft = left;
            var tmpJ = j;
            var tmpI = i;
            var tmpRight = right;
            Parallel.Invoke(
                () => QuickSort(tmpArray, tmpLeft, tmpJ),
                () => QuickSort(tmpArray, tmpI, tmpRight)
            );
        }
        else {
            if (j > left) {
                QuickSort(array, left, j);
            }

            if (i < right) {
                QuickSort(array, i, right);
            }
        }
    }
}

Then execution time of both approaches will be the same.

As @Eugene mentions in comments and in his answer - there might be other things that slow this down besides field vs local variable access - such as construction and (potentially) garbage collection of compiler-generated classes mentioned above. However, these are all consequences of the same source root - capturing local variables in closure in different ways.

like image 86
Evk Avatar answered Nov 17 '22 05:11

Evk


!!!!!!!!! This answer is not actual for now !!!!! I know this is not a proper answer, just want to make it visible: try to change order of tests:

Measure("P. Separate Method", () => ParallelSeparateMethodQuickSort.Sort(array));
Measure("Sequential\t", () => SequentialQuickSort.Sort(array));
Measure("Parallel\t", () => ParallelQuickSort.Sort(array));

And you will see:

P. Separate Method:     Elapsed=8710
Sequential      :       Elapsed=4140
Parallel        :       Elapsed=7928
P. Separate Method:     Elapsed=9033
Sequential      :       Elapsed=4098
Parallel        :       Elapsed=7881

So I think your tests are wrong and this question does not make sense. And quick investigation shows that in every of the tests you change your source array, so each next test has already sorted array.

p.s. But I think this question really exists. If you try to correct the code you will see, that calling separate method works faster!

p.p.s plz let me know if somebody has an answer or if question was corrected

like image 3
Evgeny Gorbovoy Avatar answered Nov 17 '22 06:11

Evgeny Gorbovoy