I implemented the QuickSort-Algorithm 3 times and measured the time for sorting 50 million random numbers:
sequential (took ~14 seconds)
With Parallel.Invoke()
in the same method as the sorting algorithm (took ~12 seconds)
With Parallel.Invoke()
in a separate method (took ~7 seconds)
So my question is: Why is Parallel.Invoke()
much faster if the call is in a separate method?
On my computer the 3. example was more than twice as fast as the 2.
Parallel.Invoke()
in the same method as the sorting algorithmpublic class ParallelQuickSort
{
private const int Threshold = 100;
public static void Sort(int[] array)
{
if (array == null || array.Length == 0)
{
new ArgumentException("number array must be at least of length 1");
}
QuickSort(array, 0, array.Length - 1);
}
private static void QuickSort(int[] array, int left, int right)
{
var i = left;
var j = right;
var m = array[(left + right) / 2];
while (i <= j)
{
while (array[i] < m) { i++; }
while (array[j] > m) { j--; }
if (i <= j)
{
var t = array[i]; array[i] = array[j]; array[j] = t;
i++; j--;
}
}
if (j - left > Threshold && right - i > Threshold)
{
Parallel.Invoke(
() => QuickSort(array, left, j),
() => QuickSort(array, i, right)
);
}
else
{
if (j > left) { QuickSort(array, left, j); }
if (i < right) { QuickSort(array, i, right); }
}
}
}
Parallel.Invoke()
in a separate method
public class ParallelSeparateMethodQuickSort
{
private const int Threshold = 100;
public static void Sort(int[] array)
{
if (array == null || array.Length == 0)
{
new ArgumentException("number array must be at least of length 1");
}
QuickSort(array, 0, array.Length - 1);
}
private static void QuickSort(int[] array, int left, int right)
{
var i = left;
var j = right;
var m = array[(left + right) / 2];
while (i <= j)
{
while (array[i] < m) { i++; }
while (array[j] > m) { j--; }
if (i <= j)
{
var t = array[i]; array[i] = array[j]; array[j] = t;
i++; j--;
}
}
if (j - left > Threshold && right - i > Threshold)
{
ParallelInvoke(array, left, j, i, right);
}
else
{
if (j > left) { QuickSort(array, left, j); }
if (i < right) { QuickSort(array, i, right); }
}
}
private static void ParallelInvoke(int[] array, int left, int j, int i, int right)
{
Parallel.Invoke(
() => QuickSort(array, left, j),
() => QuickSort(array, i, right)
);
}
}
using System;
using System.Threading.Tasks;
using System.Diagnostics;
namespace parallelQuicksort
{
class Program
{
static void Main(string[] args)
{
const int N = 50_000_000;
for (int i = 0; i < 5; i++)
{
var array = GetRandomArray(N);
Measure("Sequential\t", () => SequentialQuickSort.Sort(array));
array = GetRandomArray(N);
Measure("Parallel\t", () => ParallelQuickSort.Sort(array));
array = GetRandomArray(N);
Measure("P. Separate Method", () => ParallelSeparateMethodQuickSort.Sort(array));
}
}
private static int[] GetRandomArray(int length)
{
var random = new Random(4711);
var array = new int[length];
for (int i = 0; i < length; i++)
{
array[i] = random.Next();
}
return array;
}
public static void Measure(string name, Action action)
{
Stopwatch stopwatch = Stopwatch.StartNew();
action();
stopwatch.Stop();
var time = stopwatch.ElapsedMilliseconds;
Console.WriteLine($"{name}: \tElapsed={time}");
}
}
public class SequentialQuickSort
{
public static void Sort(int[] array)
{
if (array == null || array.Length == 0)
{
new ArgumentException("number array must be at least of length 1");
}
QuickSort(array, 0, array.Length - 1);
}
private static void QuickSort(int[] array, int left, int right)
{
var i = left;
var j = right;
var m = array[(left + right) / 2];
while (i <= j)
{
while (array[i] < m) { i++; }
while (array[j] > m) { j--; }
if (i <= j)
{
var t = array[i]; array[i] = array[j]; array[j] = t;
i++; j--;
}
}
if (j > left) { QuickSort(array, left, j); }
if (i < right) { QuickSort(array, i, right); }
}
}
public class ParallelQuickSort
{
private const int Threshold = 100;
public static void Sort(int[] array)
{
if (array == null || array.Length == 0)
{
new ArgumentException("number array must be at least of length 1");
}
QuickSort(array, 0, array.Length - 1);
}
private static void QuickSort(int[] array, int left, int right)
{
var i = left;
var j = right;
var m = array[(left + right) / 2];
while (i <= j)
{
while (array[i] < m) { i++; }
while (array[j] > m) { j--; }
if (i <= j)
{
var t = array[i]; array[i] = array[j]; array[j] = t;
i++; j--;
}
}
if (j - left > Threshold && right - i > Threshold)
{
Parallel.Invoke(
() => QuickSort(array, left, j),
() => QuickSort(array, i, right)
);
}
else
{
if (j > left) { QuickSort(array, left, j); }
if (i < right) { QuickSort(array, i, right); }
}
}
}
public class ParallelSeparateMethodQuickSort
{
private const int Threshold = 100;
public static void Sort(int[] array)
{
if (array == null || array.Length == 0)
{
new ArgumentException("number array must be at least of length 1");
}
QuickSort(array, 0, array.Length - 1);
}
private static void QuickSort(int[] array, int left, int right)
{
var i = left;
var j = right;
var m = array[(left + right) / 2];
while (i <= j)
{
while (array[i] < m) { i++; }
while (array[j] > m) { j--; }
if (i <= j)
{
var t = array[i]; array[i] = array[j]; array[j] = t;
i++; j--;
}
}
if (j - left > Threshold && right - i > Threshold)
{
ParallelInvoke(array, left, j, i, right);
}
else
{
if (j > left) { QuickSort(array, left, j); }
if (i < right) { QuickSort(array, i, right); }
}
}
private static void ParallelInvoke(int[] array, int left, int j, int i, int right)
{
Parallel.Invoke(
() => QuickSort(array, left, j),
() => QuickSort(array, i, right)
);
}
}
}
You find my code here: https://github.com/Lazzaretti/ParallelQuicksort
Sequential : Elapsed=14534
Parallel : Elapsed=11960
P. Separate Method: Elapsed=6353
Sequential : Elapsed=14620
Parallel : Elapsed=11954
P. Separate Method: Elapsed=6647
Sequential : Elapsed=14529
Parallel : Elapsed=11870
P. Separate Method: Elapsed=6389
Sequential : Elapsed=14512
Parallel : Elapsed=11787
P. Separate Method: Elapsed=6590
Sequential : Elapsed=16203
Parallel : Elapsed=11738
P. Separate Method: Elapsed=6674
After fixing that problem with sorting already sorted array mentioned in comments, your problem still reproduces.
I think the reason is how and what is captured by lambdas you pass to Parallel.Invoke
.
In first case (when Parallel.Invoke
is inside QuickSort
method):
Parallel.Invoke(
() => QuickSort(array, left, j),
() => QuickSort(array, i, right)
);
You capture 5 variables, all of which are used throughout the whole QuickSort
method. All those variables become fields of compiler generated class. That means whole QuickSort
method now works with object fields and not local variables. If you decompile that method you'll see something like this:
var cDisplayClass20 = new SomeCompilerGeneratedClass();
cDisplayClass20.array = array;
cDisplayClass20.left = left;
cDisplayClass20.right = right;
cDisplayClass20.i = cDisplayClass20.left;
cDisplayClass20.j = cDisplayClass20.right;
int num1 = cDisplayClass20.array[(cDisplayClass20.left + cDisplayClass20.right) / 2];
while (cDisplayClass20.i <= cDisplayClass20.j) // field access
{
while (cDisplayClass20.array[cDisplayClass20.i] < num1) // field access
cDisplayClass20.i++;
while (cDisplayClass20.array[cDisplayClass20.j] > num1) // and again
cDisplayClass20.j--;
if (cDisplayClass20.i <= cDisplayClass20.j) // again field access
{
// they are everywhere
int num2 = cDisplayClass20.array[cDisplayClass20.i];
cDisplayClass20.array[cDisplayClass20.i] = cDisplayClass20.array[cDisplayClass20.j];
cDisplayClass20.array[cDisplayClass20.j] = num2;
cDisplayClass20.i++;
cDisplayClass20.j--;
}
}
Which confirms point above.
However if you move Parallel.Invoke
to separate method, that is no longer the case. 5 variables are still captured, but that does not affect whole QuickSort
method, because capture now happens inside separate ParallelInvoke
method, and so is localized. The QuickSort
still works with local variables and not fields of compiler generated class. If you decompile version with separate method, it will look exactly like you wrote:
int index1 = left;
int index2 = right;
int num1 = array[(left + right) / 2];
while (index1 <= index2) // local variables
{
while (array[index1] < num1) // num1 is local variable
++index1;
while (array[index2] > num1)
--index2;
if (index1 <= index2) // local variables again
{
int num2 = array[index1];
array[index1] = array[index2];
array[index2] = num2;
++index1;
--index2;
}
}
...
Now, I assume that accessing object fields (which are generally on heap) is somewhat slower than accessing local variables (which are generally on stack \ in CPU register), hence version with separate method is faster. Eric Lippert also notes in comments that:
The jitter will likely do a worse job with the fields than it will with the locals because it will not enregister them as aggressively.
You can confirm the above by modifying your first version like this:
private static void QuickSort(int[] array, int left, int right) {
var i = left;
var j = right;
var m = array[(left + right) / 2];
while (i <= j) {
while (array[i] < m) {
i++;
}
while (array[j] > m) {
j--;
}
if (i <= j) {
var t = array[i];
array[i] = array[j];
array[j] = t;
i++;
j--;
}
}
if (j - left > Threshold && right - i > Threshold) {
// copy all variables you pass to lambda
// so that their capture does not affect the whole method
var tmpArray = array;
var tmpLeft = left;
var tmpJ = j;
var tmpI = i;
var tmpRight = right;
Parallel.Invoke(
() => QuickSort(tmpArray, tmpLeft, tmpJ),
() => QuickSort(tmpArray, tmpI, tmpRight)
);
}
else {
if (j > left) {
QuickSort(array, left, j);
}
if (i < right) {
QuickSort(array, i, right);
}
}
}
}
Then execution time of both approaches will be the same.
As @Eugene mentions in comments and in his answer - there might be other things that slow this down besides field vs local variable access - such as construction and (potentially) garbage collection of compiler-generated classes mentioned above. However, these are all consequences of the same source root - capturing local variables in closure in different ways.
!!!!!!!!! This answer is not actual for now !!!!! I know this is not a proper answer, just want to make it visible: try to change order of tests:
Measure("P. Separate Method", () => ParallelSeparateMethodQuickSort.Sort(array));
Measure("Sequential\t", () => SequentialQuickSort.Sort(array));
Measure("Parallel\t", () => ParallelQuickSort.Sort(array));
And you will see:
P. Separate Method: Elapsed=8710
Sequential : Elapsed=4140
Parallel : Elapsed=7928
P. Separate Method: Elapsed=9033
Sequential : Elapsed=4098
Parallel : Elapsed=7881
So I think your tests are wrong and this question does not make sense. And quick investigation shows that in every of the tests you change your source array, so each next test has already sorted array.
p.s. But I think this question really exists. If you try to correct the code you will see, that calling separate method works faster!
p.p.s plz let me know if somebody has an answer or if question was corrected
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With