I'm trying to get an intuitive understanding of how much I could speed up merge sort if I parallelize it.
My thoughts so far:
If N is the number of elements in the array to be sorted, then log(base 2)N is the highest number of cores I'd need. I believe this is the case because there are 2*log(base 2)N + 1 levels in mergesort. First, you break it up by dividing it by two over and over, then you merge two sorted arrays over and over until you have an array of N items again (and now it is sorted).
I'm trying to figure out how much this would actually improve performance. I'm thinking that the increase in performance due to additional cores will increase as we move towards the middle of the algorithm, because we can use more cores. Say I have 16 items in an unsorted array. I only need to use one core to break it up into two 8 item arrays, and then I can use two cores to break those into four 4 item arrays, etc.
So, the performance will increase by a factor of two for every level of splitting, and then decrease by two for every level of merging... right? Am I on the right track here?
Also, why couldn't we just start by merging the first two items in the unsorted array, then the next two and so on. Basically get rid of the first half of the algorithm?
Thoughts?
Should I ask this at math.stackexchange.com instead? Sorry if so... I didn't really know
Dual Pivot QuickSort is known to have beaten Merge Sort in serial versions. I think the parallel form of DPQ could really be the best fastest sorting algorithm ever created. Reason is that it has low constant factors than MergeSort and its worst case time complexity has the probability of occuring only 1/(n!). If N is large, prefer DPQ, maybe multithread it, if its possible. But parallelism has a pivot or limit, below the limit its slow due to Thread Management. Beyond the limit it is much faster. In case you are interested, serial code is below( both ascending and descending)
protected static void ASC(int[]a, int left, int right, int div)
{
int len = 1 + right - left;
if (len < 27)
{
// insertion sort for small array
int P1 = left + 1;
int P2 = left;
while ( P1 <= right )
{
div = a[P1];
while(( P2 >= left )&&( a[P2] > div ))
{
a[P2 + 1] = a[P2];
P2--;
}
a[P2 + 1] = div;
P2 = P1;
P1++;
}
return;
}
int third = len / div;
// "medians"
int P1 = left + third;
int P2 = right - third;
if (P1 <= left)
{
P1 = left + 1;
}
if (P2 >= right)
{
P2 = right - 1;
}
int temp;
if (a[P1] < a[P2])
{
temp = a[P1]; a[P1] = a[left]; a[left] = temp;
temp = a[P2]; a[P2] = a[right]; a[right] = temp;
}
else
{
temp = a[P1]; a[P1] = a[right]; a[right] = temp;
temp = a[P2]; a[P2] = a[left]; a[left] = temp;
}
// pivots
int pivot1 = a[left];
int pivot2 = a[right];
// pointers
int less = left + 1;
int great = right - 1;
// sorting
for (int k = less; k <= great; k++)
{
if (a[k] < pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] > pivot2)
{
while (k < great && a[great] > pivot2)
{
great--;
}
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] < pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
int dist = great - less;
if (dist < 13)
{
div++;
}
temp = a[less-1]; a[less-1] = a[left]; a[left] = temp;
temp = a[great+1]; a[great+1] = a[right]; a[right] = temp;
// subarrays
ASC(a, left, less - 2, div);
ASC(a, great + 2, right, div);
// equal elements
if (dist > len - 13 && pivot1 != pivot2)
{
for (int k = less; k <= great; k++)
{
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] == pivot2)
{
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
}
// subarray
if (pivot1 < pivot2)
{
ASC(a, less, great, div);
}
}
protected static void DSC(int[]a, int left, int right, int div)
{
int len = 1 + right - left;
if (len < 27)
{
// insertion sort for large array
int P1 = left + 1;
int P2 = left;
while ( P1 <= right )
{
div = a[P1];
while(( P2 >= left )&&( a[P2] < div ))
{
a[P2 + 1] = a[P2];
P2--;
}
a[P2 + 1] = div;
P2 = P1;
P1++;
}
return;
}
int third = len / div;
// "medians"
int P1 = left + third;
int P2 = right - third;
if (P1 >= left)
{
P1 = left + 1;
}
if (P2 <= right)
{
P2 = right - 1;
}
int temp;
if (a[P1] > a[P2])
{
temp = a[P1]; a[P1] = a[left]; a[left] = temp;
temp = a[P2]; a[P2] = a[right]; a[right] = temp;
}
else
{
temp = a[P1]; a[P1] = a[right]; a[right] = temp;
temp = a[P2]; a[P2] = a[left]; a[left] = temp;
}
// pivots
int pivot1 = a[left];
int pivot2 = a[right];
// pointers
int less = left + 1;
int great = right - 1;
// sorting
for (int k = less; k <= great; k++)
{
if (a[k] > pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] < pivot2)
{
while (k < great && a[great] < pivot2)
{
great--;
}
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] > pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
int dist = great - less;
if (dist < 13)
{
div++;
}
temp = a[less-1]; a[less-1] = a[left]; a[left] = temp;
temp = a[great+1]; a[great+1] = a[right]; a[right] = temp;
// subarrays
DSC(a, left, less - 2, div);
DSC(a, great + 2, right, div);
// equal elements
if (dist > len - 13 && pivot1 != pivot2)
{
for (int k = less; k <= great; k++)
{
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] == pivot2)
{
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
}
// subarray
if (pivot1 > pivot2)
{
DSC(a, less, great, div);
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With