Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Hand-coded quicksort is slower on smaller integers

Tags:

c++

sorting

When comparing my quicksort implementation with std::sort on my compiler and my implementation of mergesort, I noticed an odd pattern on large data sets: when operating on 64 bit integers, quicksort is consistently faster than mergesort; however, on smaller int sizes quicksort gets slower and mergesort gets faster.

Here is the testing code:

#include <iostream>
#include <vector>
#include <iterator>
#include <algorithm>
#include <utility>
#include <random>
#include <chrono>
#include <limits>
#include <functional>

#include <cstdint>


template <typename Iterator>
void insertion_sort(Iterator first, Iterator last)
{
    using namespace std;

    Iterator head = first;
    Iterator new_position;

    while(head != last)
    {
        new_position = head;
        while(new_position != first && *new_position < *prev(new_position))
        {
            swap(*new_position, *prev(new_position));
            --new_position;
        }
        ++head;
    }
}

template <typename Iterator>
void recursive_mergesort_impl(Iterator first, Iterator last, std::vector<typename Iterator::value_type>& temp)
{
    if(last - first > 32)
    {
        auto middle = first + (last-first)/2;
        recursive_mergesort_impl(first, middle, temp);
        recursive_mergesort_impl(middle, last, temp);
        auto last_merged = merge_move(first, middle, middle, last, temp.begin());
        std::move(temp.begin(), last_merged, first);
    }
    else
    {
        insertion_sort(first, last);
    }
}

template <typename Iterator>
void recursive_mergesort(Iterator first, Iterator last)
{
    std::vector<typename Iterator::value_type> temp(last-first);
    recursive_mergesort_impl(first, last, temp);
}

// Pick a pivot and move it to front of range
template <typename Iterator>
template <typename Iterator>
void quicksort_pivot_back(Iterator first, Iterator last)
{
    using namespace std;

    auto middle = first + (last-first)/2;
    auto last_elem = prev(last);
    Iterator pivot;

    if(*first < *middle)
    {
        if(*middle < *last_elem)
            pivot = middle;
        else if(*first < *last_elem)
            pivot = last_elem;
        else
            pivot = first;
    }
    else if(*first < *last_elem)
        pivot = first;
    else if(*middle < *last_elem)
        pivot = last_elem;
    else
        pivot = middle;

    swap(*last_elem, *pivot);
}

template <typename Iterator, typename Function>
std::pair<Iterator, Iterator> quicksort_partition(Iterator first, Iterator last, Function pivot_select)
{
    using namespace std;

    pivot_select(first, last);

    auto pivot = prev(last);
    auto bottom = first;
    auto top = pivot;

    while(bottom != top)
    {
        if(*bottom < *pivot) ++bottom;
        else swap(*bottom, *--top);
    }

    swap(*pivot, *top++);

    return make_pair(bottom, top);
}

template <typename Iterator>
void quicksort_loop(Iterator first, Iterator last)
{
    using namespace std;

    while(last - first > 32)
    {
        auto bounds = quicksort_partition(first, last, quicksort_pivot_back<Iterator>);

        quicksort_loop(bounds.second, last);
        last = bounds.first;
    }
}


template <typename Iterator>
void quicksort(Iterator first, Iterator last)
{
    quicksort_loop(first, last);
    insertion_sort(first, last);
}

template <typename IntType = uint64_t, typename Duration = std::chrono::microseconds, typename Timer = std::chrono::high_resolution_clock, typename Function, typename Generator>
void run_trial(Function sort_func, Generator gen, std::string name, std::size_t trial_size, std::size_t trial_count)
{
    using namespace std;
    using namespace chrono;

    vector<IntType> data(trial_size);

    Duration elapsed(0);

    cout << "Sorting with " << name << endl;

    for(unsigned int i = 0; i < trial_count; ++i)
    {
        generate(data.begin(), data.end(), gen);

        auto start = Timer::now();
        sort_func(data.begin(), data.end());
        auto finish = Timer::now();

        elapsed += duration_cast<Duration>(finish-start);
    }

    cout << "Done. Average elapsed time: " << elapsed.count() / trial_count << endl;
    cout << "Is correct: " << is_sorted(data.begin(), data.end()) << endl << endl;
}

int main()
{
    using namespace std;
    using namespace chrono;

    using int_type = uint64_t;
    const size_t trial_size = 12800000;
    const int trial_count = 15;

    vector<int_type> data(trial_size);
    uniform_int_distribution<int_type> distr;
    mt19937_64 rnd;

    run_trial<int_type>(recursive_mergesort<vector<int_type>::iterator>, bind(distr, rnd), "recursive mergesort", trial_size, trial_count);
    run_trial<int_type>(quicksort<vector<int_type>::iterator>, bind(distr, rnd), "quicksort", trial_size, trial_count);
    run_trial<int_type>(sort<vector<int_type>::iterator>, bind(distr, rnd), "std::sort", trial_size, trial_count);
}

Here are times from 15 trials of 12800000 elements:

uint64_t:

Sorting with recursive mergesort
Done. Average elapsed time: 1725431
Is correct: 1

Sorting with quicksort
Done. Average elapsed time: 1238070
Is correct: 1

Sorting with std::sort
Done. Average elapsed time: 1131464
Is correct: 1

uint16_t:

Sorting with recursive mergesort
Done. Average elapsed time: 1186467
Is correct: 1

Sorting with quicksort
Done. Average elapsed time: 2368535
Is correct: 1

Sorting with std::sort
Done. Average elapsed time: 888517
Is correct: 1

I have a feeling that the problem has to do with unaligned memory accesses, however that still makes me wonder why the other algorithms get a speedup while quicksort gets slowed down.

like image 588
chbaker0 Avatar asked May 06 '14 17:05

chbaker0


1 Answers

With uint16_t, you're going to get a lot of duplicates in such a large array: 195 occurrences of each of 0 though 65535, in expectation. Without a three-way ("fat") partition, or at least one that returns the middle of the repeated occurrences of the pivot value in the subarray it's processing, that causes quicksort to go quadratic. (Try a pencil-and-paper execution of a naive quicksort on an array of only zeros to see the effect.)

like image 140
Fred Foo Avatar answered Oct 20 '22 01:10

Fred Foo