//----------------------------------------------------------------------------
|
/// @file parallel_sort.hpp
|
/// @brief Contains the parallel_sort class, which is part of the
|
/// block_indirect_sort algorithm
|
///
|
/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
|
/// Distributed under the Boost Software License, Version 1.0.\n
|
/// ( See accompanying file LICENSE_1_0.txt or copy at
|
/// http://www.boost.org/LICENSE_1_0.txt )
|
/// @version 0.1
|
///
|
/// @remarks
|
//-----------------------------------------------------------------------------
|
#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
|
#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
|
|
#include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
|
#include <boost/sort/pdqsort/pdqsort.hpp>
|
#include <boost/sort/common/pivot.hpp>
|
|
namespace boost
|
{
|
namespace sort
|
{
|
namespace blk_detail
|
{
|
|
//----------------------------------------------------------------------------
|
// USING SENTENCES
|
//----------------------------------------------------------------------------
|
namespace bsc = boost::sort::common;
|
namespace bscu = bsc::util;
|
using bscu::nbits64;
|
using bsc::pivot9;
|
using boost::sort::pdqsort;
|
//
|
///---------------------------------------------------------------------------
|
/// @struct parallel_sort
|
/// @brief This class do a parallel sort, using the quicksort filtering,
|
/// splitting the data until the number of elements is smaller than a
|
/// predefined value (max_per_thread)
|
//----------------------------------------------------------------------------
|
template<uint32_t Block_size, class Iter_t, class Compare>
|
struct parallel_sort
|
{
|
//-------------------------------------------------------------------------
|
// D E F I N I T I O N S
|
//-------------------------------------------------------------------------
|
typedef typename std::iterator_traits<Iter_t>::value_type value_t;
|
typedef std::atomic<uint32_t> atomic_t;
|
typedef std::function<void(void)> function_t;
|
typedef backbone<Block_size, Iter_t, Compare> backbone_t;
|
|
//------------------------------------------------------------------------
|
// V A R I A B L E S
|
//------------------------------------------------------------------------
|
// reference to a object with all the data to sort
|
backbone_t &bk;
|
|
// maximun number of element to sort woth 1 thread
|
size_t max_per_thread;
|
|
// atomic counter for to detect the end of the works created inside
|
// the object
|
atomic_t counter;
|
|
//------------------------------------------------------------------------
|
// F U N C T I O N S
|
//------------------------------------------------------------------------
|
parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last);
|
|
void divide_sort(Iter_t first, Iter_t last, uint32_t level);
|
//
|
//------------------------------------------------------------------------
|
// function : function_divide_sort
|
/// @brief create a function_t with a call to divide_sort, and inser in
|
/// the stack of the backbone
|
//
|
/// @param first : iterator to the first element of the range to divide
|
/// @param last : iterator to the next element after the last element of
|
/// the range to divide
|
/// @param level : level of depth in the division.When zero call to
|
/// pdqsort
|
/// @param counter : atomic variable which is decremented when finish
|
/// the function. This variable is used for to know
|
/// when are finished all the function_t created
|
/// inside an object
|
/// @param error : global indicator of error.
|
//------------------------------------------------------------------------
|
void function_divide_sort(Iter_t first, Iter_t last, uint32_t level,
|
atomic_t &counter, bool &error)
|
{
|
bscu::atomic_add(counter, 1);
|
function_t f1 = [this, first, last, level, &counter, &error]( )
|
{
|
if (not error)
|
{
|
try
|
{
|
this->divide_sort (first, last, level);
|
}
|
catch (std::bad_alloc &)
|
{
|
error = true;
|
};
|
};
|
bscu::atomic_sub (counter, 1);
|
};
|
bk.works.emplace_back(f1);
|
};
|
|
//--------------------------------------------------------------------------
|
};// end struct parallel_sort
|
//--------------------------------------------------------------------------
|
//
|
//############################################################################
|
// ##
|
// ##
|
// N O N I N L I N E F U N C T I O N S ##
|
// ##
|
// ##
|
//############################################################################
|
//
|
//------------------------------------------------------------------------
|
// function : parallel_sort
|
/// @brief constructor of the class
|
/// @param [in] bkbn : backbone struct with all the information to sort
|
/// @param [in] first : iterator to the first element to sort
|
/// @param [in] last : iterator to the next element after the last
|
//------------------------------------------------------------------------
|
template<uint32_t Block_size, class Iter_t, class Compare>
|
parallel_sort<Block_size, Iter_t, Compare>
|
::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last)
|
: bk(bkbn), counter(0)
|
{
|
assert((last - first) >= 0);
|
size_t nelem = size_t(last - first);
|
|
//------------------- check if sort --------------------------------------
|
bool sorted = true;
|
for (Iter_t it1 = first, it2 = first + 1;
|
it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
|
if (sorted) return;
|
|
//------------------- check if reverse sort ---------------------------
|
sorted = true;
|
for (Iter_t it1 = first, it2 = first + 1;
|
it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++);
|
|
if (sorted)
|
{
|
size_t nelem2 = nelem >> 1;
|
Iter_t it1 = first, it2 = last - 1;
|
for (size_t i = 0; i < nelem2; ++i)
|
std::swap(*(it1++), *(it2--));
|
return;
|
};
|
|
//-------------------max_per_thread ---------------------------
|
uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1;
|
if (nbits_size > 5) nbits_size = 5;
|
max_per_thread = 1 << (18 - nbits_size);
|
|
uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2;
|
|
//---------------- check if only single thread -----------------------
|
if (nelem < (max_per_thread))
|
{
|
pdqsort(first, last, bk.cmp);
|
return;
|
};
|
if (not bk.error) divide_sort(first, last, level);
|
|
// wait until all the parts are finished
|
bk.exec(counter);
|
};
|
|
//------------------------------------------------------------------------
|
// function : divide_sort
|
/// @brief this function divide the data in two part, for to be sorted in
|
/// a parallel mode
|
/// @param first : iterator to the first element to sort
|
/// @param last : iterator to the next element after the last
|
/// @param level : level of depth before call to pdqsort
|
//------------------------------------------------------------------------
|
template<uint32_t Block_size, class Iter_t, class Compare>
|
void parallel_sort<Block_size, Iter_t, Compare>
|
::divide_sort(Iter_t first, Iter_t last, uint32_t level)
|
{
|
//------------------- check if sort -----------------------------------
|
bool sorted = true;
|
for (Iter_t it1 = first, it2 = first + 1;
|
it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
|
if (sorted) return;
|
|
//---------------- check if finish the subdivision -------------------
|
size_t nelem = last - first;
|
if (level == 0 or nelem < (max_per_thread))
|
{
|
return pdqsort(first, last, bk.cmp);
|
};
|
|
//-------------------- pivoting ----------------------------------
|
pivot9(first, last, bk.cmp);
|
const value_t &val = const_cast<value_t &>(*first);
|
Iter_t c_first = first + 1, c_last = last - 1;
|
|
while (bk.cmp(*c_first, val)) ++c_first;
|
while (bk.cmp(val, *c_last)) --c_last;
|
|
while (c_first < c_last)
|
{
|
std::swap(*(c_first++), *(c_last--));
|
while (bk.cmp(*c_first, val))
|
++c_first;
|
while (bk.cmp(val, *c_last))
|
--c_last;
|
};
|
|
std::swap(*first, *c_last);
|
|
// insert the work of the second half in the stack of works
|
function_divide_sort(c_first, last, level - 1, counter, bk.error);
|
if (bk.error) return;
|
|
// The first half is done by the same thread
|
function_divide_sort(first, c_last, level - 1, counter, bk.error);
|
};
|
//
|
//****************************************************************************
|
};// End namespace blk_detail
|
};// End namespace sort
|
};// End namespace boost
|
//****************************************************************************
|
//
|
#endif
|