/* Copyright (C) 2013 to 2016 Chris Vine

The library comprised in this file or of which this file is part is
distributed by Chris Vine under the GNU Lesser General Public
License as follows:

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public License
   as published by the Free Software Foundation; either version 2.1 of
   the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License, version 2.1, for more details.

   You should have received a copy of the GNU Lesser General Public
   License, version 2.1, along with this library (see the file LGPL.TXT
   which came with this source code package in the c++-gtk-utils
   sub-directory); if not, write to the Free Software Foundation, Inc.,
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

However, it is not intended that the object code of a program whose
source code instantiates a template from this file or uses macros or
inline functions (of any length) should by reason only of that
instantiation or use be subject to the restrictions of use in the GNU
Lesser General Public License.  With that in mind, the words "and
macros, inline functions and instantiations of templates (of any
length)" shall be treated as substituted for the words "and small
macros and small inline functions (ten lines or less in length)" in
the fourth paragraph of section 5 of that licence.  This does not
affect any other reason why object code may be subject to the
restrictions in that licence (nor for the avoidance of doubt does it
affect the application of section 2 of that licence to modifications
of the source code in this file).

*/

#ifndef CGU_PARALLEL_H
#define CGU_PARALLEL_H

#include <utility>     // for std::move, std::forward and std::pair
#include <memory>      // for std::unique_ptr
#include <iterator>    // for std::iterator_traits and std::distance
#include <exception>   // for std::exception
#include <functional>  // for std::bind
#include <type_traits> // for std::remove_reference and std::remove_const
#include <limits>      // for std::numeric_limits
#include <algorithm>   // for std::min
#include <tuple>

#include <c++-gtk-utils/callback.h>
#include <c++-gtk-utils/task_manager.h>
#include <c++-gtk-utils/mutex.h>
#include <c++-gtk-utils/cgu_config.h>

namespace Cgu {

namespace Thread {

struct ParallelError: public std::exception {
  virtual const char* what() const throw() {return "ParallelError\n";}
};

#ifndef DOXYGEN_PARSING

// in version 2.2.2, this was the ParallelHelper namespace.  Because
// the meaning of the DiffType* argument of these functions has
// changed in version 2.2.3 (it is incremented rather than
// decremented), this is now the ParallelHelper2 namespace so that no
// ODR issues arise on library linking with old binaries
namespace ParallelHelper2 {

template <class ArgRefType, class DiffType, class Iterator>
void for_each_cb_func(const Cgu::Callback::SafeFunctorArg<ArgRefType>& s_task,
		      Iterator iter,
		      Mutex* m, Cond* cond,
		      DiffType* done_count) {
  s_task(*iter);
  Mutex::Lock l{*m};
  ++*done_count;
  cond->signal();
}

template <class FType, class ArgRefType, class DestType>
void transform1_func(const FType& func,
		     ArgRefType arg,
		     DestType& res) {
  res = func(arg);
}


template <class ArgRefType, class DestType, class DiffType, class SourceIterator>
void transform1_cb_func(const Cgu::Callback::SafeFunctorArg<ArgRefType, DestType&>& s_task,
			SourceIterator source_iter,
			Mutex* m, Cond* cond,
			DiffType* done_count,
			DestType* result) {
  DestType res;
  s_task(*source_iter, res);
  Mutex::Lock l{*m};
  // move to 'result' within the mutex because g++ <= 4.7 does not
  // correctly implement the C++11 memory model on some 64 bit
  // platforms (this is a slight pessimization for gcc >= 4.8)
  *result = std::move(res);
  ++*done_count;
  cond->signal();
}

template <class FType, class Arg1RefType,
          class Arg2RefType, class DestType>
void transform2_func(const FType& func,
		     Arg1RefType arg1,
		     Arg2RefType arg2,
		     DestType& res) {
  res = func(arg1, arg2);
}


template <class Arg1RefType, class Arg2RefType, class DestType,
          class DiffType, class SourceIterator1, class SourceIterator2>
void transform2_cb_func(const Cgu::Callback::SafeFunctorArg<Arg1RefType, Arg2RefType, DestType&>& s_task,
			SourceIterator1 source_iter1,
			SourceIterator2 source_iter2,
			Mutex* m, Cond* cond,
			DiffType* done_count,
			DestType* result) {
  DestType res;
  s_task(*source_iter1, *source_iter2, res);
  Mutex::Lock l{*m};
  // move to 'result' within the mutex because g++ <= 4.7 does not
  // correctly implement the C++11 memory model on some 64 bit
  // platforms (this is a slight pessimization for gcc >= 4.8)
  *result = std::move(res);
  ++*done_count;
  cond->signal();
}

template <class DiffType>
void fail_func(Mutex* m, Cond* cond,
	       bool* error, DiffType* done_count) noexcept {
  Mutex::Lock l{*m};
  ++*done_count;
  *error = true;
  cond->signal();
}

} // namespace ParallelHelper2

#endif // DOXYGEN_PARSING

/**
 * \#include <c++-gtk-utils/parallel.h>
 * @sa Cgu::IntIter
 *
 * This function applies a callable object to each element of a
 * container in the range ['first', 'last'), by executing each such
 * application as a task of a Thread::TaskManager object.  Tasks are
 * added to the Thread::TaskManager object in the order in which the
 * respective elements appear in the container (and if a task mutates
 * its argument, it will do so in respect of the correct element of
 * the container), but no other ordering arises, and the tasks will
 * execute in parallel to the extent that the Thread::TaskManager
 * object has sufficient threads available to do so.
 *
 * Apart from that, and that this function returns void, it does the
 * same as std::for_each().  It can mutate container elements if the
 * callable object takes its argument by non-const reference.  It will
 * not return until the callable object has been applied to all of the
 * elements in the range ['first', 'last').
 *
 * This function can be called by a task running on the same
 * TaskManager object.  However if that is done, as the task would end
 * up blocking on its sub-tasks, the maximum number of threads running
 * on the TaskManager object should be incremented by one temporarily
 * while this function is executing using the TaskManager::IncHandle
 * scoped handle class in order to prevent any deadlock through thread
 * starvation. (Another approach where a result is to be delivered to
 * a glib main loop is to call this function in a task running on a
 * Cgu::Thread::Future object and to set a 'when' callback on the
 * future object which passes the result to the main loop.)
 *
 * @param tm The Thread::TaskManager object on which the tasks will
 * run.
 * @param first The beginning of the range to which 'func' is to be
 * applied.
 * @param last One past the last element to which 'func' is to be
 * applied.
 * @param func A callable object to be applied to each element in the
 * range ['first', 'last'), such as formed by a lambda expression or
 * the result of std::bind.  It should take a single unbound argument
 * of the value type of the container to which 'first' and 'last'
 * relate or a const or non-const reference to that type.  Any return
 * value is discarded.  If an exception propagates from 'func', the
 * exception will be consumed while the for each loop is running, and
 * an attempt will still be made to apply 'func' to all remaining
 * elements in the range ['first', 'last'), and only after that
 * attempt has completed will the exception Cgu::Thread::ParallelError
 * be thrown.
 * @exception std::bad_alloc This exception will be thrown if memory
 * is exhausted and the system throws in that case.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion).  See also the
 * documentation for the Cgu::Thread::TaskManager::get_max_tasks()
 * method about the possibility of std::length_error being thrown.  If
 * std::bad_alloc or std::length_error is thrown, some tasks may
 * nonetheless have already started by virtue of the call to this
 * function, but subsequent ones will not.
 * @exception Cgu::Thread::TaskError This exception will be thrown if
 * stop_all() has previously been called on the Thread::TaskManager
 * object, or if another thread calls stop_all() after this method is
 * called but before it has returned.  It will also be thrown if the
 * Thread::TaskManager object's is_error() method would return true
 * because its internal thread pool loop implementation has thrown
 * std::bad_alloc, or a thread has failed to start correctly because
 * pthread has run out of resources.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion, and if a reasonable
 * maximum thread count has been chosen for the Thread::TaskManager
 * object pthread should not run out of other resources, but there may
 * be some specialized cases where the return value of is_error() is
 * useful.)  If this exception is thrown, some tasks may nonetheless
 * have already started by virtue of the call to this function.
 * @exception Cgu::Thread::ParallelError This exception will be thrown
 * if an exception propagates from the 'func' callable object when it
 * executes on being applied to one or more elements of the container.
 * Such an exception will not stop an attempt being made to apply
 * 'func' (successfully or unsuccessfully) to all elements in the
 * range ['first', 'last').  Cgu::Thread::ParallelError will be thrown
 * after such attempted application has finished.
 * @exception Cgu::Thread::MutexError This exception will be thrown if
 * initialization of a mutex used by this function fails.  (It is
 * often not worth checking for this, as it means either memory is
 * exhausted or pthread has run out of other resources to create new
 * mutexes.)  If this exception is thrown, no tasks will start.
 * @exception Cgu::Thread::CondError This exception will be thrown if
 * initialization of a condition variable used by this function fails.
 * (It is often not worth checking for this, as it means either memory
 * is exhausted or pthread has run out of other resources to create
 * new condition variables.)  If this exception is thrown, no tasks
 * will start.
 * @note 1. An exception might also be thrown if the copy or move
 * constructor of the 'func' callable objects throws.  If such an
 * exception is thrown, no tasks will start.
 * @note 2. Prior to version 2.0.27 and 2.2.10, this function could
 * not take a source iterator to const.  This was fixed in versions
 * 2.0.27 and 2.2.10.
 * 
 * Since 2.0.19/2.2.2
 */
template <class Iterator, class Func>
void parallel_for_each(TaskManager& tm,
		       Iterator first,
		       Iterator last,
		       Func&& func) {

  typedef typename std::iterator_traits<Iterator>::reference ArgRefType;
  typedef typename std::iterator_traits<Iterator>::difference_type DiffType;

  Mutex mutex;
  Cond cond;
  DiffType start_count = 0;
  DiffType done_count = 0;
  bool error = false;

  // construct SafeFunctorArg objects so that they can be shared
  // between different tasks
  Cgu::Callback::SafeFunctorArg<ArgRefType> s_task{
    Cgu::Callback::lambda<ArgRefType>(std::forward<Func>(func))
  };
  Cgu::Callback::SafeFunctor s_fail{
    Cgu::Callback::make(&ParallelHelper2::fail_func<DiffType>,
			&mutex,
			&cond,
			&error,
			&done_count)
  };

  for (; first != last; ++first, ++start_count) {
    std::unique_ptr<const Cgu::Callback::Callback> task_cb(
      Cgu::Callback::make_ref(&ParallelHelper2::for_each_cb_func<ArgRefType, DiffType, Iterator>,
			      s_task,
			      first,
			      &mutex,
			      &cond,
			      &done_count)
    );
    std::unique_ptr<const Cgu::Callback::Callback> fail_cb(
      Cgu::Callback::lambda<>([s_fail] () {s_fail();})
    );

    tm.add_task(std::move(task_cb), std::move(fail_cb));
  }
  
  Mutex::Lock l{mutex};
  while (start_count > done_count) cond.wait(mutex);
  if (error) throw ParallelError();
}

/**
 * \#include <c++-gtk-utils/parallel.h>
 * @sa Cgu::IntIter
 *
 * This function maps over a container in the range ['first', 'last'),
 * applying a unary callable object to each element of the container
 * in that range and storing the result in the destination range, by
 * executing each such application as a task of a Thread::TaskManager
 * object.  Tasks are added to the Thread::TaskManager object in the
 * order in which the respective elements appear in the source
 * container, and the final result appears in the destination
 * container in the same order as the source range from which it is
 * generated (including if a back_inserter iterator is used), but no
 * other ordering arises, and the tasks will execute in parallel to
 * the extent that the Thread::TaskManager object has sufficient
 * threads available to do so.
 *
 * Apart from that, this function does the same as the version of
 * std::transform() taking a unary function, except that it returns
 * void (see Thread::parallel_transform_partial() for a function which
 * returns a destination iterator and an iterator to the source
 * range).  It will not return until the callable object has been
 * applied to all of the elements in the range ['first', 'last').
 *
 * This function can be called by a task running on the same
 * TaskManager object, perhaps with a view to delivering a result
 * asynchronously to a glib main loop.  However if that is done, as
 * the task would end up blocking on its sub-tasks, the maximum number
 * of threads running on the TaskManager object should be incremented
 * by one temporarily while this function is executing using the
 * TaskManager::IncHandle scoped handle class in order to prevent any
 * deadlock through thread starvation.  (Another approach where a
 * result is to be delivered to a glib main loop is to call this
 * function in a task running on a Cgu::Thread::Future object and to
 * set a 'when' callback on the future object which passes the result
 * to the main loop.)
 *
 * A task can carry out a map-reduce operation by passing the result
 * of calling this function to std::accumulate() to perform a
 * fold-left or fold-right on that result.
 *
 * A separate overload of this function takes a binary callable
 * object.
 *
 * Here is a trivial example of a map-reduce operation which maps over
 * a vector by multiplying each element by 2 in separate tasks, and
 * then folds-left using std::accumulate() (std::accumulate() can fold
 * using any callable object, but in this example the default of
 * addition is used):
 *
 * @code
 *   using namespace Cgu;
 *   std::vector<int> v{1, 2, 3, 4, 5};
 *   Thread::TaskManager tm;
 *
 *   Thread::parallel_transform(tm,
 *                              v.begin(),
 *                              v.end(),
 *                              v.begin(),
 *                              [] (int elt) {return elt * 2;});
 *  // res will be equal to 30
 *  int res = std::accumulate(v.begin(), v.end(), 0);
 * @endcode
 *
 * @param tm The Thread::TaskManager object on which the tasks will
 * run.
 * @param first The beginning of the range to which 'func' is to be
 * applied.
 * @param last One past the last element to which 'func' is to be
 * applied.
 * @param dest The beginning of the range to which the result of
 * applying 'func' to the elements in the range ['first', 'last') is
 * to be stored.  As in the case of std::transform, this can overlap
 * with or be the same as the source range.  It may also be an insert
 * iterator.
 * @param func A unary callable object to be applied to each element
 * in the range ['first', 'last'), such as formed by a lambda
 * expression or the result of std::bind.  It should take a single
 * unbound argument of the value type of the container to which
 * 'first' and 'last' relate or a const or non-const reference to that
 * type.  If an exception propagates from 'func', the exception will
 * be consumed while the transform loop is running, and an attempt
 * will still be made to apply 'func' to all remaining elements in the
 * range ['first', 'last'), and only after that attempt has completed
 * will the exception Cgu::Thread::ParallelError be thrown.
 * @exception std::bad_alloc This exception will be thrown if memory
 * is exhausted and the system throws in that case.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion).  See also the
 * documentation for the Cgu::Thread::TaskManager::get_max_tasks()
 * method about the possibility of std::length_error being thrown.  If
 * std::bad_alloc or std::length_error is thrown, some tasks may
 * nonetheless have already started by virtue of the call to this
 * function, but subsequent ones will not.
 * @exception Cgu::Thread::TaskError This exception will be thrown if
 * stop_all() has previously been called on the Thread::TaskManager
 * object, or if another thread calls stop_all() after this method is
 * called but before it has returned.  It will also be thrown if the
 * Thread::TaskManager object's is_error() method would return true
 * because its internal thread pool loop implementation has thrown
 * std::bad_alloc, or a thread has failed to start correctly because
 * pthread has run out of resources.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion, and if a reasonable
 * maximum thread count has been chosen for the Thread::TaskManager
 * object pthread should not run out of other resources, but there may
 * be some specialized cases where the return value of is_error() is
 * useful.)  If this exception is thrown, some tasks may nonetheless
 * have already started by virtue of the call to this function.
 * @exception Cgu::Thread::ParallelError This exception will be thrown
 * if an exception propagates from the 'func' callable object when it
 * executes on being applied to one or more elements of the source
 * container.  Such an exception will not stop an attempt being made
 * to apply 'func' (successfully or unsuccessfully) to all elements in
 * the range ['first', 'last').  Cgu::Thread::ParallelError will be
 * thrown after such attempted application has finished.
 * @exception Cgu::Thread::MutexError This exception will be thrown if
 * initialization of a mutex used by this function fails.  (It is
 * often not worth checking for this, as it means either memory is
 * exhausted or pthread has run out of other resources to create new
 * mutexes.)  If this exception is thrown, no tasks will start.
 * @exception Cgu::Thread::CondError This exception will be thrown if
 * initialization of a condition variable used by this function fails.
 * (It is often not worth checking for this, as it means either memory
 * is exhausted or pthread has run out of other resources to create
 * new condition variables.)  If this exception is thrown, no tasks
 * will start.
 * @note 1. An exception might also be thrown if the copy or move
 * constructor of the 'func' callable objects throws.  If such an
 * exception is thrown, no tasks will start.
 * @note 2. Prior to version 2.0.27 and 2.2.10, this function could
 * not take a source iterator to const.  This was fixed in versions
 * 2.0.27 and 2.2.10.
 * 
 * Since 2.0.19/2.2.2
 */
template <class SourceIterator, class DestIterator, class Func>
void parallel_transform(TaskManager& tm,
			SourceIterator first,
			SourceIterator last,
			DestIterator dest,
			Func&& func) {

  if (first == last) return;

  typedef typename std::iterator_traits<SourceIterator>::reference ArgRefType;
  typedef typename std::iterator_traits<SourceIterator>::difference_type DiffType;
  typedef typename std::remove_const<typename std::remove_reference<Func>::type>::type FType;
  // this function will fail to compile if DestType is a reference
  // type: that is a feature, not a bug, as a function returning a
  // reference lacks referential transparency, is unlikely to be
  // thread-safe and is unsuitable for use as a task function
  typedef decltype(func(*first)) DestType;

  Mutex mutex;
  Cond cond;
  DiffType start_count = 0;
  DiffType done_count = 0;
  bool error = false;

  // intermediate results have to be held in an array so destination
  // ordering can be enforced when using insert interators.  This
  // causes some inefficiency for non-random access iterators
  std::unique_ptr<DestType[]> results(new DestType[std::distance(first, last)]);

  // construct SafeFunctorArg objects so that they can be shared
  // between different tasks
  Cgu::Callback::SafeFunctorArg<ArgRefType, DestType&> s_task{
    Cgu::Callback::make_ref(&ParallelHelper2::transform1_func<FType, ArgRefType, DestType>,
			    std::forward<Func>(func))
  };
  Cgu::Callback::SafeFunctor s_fail{
    Cgu::Callback::make(&ParallelHelper2::fail_func<DiffType>,
			&mutex,
			&cond,
			&error,
			&done_count)
  };

  for (; first != last; ++first, ++start_count) {
    std::unique_ptr<const Cgu::Callback::Callback> task_cb(
      Cgu::Callback::lambda<>(std::bind(&ParallelHelper2::transform1_cb_func<ArgRefType, DestType, DiffType, SourceIterator>,
					s_task,
					first,
					&mutex,
					&cond,
					&done_count,
					results.get() + start_count))
    );
    std::unique_ptr<const Cgu::Callback::Callback> fail_cb(
      Cgu::Callback::lambda<>([s_fail] () {s_fail();})
    );

    tm.add_task(std::move(task_cb), std::move(fail_cb));
  }
  
  Mutex::Lock l{mutex};
  while (start_count > done_count) cond.wait(mutex);
  if (error) throw ParallelError();
  for (DiffType index = 0; index < start_count; ++dest, ++index) {
    *dest = std::move(results[index]);
  }
}

/**
 * \#include <c++-gtk-utils/parallel.h>
 * @sa Cgu::IntIter
 *
 * This function maps over two containers, one in the range ['first1',
 * 'last1') and the other beginning at 'first2', applying a binary
 * callable object to each element of the containers in those ranges
 * and storing the result in the destination range, by executing each
 * such application as a task of a Thread::TaskManager object.  Tasks
 * are added to the Thread::TaskManager object in the order in which
 * the respective elements appear in the source containers, and the
 * final result appears in the destination container in the same order
 * as the source ranges from which it is generated (including if a
 * back_inserter iterator is used), but no other ordering arises, and
 * the tasks will execute in parallel to the extent that the
 * Thread::TaskManager object has sufficient threads available to do
 * so.
 *
 * Apart from that, this function does the same as the version of
 * std::transform() taking a binary function, except that it returns
 * void (see Thread::parallel_transform_partial() for a function which
 * returns a destination iterator and iterators to the source ranges).
 * It will not return until the callable object has been applied to
 * all of the elements in the source ranges.
 *
 * This function can be called by a task running on the same
 * TaskManager object, perhaps with a view to delivering a result
 * asynchronously to a glib main loop.  However if that is done, as
 * the task would end up blocking on its sub-tasks, the maximum number
 * of threads running on the TaskManager object should be incremented
 * by one temporarily while this function is executing using the
 * TaskManager::IncHandle scoped handle class in order to prevent any
 * deadlock through thread starvation.  (Another approach where a
 * result is to be delivered to a glib main loop is to call this
 * function in a task running on a Cgu::Thread::Future object and to
 * set a 'when' callback on the future object which passes the result
 * to the main loop.)
 *
 * A task can carry out a map-reduce operation by passing the result
 * of calling this function to std::accumulate() to perform a
 * fold-left or fold-right on that result.
 *
 * A separate overload of this function takes a unary callable object.
 *
 * Here is a trivial example of a map-reduce operation which maps over
 * two vectors by adding respective elements of the vectors in
 * separate tasks, and then folds-left using std::accumulate()
 * (std::accumulate() can fold using any callable object, but in this
 * example the default of addition is used):
 *
 * @code
 *   using namespace Cgu;
 *   std::vector<int> v1{2, 4, 6, 8, 10};
 *   std::vector<int> v2{10, 20, 30, 40, 50};
 *   std::vector<int> v3;
 *   Thread::TaskManager tm;
 *
 *   Thread::parallel_transform(tm,
 *                              v1.begin(),
 *                              v1.end(),
 *                              v2.begin(),
 *                              std::back_inserter(v3),
 *                              std::plus<int>());
 *  // res will be equal to 180
 *  int res = std::accumulate(v3.begin(), v3.end(), 0);
 * @endcode
 *
 * @param tm The Thread::TaskManager object on which the tasks will
 * run.
 * @param first1 The beginning of the range which is to be passed as
 * the first argument of 'func'.
 * @param last1 One past the last element of the range which is to be
 * passed as the first argument of 'func'.
 * @param first2 The beginning of the range which is to be passed as
 * the second argument of 'func'.
 * @param dest The beginning of the range to which the result of
 * applying 'func' to the elements in the source ranges is to be
 * stored.  As in the case of std::transform, this can overlap with or
 * be the same as one of the source ranges.  It may also be an insert
 * iterator.
 * @param func A binary callable object to be applied to each element
 * in the source ranges, such as formed by a lambda expression or the
 * result of std::bind.  It should take two unbound arguments of the
 * value types of the containers to which 'first1' and 'first2' relate
 * or const or non-const references to those types.  If an exception
 * propagates from 'func', the exception will be consumed while the
 * transform loop is running, and an attempt will still be made to
 * apply 'func' to all remaining elements of the source ranges, and
 * only after that attempt has completed will the exception
 * Cgu::Thread::ParallelError be thrown.
 * @exception std::bad_alloc This exception will be thrown if memory
 * is exhausted and the system throws in that case.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion).  See also the
 * documentation for the Cgu::Thread::TaskManager::get_max_tasks()
 * method about the possibility of std::length_error being thrown.  If
 * std::bad_alloc or std::length_error is thrown, some tasks may
 * nonetheless have already started by virtue of the call to this
 * function, but subsequent ones will not.
 * @exception Cgu::Thread::TaskError This exception will be thrown if
 * stop_all() has previously been called on the Thread::TaskManager
 * object, or if another thread calls stop_all() after this method is
 * called but before it has returned.  It will also be thrown if the
 * Thread::TaskManager object's is_error() method would return true
 * because its internal thread pool loop implementation has thrown
 * std::bad_alloc, or a thread has failed to start correctly because
 * pthread has run out of resources.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion, and if a reasonable
 * maximum thread count has been chosen for the Thread::TaskManager
 * object pthread should not run out of other resources, but there may
 * be some specialized cases where the return value of is_error() is
 * useful.)  If this exception is thrown, some tasks may nonetheless
 * have already started by virtue of the call to this function.
 * @exception Cgu::Thread::ParallelError This exception will be thrown
 * if an exception propagates from the 'func' callable object when it
 * executes on being applied to one or more elements of the source
 * ranges.  Such an exception will not stop an attempt being made to
 * apply 'func' (successfully or unsuccessfully) to all elements in
 * the source ranges.  Cgu::Thread::ParallelError will be thrown after
 * such attempted application has finished.
 * @exception Cgu::Thread::MutexError This exception will be thrown if
 * initialization of a mutex used by this function fails.  (It is
 * often not worth checking for this, as it means either memory is
 * exhausted or pthread has run out of other resources to create new
 * mutexes.)  If this exception is thrown, no tasks will start.
 * @exception Cgu::Thread::CondError This exception will be thrown if
 * initialization of a condition variable used by this function fails.
 * (It is often not worth checking for this, as it means either memory
 * is exhausted or pthread has run out of other resources to create
 * new condition variables.)  If this exception is thrown, no tasks
 * will start.
 * @note 1. An exception might also be thrown if the copy or move
 * constructor of the 'func' callable objects throws.  If such an
 * exception is thrown, no tasks will start.
 * @note 2. Prior to version 2.0.27 and 2.2.10, this function could
 * not take a source iterator to const.  This was fixed in versions
 * 2.0.27 and 2.2.10.
 * 
 * Since 2.0.19/2.2.2
 */
template <class SourceIterator1, class SourceIterator2, class DestIterator, class Func>
void parallel_transform(TaskManager& tm,
			SourceIterator1 first1,
			SourceIterator1 last1,
			SourceIterator2 first2,
			DestIterator dest,
			Func&& func) {

  if (first1 == last1) return;

  typedef typename std::iterator_traits<SourceIterator1>::reference Arg1RefType;
  typedef typename std::iterator_traits<SourceIterator1>::difference_type DiffType;
  typedef typename std::iterator_traits<SourceIterator2>::reference Arg2RefType;
  typedef typename std::remove_const<typename std::remove_reference<Func>::type>::type FType;
  // this function will fail to compile if DestType is a reference
  // type: that is a feature, not a bug, as a function returning a
  // reference lacks referential transparency, is unlikely to be
  // thread-safe and is unsuitable for use as a task function
  typedef decltype(func(*first1, *first2)) DestType;

  Mutex mutex;
  Cond cond;
  DiffType start_count = 0;
  DiffType done_count = 0;
  bool error = false;

  // intermediate results have to be held in an array so destination
  // ordering can be enforced when using insert interators.  This
  // causes some inefficiency for non-random access iterators
  std::unique_ptr<DestType[]> results(new DestType[std::distance(first1, last1)]);

  // construct SafeFunctorArg objects so that they can be shared
  // between different tasks
  Cgu::Callback::SafeFunctorArg<Arg1RefType, Arg2RefType, DestType&> s_task{
    Cgu::Callback::make_ref(&ParallelHelper2::transform2_func<FType, Arg1RefType, Arg2RefType, DestType>,
			    std::forward<Func>(func))
  };
  Cgu::Callback::SafeFunctor s_fail{
    Cgu::Callback::make(&ParallelHelper2::fail_func<DiffType>,
			&mutex,
			&cond,
			&error,
			&done_count)
  };

  for (; first1 != last1; ++first1, ++first2, ++start_count) {
    std::unique_ptr<const Cgu::Callback::Callback> task_cb(
      Cgu::Callback::lambda<>(std::bind(&ParallelHelper2::transform2_cb_func<Arg1RefType, Arg2RefType, DestType, DiffType, SourceIterator1, SourceIterator2>,
					s_task,
					first1,
					first2,
					&mutex,
					&cond,
					&done_count,
					results.get() + start_count))
    );
    std::unique_ptr<const Cgu::Callback::Callback> fail_cb(
      Cgu::Callback::lambda<>([s_fail] () {s_fail();})
    );

    tm.add_task(std::move(task_cb), std::move(fail_cb));
  }
  
  Mutex::Lock l{mutex};
  while (start_count > done_count) cond.wait(mutex);
  if (error) throw ParallelError();
  for (DiffType index = 0; index < start_count; ++dest, ++index) {
    *dest = std::move(results[index]);
  }
}

/**
 * \#include <c++-gtk-utils/parallel.h>
 * @sa Cgu::IntIter
 *
 * This function applies a callable object to each element of a
 * container in the range ['first', 'last') subject to a maximum, by
 * executing each such application as a task of a Thread::TaskManager
 * object.  Tasks are added to the Thread::TaskManager object in the
 * order in which the respective elements appear in the container (and
 * if a task mutates its argument, it will do so in respect of the
 * correct element of the container), but no other ordering arises,
 * and the tasks will execute in parallel to the extent that the
 * Thread::TaskManager object has sufficient threads available to do
 * so.
 *
 * This function does the same as Thread::parallel_for_each(), except
 * that it returns an iterator to the element past the last element to
 * which the callable object has been applied, and it has a 'max'
 * parameter to limit the maximum number of elements to which the
 * callable object will be applied on any one call to this function
 * even if the range ['first', 'last') is greater than this maximum.
 * Whether this limitation has had effect on any one call can be
 * tested by checking whether the return value is equal to the 'last'
 * parameter.  If it is not, a further call to this function can be
 * made.
 *
 * The main purpose of this additional function is to enable the
 * application of the callable object to the elements of a container
 * to be dealt with in chunks, possibly to enable other tasks to be
 * interleaved at reasonable intervals.  For a container which does
 * not support random access iterators, the 'last' parameter can be
 * set to, say, the end of the container and the chunk size set with
 * the 'max' paramater, with the return value being used as the
 * 'first' parameter for subsequent calls to this function.  This
 * avoids having to increment the iterator for each "chunk" by
 * stepping through the container by hand.  In this usage, it
 * therefore represents a minor efficiency improvement.
 *
 * @param tm The Thread::TaskManager object on which the tasks will
 * run.
 * @param first The beginning of the range to which 'func' is to be
 * applied.
 * @param last One past the last element to which 'func' is to be
 * applied, subject to any maximum specified as the 'max' parameter.
 * @param max The maximum number of elements of the source container
 * to which 'func' will be applied on this call.  It is not an error
 * if it is greater than the distance between 'first' and 'last'.  If
 * it is equal to or greater than that distance, it follows that the
 * iterator returned will be equal to 'last'.  The same results if
 * 'max' is a negative number (in that case no maximum will take
 * effect and each element in the range ['first', 'last') will have
 * 'func' applied to it).
 * @param func A callable object to be applied (subject to the
 * maximum) to each element in the range ['first', 'last'), such as
 * formed by a lambda expression or the result of std::bind.  It
 * should take a single unbound argument of the value type of the
 * container to which 'first' and 'last' relate or a const or
 * non-const reference to that type.  Any return value is discarded.
 * If an exception propagates from 'func', the exception will be
 * consumed while the for each loop is running, and an attempt will
 * still be made to apply 'func' to all remaining elements in the
 * range ['first', 'last') subject to the maximum, and only after that
 * attempt has completed will the exception Cgu::Thread::ParallelError
 * be thrown.
 * @return An iterator representing the element past the last element
 * of the range to which 'func' was applied, which may be passed as
 * the 'first' argument of a subsequent call to this function.
 * @exception std::bad_alloc This exception will be thrown if memory
 * is exhausted and the system throws in that case.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion).  See also the
 * documentation for the Cgu::Thread::TaskManager::get_max_tasks()
 * method about the possibility of std::length_error being thrown.  If
 * std::bad_alloc or std::length_error is thrown, some tasks may
 * nonetheless have already started by virtue of the call to this
 * function, but subsequent ones will not.
 * @exception Cgu::Thread::TaskError This exception will be thrown if
 * stop_all() has previously been called on the Thread::TaskManager
 * object, or if another thread calls stop_all() after this method is
 * called but before it has returned.  It will also be thrown if the
 * Thread::TaskManager object's is_error() method would return true
 * because its internal thread pool loop implementation has thrown
 * std::bad_alloc, or a thread has failed to start correctly because
 * pthread has run out of resources.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion, and if a reasonable
 * maximum thread count has been chosen for the Thread::TaskManager
 * object pthread should not run out of other resources, but there may
 * be some specialized cases where the return value of is_error() is
 * useful.)  If this exception is thrown, some tasks may nonetheless
 * have already started by virtue of the call to this function.
 * @exception Cgu::Thread::ParallelError This exception will be thrown
 * if an exception propagates from the 'func' callable object when it
 * executes on being applied to one or more elements of the container.
 * Such an exception will not stop an attempt being made to apply
 * 'func' (successfully or unsuccessfully) to all elements in the
 * range ['first', 'last') subject to the maximum.
 * Cgu::Thread::ParallelError will be thrown after such attempted
 * application has finished.
 * @exception Cgu::Thread::MutexError This exception will be thrown if
 * initialization of a mutex used by this function fails.  (It is
 * often not worth checking for this, as it means either memory is
 * exhausted or pthread has run out of other resources to create new
 * mutexes.)  If this exception is thrown, no tasks will start.
 * @exception Cgu::Thread::CondError This exception will be thrown if
 * initialization of a condition variable used by this function fails.
 * (It is often not worth checking for this, as it means either memory
 * is exhausted or pthread has run out of other resources to create
 * new condition variables.)  If this exception is thrown, no tasks
 * will start.
 * @note 1. An exception might also be thrown if the copy or move
 * constructor of the 'func' callable objects throws.  If such an
 * exception is thrown, no tasks will start.
 * @note 2. Prior to version 2.0.27 and 2.2.10, this function could
 * not take a source iterator to const.  This was fixed in versions
 * 2.0.27 and 2.2.10.
 * 
 * Since 2.0.20/2.2.3
 */
template <class Iterator, class Func>
Iterator parallel_for_each_partial(TaskManager& tm,
				   Iterator first,
				   Iterator last,
				   int max,
				   Func&& func) {

  if (first == last || !max) return first;

  typedef typename std::iterator_traits<Iterator>::reference ArgRefType;
  typedef typename std::iterator_traits<Iterator>::difference_type DiffType;

  Mutex mutex;
  Cond cond;
  DiffType start_count = 0;
  DiffType done_count = 0;
  bool error = false;

  // a specialization of std::numeric_limits::max() for all arithmetic
  // types is required by §3.9.1/8 of the standard.  The iterator
  // difference type must be a signed integer type (§24.2.1/1).  All
  // signed integer types are arithmetic types (§3.9.1/2, §3.9.1/7 and
  // §3.9.1/8).
  const DiffType local_max =
    (max >= 0) ? max : std::numeric_limits<DiffType>::max();

  // construct SafeFunctorArg objects so that they can be shared
  // between different tasks
  Cgu::Callback::SafeFunctorArg<ArgRefType> s_task{
    Cgu::Callback::lambda<ArgRefType>(std::forward<Func>(func))
  };
  Cgu::Callback::SafeFunctor s_fail{
    Cgu::Callback::make(&ParallelHelper2::fail_func<DiffType>,
			&mutex,
			&cond,
			&error,
			&done_count)
  };

  for (; first != last && start_count < local_max; ++first, ++start_count) {
    std::unique_ptr<const Cgu::Callback::Callback> task_cb(
      Cgu::Callback::make_ref(&ParallelHelper2::for_each_cb_func<ArgRefType, DiffType, Iterator>,
			      s_task,
			      first,
			      &mutex,
			      &cond,
			      &done_count)
    );
    std::unique_ptr<const Cgu::Callback::Callback> fail_cb(
      Cgu::Callback::lambda<>([s_fail] () {s_fail();})
    );

    tm.add_task(std::move(task_cb), std::move(fail_cb));
  }
  
  Mutex::Lock l{mutex};
  while (start_count > done_count) cond.wait(mutex);
  if (error) throw ParallelError();
  return first;
}

/**
 * \#include <c++-gtk-utils/parallel.h>
 * @sa Cgu::IntIter
 *
 * This function maps over a container in the range ['first', 'last')
 * subject to a maximum, applying a unary callable object to each
 * element of the container in that range (subject to the maximum) and
 * storing the result in the destination range, by executing each such
 * application as a task of a Thread::TaskManager object.  Tasks are
 * added to the Thread::TaskManager object in the order in which the
 * respective elements appear in the source container, and the final
 * result appears in the destination container in the same order as
 * the source range from which it is generated (including if a
 * back_inserter iterator is used), but no other ordering arises, and
 * the tasks will execute in parallel to the extent that the
 * Thread::TaskManager object has sufficient threads available to do
 * so.
 *
 * A separate overload of this function takes a binary callable
 * object.
 *
 * This function does the same as the version of
 * Thread::parallel_transform() taking a unary callable object, except
 * that it returns a std::pair object containing an iterator to the
 * element past the last element of the source range transformed, and
 * a destination iterator to the element past the last element stored
 * in the destination range, and it has a 'max' parameter to limit the
 * maximum number of elements which will be so transformed on any one
 * call to this function.  Whether this limitation has had effect on
 * any one call can be tested by checking whether the first value of
 * the pair returned is equal to the 'last' parameter.  If it is not,
 * a further call to this function can be made.
 *
 * The main purpose of this additional function is to enable the
 * parallel transform of the elements of a container to be dealt with
 * in chunks, possibly to enable other tasks to be interleaved at
 * reasonable intervals.  For source or destination containers which
 * do not support random access iterators, the 'last' parameter can be
 * set to, say, the end of the container and the chunk size set with
 * the 'max' paramater, with the values of the returned pair being
 * used as the 'first' and 'dest' parameters for subsequent calls to
 * this function.  This avoids having to increment the source and
 * destination iterators for each "chunk" by stepping through the
 * respective containers by hand.  In this usage, it therefore
 * represents a minor efficiency improvement.
 *
 * @param tm The Thread::TaskManager object on which the tasks will
 * run.
 * @param first The beginning of the range to which 'func' is to be
 * applied.
 * @param last One past the last element to which 'func' is to be
 * applied, subject to any maximum specified as the 'max' parameter.
 * @param dest The beginning of the range to which the result of
 * applying 'func' to the elements in the source range is to be
 * stored.  As in the case of std::transform, this can overlap with or
 * be the same as the source range.  It may also be an insert
 * iterator.
 * @param max The maximum number of elements of the source container
 * which will be transformed on this call.  It is not an error if it
 * is greater than the distance between 'first' and 'last'.  If it is
 * equal to or greater than that distance, it follows that the first
 * value of the pair returned by this function will be equal to
 * 'last'.  The same results if 'max' is a negative number (in that
 * case no maximum will take effect and all elements in the range
 * ['first', 'last') will be transformed), so passing -1 might be
 * useful as a means of obtaining a destination iterator (the second
 * value) for other purposes, particularly where it is not a random
 * access iterator).
 * @param func A unary callable object to be applied (subject to the
 * maximum) to each element in the range ['first', 'last'), such as
 * formed by a lambda expression or the result of std::bind.  It
 * should take a single unbound argument of the value type of the
 * container to which 'first' and 'last' relate or a const or
 * non-const reference to that type.  If an exception propagates from
 * 'func', the exception will be consumed while the transform loop is
 * running, and an attempt will still be made to apply 'func' to all
 * remaining elements in the range ['first', 'last') subject to the
 * maximum, and only after that attempt has completed will the
 * exception Cgu::Thread::ParallelError be thrown.
 * @return A std::pair object.  Its first member is an iterator
 * representing the element past the last element of the source range
 * transformed, which may be passed as the 'first' argument of a
 * subsequent call to this function; and its second member is an
 * iterator representing the element past the last element stored in
 * the destination range, which may be passed as the 'dest' argument
 * of the subsequent call.
 * @exception std::bad_alloc This exception will be thrown if memory
 * is exhausted and the system throws in that case.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion).  See also the
 * documentation for the Cgu::Thread::TaskManager::get_max_tasks()
 * method about the possibility of std::length_error being thrown.  If
 * std::bad_alloc or std::length_error is thrown, some tasks may
 * nonetheless have already started by virtue of the call to this
 * function, but subsequent ones will not.
 * @exception Cgu::Thread::TaskError This exception will be thrown if
 * stop_all() has previously been called on the Thread::TaskManager
 * object, or if another thread calls stop_all() after this method is
 * called but before it has returned.  It will also be thrown if the
 * Thread::TaskManager object's is_error() method would return true
 * because its internal thread pool loop implementation has thrown
 * std::bad_alloc, or a thread has failed to start correctly because
 * pthread has run out of resources.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion, and if a reasonable
 * maximum thread count has been chosen for the Thread::TaskManager
 * object pthread should not run out of other resources, but there may
 * be some specialized cases where the return value of is_error() is
 * useful.)  If this exception is thrown, some tasks may nonetheless
 * have already started by virtue of the call to this function.
 * @exception Cgu::Thread::ParallelError This exception will be thrown
 * if an exception propagates from the 'func' callable object when it
 * executes on being applied to one or more elements of the source
 * container.  Such an exception will not stop an attempt being made
 * to apply 'func' (successfully or unsuccessfully) to all elements in
 * the range ['first', 'last') subject to the maximum.
 * Cgu::Thread::ParallelError will be thrown after such attempted
 * application has finished.
 * @exception Cgu::Thread::MutexError This exception will be thrown if
 * initialization of a mutex used by this function fails.  (It is
 * often not worth checking for this, as it means either memory is
 * exhausted or pthread has run out of other resources to create new
 * mutexes.)  If this exception is thrown, no tasks will start.
 * @exception Cgu::Thread::CondError This exception will be thrown if
 * initialization of a condition variable used by this function fails.
 * (It is often not worth checking for this, as it means either memory
 * is exhausted or pthread has run out of other resources to create
 * new condition variables.)  If this exception is thrown, no tasks
 * will start.
 * @note 1. An exception might also be thrown if the copy or move
 * constructor of the 'func' callable objects throws.  If such an
 * exception is thrown, no tasks will start.
 * @note 2. Prior to version 2.0.27 and 2.2.10, this function could
 * not take a source iterator to const.  This was fixed in versions
 * 2.0.27 and 2.2.10.
 * 
 * Since 2.0.20/2.2.3
 */
template <class SourceIterator, class DestIterator, class Func>
std::pair<SourceIterator, DestIterator>
  parallel_transform_partial(TaskManager& tm,
			     SourceIterator first,
			     SourceIterator last,
			     DestIterator dest,
			     int max,
			     Func&& func) {

  if (first == last || !max) return {first, dest};

  typedef typename std::iterator_traits<SourceIterator>::reference ArgRefType;
  typedef typename std::iterator_traits<SourceIterator>::difference_type DiffType;
  typedef typename std::remove_const<typename std::remove_reference<Func>::type>::type FType;
  // this function will fail to compile if DestType is a reference
  // type: that is a feature, not a bug, as a function returning a
  // reference lacks referential transparency, is unlikely to be
  // thread-safe and is unsuitable for use as a task function
  typedef decltype(func(*first)) DestType;

  Mutex mutex;
  Cond cond;
  DiffType start_count = 0;
  DiffType done_count = 0;
  bool error = false;

  // a specialization of std::numeric_limits::max() for all arithmetic
  // types is required by §3.9.1/8 of the standard.  The iterator
  // difference type must be a signed integer type (§24.2.1/1).  All
  // signed integer types are arithmetic types (§3.9.1/2, §3.9.1/7 and
  // §3.9.1/8).
  const DiffType local_max =
    (max >= 0) ? max : std::numeric_limits<DiffType>::max();

  // intermediate results have to be held in an array so destination
  // ordering can be enforced when using insert interators.  This
  // causes some inefficiency for non-random access iterators
  std::unique_ptr<DestType[]> results(new DestType[std::min(local_max,
							    std::distance(first, last))]);

  // construct SafeFunctorArg objects so that they can be shared
  // between different tasks
  Cgu::Callback::SafeFunctorArg<ArgRefType, DestType&> s_task{
    Cgu::Callback::make_ref(&ParallelHelper2::transform1_func<FType, ArgRefType, DestType>,
			    std::forward<Func>(func))
  };
  Cgu::Callback::SafeFunctor s_fail{
    Cgu::Callback::make(&ParallelHelper2::fail_func<DiffType>,
			&mutex,
			&cond,
			&error,
			&done_count)
  };

  for (; first != last && start_count < local_max; ++first, ++start_count) {
    std::unique_ptr<const Cgu::Callback::Callback> task_cb(
      Cgu::Callback::lambda<>(std::bind(&ParallelHelper2::transform1_cb_func<ArgRefType, DestType, DiffType, SourceIterator>,
					s_task,
					first,
					&mutex,
					&cond,
					&done_count,
					results.get() + start_count))
    );
    std::unique_ptr<const Cgu::Callback::Callback> fail_cb(
      Cgu::Callback::lambda<>([s_fail] () {s_fail();})
    );

    tm.add_task(std::move(task_cb), std::move(fail_cb));
  }
  
  Mutex::Lock l{mutex};
  while (start_count > done_count) cond.wait(mutex);
  if (error) throw ParallelError();
  for (DiffType index = 0; index < start_count; ++dest, ++index) {
    *dest = std::move(results[index]);
  }
  return {first, dest};
}

/**
 * \#include <c++-gtk-utils/parallel.h>
 * @sa Cgu::IntIter
 *
 * This function maps over two containers, one in the range ['first1',
 * 'last1') subject to a maximum, and the other beginning at 'first2',
 * applying a binary callable object to each element of the containers
 * in those ranges (subject to the maximum) and storing the result in
 * the destination range, by executing each such application as a task
 * of a Thread::TaskManager object.  Tasks are added to the
 * Thread::TaskManager object in the order in which the respective
 * elements appear in the source containers, and the final result
 * appears in the destination container in the same order as the
 * source ranges from which it is generated (including if a
 * back_inserter iterator is used), but no other ordering arises, and
 * the tasks will execute in parallel to the extent that the
 * Thread::TaskManager object has sufficient threads available to do
 * so.
 *
 * A separate overload of this function takes a unary callable object.
 *
 * This function does the same as the version of
 * Thread::parallel_transform() taking a binary callable object,
 * except that it returns a std::tuple object containing iterators to
 * the element past the last elements of the source ranges
 * transformed, and a destination iterator to the element past the
 * last element stored in the destination range, and it has a 'max'
 * parameter to limit the maximum number of elements which will be so
 * transformed on any one call to this function.  Whether this
 * limitation has had effect on any one call can be tested by checking
 * whether the first value of the tuple returned is equal to the
 * 'last' parameter.  If it is not, a further call to this function
 * can be made.
 *
 * The main purpose of this additional function is to enable the
 * parallel transform of the elements of a container to be dealt with
 * in chunks, possibly to enable other tasks to be interleaved at
 * reasonable intervals.  For source or destination containers which
 * do not support random access iterators, the 'last' parameter can be
 * set to, say, the end of the container and the chunk size set with
 * the 'max' paramater, with the values of the returned tuple being
 * used as the 'first1', 'first2' and 'dest' parameters for subsequent
 * calls to this function.  This avoids having to increment the source
 * and destination iterators for each "chunk" by stepping through the
 * respective containers by hand.  In this usage, it therefore
 * represents a minor efficiency improvement.
 *
 * @param tm The Thread::TaskManager object on which the tasks will
 * run.
 * @param first1 The beginning of the range which is to be passed as
 * the first argument of 'func'.
 * @param last1 One past the last element of the range which is to be
 * passed as the first argument of 'func', subject to any maximum
 * specified as the 'max' parameter.
 * @param first2 The beginning of the range which is to be passed as
 * the second argument of 'func'.
 * @param dest The beginning of the range to which the result of
 * applying 'func' to the elements in the source ranges is to be
 * stored.  As in the case of std::transform, this can overlap with or
 * be the same as one of the source ranges.  It may also be an insert
 * iterator.
 * @param max The maximum number of elements of the source containers
 * which will be transformed on this call.  It is not an error if it
 * is greater than the distance between 'first1' and 'last1'.  If it
 * is equal to or greater than that distance, it follows that the
 * first value of the tuple returned by this function will be equal to
 * 'last1'.  The same results if 'max' is a negative number (in that
 * case no maximum will take effect and all elements in the range
 * ['first1', 'last1') will be transformed), so passing -1 might be
 * useful as a means of obtaining a second source iterator (the second
 * value of the tuple) or destination iterator (the third value) for
 * other purposes, particularly where they are not random access
 * iterators.
 * @param func A binary callable object to be applied (subject to the
 * maximum) to each element in the source ranges, such as formed by a
 * lambda expression or the result of std::bind.  It should take two
 * unbound arguments of the value types of the containers to which
 * 'first1' and 'first2' relate or const or non-const references to
 * those types.  If an exception propagates from 'func', the exception
 * will be consumed while the transform loop is running, and an
 * attempt will still be made to apply 'func' to all remaining
 * elements of the source ranges subject to the maximum, and only
 * after that attempt has completed will the exception
 * Cgu::Thread::ParallelError be thrown.
 * @return A std::tuple object.  Its first value is an iterator
 * representing the element past the last element of the first source
 * range transformed, which may be passed as the 'first1' argument of
 * a subsequent call to this function; its second value is an iterator
 * representing the element past the last element of the second source
 * range transformed, which may be passed as the 'first2' argument of
 * the subsequent call; and its third value is an iterator
 * representing the element past the last element stored in the
 * destination range, which may be passed as the 'dest' argument of
 * the subsequent call.
 * @exception std::bad_alloc This exception will be thrown if memory
 * is exhausted and the system throws in that case.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion).  See also the
 * documentation for the Cgu::Thread::TaskManager::get_max_tasks()
 * method about the possibility of std::length_error being thrown.  If
 * std::bad_alloc or std::length_error is thrown, some tasks may
 * nonetheless have already started by virtue of the call to this
 * function, but subsequent ones will not.
 * @exception Cgu::Thread::TaskError This exception will be thrown if
 * stop_all() has previously been called on the Thread::TaskManager
 * object, or if another thread calls stop_all() after this method is
 * called but before it has returned.  It will also be thrown if the
 * Thread::TaskManager object's is_error() method would return true
 * because its internal thread pool loop implementation has thrown
 * std::bad_alloc, or a thread has failed to start correctly because
 * pthread has run out of resources.  (On systems with
 * over-commit/lazy-commit combined with virtual memory (swap), it is
 * rarely useful to check for memory exhaustion, and if a reasonable
 * maximum thread count has been chosen for the Thread::TaskManager
 * object pthread should not run out of other resources, but there may
 * be some specialized cases where the return value of is_error() is
 * useful.)  If this exception is thrown, some tasks may nonetheless
 * have already started by virtue of the call to this function.
 * @exception Cgu::Thread::ParallelError This exception will be thrown
 * if an exception propagates from the 'func' callable object when it
 * executes on being applied to one or more elements of the source
 * ranges.  Such an exception will not stop an attempt being made to
 * apply 'func' (successfully or unsuccessfully) to all elements in
 * the source ranges subject to the maximum.
 * Cgu::Thread::ParallelError will be thrown after such attempted
 * application has finished.
 * @exception Cgu::Thread::MutexError This exception will be thrown if
 * initialization of a mutex used by this function fails.  (It is
 * often not worth checking for this, as it means either memory is
 * exhausted or pthread has run out of other resources to create new
 * mutexes.)  If this exception is thrown, no tasks will start.
 * @exception Cgu::Thread::CondError This exception will be thrown if
 * initialization of a condition variable used by this function fails.
 * (It is often not worth checking for this, as it means either memory
 * is exhausted or pthread has run out of other resources to create
 * new condition variables.)  If this exception is thrown, no tasks
 * will start.
 * @note 1. An exception might also be thrown if the copy or move
 * constructor of the 'func' callable objects throws.  If such an
 * exception is thrown, no tasks will start.
 * @note 2. Prior to version 2.0.27 and 2.2.10, this function could
 * not take a source iterator to const.  This was fixed in versions
 * 2.0.27 and 2.2.10.
 * 
 * Since 2.0.20/2.2.3
 */
template <class SourceIterator1, class SourceIterator2, class DestIterator, class Func>
std::tuple<SourceIterator1, SourceIterator2, DestIterator>
  parallel_transform_partial(TaskManager& tm,
			     SourceIterator1 first1,
			     SourceIterator1 last1,
			     SourceIterator2 first2,
			     DestIterator dest,
			     int max,
			     Func&& func) {

  if (first1 == last1 || !max) return std::make_tuple(first1, first2, dest);

  typedef typename std::iterator_traits<SourceIterator1>::reference Arg1RefType;
  typedef typename std::iterator_traits<SourceIterator1>::difference_type DiffType;
  typedef typename std::iterator_traits<SourceIterator2>::reference Arg2RefType;
  typedef typename std::remove_const<typename std::remove_reference<Func>::type>::type FType;
  // this function will fail to compile if DestType is a reference
  // type: that is a feature, not a bug, as a function returning a
  // reference lacks referential transparency, is unlikely to be
  // thread-safe and is unsuitable for use as a task function
  typedef decltype(func(*first1, *first2)) DestType;

  Mutex mutex;
  Cond cond;
  DiffType start_count = 0;
  DiffType done_count = 0;
  bool error = false;

  // a specialization of std::numeric_limits::max() for all arithmetic
  // types is required by §3.9.1/8 of the standard.  The iterator
  // difference type must be a signed integer type (§24.2.1/1).  All
  // signed integer types are arithmetic types (§3.9.1/2, §3.9.1/7 and
  // §3.9.1/8).
  const DiffType local_max =
    (max >= 0) ? max : std::numeric_limits<DiffType>::max();

  // intermediate results have to be held in an array so destination
  // ordering can be enforced when using insert interators.  This
  // causes some inefficiency for non-random access iterators
  std::unique_ptr<DestType[]> results(new DestType[std::min(local_max,
							    std::distance(first1, last1))]);

  // construct SafeFunctorArg objects so that they can be shared
  // between different tasks
  Cgu::Callback::SafeFunctorArg<Arg1RefType, Arg2RefType, DestType&> s_task{
    Cgu::Callback::make_ref(&ParallelHelper2::transform2_func<FType, Arg1RefType, Arg2RefType, DestType>,
			    std::forward<Func>(func))
  };
  Cgu::Callback::SafeFunctor s_fail{
    Cgu::Callback::make(&ParallelHelper2::fail_func<DiffType>,
			&mutex,
			&cond,
			&error,
			&done_count)
  };

  for (; first1 != last1 && start_count < local_max; ++first1, ++first2, ++start_count) {
    std::unique_ptr<const Cgu::Callback::Callback> task_cb(
      Cgu::Callback::lambda<>(std::bind(&ParallelHelper2::transform2_cb_func<Arg1RefType, Arg2RefType, DestType, DiffType, SourceIterator1, SourceIterator2>,
					s_task,
					first1,
					first2,
					&mutex,
					&cond,
					&done_count,
					results.get() + start_count))
    );
    std::unique_ptr<const Cgu::Callback::Callback> fail_cb(
      Cgu::Callback::lambda<>([s_fail] () {s_fail();})
    );

    tm.add_task(std::move(task_cb), std::move(fail_cb));
  }
  
  Mutex::Lock l{mutex};
  while (start_count > done_count) cond.wait(mutex);
  if (error) throw ParallelError();
  for (DiffType index = 0; index < start_count; ++dest, ++index) {
    *dest = std::move(results[index]);
  }
  return std::make_tuple(first1, first2, dest);
}

} // namespace Thread

/**
 * @defgroup IntIterHelpers IntIterHelpers
 *
 * @class IntIter parallel.h c++-gtk-utils/parallel.h
 * @brief An iterator class providing a lazy integer range over a virtual container.
 * @sa IntIterHelpers
 *
 * This class acts as an iterator which iterates over a range of
 * integers lazily, as if over a virtual container of incrementing
 * ints constructed using std::iota.  It is principally intended for
 * use in constructing parallel for loops using
 * Cgu::Thread::parallel_for_each() or
 * Cgu::Thread::parallel_transform(), which is why it is in the
 * c++-gtk-utils/parallel.h header, but it can be used whenever a lazy
 * range of integers is required.
 *
 * It behaves as a random access iterator to const int, and has the
 * normal increment, decrement and other random access functions.
 * When used with Cgu::Thread::parallel_for_each() and
 * Cgu::Thread::parallel_transform(), because it acts as an iterator
 * to const int, the callable object passed to those functions must
 * take an int or const int& argument.  Any IntIter object compares
 * equal to any other IntIter object which at the time in question
 * references the same int value, so it can be used as the beginning
 * iterator or end iterator of a range for a standard algorithm; and
 * one IntIter object is less than another IntIter object if it
 * references an int value less than the other, and so on as regards
 * the other comparison operators.
 *
 * Here is an example of its use with
 * Cgu::Thread::parallel_transform(), as a parallelized equivalent of
 * a for loop which increments a count integer on each iteration
 * through the loop.  In this example, the count integer, as
 * incremented on each iteration, is squared and the result stored in
 * a std::vector object (in practice you would not want to use this
 * construction for such a trivial case as it would be slower than the
 * single threaded version - it is for use where some significant work
 * is done in the for loop, here represented by the lambda
 * expression):
 *
 * @code
 *   using namespace Cgu;
 *   std::vector<int> v;
 *   Thread::TaskManager tm{4};
 *   Thread::parallel_transform(tm,
 *                              IntIter{0},   // beginning of range
 *                              IntIter{10},  // one past end of range
 *                              std::back_inserter(v),
 *                              [](int i) {return i * i;});
 *   for (auto elt: v) std::cout << elt << ' ';
 *   std::cout << std::endl;
 * @endcode
 *
 * Although unlikely to be useful very often, the iterator can count
 * backwards using std::reverse_iterator:
 *
 * @code
 *   using namespace Cgu;
 *   typedef std::reverse_iterator<IntIter> RIntIter;
 *   std::vector<int> v;
 *   Thread::TaskManager tm{4};
 *   Thread::parallel_transform(tm,
 *                              RIntIter{IntIter{10}},  // one past beginning of range
 *                              RIntIter{IntIter{0}},   // end of range
 *                              std::back_inserter(v),
 *                              [](int i) {return i * i;});
 *   for (auto elt: v) std::cout << elt << ' ';
 *   std::cout << std::endl;
 * @endcode
 *
 * @ingroup IntIterHelpers
 *
 * Since 2.0.27 and 2.2.10.
 */
class IntIter {
public:
  typedef int value_type;
  typedef int reference;  // read only
  typedef void pointer;   // read only
  typedef int difference_type;
  typedef std::random_access_iterator_tag iterator_category;
private:
  int val;
public:
  /**
   * This constructor acts as both a default constructor and an
   * initializing constructor.  It does not throw.
   *
   * Since 2.0.27 and 2.2.10.
   */
  explicit IntIter(value_type val_ = 0) noexcept : val(val_) {}

  /**
   * The copy constructor does not throw.
   *
   * Since 2.0.27 and 2.2.10.
   */
  // gcc-4.6 will error if we make this noexcept
  IntIter(const IntIter&) = default;
  
  /**
   * The copy assignment operator does not throw.  No locking is
   * carried out, so if the iterator is accessed in more than one
   * thread, the user must provide synchronization (but
   * Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   *
   * Since 2.0.27 and 2.2.10.
   */
  // gcc-4.6 will error if we make this noexcept
  IntIter& operator=(const IntIter&) = default;
  
  /**
   * The pre-increment operator does not throw.  No locking is carried
   * out, so if the iterator is accessed in more than one thread, the
   * user must provide synchronization (but
   * Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return A reference to the iterator after being incremented.
   *
   * Since 2.0.27 and 2.2.10.
   */
  IntIter& operator++() noexcept {++val; return *this;}

  /**
   * The post-increment operator does not throw.  No locking is
   * carried out, so if the iterator is accessed in more than one
   * thread, the user must provide synchronization (but
   * Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return A copy of the iterator prior to being incremented.
   *
   * Since 2.0.27 and 2.2.10.
   */
  IntIter operator++(int) noexcept {IntIter tmp = *this; ++val; return tmp;}

  /**
   * The pre-decrement operator does not throw.  No locking is carried
   * out, so if the iterator is accessed in more than one thread, the
   * user must provide synchronization (but
   * Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return A reference to the iterator after being decremented.
   *
   * Since 2.0.27 and 2.2.10.
   */
  IntIter& operator--() noexcept {--val; return *this;}

  /**
   * The post-decrement operator does not throw.  No locking is
   * carried out, so if the iterator is accessed in more than one
   * thread, the user must provide synchronization (but
   * Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return A copy of the iterator prior to being decremented.
   *
   * Since 2.0.27 and 2.2.10.
   */
  IntIter operator--(int) noexcept {IntIter tmp = *this; --val; return tmp;}

  /**
   * This operator adds the value of the argument to the integer value
   * currently represented by the iterator.  It does not throw.  No
   * locking is carried out, so if the iterator is accessed in more
   * than one thread, the user must provide synchronization (but
   * Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return A reference to the iterator after addition.
   *
   * Since 2.0.27 and 2.2.10.
   */
  IntIter& operator+=(difference_type n) noexcept {val += n; return *this;}

  /**
   * This operator subtracts the value of the argument from the
   * integer value currently represented by the iterator.  It does not
   * throw.  No locking is carried out, so if the iterator is accessed
   * in more than one thread, the user must provide synchronization
   * (but Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return A reference to the iterator after subtraction.
   *
   * Since 2.0.27 and 2.2.10.
   */
  IntIter& operator-=(difference_type n) noexcept {val -= n; return *this;}

  /**
   * The offset dereferencing operator does not throw.  No locking is
   * carried out, so if the iterator is accessed in more than one
   * thread and one calls a non-const method, the user must provide
   * synchronization (but Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return The integer value at the given offset.
   *
   * Since 2.0.27 and 2.2.10.
   */
  reference operator[](difference_type n) const noexcept {return val + n;}

  /**
   * The dereferencing operator does not throw.  No locking is carried
   * out, so if the iterator is accessed in more than one thread and
   * one calls a non-const method, the user must provide
   * synchronization (but Cgu::Thread::parallel_for_each(),
   * Cgu::Thread::parallel_for_each_partial(),
   * Cgu::Thread::parallel_transform() and
   * Cgu::Thread::parallel_transform_partial() only access source and
   * destination iterators in the thread which calls the functions, so
   * use of an IntIter object only by one of those functions does not
   * require synchronization).
   * @return The integer value currently represented by the iterator.
   *
   * Since 2.0.27 and 2.2.10.
   */
  reference operator*() const noexcept {return val;}

/* Only has effect if --with-glib-memory-slices-compat or
 * --with-glib-memory-slices-no-compat option picked */
  CGU_GLIB_MEMORY_SLICES_FUNCS
};

/**
 * @ingroup IntIterHelpers
 *
 * This comparison operator does not throw.  No locking is carried
 * out, so if one of the iterators is accessed in more than one thread
 * and a thread calls a non-const method, the user must provide
 * synchronization (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline bool operator==(IntIter iter1, IntIter iter2) noexcept {
  return *iter1 == *iter2;
}

/**
 * @ingroup IntIterHelpers
 *
 * This comparison operator does not throw.  No locking is carried
 * out, so if one of the iterators is accessed in more than one thread
 * and a thread calls a non-const method, the user must provide
 * synchronization (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline bool operator!=(IntIter iter1, IntIter iter2) noexcept {
  return !(iter1 == iter2);
}

/**
 * @ingroup IntIterHelpers
 *
 * This comparison operator does not throw.  No locking is carried
 * out, so if one of the iterators is accessed in more than one thread
 * and a thread calls a non-const method, the user must provide
 * synchronization (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline bool operator<(IntIter iter1, IntIter iter2) noexcept {
  return *iter1 < *iter2;
}

/**
 * @ingroup IntIterHelpers
 *
 * This comparison operator does not throw.  No locking is carried
 * out, so if one of the iterators is accessed in more than one thread
 * and a thread calls a non-const method, the user must provide
 * synchronization (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline bool operator>(IntIter iter1, IntIter iter2) noexcept {
  return iter2 < iter1;
}

/**
 * @ingroup IntIterHelpers
 *
 * This comparison operator does not throw.  No locking is carried
 * out, so if one of the iterators is accessed in more than one thread
 * and a thread calls a non-const method, the user must provide
 * synchronization (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls the functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline bool operator<=(IntIter iter1, IntIter iter2) noexcept {
  return !(iter1 > iter2);
}

/**
 * @ingroup IntIterHelpers
 *
 * This comparison operator does not throw.  No locking is carried
 * out, so if one of the iterators is accessed in more than one thread
 * and a thread calls a non-const method, the user must provide
 * synchronization (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline bool operator>=(IntIter iter1, IntIter iter2) noexcept {
  return !(iter1 < iter2);
}

/**
 * @ingroup IntIterHelpers
 *
 * This operator does not throw.  No locking is carried out, so if one
 * of the iterators is accessed in more than one thread and a thread
 * calls a non-const method, the user must provide synchronization
 * (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline IntIter::difference_type operator-(IntIter iter1, IntIter iter2) noexcept {
  return *iter1 - *iter2;
}

/**
 * @ingroup IntIterHelpers
 *
 * This operator does not throw.  No locking is carried out, so if one
 * of the iterators is accessed in more than one thread and a thread
 * calls a non-const method, the user must provide synchronization
 * (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline IntIter operator+(IntIter iter, IntIter::difference_type n) noexcept {
  return IntIter{*iter + n};
}

/**
 * @ingroup IntIterHelpers
 *
 * This operator does not throw.  No locking is carried out, so if one
 * of the iterators is accessed in more than one thread and a thread
 * calls a non-const method, the user must provide synchronization
 * (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline IntIter operator-(IntIter iter, IntIter::difference_type n) noexcept {
  return IntIter{*iter - n};
}

/**
 * @ingroup IntIterHelpers
 *
 * This operator does not throw.  No locking is carried out, so if one
 * of the iterators is accessed in more than one thread and a thread
 * calls a non-const method, the user must provide synchronization
 * (but Cgu::Thread::parallel_for_each(),
 * Cgu::Thread::parallel_for_each_partial(),
 * Cgu::Thread::parallel_transform() and
 * Cgu::Thread::parallel_transform_partial() only access source and
 * destination iterators in the thread which calls those functions, so
 * use of an IntIter object only by one of those functions does not
 * require synchronization).
 *
 * Since 2.0.27 and 2.2.10.
 */
inline IntIter operator+(IntIter::difference_type n, IntIter iter) noexcept {
  return iter + n;
}

} // namespace Cgu

#endif // CGU_PARALLEL_H
