Libraries: introsort - array sorting algorithm

 

introsort - array sorting algorithm:

hybrid sorting algorithm that provides both fast average performance and (asymptotically) optimal worst-case performance

Author: DMITRII PECHERITSA

 

Hi, DMITRII PECHERITSA.

Thank you for the introsort algorithm.

I found some errors in your implementation when I sort large arrays (e.g., 5000 or more elements):

here is the corrected version (also, attached at the end):

File: <Mqh\Algorithms\SortIntro\Introsort.mqh>


//+------------------------------------------------------------------+
//|                                                    IntroSort.mqh |
//|                                    2019-2021, dimitri pecheritsa |
//|                                         mql5.com/en/users/dmipec |
//|------------------------------------------------------------------|
//|   c| array sorting algorithm                                     |
//|  introsort or introspective sort is a hybrid sorting algorithm   |
//|that provides both fast average performance and (asymptotically)  |
//|optimal worst-case performance. it begins with quicksort, it      |
//|switches to heapsort when the recursion depth exceeds a level     |
//|based on (the logarithm of) the number of elements being sorted   |
//|and it switches to insertion sort when the number of elements is  |
//|below some threshold. this combines the good parts of the three   |
//|algorithms, with practical performance comparable to quicksort on |
//|typical data sets and worst-case o(n log n) runtime due to the    |
//|heap sort. since the three algorithms it uses are comparison      |
//|sorts, it is also a comparison sort.                              |
//|  introsort was invented by david musser in 1997, in which he also|
//|introduced introselect, a hybrid selection algorithm based on     |
//|quickselect (a variant of quicksort), which falls back to median  |
//|of medians and thus provides worst-case linear complexity, which  |
//|is optimal. both algorithms were introduced with the purpose of   |
//|providing generic algorithms for the c++ standard library which   |
//|had both fast average performance and optimal worst-case          |
//|performance, thus allowing the performance requirements to be     |
//|tightened. introsort is in place and not stable                   |
//|  the most popular c++ stl algorithm sort() uses introsort.       |
//+------------------------------------------------------------------+
#include <Mqh\Algorithms\ASorter.mqh>
template<typename TKey,typename TItem>
class CIntroSort : public ASorter<TKey,TItem>
  {
public:
                     CIntroSort(void) {}
                    ~CIntroSort(void) {}
   string            Name(void) {return "introsort";}
   //--- 010
   void              Sort(void);
protected:
   void              IntroSort(int lo,int hi,int maxdepth);
   //--- 030 quick sort sub-algorithm
   int               Partition(int lo,int hi);
   //--- 040 heap sort sub-algorithm
   void              HeapSort(int lo,int hi);
   void              Heapify(int lo,int hi);
   void              SiftDown(int start,int end,int offset);
   //--- 070 insertion sort sub-algorithm for extreme cases
   void              InsertionSort(int lo,int hi);
  };
//+------------------------------------------------------------------+
//| 010| sort an array from low to high with introsort at some max   |
//|depth. the factor 2 in the maximum depth is arbitrary; it can be  |
//|tuned for practical performance.                                  |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::Sort(void)
  {
   int maxdepth=2*(int)floor(log(ArraySize(keys)/M_LN2));
   IntroSort(0,ArraySize(keys)-1,maxdepth);
  }
//+------------------------------------------------------------------+
//| 020| intro sort. the reason behind the “fallback” from quicksort |
//|to heapsort is that if quicksort was not able to get the solution |
//|after 2*log(n) recursions for a branch, probably it hit its worst |
//|case and it is degrading to complexity o(n^2). to optimise even   |
//|further this algorithm, an extra step is introduced in each       |
//|recursion where the partition is sorted using insertion sort if   |
//|the count of the partition is less than 20.                       |
//|  the number 20 is an empiric number obtained observing the       |
//|behaviour of insertionsort with lists of this size.               |
//|  ->| lo,hi   | index range                                       |
//|  ->| maxdepth| recursion depth                                   |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::IntroSort(int lo,int hi,int maxdepth)
  {
   int n=hi-lo+1;
//--- base case
   if(n<=1)
      return;
//--- use insertion sort based on the number of elements
   if(n<=20)
     {
      InsertionSort(lo,hi);
      return;
     }
//--- select heapsort or quicksort based on the current recursion 
//depth
   if(maxdepth==0)
      //--- max depth level has been exeeded
      HeapSort(lo,hi);
   else
     {
      //--- recursion depth is not exceeded yet. use quicksort. like 
      //others, hoare's partitioning doesn't produce a stable sort. in
      //this scheme, the pivot's final location is not necessarily at 
      //the index that is returned, as the pivot and elements equal to
      //the pivot can end up anywhere within the partition after a 
      //partition step, and may not be sorted until the base case of a
      //partition with a single element is reached via recursion. the 
      //next two segments that the main algorithm recurs on are 
      //(lo..p) (elements ≤ pivot) and (p+1..hi) (elements ≥ pivot) as
      //opposed to (lo..p-1) and (p+1..hi) as in lomuto's scheme. 
      //however, the partitioning algorithm guarantees lo ≤ p < hi 
      //which implies both resulting partitions are non-empty, hence 
      //there's no risk of infinite recursion
      int p=Partition(lo,hi);
      IntroSort(lo,p,maxdepth-1);
      IntroSort(p+1,hi,maxdepth-1);
     }
  }
//+------------------------------------------------------------------+
//| 030| the original partition scheme described by tony hoare uses  |
//|two indices that start at the ends of the array being partitioned,|
//|then move toward each other, until they detect an inversion: a    |
//|pair of elements, one greater than or equal to the pivot, one less|
//|than or equal, that are in the wrong order relative to each other.|
//|the inverted elements are then swapped. when the indices meet, the|
//|algorithm stops and returns the final index. hoare's scheme is    |
//|more efficient than lomuto's partition scheme because it does     |
//|three times fewer swaps on average, and it creates efficient      |
//|partitions even when all values are equal.                        |
//|  like lomuto's partition scheme, hoare's partitioning also would |
//|cause quicksort to degrade to o(n^2) for already sorted input, if |
//|the pivot was chosen as the first or the last element. with the   |
//|middle element as the pivot, however, sorted data results with    |
//|(almost) no swaps in equally sized partitions leading to best case|
//|behavior of quicksort, i.e. o(n log(n)).                          |
//|  <-| partition value                                             |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
#include <Mqh\Universal\CompareFunction.mqh>
template<typename TKey,typename TItem>
int CIntroSort::Partition(int lo,int hi)
  {
   TKey pivot=keys[(hi+lo)/2];
   int i=lo-1;
   int j=hi+1;
   while(true)
     {
      do
         i=i+1;
      while(Compare(keys[i],pivot)==-1);
      do
         j=j-1;
      while(Compare(keys[j],pivot)==1);
      if(i>=j)
         return j;
      Swap(i,j);
     }
  }
//+------------------------------------------------------------------+
//| 040| heapsort uses two subroutines, heapify and siftdown.        |
//|  the former is the common in-place heap construction routine,    |
//|while the latter is a common subroutine for implementing heapify. |
//|  in the first step, a heap is built out of the data. the heap is |
//|often placed in an array with the layout of a complete binary     |
//|tree. the complete binary tree maps the binary tree structure into|
//|the array indices; each array index represents a node; the index  |
//|of the node's parent, left child branch, or right child branch are|
//|simple expressions. for a zero-based array, the root node is      |
//|stored at index lo.                                               |
//|  in the second step, a sorted array is created by repeatedly     |
//|removing the largest element from the heap (the root of the heap),|
//|and inserting it into the array. the heap is updated after each   |
//|removal to maintain the heap property. once all objects have been |
//|removed from the heap, the result is a sorted array.              |
//|  heapsort can be performed in place. the array can be split into |
//|two parts, the sorted array and the heap. the heap's invariant is |
//|preserved after each extraction, so the only cost is that of      |
//|extraction.                                                       |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::HeapSort(int lo,int hi)
  {
//--- build the heap in array keys so that largest value is at the
//root
   Heapify(lo,hi);
//--- the following loop maintains the invariants that keys[lo:end] is
//a heap and every element beyond end is greater than everything
//before it (so keys[end:count] is in sorted order)
   for(int end=hi-lo; end>0;)
     {
      //--- keys[lo] is the root and largest value. the swap moves it
      //in front of the sorted elements.
      Swap(lo+end,lo);
      //--- the heap size is reduced by one
      end--;
      //--- the swap ruined the heap property, so restore it
      SiftDown(0,end,lo);
     }
  }
//+------------------------------------------------------------------+
//| 050| heapify puts elements in heap order, in-place. after sifting|
//|down the root all nodes/elements are in heap order                |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::Heapify(int lo,int hi)
  {
   int count=hi-lo+1;
//--- start is assigned the index in 'keys' of the last parent node.
//the last element in a 0-based array is at index count-1; find the
//parent of that element
   for(int start=(int)floor((count-2)/2.); start>=0; start--)
      //--- sift down the node at index 'start' to the proper place
      //such that all nodes below the start index are in heap order.
      //then go to the next parent node
      SiftDown(start,count-1,lo);
  }
//+------------------------------------------------------------------+
//| 060| sift down repairs the heap whose root element is at index   |
//|start, assuming the heaps rooted at its children are valid        |
//|  ->| start,end| index range                                      |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::SiftDown(int start,int end,int offset)
  {
//--- the root has at least one child
   for(int root=start; (root<<1)+1<=end;)
     {
      //--- left child of root
      int child=(root<<1)+1;
      //--- keeps track of child to swap with
      int swap=root;
      if(Compare(keys[offset+swap],keys[offset+child])==-1)
         swap=child;
      //--- there is a right child and that child is greater
      if(child+1<=end && Compare(keys[offset+swap],keys[offset+child+1])==-1)
         swap=child+1;
      //--- the root holds the largest element. since we assume the
      //heaps rooted at the children are valid, this means that we
      //are done.
      if(swap==root)
         return;
      else
        {
         Swap(offset+root,offset+swap);
         root=swap;
         //--- repeat to continue sifting down the child now
        }
     }
  }
//+------------------------------------------------------------------+
//| 070| insertion sort iterates, consuming one input element each   |
//|repetition, and grows a sorted output list. at each iteration,    |
//|insertion sort removes one element from the input data, finds the |
//|location it belongs within the sorted list, and inserts it there. |
//|it repeats until no input elements remain.                        |
//|  sorting is typically done in-place, by iterating up the array,  |
//|growing the sorted list behind it. at each array-position, it     |
//|checks the value there against the largest value in the sorted    |
//|list (which happens to be next to it, in the previous             |
//|array-position checked). if larger, it leaves the element in place|
//|and moves to the next. if smaller, it finds the correct position  |
//|within the sorted list, shifts all the larger values up to make a |
//|space, and inserts into that correct position.                    |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
#include <Mqh\Universal\CompareFunction.mqh>
template<typename TKey,typename TItem>
void CIntroSort::InsertionSort(int lo,int hi)
  {
   for(int i=lo+1; i<=hi; i++)
      for(int j=i; j>0 && Compare(keys[j-1],keys[j])==1; j--)
         Swap(j,j-1);
  }
//+------------------------------------------------------------------+
Files:
 

Wondering if this issue has been fixed in the attached code in the original post?

What lines of code changed in the corrected file?

 

Does an addition like this make sense?

This is when the keys and items are different.

//+------------------------------------------------------------------+
//|                                                    Functions.mqh |
//|                                    2019-2021, dimitri pecheritsa |
//|                                         mql5.com/en/users/dmipec |
//|------------------------------------------------------------------|
//|   f| introsort example                                           |
//| 005| enum sort order                                             |
//| 010| array sort (keys[<>], algorithm, order, prn)                |
//+------------------------------------------------------------------+
//+------------------------------------------------------------------+
//| 005| enum sort order parameter                                   |
//+------------------------------------------------------------------+
enum ENUM_SORT_ORDER
  {
   SORT_ORDER_ACCENDING,
   SORT_ORDER_DESCENDING
  };
//+------------------------------------------------------------------+
//| 010| sort keys with a sorting algorithm                          |
//|  ->| keys     | array which values are compared and sorted       |
//|  ->| algorithm| any compatible sorting algorithm                 |
//|  ->| order    | sorting order flag                               |
//+------------------------------------------------------------------+
#include <Mqh\Algorithms\ASorter.mqh>
template<typename TKey>
void ArraySort(TKey& keys[],ASorter<TKey,TKey>* algorithm,ENUM_SORT_ORDER order)
  {
   if(!CheckPointer(algorithm))
      return;
//--- prepare algorithm
   ArrayCopy(algorithm.keys,keys);
//--- sort keys
   algorithm.Sort();
//--- save sorted items
   ArrayCopy(keys,algorithm.keys);
//--- keep order
   if(order==SORT_ORDER_DESCENDING)
      ArrayReverse(keys);
//--- clear
   if(CheckPointer(algorithm)==POINTER_DYNAMIC)
      delete algorithm;
  }
template<typename TKey,typename TItem>
void ArraySort(TKey& keys[],TItem& items[],ASorter<TKey,TItem>* algorithm,ENUM_SORT_ORDER order)
  {
   if(!CheckPointer(algorithm))
      return;
//--- prepare algorithm
   ArrayCopy(algorithm.keys,keys);
   ArrayCopy(algorithm.items,items);
//--- sort keys
   algorithm.Sort();
//--- save sorted items
   ArrayCopy(keys,algorithm.keys);
   ArrayCopy(items,algorithm.items);
//--- keep order
   if(order==SORT_ORDER_DESCENDING) 
      {
       ArrayReverse(keys);
       ArrayReverse(items);
      }
//--- clear
   if(CheckPointer(algorithm)==POINTER_DYNAMIC)
      delete algorithm;
  }
//+------------------------------------------------------------------+
Reason: