Libraries: introsort - array sorting algorithm

Automated-Trading 2021.02.10 06:33

hybrid sorting algorithm that provides both fast average performance and (asymptotically) optimal worst-case performance

Author: DMITRII PECHERITSA

amrali 2021.03.12 13:56 #1

Hi, DMITRII PECHERITSA.

Thank you for the introsort algorithm.

I found some errors in your implementation when I sort large arrays (e.g., 5000 or more elements):

here is the corrected version (also, attached at the end):

File: <Mqh\Algorithms\SortIntro\Introsort.mqh>

//+------------------------------------------------------------------+
//|                                                    IntroSort.mqh |
//|                                    2019-2021, dimitri pecheritsa |
//|                                         mql5.com/en/users/dmipec |
//|------------------------------------------------------------------|
//|   c| array sorting algorithm                                     |
//|  introsort or introspective sort is a hybrid sorting algorithm   |
//|that provides both fast average performance and (asymptotically)  |
//|optimal worst-case performance. it begins with quicksort, it      |
//|switches to heapsort when the recursion depth exceeds a level     |
//|based on (the logarithm of) the number of elements being sorted   |
//|and it switches to insertion sort when the number of elements is  |
//|below some threshold. this combines the good parts of the three   |
//|algorithms, with practical performance comparable to quicksort on |
//|typical data sets and worst-case o(n log n) runtime due to the    |
//|heap sort. since the three algorithms it uses are comparison      |
//|sorts, it is also a comparison sort.                              |
//|  introsort was invented by david musser in 1997, in which he also|
//|introduced introselect, a hybrid selection algorithm based on     |
//|quickselect (a variant of quicksort), which falls back to median  |
//|of medians and thus provides worst-case linear complexity, which  |
//|is optimal. both algorithms were introduced with the purpose of   |
//|providing generic algorithms for the c++ standard library which   |
//|had both fast average performance and optimal worst-case          |
//|performance, thus allowing the performance requirements to be     |
//|tightened. introsort is in place and not stable                   |
//|  the most popular c++ stl algorithm sort() uses introsort.       |
//+------------------------------------------------------------------+
#include <Mqh\Algorithms\ASorter.mqh>
template<typename TKey,typename TItem>
class CIntroSort : public ASorter<TKey,TItem>
  {
public:
                     CIntroSort(void) {}
                    ~CIntroSort(void) {}
   string            Name(void) {return "introsort";}
   //--- 010
   void              Sort(void);
protected:
   void              IntroSort(int lo,int hi,int maxdepth);
   //--- 030 quick sort sub-algorithm
   int               Partition(int lo,int hi);
   //--- 040 heap sort sub-algorithm
   void              HeapSort(int lo,int hi);
   void              Heapify(int lo,int hi);
   void              SiftDown(int start,int end,int offset);
   //--- 070 insertion sort sub-algorithm for extreme cases
   void              InsertionSort(int lo,int hi);
  };
//+------------------------------------------------------------------+
//| 010| sort an array from low to high with introsort at some max   |
//|depth. the factor 2 in the maximum depth is arbitrary; it can be  |
//|tuned for practical performance.                                  |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::Sort(void)
  {
   int maxdepth=2*(int)floor(log(ArraySize(keys)/M_LN2));
   IntroSort(0,ArraySize(keys)-1,maxdepth);
  }
//+------------------------------------------------------------------+
//| 020| intro sort. the reason behind the “fallback” from quicksort |
//|to heapsort is that if quicksort was not able to get the solution |
//|after 2*log(n) recursions for a branch, probably it hit its worst |
//|case and it is degrading to complexity o(n^2). to optimise even   |
//|further this algorithm, an extra step is introduced in each       |
//|recursion where the partition is sorted using insertion sort if   |
//|the count of the partition is less than 20.                       |
//|  the number 20 is an empiric number obtained observing the       |
//|behaviour of insertionsort with lists of this size.               |
//|  ->| lo,hi   | index range                                       |
//|  ->| maxdepth| recursion depth                                   |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::IntroSort(int lo,int hi,int maxdepth)
  {
   int n=hi-lo+1;
//--- base case
   if(n<=1)
      return;
//--- use insertion sort based on the number of elements
   if(n<=20)
     {
      InsertionSort(lo,hi);
      return;
     }
//--- select heapsort or quicksort based on the current recursion 
//depth
   if(maxdepth==0)
      //--- max depth level has been exeeded
      HeapSort(lo,hi);
   else
     {
      //--- recursion depth is not exceeded yet. use quicksort. like 
      //others, hoare's partitioning doesn't produce a stable sort. in
      //this scheme, the pivot's final location is not necessarily at 
      //the index that is returned, as the pivot and elements equal to
      //the pivot can end up anywhere within the partition after a 
      //partition step, and may not be sorted until the base case of a
      //partition with a single element is reached via recursion. the 
      //next two segments that the main algorithm recurs on are 
      //(lo..p) (elements ≤ pivot) and (p+1..hi) (elements ≥ pivot) as
      //opposed to (lo..p-1) and (p+1..hi) as in lomuto's scheme. 
      //however, the partitioning algorithm guarantees lo ≤ p < hi 
      //which implies both resulting partitions are non-empty, hence 
      //there's no risk of infinite recursion
      int p=Partition(lo,hi);
      IntroSort(lo,p,maxdepth-1);
      IntroSort(p+1,hi,maxdepth-1);
     }
  }
//+------------------------------------------------------------------+
//| 030| the original partition scheme described by tony hoare uses  |
//|two indices that start at the ends of the array being partitioned,|
//|then move toward each other, until they detect an inversion: a    |
//|pair of elements, one greater than or equal to the pivot, one less|
//|than or equal, that are in the wrong order relative to each other.|
//|the inverted elements are then swapped. when the indices meet, the|
//|algorithm stops and returns the final index. hoare's scheme is    |
//|more efficient than lomuto's partition scheme because it does     |
//|three times fewer swaps on average, and it creates efficient      |
//|partitions even when all values are equal.                        |
//|  like lomuto's partition scheme, hoare's partitioning also would |
//|cause quicksort to degrade to o(n^2) for already sorted input, if |
//|the pivot was chosen as the first or the last element. with the   |
//|middle element as the pivot, however, sorted data results with    |
//|(almost) no swaps in equally sized partitions leading to best case|
//|behavior of quicksort, i.e. o(n log(n)).                          |
//|  <-| partition value                                             |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
#include <Mqh\Universal\CompareFunction.mqh>
template<typename TKey,typename TItem>
int CIntroSort::Partition(int lo,int hi)
  {
   TKey pivot=keys[(hi+lo)/2];
   int i=lo-1;
   int j=hi+1;
   while(true)
     {
      do
         i=i+1;
      while(Compare(keys[i],pivot)==-1);
      do
         j=j-1;
      while(Compare(keys[j],pivot)==1);
      if(i>=j)
         return j;
      Swap(i,j);
     }
  }
//+------------------------------------------------------------------+
//| 040| heapsort uses two subroutines, heapify and siftdown.        |
//|  the former is the common in-place heap construction routine,    |
//|while the latter is a common subroutine for implementing heapify. |
//|  in the first step, a heap is built out of the data. the heap is |
//|often placed in an array with the layout of a complete binary     |
//|tree. the complete binary tree maps the binary tree structure into|
//|the array indices; each array index represents a node; the index  |
//|of the node's parent, left child branch, or right child branch are|
//|simple expressions. for a zero-based array, the root node is      |
//|stored at index lo.                                               |
//|  in the second step, a sorted array is created by repeatedly     |
//|removing the largest element from the heap (the root of the heap),|
//|and inserting it into the array. the heap is updated after each   |
//|removal to maintain the heap property. once all objects have been |
//|removed from the heap, the result is a sorted array.              |
//|  heapsort can be performed in place. the array can be split into |
//|two parts, the sorted array and the heap. the heap's invariant is |
//|preserved after each extraction, so the only cost is that of      |
//|extraction.                                                       |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::HeapSort(int lo,int hi)
  {
//--- build the heap in array keys so that largest value is at the
//root
   Heapify(lo,hi);
//--- the following loop maintains the invariants that keys[lo:end] is
//a heap and every element beyond end is greater than everything
//before it (so keys[end:count] is in sorted order)
   for(int end=hi-lo; end>0;)
     {
      //--- keys[lo] is the root and largest value. the swap moves it
      //in front of the sorted elements.
      Swap(lo+end,lo);
      //--- the heap size is reduced by one
      end--;
      //--- the swap ruined the heap property, so restore it
      SiftDown(0,end,lo);
     }
  }
//+------------------------------------------------------------------+
//| 050| heapify puts elements in heap order, in-place. after sifting|
//|down the root all nodes/elements are in heap order                |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::Heapify(int lo,int hi)
  {
   int count=hi-lo+1;
//--- start is assigned the index in 'keys' of the last parent node.
//the last element in a 0-based array is at index count-1; find the
//parent of that element
   for(int start=(int)floor((count-2)/2.); start>=0; start--)
      //--- sift down the node at index 'start' to the proper place
      //such that all nodes below the start index are in heap order.
      //then go to the next parent node
      SiftDown(start,count-1,lo);
  }
//+------------------------------------------------------------------+
//| 060| sift down repairs the heap whose root element is at index   |
//|start, assuming the heaps rooted at its children are valid        |
//|  ->| start,end| index range                                      |
//+------------------------------------------------------------------+
template<typename TKey,typename TItem>
void CIntroSort::SiftDown(int start,int end,int offset)
  {
//--- the root has at least one child
   for(int root=start; (root<<1)+1<=end;)
     {
      //--- left child of root
      int child=(root<<1)+1;
      //--- keeps track of child to swap with
      int swap=root;
      if(Compare(keys[offset+swap],keys[offset+child])==-1)
         swap=child;
      //--- there is a right child and that child is greater
      if(child+1<=end && Compare(keys[offset+swap],keys[offset+child+1])==-1)
         swap=child+1;
      //--- the root holds the largest element. since we assume the
      //heaps rooted at the children are valid, this means that we
      //are done.
      if(swap==root)
         return;
      else
        {
         Swap(offset+root,offset+swap);
         root=swap;
         //--- repeat to continue sifting down the child now
        }
     }
  }
//+------------------------------------------------------------------+
//| 070| insertion sort iterates, consuming one input element each   |
//|repetition, and grows a sorted output list. at each iteration,    |
//|insertion sort removes one element from the input data, finds the |
//|location it belongs within the sorted list, and inserts it there. |
//|it repeats until no input elements remain.                        |
//|  sorting is typically done in-place, by iterating up the array,  |
//|growing the sorted list behind it. at each array-position, it     |
//|checks the value there against the largest value in the sorted    |
//|list (which happens to be next to it, in the previous             |
//|array-position checked). if larger, it leaves the element in place|
//|and moves to the next. if smaller, it finds the correct position  |
//|within the sorted list, shifts all the larger values up to make a |
//|space, and inserts into that correct position.                    |
//|  ->| lo,hi| index range                                          |
//+------------------------------------------------------------------+
#include <Mqh\Universal\CompareFunction.mqh>
template<typename TKey,typename TItem>
void CIntroSort::InsertionSort(int lo,int hi)
  {
   for(int i=lo+1; i<=hi; i++)
      for(int j=i; j>0 && Compare(keys[j-1],keys[j])==1; j--)
         Swap(j,j-1);
  }
//+------------------------------------------------------------------+

Files:

Introsort_old.mqh 27 kb

Introsort.mqh 27 kb

Features of the mql5 Discussion of article "Statistical Using CArrayList gives error

Suminda Dharmasena 2022.04.14 09:32 #2

Wondering if this issue has been fixed in the attached code in the original post?

What lines of code changed in the corrected file?

Suminda Dharmasena 2022.04.14 16:50 #3

Does an addition like this make sense?

This is when the keys and items are different.

//+------------------------------------------------------------------+
//|                                                    Functions.mqh |
//|                                    2019-2021, dimitri pecheritsa |
//|                                         mql5.com/en/users/dmipec |
//|------------------------------------------------------------------|
//|   f| introsort example                                           |
//| 005| enum sort order                                             |
//| 010| array sort (keys[<>], algorithm, order, prn)                |
//+------------------------------------------------------------------+
//+------------------------------------------------------------------+
//| 005| enum sort order parameter                                   |
//+------------------------------------------------------------------+
enum ENUM_SORT_ORDER
  {
   SORT_ORDER_ACCENDING,
   SORT_ORDER_DESCENDING
  };
//+------------------------------------------------------------------+
//| 010| sort keys with a sorting algorithm                          |
//|  ->| keys     | array which values are compared and sorted       |
//|  ->| algorithm| any compatible sorting algorithm                 |
//|  ->| order    | sorting order flag                               |
//+------------------------------------------------------------------+
#include <Mqh\Algorithms\ASorter.mqh>
template<typename TKey>
void ArraySort(TKey& keys[],ASorter<TKey,TKey>* algorithm,ENUM_SORT_ORDER order)
  {
   if(!CheckPointer(algorithm))
      return;
//--- prepare algorithm
   ArrayCopy(algorithm.keys,keys);
//--- sort keys
   algorithm.Sort();
//--- save sorted items
   ArrayCopy(keys,algorithm.keys);
//--- keep order
   if(order==SORT_ORDER_DESCENDING)
      ArrayReverse(keys);
//--- clear
   if(CheckPointer(algorithm)==POINTER_DYNAMIC)
      delete algorithm;
  }
template<typename TKey,typename TItem>
void ArraySort(TKey& keys[],TItem& items[],ASorter<TKey,TItem>* algorithm,ENUM_SORT_ORDER order)
  {
   if(!CheckPointer(algorithm))
      return;
//--- prepare algorithm
   ArrayCopy(algorithm.keys,keys);
   ArrayCopy(algorithm.items,items);
//--- sort keys
   algorithm.Sort();
//--- save sorted items
   ArrayCopy(keys,algorithm.keys);
   ArrayCopy(items,algorithm.items);
//--- keep order
   if(order==SORT_ORDER_DESCENDING) 
      {
       ArrayReverse(keys);
       ArrayReverse(items);
      }
//--- clear
   if(CheckPointer(algorithm)==POINTER_DYNAMIC)
      delete algorithm;
  }
//+------------------------------------------------------------------+

New comment