@@ -38,64 +38,125 @@ impl<T> Drop for CopyOnDrop<T> {
3838 }
3939}
4040
41- /// Sorts a slice using insertion sort, which is `O(n^2)` worst-case .
42- fn insertion_sort < T , F > ( v : & mut [ T ] , is_less : & mut F )
41+ /// Shifts the first element to the right until it encounters a greater or equal element .
42+ fn shift_head < T , F > ( v : & mut [ T ] , is_less : & mut F )
4343 where F : FnMut ( & T , & T ) -> bool
4444{
4545 let len = v. len ( ) ;
46+ unsafe {
47+ // If the first two elements are out-of-order...
48+ if len >= 2 && is_less ( v. get_unchecked ( 1 ) , v. get_unchecked ( 0 ) ) {
49+ // Read the first element into a stack-allocated variable. If a following comparison
50+ // operation panics, `hole` will get dropped and automatically write the element back
51+ // into the slice.
52+ let mut tmp = NoDrop { value : ptr:: read ( v. get_unchecked ( 0 ) ) } ;
53+ let mut hole = CopyOnDrop {
54+ src : & mut tmp. value ,
55+ dest : v. get_unchecked_mut ( 1 ) ,
56+ } ;
57+ ptr:: copy_nonoverlapping ( v. get_unchecked ( 1 ) , v. get_unchecked_mut ( 0 ) , 1 ) ;
4658
47- for i in 1 ..len {
48- unsafe {
49- if is_less ( v. get_unchecked ( i) , v. get_unchecked ( i - 1 ) ) {
50- // There are three ways to implement insertion here:
51- //
52- // 1. Swap adjacent elements until the first one gets to its final destination.
53- // However, this way we copy data around more than is necessary. If elements are
54- // big structures (costly to copy), this method will be slow.
55- //
56- // 2. Iterate until the right place for the first element is found. Then shift the
57- // elements succeeding it to make room for it and finally place it into the
58- // remaining hole. This is a good method.
59- //
60- // 3. Copy the first element into a temporary variable. Iterate until the right
61- // place for it is found. As we go along, copy every traversed element into the
62- // slot preceding it. Finally, copy data from the temporary variable into the
63- // remaining hole. This method is very good. Benchmarks demonstrated slightly
64- // better performance than with the 2nd method.
65- //
66- // All methods were benchmarked, and the 3rd showed best results. So we chose that
67- // one.
68- let mut tmp = NoDrop { value : ptr:: read ( v. get_unchecked ( i) ) } ;
69-
70- // Intermediate state of the insertion process is always tracked by `hole`, which
71- // serves two purposes:
72- // 1. Protects integrity of `v` from panics in `is_less`.
73- // 2. Fills the remaining hole in `v` in the end.
74- //
75- // Panic safety:
76- //
77- // If `is_less` panics at any point during the process, `hole` will get dropped and
78- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object
79- // it initially held exactly once.
80- let mut hole = CopyOnDrop {
81- src : & mut tmp. value ,
82- dest : v. get_unchecked_mut ( i - 1 ) ,
83- } ;
84- ptr:: copy_nonoverlapping ( v. get_unchecked ( i - 1 ) , v. get_unchecked_mut ( i) , 1 ) ;
85-
86- for h in ( 0 ..i-1 ) . rev ( ) {
87- if !is_less ( & tmp. value , v. get_unchecked ( h) ) {
88- break ;
89- }
90- ptr:: copy_nonoverlapping ( v. get_unchecked ( h) , v. get_unchecked_mut ( h + 1 ) , 1 ) ;
91- hole. dest = v. get_unchecked_mut ( h) ;
59+ for i in 2 ..len {
60+ if !is_less ( & v[ i] , & tmp. value ) {
61+ break ;
9262 }
93- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
63+
64+ // Move `i`-th element one place to the left, thus shifting the hole to the right.
65+ ptr:: copy_nonoverlapping ( v. get_unchecked ( i) , v. get_unchecked_mut ( i - 1 ) , 1 ) ;
66+ hole. dest = v. get_unchecked_mut ( i) ;
9467 }
68+ // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
9569 }
9670 }
9771}
9872
73+ /// Shifts the last element to the left until it encounters a smaller or equal element.
74+ fn shift_tail < T , F > ( v : & mut [ T ] , is_less : & mut F )
75+ where F : FnMut ( & T , & T ) -> bool
76+ {
77+ let len = v. len ( ) ;
78+ unsafe {
79+ // If the last two elements are out-of-order...
80+ if len >= 2 && is_less ( v. get_unchecked ( len - 1 ) , v. get_unchecked ( len - 2 ) ) {
81+ // Read the last element into a stack-allocated variable. If a following comparison
82+ // operation panics, `hole` will get dropped and automatically write the element back
83+ // into the slice.
84+ let mut tmp = NoDrop { value : ptr:: read ( v. get_unchecked ( len - 1 ) ) } ;
85+ let mut hole = CopyOnDrop {
86+ src : & mut tmp. value ,
87+ dest : v. get_unchecked_mut ( len - 2 ) ,
88+ } ;
89+ ptr:: copy_nonoverlapping ( v. get_unchecked ( len - 2 ) , v. get_unchecked_mut ( len - 1 ) , 1 ) ;
90+
91+ for i in ( 0 ..len-2 ) . rev ( ) {
92+ if !is_less ( & tmp. value , v. get_unchecked ( i) ) {
93+ break ;
94+ }
95+
96+ // Move `i`-th element one place to the right, thus shifting the hole to the left.
97+ ptr:: copy_nonoverlapping ( v. get_unchecked ( i) , v. get_unchecked_mut ( i + 1 ) , 1 ) ;
98+ hole. dest = v. get_unchecked_mut ( i) ;
99+ }
100+ // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
101+ }
102+ }
103+ }
104+
105+ /// Partially sorts a slice by shifting several out-of-order elements around.
106+ ///
107+ /// Returns true if the slice is sorted at the end. This function is `O(n)` worst-case.
108+ #[ cold]
109+ fn partial_insertion_sort < T , F > ( v : & mut [ T ] , is_less : & mut F ) -> bool
110+ where F : FnMut ( & T , & T ) -> bool
111+ {
112+ // Maximum number of adjacent out-of-order pairs that will get shifted.
113+ const MAX_STEPS : usize = 5 ;
114+ // If the slice is shorter than this, don't shift any elements.
115+ const SHORTEST_SHIFTING : usize = 50 ;
116+
117+ let len = v. len ( ) ;
118+ let mut i = 1 ;
119+
120+ for _ in 0 ..MAX_STEPS {
121+ unsafe {
122+ // Find the next pair of adjacent out-of-order elements.
123+ while i < len && !is_less ( v. get_unchecked ( i) , v. get_unchecked ( i - 1 ) ) {
124+ i += 1 ;
125+ }
126+ }
127+
128+ // Are we done?
129+ if i == len {
130+ return true ;
131+ }
132+
133+ // Don't shift elements on short arrays, that has a performance cost.
134+ if len < SHORTEST_SHIFTING {
135+ return false ;
136+ }
137+
138+ // Swap the found pair of elements. This puts them in correct order.
139+ v. swap ( i - 1 , i) ;
140+
141+ // Shift the smaller element to the left.
142+ shift_tail ( & mut v[ ..i] , is_less) ;
143+ // Shift the greater element to the right.
144+ shift_head ( & mut v[ i..] , is_less) ;
145+ }
146+
147+ // Didn't manage to sort the slice in the limited number of steps.
148+ false
149+ }
150+
151+ /// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
152+ fn insertion_sort < T , F > ( v : & mut [ T ] , is_less : & mut F )
153+ where F : FnMut ( & T , & T ) -> bool
154+ {
155+ for i in 2 ..v. len ( ) +1 {
156+ shift_tail ( & mut v[ ..i] , is_less) ;
157+ }
158+ }
159+
99160/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
100161#[ cold]
101162fn heapsort < T , F > ( v : & mut [ T ] , is_less : & mut F )
@@ -180,6 +241,9 @@ fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
180241 let mut end_r = ptr:: null_mut ( ) ;
181242 let mut offsets_r: [ u8 ; BLOCK ] = unsafe { mem:: uninitialized ( ) } ;
182243
244+ // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
245+ // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
246+
183247 // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
184248 fn width < T > ( l : * mut T , r : * mut T ) -> usize {
185249 assert ! ( mem:: size_of:: <T >( ) > 0 ) ;
@@ -470,10 +534,10 @@ fn break_patterns<T>(v: &mut [T]) {
470534fn choose_pivot < T , F > ( v : & mut [ T ] , is_less : & mut F ) -> ( usize , bool )
471535 where F : FnMut ( & T , & T ) -> bool
472536{
473- // Minimal length to choose the median-of-medians method.
537+ // Minimum length to choose the median-of-medians method.
474538 // Shorter slices use the simple median-of-three method.
475- const SHORTEST_MEDIAN_OF_MEDIANS : usize = 80 ;
476- // Maximal number of swaps that can be performed in this function.
539+ const SHORTEST_MEDIAN_OF_MEDIANS : usize = 50 ;
540+ // Maximum number of swaps that can be performed in this function.
477541 const MAX_SWAPS : usize = 4 * 3 ;
478542
479543 let len = v. len ( ) ;
@@ -522,7 +586,7 @@ fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
522586 if swaps < MAX_SWAPS {
523587 ( b, swaps == 0 )
524588 } else {
525- // The maximal number of swaps was performed. Chances are the slice is descending or mostly
589+ // The maximum number of swaps was performed. Chances are the slice is descending or mostly
526590 // descending, so reversing will probably help sort it faster.
527591 v. reverse ( ) ;
528592 ( len - 1 - b, true )
@@ -575,8 +639,9 @@ fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &mut F, mut pred: Option<&'a T
575639 // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
576640 // selection predicts the slice is likely already sorted...
577641 if was_balanced && was_partitioned && likely_sorted {
578- // Check whether the slice really is sorted. If so, we're done.
579- if v. windows ( 2 ) . all ( |w| !is_less ( & w[ 1 ] , & w[ 0 ] ) ) {
642+ // Try identifying several out-of-order elements and shifting them to correct
643+ // positions. If the slice ends up being completely sorted, we're done.
644+ if partial_insertion_sort ( v, is_less) {
580645 return ;
581646 }
582647 }
0 commit comments