From 5907f61b586ad0f6de728e98ee5f50bac1aea03e Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Mon, 23 Oct 2023 23:32:15 +0200 Subject: [PATCH 1/8] feat: add timsort sorting algorithm implementation --- sort/timsort.go | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 sort/timsort.go diff --git a/sort/timsort.go b/sort/timsort.go new file mode 100644 index 000000000..53116eecc --- /dev/null +++ b/sort/timsort.go @@ -0,0 +1,102 @@ +// Implementation of Timsort algorithm +// Reference: https://en.wikipedia.org/wiki/Timsort + +package sort + +import ( + "fmt" + + "github.com/TheAlgorithms/Go/constraints" +) + +const runSizeThreshold = 8 + +// Timsort is a simple generic implementation of Timsort algorithm. +func Timsort[T constraints.Ordered](data []T) []T { + runSize := calculateRunSize(len(data)) + insertionSortRuns(data, runSize) + mergeRuns(data, runSize) + return data +} + +func calculateRunSize(dataLength int) int { + remainder := 0 + for dataLength >= runSizeThreshold { + if dataLength%2 == 1 { + remainder = 1 + } + + dataLength = dataLength / 2 + } + + return dataLength + remainder +} + +func insertionSortRuns[T constraints.Ordered](data []T, runSize int) { + for lower := 0; lower < len(data); lower += runSize { + upper := lower + runSize + if upper >= len(data) { + upper = len(data) + } + + insertionSortRun(data[lower:upper]) + } +} + +func insertionSortRun[T constraints.Ordered](data []T) { + for i := 1; i < len(data); i++ { + temp := data[i] + j := i + for ; j > 0 && data[j-1] > temp; j-- { + data[j] = data[j-1] + } + + data[j] = temp + } +} + +func mergeRuns[T constraints.Ordered](data []T, runSize int) { + fmt.Println(runSize) + for size := runSize; size < len(data); size *= 2 { + for lowerBound := 0; lowerBound < len(data); lowerBound += size * 2 { + middleBound := lowerBound + size - 1 + upperBound := lowerBound + 2*size - 1 + if len(data)-1 < upperBound { + upperBound = len(data) - 1 + } + + mergeRun(data, lowerBound, middleBound, upperBound) + } + } +} + +func mergeRun[T constraints.Ordered](data []T, lower, mid, upper int) { + left := make([]T, mid-lower+1) + right := make([]T, upper-mid) + copy(left, data[lower:mid+1]) + copy(right, data[mid+1:upper+1]) + i, j, k := 0, 0, lower + for i < len(left) && j < len(right) { + if left[i] <= right[j] { + data[k] = left[i] + i++ + } else { + data[k] = right[j] + j++ + } + + k++ + } + + for i < len(left) { + data[k] = left[i] + k++ + i++ + } + + for j < len(right) { + data[k] = right[j] + k++ + j++ + } +} From 6e03b288b62cc574221b0d6ea6bac742a839af22 Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Mon, 23 Oct 2023 23:33:16 +0200 Subject: [PATCH 2/8] test: add timsort sorting algorithm to tests --- sort/sorts_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sort/sorts_test.go b/sort/sorts_test.go index 8df55373f..7c27671c7 100644 --- a/sort/sorts_test.go +++ b/sort/sorts_test.go @@ -178,7 +178,11 @@ func TestCycle(t *testing.T) { testFramework(t, sort.Cycle[int]) } -//END TESTS +func TestTimsort(t *testing.T) { + testFramework(t, sort.Timsort[int]) +} + +// END TESTS func benchmarkFramework(b *testing.B, f func(arr []int) []int) { var sortTests = []struct { @@ -303,3 +307,7 @@ func BenchmarkPatience(b *testing.B) { func BenchmarkCycle(b *testing.B) { benchmarkFramework(b, sort.Cycle[int]) } + +func BenchmarkTimsort(b *testing.B) { + benchmarkFramework(b, sort.Timsort[int]) +} From 6786c2519612c80b12b78d5c62399cf5de91a93e Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Mon, 23 Oct 2023 23:53:26 +0200 Subject: [PATCH 3/8] chore: remove left-over print statement --- sort/timsort.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/sort/timsort.go b/sort/timsort.go index 53116eecc..c15a02a27 100644 --- a/sort/timsort.go +++ b/sort/timsort.go @@ -4,8 +4,6 @@ package sort import ( - "fmt" - "github.com/TheAlgorithms/Go/constraints" ) @@ -56,7 +54,6 @@ func insertionSortRun[T constraints.Ordered](data []T) { } func mergeRuns[T constraints.Ordered](data []T, runSize int) { - fmt.Println(runSize) for size := runSize; size < len(data); size *= 2 { for lowerBound := 0; lowerBound < len(data); lowerBound += size * 2 { middleBound := lowerBound + size - 1 From 8e1bb41ae2916c315909611680c7d632f9defcc3 Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Tue, 24 Oct 2023 00:00:49 +0200 Subject: [PATCH 4/8] refactor: change insertionSortRun temp variable name --- sort/timsort.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sort/timsort.go b/sort/timsort.go index c15a02a27..2e08f4c94 100644 --- a/sort/timsort.go +++ b/sort/timsort.go @@ -43,13 +43,14 @@ func insertionSortRuns[T constraints.Ordered](data []T, runSize int) { func insertionSortRun[T constraints.Ordered](data []T) { for i := 1; i < len(data); i++ { - temp := data[i] + value := data[i] j := i for ; j > 0 && data[j-1] > temp; j-- { + for ; j > 0 && data[j-1] > value; j-- { data[j] = data[j-1] } - data[j] = temp + data[j] = value } } From ca120c1ce0ecad8e091d44ded0d063f7f684b555 Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Tue, 24 Oct 2023 00:01:39 +0200 Subject: [PATCH 5/8] docs: add concise documentation to timsort algorithm --- sort/timsort.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sort/timsort.go b/sort/timsort.go index 2e08f4c94..30d33ad15 100644 --- a/sort/timsort.go +++ b/sort/timsort.go @@ -17,6 +17,8 @@ func Timsort[T constraints.Ordered](data []T) []T { return data } +// calculateRunSize returns a run size parameter that is further used +// to slice the data slice. func calculateRunSize(dataLength int) int { remainder := 0 for dataLength >= runSizeThreshold { @@ -30,6 +32,7 @@ func calculateRunSize(dataLength int) int { return dataLength + remainder } +// insertionSortRuns runs insertion sort on all the data runs one by one. func insertionSortRuns[T constraints.Ordered](data []T, runSize int) { for lower := 0; lower < len(data); lower += runSize { upper := lower + runSize @@ -41,11 +44,13 @@ func insertionSortRuns[T constraints.Ordered](data []T, runSize int) { } } +// insertionSortRuns runs insertion sort on a single data run slice. func insertionSortRun[T constraints.Ordered](data []T) { for i := 1; i < len(data); i++ { value := data[i] j := i - for ; j > 0 && data[j-1] > temp; j-- { + // return to the sorted part of slice by decrementing the j index and stop + // upon reaching a smaller value for ; j > 0 && data[j-1] > value; j-- { data[j] = data[j-1] } @@ -54,6 +59,7 @@ func insertionSortRun[T constraints.Ordered](data []T) { } } +// mergeRuns merge sorts all the data runs into a single sorted data slice. func mergeRuns[T constraints.Ordered](data []T, runSize int) { for size := runSize; size < len(data); size *= 2 { for lowerBound := 0; lowerBound < len(data); lowerBound += size * 2 { @@ -68,12 +74,15 @@ func mergeRuns[T constraints.Ordered](data []T, runSize int) { } } +// mergeRun uses merge sort to sort adjacent data runs. func mergeRun[T constraints.Ordered](data []T, lower, mid, upper int) { left := make([]T, mid-lower+1) right := make([]T, upper-mid) copy(left, data[lower:mid+1]) copy(right, data[mid+1:upper+1]) i, j, k := 0, 0, lower + // checks the top of left and right slice, chooses the smallest value + // and increments proper slice index until one reaches the slice's length for i < len(left) && j < len(right) { if left[i] <= right[j] { data[k] = left[i] @@ -86,12 +95,14 @@ func mergeRun[T constraints.Ordered](data []T, lower, mid, upper int) { k++ } + // completes the merge sort with left-over values from left slice for i < len(left) { data[k] = left[i] k++ i++ } + // completes the merge sort with left-over values from right slice for j < len(right) { data[k] = right[j] k++ From d135489855520c0b590c58bbadaf28e89206d411 Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Wed, 25 Oct 2023 08:56:48 +0200 Subject: [PATCH 6/8] refactor: reuse insertion sort algorithm --- sort/timsort.go | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/sort/timsort.go b/sort/timsort.go index 30d33ad15..becc642e0 100644 --- a/sort/timsort.go +++ b/sort/timsort.go @@ -40,22 +40,7 @@ func insertionSortRuns[T constraints.Ordered](data []T, runSize int) { upper = len(data) } - insertionSortRun(data[lower:upper]) - } -} - -// insertionSortRuns runs insertion sort on a single data run slice. -func insertionSortRun[T constraints.Ordered](data []T) { - for i := 1; i < len(data); i++ { - value := data[i] - j := i - // return to the sorted part of slice by decrementing the j index and stop - // upon reaching a smaller value - for ; j > 0 && data[j-1] > value; j-- { - data[j] = data[j-1] - } - - data[j] = value + Insertion(data[lower:upper]) } } From bd33618c4ca199e9a95abd3802e9cae5756955a9 Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Wed, 25 Oct 2023 09:04:06 +0200 Subject: [PATCH 7/8] refactor: reuse merge sort algorithm helper function --- sort/timsort.go | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/sort/timsort.go b/sort/timsort.go index becc642e0..a17b90e1e 100644 --- a/sort/timsort.go +++ b/sort/timsort.go @@ -65,32 +65,9 @@ func mergeRun[T constraints.Ordered](data []T, lower, mid, upper int) { right := make([]T, upper-mid) copy(left, data[lower:mid+1]) copy(right, data[mid+1:upper+1]) - i, j, k := 0, 0, lower - // checks the top of left and right slice, chooses the smallest value - // and increments proper slice index until one reaches the slice's length - for i < len(left) && j < len(right) { - if left[i] <= right[j] { - data[k] = left[i] - i++ - } else { - data[k] = right[j] - j++ - } - - k++ - } - - // completes the merge sort with left-over values from left slice - for i < len(left) { - data[k] = left[i] - k++ - i++ - } - - // completes the merge sort with left-over values from right slice - for j < len(right) { - data[k] = right[j] - k++ - j++ + merged := merge(left, right) + // rewrite original data slice values with sorted values from merged slice + for i, value := range merged { + data[lower+i] = value } } From 8726a0a4cca02bea6a5ced2f83da450bacc2c60e Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Wed, 25 Oct 2023 10:30:38 +0200 Subject: [PATCH 8/8] refactor: remove slice copying in merge run --- sort/timsort.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sort/timsort.go b/sort/timsort.go index a17b90e1e..95520219a 100644 --- a/sort/timsort.go +++ b/sort/timsort.go @@ -61,10 +61,8 @@ func mergeRuns[T constraints.Ordered](data []T, runSize int) { // mergeRun uses merge sort to sort adjacent data runs. func mergeRun[T constraints.Ordered](data []T, lower, mid, upper int) { - left := make([]T, mid-lower+1) - right := make([]T, upper-mid) - copy(left, data[lower:mid+1]) - copy(right, data[mid+1:upper+1]) + left := data[lower : mid+1] + right := data[mid+1 : upper+1] merged := merge(left, right) // rewrite original data slice values with sorted values from merged slice for i, value := range merged {