Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IMPLEMENTATION] Compressor RLE #726

Merged
merged 4 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions compression/rlecoding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
rlecoding.go
description: run length encoding and decoding
details:
Run-length encoding (RLE) is a simple form of data compression in which runs of data are stored as a single data value and count, rather than as the original run. This is useful when the data contains many repeated values. For example, the data "WWWWWWWWWWWWBWWWWWWWWWWWWBBB" can be compressed to "12W1B12W3B". The algorithm is simple and can be implemented in a few lines of code.
author(s) [ddaniel27](https://github.com/ddaniel27)
*/
package compression

import (
"bytes"
"fmt"
"regexp"
"strconv"
"strings"
)

// RLEncode takes a string and returns its run-length encoding
func RLEncode(data string) string {
var result string
count := 1
for i := 0; i < len(data); i++ {
if i+1 < len(data) && data[i] == data[i+1] {
count++
continue
}
result += fmt.Sprintf("%d%c", count, data[i])
count = 1
}
return result
}

// RLEdecode takes a run-length encoded string and returns the original string
func RLEdecode(data string) string {
var result string
regex := regexp.MustCompile(`(\d+)(\w)`)

for _, match := range regex.FindAllStringSubmatch(data, -1) {
num, _ := strconv.Atoi(match[1])
result += strings.Repeat(match[2], num)
}

return result
}

// RLEncodebytes takes a byte slice and returns its run-length encoding as a byte slice
func RLEncodebytes(data []byte) []byte {
var result []byte
var count byte = 1

for i := 0; i < len(data); i++ {
if i+1 < len(data) && data[i] == data[i+1] {
count++
continue
}
result = append(result, count, data[i])
count = 1
}

return result
}

// RLEdecodebytes takes a run-length encoded byte slice and returns the original byte slice
func RLEdecodebytes(data []byte) []byte {
var result []byte

for i := 0; i < len(data); i += 2 {
count := int(data[i])
result = append(result, bytes.Repeat([]byte{data[i+1]}, count)...)
}

return result
}
161 changes: 161 additions & 0 deletions compression/rlecoding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package compression_test

import (
"bytes"
"testing"

"github.com/TheAlgorithms/Go/compression"
)

func TestCompressionRLEncode(t *testing.T) {
tests := []struct {
name string
data string
want string
}{
{
name: "test 1",
data: "WWWWWWWWWWWWBWWWWWWWWWWWWBBB",
want: "12W1B12W3B",
},
{
name: "test 2",
data: "AABCCCDEEEE",
want: "2A1B3C1D4E",
},
{
name: "test 3",
data: "AAAABBBCCDA",
want: "4A3B2C1D1A",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := compression.RLEncode(tt.data); got != tt.want {
t.Errorf("RLEncode() = %v, want %v", got, tt.want)
}
})
}
}

func TestCompressionRLEDecode(t *testing.T) {
tests := []struct {
name string
data string
want string
}{
{
name: "test 1",
data: "12W1B12W3B",
want: "WWWWWWWWWWWWBWWWWWWWWWWWWBBB",
},
{
name: "test 2",
data: "2A1B3C1D4E",
want: "AABCCCDEEEE",
},
{
name: "test 3",
data: "4A3B2C1D1A",
want: "AAAABBBCCDA",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := compression.RLEdecode(tt.data); got != tt.want {
t.Errorf("RLEdecode() = %v, want %v", got, tt.want)
}
})
}
}

func TestCompressionRLEncodeBytes(t *testing.T) {
tests := []struct {
name string
data []byte
want []byte
}{
{
name: "test 1",
data: []byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"),
want: []byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'},
},
{
name: "test 2",
data: []byte("AABCCCDEEEE"),
want: []byte{2, 'A', 1, 'B', 3, 'C', 1, 'D', 4, 'E'},
},
{
name: "test 3",
data: []byte("AAAABBBCCDA"),
want: []byte{4, 'A', 3, 'B', 2, 'C', 1, 'D', 1, 'A'},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := compression.RLEncodebytes(tt.data); !bytes.Equal(got, tt.want) {
t.Errorf("RLEncodebytes() = %v, want %v", got, tt.want)
}
})
}
}

func TestCompressionRLEDecodeBytes(t *testing.T) {
tests := []struct {
name string
data []byte
want []byte
}{
{
name: "test 1",
data: []byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'},
want: []byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"),
},
{
name: "test 2",
data: []byte{2, 'A', 1, 'B', 3, 'C', 1, 'D', 4, 'E'},
want: []byte("AABCCCDEEEE"),
},
{
name: "test 3",
data: []byte{4, 'A', 3, 'B', 2, 'C', 1, 'D', 1, 'A'},
want: []byte("AAAABBBCCDA"),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := compression.RLEdecodebytes(tt.data); !bytes.Equal(got, tt.want) {
t.Errorf("RLEdecodebytes() = %v, want %v", got, tt.want)
}
})
}
}

/* --- BENCHMARKS --- */
func BenchmarkRLEncode(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = compression.RLEncode("WWWWWWWWWWWWBWWWWWWWWWWWWBBB")
}
}

func BenchmarkRLEDecode(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = compression.RLEdecode("12W1B12W3B")
}
}

func BenchmarkRLEncodeBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = compression.RLEncodebytes([]byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"))
}
}

func BenchmarkRLEDecodeBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = compression.RLEdecodebytes([]byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'})
}
}
Loading