-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharray.go
91 lines (77 loc) · 2.83 KB
/
array.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/**
The intention of this code is to experiment with cpu cache behaviour and resulting performance.
A matrix multiplication demands processing of many bytes from memory.
To avoid optimizations from a specialised matrix multiplications library a manual multiplication is performed here.
First experiment is the multiplication of matrix with itself.
Due to the nature of matrix multiplications, one matrix is read per row, the other matrix is read per column while calculating the new result value
The complexity of the standard algorithm is O(i * j * k). For quadratic matrices the run time is cubic, of the order O(n^3)
The representation of a matrix in memory is per row.
Optimal use of the cpu cache is achieved when the data that is going to be used is stored in consecutive order in memory.
So typically one matrix has the optimal data representation in memory, the second doesn't.
To test the significance of that factor, this program prepares the second matrix with pivoting the data, so this second matrix can be accessed per row as well and therefore support efficient caching for both matrices.
The necessary processing time gets displayed.
*/
package main
import (
"fmt"
"math/rand"
"time"
)
const (
x = 1200 // column
y = 1200 // row
)
var (
// [row][column]
matrix [y][x]int
matrixTurned [y][x]int
matrixResult[y][x]int
matrixTurnedResult[y][x]int
)
func main() {
fmt.Println(`Let's have some fun with matrix multiplication.`)
var i, j, k, tmp int
// initialize matrix and "turned" matrix with the same random data
for i := 0; i<y; i++ { // row
for j := 0; j<x; j++ { // column
tmp = rand.Intn(100) // can be surprisingly large numbers (e.g. 10^10), without significantly slowing down the calculation
matrix[i][j] = tmp
matrixTurned[j][i] = tmp
}
}
// calculate result matrix with regular matrix multiplication without optimization
fmt.Printf("\n\ncalculate matrix X matrix\n")
start := time.Now()
for i = 0; i<y; i++ { // row
for j = 0; j<x; j++ { // column
tmp = 0
for k = 0; k<x; k++ {
tmp += matrix[k][j] * matrix[i][k]
}
matrixResult[i][j] = tmp
}
}
fmt.Printf("Time elapsed: %s\n", time.Since(start))
// calculate result matrix with regular and optimized matrix
fmt.Printf("\n\ncalculate matrix X matrixTurned\n")
start = time.Now()
for i = 0; i<y; i++ { // row
for j = 0; j<x; j++ { // column
tmp = 0
for k = 0; k<x; k++ {
tmp += matrix[i][k] * matrixTurned[j][k]
}
matrixTurnedResult[i][j] = tmp
}
}
fmt.Printf("Time elapsed: %s\n", time.Since(start))
// make sure the same results were calculated
for i = 0; i<y; i++ {
for j = 0; j<x; j++ {
if matrixTurnedResult[i][j] != matrixResult[i][j] {
fmt.Printf("CRASH BURN\n %d %d doesn't match\n", matrixResult[i][j], matrixTurnedResult[i][j])
return
}
}
}
}