-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdispatch.go
143 lines (123 loc) · 4.54 KB
/
dispatch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// Copyright (c) 2020, 2024 Robert Clausecker <[email protected]>
// Positional population counts.
//
// This package contains a set of functions to compute positional
// population counts for arrays of uint8, uint16, uint32, or uint64.
// Optimised assembly optimisations are provided for amd64 (AVX-512,
// AVX2, SSE2), 386 (AVX2, SSE2), and ARM64 (NEON). An optimal
// implementation constrainted by the instruction set extensions
// available on your CPU is chosen automatically at runtime. If no
// assembly implementation exists, a generic fallback implementation
// will be used. The pospop package thus works on all architectures
// supported by the Go toolchain.
//
// The kernels works on a block size of 240, 480, or 960 bytes. A
// buffer size that is a multiple of 64 bytes and at least 10 kB in size
// is recommended. The author's benchmarks show that a buffer size
// around 100 kB appears optimal.
//
// See the example on the Count8 function for what the positional
// population count operation does.
package pospop
import "unsafe"
// each platform must provide arrays count8funcs, coun16funcs,
// count32funcs, and count64funcs of type count8impl, ... listing
// the available implementations. The member available indicates that
// the function would run on this machine. The dispatch code picks the
// lowest-numbered function in the array for which available is true.
// The generic implementation should be available under all
// circumstances so it can be run by the unit tests. The name field
// should be the name of the implementation and should not repeat the
// "count#" prefix.
type count8impl struct {
count8 func(*[8]int, []uint8)
name string
available bool
}
type count16impl struct {
count16 func(*[16]int, []uint16)
name string
available bool
}
type count32impl struct {
count32 func(*[32]int, []uint32)
name string
available bool
}
type count64impl struct {
count64 func(*[64]int, []uint64)
name string
available bool
}
// optimal count8 implementation selected at runtime
var count8func = func() func(*[8]int, []uint8) {
for _, f := range count8funcs {
if f.available {
return f.count8
}
}
panic("no implementation of count8 available")
}()
// optimal count16 implementation selected at runtime
var count16func = func() func(*[16]int, []uint16) {
for _, f := range count16funcs {
if f.available {
return f.count16
}
}
panic("no implementation of count16 available")
}()
// optimal count32 implementation selected at runtime
var count32func = func() func(*[32]int, []uint32) {
for _, f := range count32funcs {
if f.available {
return f.count32
}
}
panic("no implementation of count32 available")
}()
// optimal count64 implementation selected at runtime
var count64func = func() func(*[64]int, []uint64) {
for _, f := range count64funcs {
if f.available {
return f.count64
}
}
panic("no implementation of count64 available")
}()
// Count the number of corresponding set bits of the bytes in str and
// add the results to counts. Each element of counts keeps track of a
// different place; counts[0] for 0x01, counts[1] for 0x02, and so on to
// counts[7] for 0x80.
func CountString(counts *[8]int, str string) {
buf := unsafe.Slice(unsafe.StringData(str), len(str))
count8func(counts, buf)
}
// Count the number of corresponding set bits of the bytes in buf and
// add the results to counts. Each element of counts keeps track of a
// different place; counts[0] for 0x01, counts[1] for 0x02, and so on to
// counts[7] for 0x80.
func Count8(counts *[8]int, buf []uint8) {
count8func(counts, buf)
}
// Count the number of corresponding set bits of the values in buf and
// add the results to counts. Each element of counts keeps track of a
// different place; counts[0] for 0x0001, counts[1] for 0x0002, and so
// on to counts[15] for 0x8000.
func Count16(counts *[16]int, buf []uint16) {
count16func(counts, buf)
}
// Count the number of corresponding set bits of the values in buf and
// add the results to counts. Each element of counts keeps track of a
// different place; counts[0] for 0x0000001, counts[1] for 0x00000002,
// and so on to counts[31] for 0x80000000.
func Count32(counts *[32]int, buf []uint32) {
count32func(counts, buf)
}
// Count the number of corresponding set bits of the values in buf and
// add the results to counts. Each element of counts keeps track of a
// different place; counts[0] for 0x000000000000001, counts[1] for
// 0x0000000000000002, and so on to counts[63] for 0x8000000000000000.
func Count64(counts *[64]int, buf []uint64) {
count64func(counts, buf)
}