From a939dd459bdf652ff20a0da8fcf62aff13ab942c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sun, 1 Dec 2024 16:06:51 +0000 Subject: [PATCH 1/2] use a strings.Replacer to reverse names in internal/abi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way, rather than using a double loop quadratic algorithm to search for each name to replace in a string, we can make use of the reasonably efficient generic replacer which makes use of tries. Copying some code from the strings package is not ideal, but it beats having to re-implement such an algorithm ourselves. Not only is the algorithm much faster, as we are no longer quadratic, but the replacer also appends into a buffer to avoid repeated string copies, which means we allocate fewer bytes per operation. │ old │ new │ │ sec/op │ sec/op vs base │ AbiOriginalNames-8 135708.0n ± 0% 391.1n ± 5% -99.71% (p=0.001 n=7) │ old │ new │ │ B/s │ B/s vs base │ AbiOriginalNames-8 2.565Mi ± 0% 890.112Mi ± 4% +34597.03% (p=0.001 n=7) │ old │ new │ │ B/op │ B/op vs base │ AbiOriginalNames-8 5464.0 ± 0% 848.0 ± 0% -84.48% (p=0.001 n=7) │ old │ new │ │ allocs/op │ allocs/op vs base │ AbiOriginalNames-8 18.00 ± 0% 16.00 ± 0% -11.11% (p=0.001 n=7) --- bench_test.go | 12 +- reflect_abi_code.go | 267 ++++++++++++++++++++++++++++++++++++++----- reflect_abi_patch.go | 9 +- 3 files changed, 253 insertions(+), 35 deletions(-) diff --git a/bench_test.go b/bench_test.go index af5005b3..40b14e88 100644 --- a/bench_test.go +++ b/bench_test.go @@ -178,14 +178,15 @@ func BenchmarkAbiOriginalNames(b *testing.B) { for n := range 2000 { name := fmt.Sprintf("name_%d", n) garbled := hashWithCustomSalt(salt, name) - _originalNamePairs = append(_originalNamePairs, [2]string{garbled, name}) + _originalNamePairs = append(_originalNamePairs, garbled, name) } - // Pick twenty names at random to use as inputs below. + _originalNamesInit() + // Pick twenty obfuscated names at random to use as inputs below. // Use a deterministic random source so it's stable between benchmark runs. rnd := rand.New(rand.NewPCG(1, 2)) var chosen []string - for _, pair := range _originalNamePairs { - chosen = append(chosen, pair[0]) + for i := 0; i < len(_originalNamePairs); i += 2 { + chosen = append(chosen, _originalNamePairs[i]) } rnd.Shuffle(len(chosen), func(i, j int) { chosen[i], chosen[j] = chosen[j], chosen[i] @@ -223,5 +224,6 @@ func BenchmarkAbiOriginalNames(b *testing.B) { } } }) - _originalNamePairs = [][2]string{} + _originalNamePairs = []string{} + _originalNamesReplacer = nil } diff --git a/reflect_abi_code.go b/reflect_abi_code.go index ce7b8781..53f467cb 100644 --- a/reflect_abi_code.go +++ b/reflect_abi_code.go @@ -1,7 +1,6 @@ package main -// The "name" internal/abi passes to this function doesn't have to be a simple "someName" - +// The "name" internal/abi passes to this function doesn't have to be a simple "someName", // it can also be for function names like "*pkgName.FuncName" (obfuscated) // or for structs the entire struct definition, like // @@ -22,41 +21,253 @@ package main // Injected code below this line. +// Each pair is the obfuscated and then the real name. +// The slice is sorted from shortest to longest obfuscated name. +var _originalNamePairs = []string{} + +var _originalNamesReplacer *_genericReplacer + +//disabledgo:linkname _originalNamesInit internal/abi._originalNamesInit +func _originalNamesInit() { + _originalNamesReplacer = _makeGenericReplacer(_originalNamePairs) +} + //disabledgo:linkname _originalNames internal/abi._originalNames func _originalNames(name string) string { - // We can stop once there aren't enough bytes to fit another obfuscated name. - for i := 0; i <= len(name)-minHashLength; { - switch name[i] { - case ' ', '.', '*', '{', '}', '[', ']': - // These characters never start an obfuscated name. - i++ - continue + return _originalNamesReplacer.Replace(name) +} + +// -- Lifted from internal/stringslite -- + +func _hasPrefix(s, prefix string) bool { + return len(s) >= len(prefix) && s[0:len(prefix)] == prefix +} + +// -- Lifted from strings as of Go 1.23 -- +// +// With minor modifications to avoid type assertions, +// as any reflection in internal/abi causes a recursive call to the runtime +// which locks up the entire runtime. Moreover, we can't import strings. +// +// Updating the code below should not be necessary in general, +// unless upstream Go makes significant improvements to this replacer implementation. + +// _trieNode is a node in a lookup trie for prioritized key/value pairs. Keys +// and values may be empty. For example, the trie containing keys "ax", "ay", +// "bcbc", "x" and "xy" could have eight nodes: +// +// n0 - +// n1 a- +// n2 .x+ +// n3 .y+ +// n4 b- +// n5 .cbc+ +// n6 x+ +// n7 .y+ +// +// n0 is the root node, and its children are n1, n4 and n6; n1's children are +// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked +// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 +// (marked with a trailing "+") are complete keys. +type _trieNode struct { + // value is the value of the trie node's key/value pair. It is empty if + // this node is not a complete key. + value string + // priority is the priority (higher is more important) of the trie node's + // key/value pair; keys are not necessarily matched shortest- or longest- + // first. Priority is positive if this node is a complete key, and zero + // otherwise. In the example above, positive/zero priorities are marked + // with a trailing "+" or "-". + priority int + + // A trie node may have zero, one or more child nodes: + // * if the remaining fields are zero, there are no children. + // * if prefix and next are non-zero, there is one child in next. + // * if table is non-zero, it defines all the children. + // + // Prefixes are preferred over tables when there is one child, but the + // root node always uses a table for lookup efficiency. + + // prefix is the difference in keys between this trie node and the next. + // In the example above, node n4 has prefix "cbc" and n4's next node is n5. + // Node n5 has no children and so has zero prefix, next and table fields. + prefix string + next *_trieNode + + // table is a lookup table indexed by the next byte in the key, after + // remapping that byte through _genericReplacer.mapping to create a dense + // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and + // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and + // _genericReplacer.tableSize will be 5. Node n0's table will be + // []*_trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped + // 'a', 'b' and 'x'. + table []*_trieNode +} + +func (t *_trieNode) add(key, val string, priority int, r *_genericReplacer) { + if key == "" { + if t.priority == 0 { + t.value = val + t.priority = priority } - remLen := len(name[i:]) - found := false - for _, pair := range _originalNamePairs { - obfName := pair[0] - real := pair[1] - keyLen := len(obfName) - if remLen < keyLen { - // Since the pairs are sorted from shortest to longest name, - // we know that the rest of the pairs are at least just as long. + return + } + + if t.prefix != "" { + var n int // length of the longest common prefix + for ; n < len(t.prefix) && n < len(key); n++ { + if t.prefix[n] != key[n] { break } - if name[i:i+keyLen] == obfName { - name = name[:i] + real + name[i+keyLen:] - found = true - i += len(real) + } + if n == len(t.prefix) { + t.next.add(key[n:], val, priority, r) + } else if n == 0 { + var prefixNode *_trieNode + if len(t.prefix) == 1 { + prefixNode = t.next + } else { + prefixNode = &_trieNode{ + prefix: t.prefix[1:], + next: t.next, + } + } + keyNode := new(_trieNode) + t.table = make([]*_trieNode, r.tableSize) + t.table[r.mapping[t.prefix[0]]] = prefixNode + t.table[r.mapping[key[0]]] = keyNode + t.prefix = "" + t.next = nil + keyNode.add(key[1:], val, priority, r) + } else { + // Insert new node after the common section of the prefix. + next := &_trieNode{ + prefix: t.prefix[n:], + next: t.next, + } + t.prefix = t.prefix[:n] + t.next = next + next.add(key[n:], val, priority, r) + } + } else if t.table != nil { + // Insert into existing table. + m := r.mapping[key[0]] + if t.table[m] == nil { + t.table[m] = new(_trieNode) + } + t.table[m].add(key[1:], val, priority, r) + } else { + t.prefix = key + t.next = new(_trieNode) + t.next.add("", val, priority, r) + } +} + +func (r *_genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { + // Iterate down the trie to the end, and grab the value and keylen with + // the highest priority. + bestPriority := 0 + node := &r.root + n := 0 + for node != nil { + if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { + bestPriority = node.priority + val = node.value + keylen = n + found = true + } + + if s == "" { + break + } + if node.table != nil { + index := r.mapping[s[0]] + if int(index) == r.tableSize { break } + node = node.table[index] + s = s[1:] + n++ + } else if node.prefix != "" && _hasPrefix(s, node.prefix) { + n += len(node.prefix) + s = s[len(node.prefix):] + node = node.next + } else { + break } - if !found { - i++ + } + return +} + +type _genericReplacer struct { + root _trieNode + // tableSize is the size of a trie node's lookup table. It is the number + // of unique key bytes. + tableSize int + // mapping maps from key bytes to a dense index for _trieNode.table. + mapping [256]byte +} + +func _makeGenericReplacer(oldnew []string) *_genericReplacer { + r := new(_genericReplacer) + // Find each byte used, then assign them each an index. + for i := 0; i < len(oldnew); i += 2 { + key := oldnew[i] + for j := 0; j < len(key); j++ { + r.mapping[key[j]] = 1 } } - return name + + for _, b := range r.mapping { + r.tableSize += int(b) + } + + var index byte + for i, b := range r.mapping { + if b == 0 { + r.mapping[i] = byte(r.tableSize) + } else { + r.mapping[i] = index + index++ + } + } + // Find each byte used, then assign them each an index. + r.root.table = make([]*_trieNode, r.tableSize) + + for i := 0; i < len(oldnew); i += 2 { + r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) + } + return r } -// Each pair is the obfuscated and then the real name. -// The slice is sorted from shortest to longest obfuscated name. -var _originalNamePairs = [][2]string{} +func (r *_genericReplacer) Replace(s string) string { + dst := make([]byte, 0, len(s)) + var last int + var prevMatchEmpty bool + for i := 0; i <= len(s); { + // Fast path: s[i] is not a prefix of any pattern. + if i != len(s) && r.root.priority == 0 { + index := int(r.mapping[s[i]]) + if index == r.tableSize || r.root.table[index] == nil { + i++ + continue + } + } + + // Ignore the empty match iff the previous loop found the empty match. + val, keylen, match := r.lookup(s[i:], prevMatchEmpty) + prevMatchEmpty = match && keylen == 0 + if match { + dst = append(dst, s[last:i]...) + dst = append(dst, val...) + i += keylen + last = i + continue + } + i++ + } + if last != len(s) { + dst = append(dst, s[last:]...) + } + return string(dst) +} diff --git a/reflect_abi_patch.go b/reflect_abi_patch.go index 1ecd80f4..30392e74 100644 --- a/reflect_abi_patch.go +++ b/reflect_abi_patch.go @@ -26,6 +26,11 @@ func abiNamePatch(path string) (string, error) { originalNames := ` //go:linkname _originalNames func _originalNames(name string) string + +//go:linkname _originalNamesInit +func _originalNamesInit() + +func init() { _originalNamesInit() } ` return str + originalNames, nil @@ -60,7 +65,7 @@ func reflectMainPrePatch(path string) ([]byte, error) { // mappings after all packages have been analyzed. func reflectMainPostPatch(file []byte, lpkg *listedPackage, pkg pkgCache) []byte { obfVarName := hashWithPackage(lpkg, "_originalNamePairs") - namePairs := fmt.Appendf(nil, "%s = [][2]string{", obfVarName) + namePairs := fmt.Appendf(nil, "%s = []string{", obfVarName) keys := slices.SortedFunc(maps.Keys(pkg.ReflectObjectNames), func(a, b string) int { if c := cmp.Compare(len(a), len(b)); c != 0 { @@ -70,7 +75,7 @@ func reflectMainPostPatch(file []byte, lpkg *listedPackage, pkg pkgCache) []byte }) namePairsFilled := bytes.Clone(namePairs) for _, obf := range keys { - namePairsFilled = fmt.Appendf(namePairsFilled, "{%q, %q},", obf, pkg.ReflectObjectNames[obf]) + namePairsFilled = fmt.Appendf(namePairsFilled, "%q, %q,", obf, pkg.ReflectObjectNames[obf]) } return bytes.Replace(file, namePairs, namePairsFilled, 1) From acaa702967b89f15660f335eb90ea3e745e573da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sun, 1 Dec 2024 16:12:45 +0000 Subject: [PATCH 2/2] go back to sorting _originalNamePairs lexicographically Now that we only use the list to create a replacer at init time, we no longer need to spend extra effort sorting by length first. The benchmark shows no measurable difference in performance. --- reflect_abi_code.go | 2 +- reflect_abi_patch.go | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/reflect_abi_code.go b/reflect_abi_code.go index 53f467cb..ad944737 100644 --- a/reflect_abi_code.go +++ b/reflect_abi_code.go @@ -22,7 +22,7 @@ package main // Injected code below this line. // Each pair is the obfuscated and then the real name. -// The slice is sorted from shortest to longest obfuscated name. +// The pairs are sorted by obfuscated name, lexicographically. var _originalNamePairs = []string{} var _originalNamesReplacer *_genericReplacer diff --git a/reflect_abi_patch.go b/reflect_abi_patch.go index 30392e74..7915d235 100644 --- a/reflect_abi_patch.go +++ b/reflect_abi_patch.go @@ -2,7 +2,6 @@ package main import ( "bytes" - "cmp" _ "embed" "fmt" "maps" @@ -67,12 +66,7 @@ func reflectMainPostPatch(file []byte, lpkg *listedPackage, pkg pkgCache) []byte obfVarName := hashWithPackage(lpkg, "_originalNamePairs") namePairs := fmt.Appendf(nil, "%s = []string{", obfVarName) - keys := slices.SortedFunc(maps.Keys(pkg.ReflectObjectNames), func(a, b string) int { - if c := cmp.Compare(len(a), len(b)); c != 0 { - return c - } - return cmp.Compare(a, b) - }) + keys := slices.Sorted(maps.Keys(pkg.ReflectObjectNames)) namePairsFilled := bytes.Clone(namePairs) for _, obf := range keys { namePairsFilled = fmt.Appendf(namePairsFilled, "%q, %q,", obf, pkg.ReflectObjectNames[obf])