-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpinyin.go
99 lines (89 loc) · 1.98 KB
/
pinyin.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package runeconverter
import (
"bufio"
"bytes"
"errors"
"os"
"sort"
"strconv"
"strings"
)
// 拼音风格
const (
PinyinStyleNormal = iota
PinyinStyleTone
)
type pinyinTable []pinyinItem
type pinyinItem struct {
Code uint32
Cnt int
Pinyins [3][]byte
}
func (table pinyinTable) Less(i, j int) bool {
return table[i].Code < table[j].Code
}
func (table pinyinTable) Len() int {
return len(table)
}
func (table pinyinTable) Swap(i, j int) {
table[i], table[j] = table[j], table[i]
}
type Pinyin struct {
table [1024][]pinyinItem
style int
}
func NewPinyin(dbpath string, style int) (*Pinyin, error) {
f, err := os.OpenFile(dbpath, os.O_RDONLY, 0644)
if err != nil {
return nil, err
}
defer f.Close()
var code uint64
var tableidx uint32
var ret Pinyin
ret.style = style
scanner := bufio.NewScanner(f)
for scanner.Scan() {
itms := strings.Split(scanner.Text(), ",")
if len(itms) < 2 {
return nil, errors.New("Invalid table file")
}
code, err = strconv.ParseUint(itms[0], 0, 32)
if err != nil {
return nil, err
}
pinyinitm := pinyinItem{
Code: uint32(code),
Cnt: len(itms) - 1,
}
if pinyinitm.Cnt > 3 {
pinyinitm.Cnt = 3
}
for i := 0; i < pinyinitm.Cnt; i++ {
pinyinitm.Pinyins[i] = []byte(itms[i+1])
}
tableidx = pinyinitm.Code & (uint32(len(ret.table)) - 1)
ret.table[tableidx] = append(ret.table[tableidx], pinyinitm)
}
for i := 0; i < len(ret.table); i++ {
sort.Sort(pinyinTable(ret.table[i]))
}
return &ret, nil
}
func (py *Pinyin) Convert(r rune, buf *bytes.Buffer) {
tableidx := int(r) & (len(py.table) - 1)
tableentry := py.table[tableidx]
idx := sort.Search(len(tableentry), func(i int) bool {
return tableentry[i].Code >= uint32(r)
})
if idx < len(tableentry) && tableentry[idx].Code == uint32(r) {
if py.style == PinyinStyleNormal {
vlen := len(tableentry[idx].Pinyins[0])
buf.Write(tableentry[idx].Pinyins[0][:vlen-1])
} else {
buf.Write(tableentry[idx].Pinyins[0])
}
} else {
buf.WriteRune(r)
}
}