-
Notifications
You must be signed in to change notification settings - Fork 4
/
correct.go
67 lines (53 loc) · 1.5 KB
/
correct.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
// BUG(polaris): 一个段落英文开头的大小写转换有问题,比如 go中文网 中的 go 不会转为 Go。
package autocorrect
import (
"strings"
"unicode"
"unicode/utf8"
)
var otherDicts = make(map[string]string)
// AddDict 支持自定义添加字典
func AddDict(dict map[string]string) {
for k, v := range dict {
otherDicts[k] = v
}
}
// AutoSpace 自动给中英文之间加上空格
func AutoSpace(str string) string {
out := ""
for _, r := range str {
out = addSpaceAtBoundary(out, r)
}
return out
}
// AutoCorrect 对常见英文单词进行大家一般写法的纠正,如 go -> Go
func AutoCorrect(str string) string {
oldNews := make([]string, 2*(len(dicts)+len(otherDicts)))
for from, to := range dicts {
oldNews = append(oldNews, " "+from+" ")
oldNews = append(oldNews, " "+to+" ")
}
replacer := strings.NewReplacer(oldNews...)
return replacer.Replace(str)
}
// Convert 先执行 AutoSpace,然后执行 AutoCorrect
func Convert(str string) string {
return AutoSpace(AutoCorrect(str))
}
func addSpaceAtBoundary(prefix string, nextChar rune) string {
if len(prefix) == 0 {
return string(nextChar)
}
r, size := utf8.DecodeLastRuneInString(prefix)
if isLatin(size) != isLatin(utf8.RuneLen(nextChar)) &&
isAllowSpace(nextChar) && isAllowSpace(r) {
return prefix + " " + string(nextChar)
}
return prefix + string(nextChar)
}
func isLatin(size int) bool {
return size == 1
}
func isAllowSpace(r rune) bool {
return !unicode.IsSpace(r) && !unicode.IsPunct(r)
}