This repository has been archived by the owner on Nov 25, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
string_helper.rb
147 lines (133 loc) · 2.53 KB
/
string_helper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
require 'active_support/all'
require 'uri'
class String
CHARMAPTELEX = {
"Á" => "AS",
"À" => "AF",
"Ả" => "AR",
"Ã" => "AX",
"Ạ" => "AJ",
"Ă" => "AW",
"Ắ" => "AWS",
"Ằ" => "AWF",
"Ẳ" => "AWR",
"Ẵ" => "AWX",
"Ặ" => "AWJ",
"Â" => "AA",
"Ấ" => "AAS",
"Ầ" => "AAF",
"Ẩ" => "AAR",
"Ẫ" => "AAX",
"Ậ" => "AAJ",
"Đ" => "DD",
"É" => "ES",
"È" => "EF",
"Ẻ" => "ER",
"Ẽ" => "EX",
"Ẹ" => "EJ",
"Ê" => "EE",
"Ế" => "EES",
"Ề" => "EEF",
"Ể" => "EER",
"Ễ" => "EEX",
"Ệ" => "EEJ",
"Í" => "IS",
"Ì" => "IF",
"Ỉ" => "IR",
"Ĩ" => "IX",
"Ị" => "IJ",
"Ơ" => "OW",
"Ó" => "OS",
"Ò" => "OF",
"Ỏ" => "OR",
"Õ" => "OX",
"Ọ" => "OJ",
"Ô" => "OO",
"Ố" => "OOS",
"Ồ" => "OOF",
"Ổ" => "OOR",
"Ỗ" => "OOX",
"Ộ" => "OOJ",
"Ớ" => "OWS",
"Ờ" => "OWF",
"Ở" => "OWR",
"Ỡ" => "OWX",
"Ợ" => "OWJ",
"Ư" => "UW",
"Ú" => "US",
"Ù" => "UF",
"Ủ" => "UR",
"Ũ" => "UX",
"Ụ" => "UJ",
"Ứ" => "UWS",
"Ừ" => "UWF",
"Ử" => "UWR",
"Ữ" => "UWX",
"Ự" => "UWJ",
"Ý" => "YS",
"Ỳ" => "YF",
"Ỷ" => "YR",
"Ỹ" => "YX",
"Ỵ" => "YJ"
}
# nGh nG gH cH pH kH tH nH tR qU
MULTICOSONANT = {
'G' => ['N'],
'H' => ['G', 'C', 'P', 'K', 'T', 'N'],
'R' => ['T'],
'U' => ['Q']
}
def uni_downcase
self.mb_chars.downcase.wrapped_string
end
def uni_upcase
self.mb_chars.upcase.wrapped_string
end
def get_converted_suffix
suffix = []
self.split("").each {|c|
# unicode upcase
c = c.mb_chars.upcase.wrapped_string
if MULTICOSONANT.has_key?(c) && !suffix.empty?
postword = suffix.last
if MULTICOSONANT[c].include?(postword)
suffix[-1] = postword + c.upcase
else
suffix << c
end
elsif !CHARMAPTELEX[c].nil?
suffix << CHARMAPTELEX[c]
else
suffix << c
end
}
# TR OW F I
suffix = suffix.join(" ").downcase
suffix
end
def get_converted_prefix
get_converted_suffix.tr('^A-Za-z0-9', '')
end
# to telex to do transform TRỜI to "TROWFI<tab>TR OW F I sp"
def to_telex
suffix = get_converted_suffix
# TROWFI
prefix = suffix.tr('^A-Za-z0-9', '')
# TROWFI<tab>TR OW F I sp
prefix + " " * (16-prefix.length) + suffix
end
def with_pause
self + " sp"
end
def to_vni
# TODO: implement when in need
end
# crawler check page is category or not
def is_category
self.start_with?("/")
end
def is_valid_url
return true if self =~ URI::regexp || self.is_category
return false
end
end