-
Notifications
You must be signed in to change notification settings - Fork 1
/
kanbun.lua
245 lines (229 loc) · 9.16 KB
/
kanbun.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
-- Copyright 2022 Yuanhao Chen
-- This work may be distributed and/or modified under the
-- conditions of the LaTeX Project Public License, either version 1.3
-- of this license or (at your option) any later version.
-- The latest version of this license is in
-- http://www.latex-project.org/lppl.txt
-- and version 1.3 or later is part of all distributions of LaTeX
-- version 2005/12/01 or later.
--
-- This work has the LPPL maintenance status `maintained'.
--
-- The Current Maintainer of this work is Yuanhao Chen.
--
-- This work consists of the files kanbun.sty, kanbun.lua,
-- kanbun-example.tex and kanbun.tex.
function directtex(str)
coroutine.yield(str)
end
function to_TeX_box(str)
directtex("\\ExplSyntaxOn\\newbox\\kanbun_lua_box\\sbox\\kanbun_lua_box{"..str.."}\\ExplSyntaxOff")
return tex.getbox('kanbun_lua_box')
end
function has_value (tab, val)
for index, value in ipairs(tab) do
if value == val then
return true
end
end
return false
end
function main_loop()
brackets = {}
--
brackets["furigana"] = {"(", ")"}
brackets["okurigana"] = {"{","}"} -- allow user to omit
brackets["kaeriten"] = {"[","]"}
brackets["furigana4saidokumoji"] = {"‹","›"}
brackets["okurigana4saidokumoji"] = {"«","»"}
--
brackets["punctuation"] = {"⦉","⦊"}
-- brackets["kanji"] = {"⌊","⌋"}
-- brackets["multikanji"] = {"‘","’"}
-- brackets["unit"] = {"“","”"}
--
left_brackets = {}
right_brackets = {}
for k,v in pairs(brackets) do
table.insert(left_brackets, v[1])
table.insert(right_brackets, v[2])
end
-- punctuation_str = "〻―・、,。…「」『』"
punctuation_str = "㆐〻―—・、,。…「」『』!?:"
left_punctuation_str = "「『"
lines_chars_table = {}
tex_kana_bool = token.create("g_kana_bool")
tex_true_bool = token.create("c_true_bool")
for i,l in ipairs(verb_table) do
split_line = {}
for c in l:gmatch(utf8.charpattern) do
table.insert(split_line, c)
end
-- some unknown bug (in \matchkana ?) solved by changing the last entry to an empty string if not ascii
for i,c in ipairs(split_line) do
last_index = i
end
if utf8.codepoint(split_line[last_index] or " ") < 128 then
split_line[last_index+1]=""
else
split_line[last_index]=""
end
last_bracket_index = 0
for j,c in ipairs(split_line) do
if has_value(left_brackets, c) then
last_bracket_index = j
end
if last_bracket_index == 0 then
directtex("\\matchkana{"..c.."}")
if tex_kana_bool.mode == tex_true_bool.mode then
split_line[j] = brackets["okurigana"][1]..c..brackets["okurigana"][2]
else
split_line[j] = "“⌊"..c.."⌋”"
end
end
if has_value(right_brackets, c) then
last_bracket_index = 0
end
end
table.insert(lines_chars_table, split_line)
end
annotated_lines_table = {}
for i,l in ipairs(lines_chars_table) do
line = table.concat(l, "")
line = string.gsub(line, brackets["okurigana"][2]..brackets["okurigana"][1], "")
for k,v in pairs(brackets) do
line = string.gsub(line, "”(%"..v[1]..")", "%1")
line = string.gsub(line, "(%"..v[2]..")(“)", "%1”%2")
end
for c in line:gmatch(utf8.charpattern) do
last = c
end
if has_value(right_brackets, last) then
str = str .. '”'
end
line = string.gsub(line, "⌊‘⌋”", "‘")
line = string.gsub(line, "“⌊’⌋", "’")
for p in punctuation_str:gmatch(utf8.charpattern) do
line = string.gsub(line, "”“⌊("..p..")⌋", "⦉%1⦊")
end
line = string.gsub(line, "⦊⦉", "")
-- reverse the makeshift bug fix to \matchkana
line = string.gsub(line, "“⌊⌋”", "")
-- process annotated text
tmp_number_of_multikanji_braces = {utf8.char(61442), utf8.char(61443)}
-- process line into units
split_line = {}
for c in line:gmatch(utf8.charpattern) do
table.insert(split_line, c)
end
units = {}
unit_content = {}
last_bracket_index = 0
for j,c in ipairs(split_line) do
if c == "”" then
last_bracket_index = last_bracket_index - 1
end
if last_bracket_index < 1 then
table.insert(units, table.concat(unit_content, ""))
unit_content = {}
else
table.insert(unit_content, c)
end
if c == "“" then
last_bracket_index = last_bracket_index + 1
end
end
-- account for multikanji
for j,u in ipairs(units) do
split_unit = {}
for c in u:gmatch(utf8.charpattern) do
table.insert(split_unit, c)
end
number_of_multikanji = 0
local last_k
for k,c in ipairs(split_unit) do
if c == "“" then
number_of_multikanji = number_of_multikanji + 1
last_k = k
end
end
for m,c in ipairs(split_unit) do
if m == last_k then
split_unit[m] = "“"..tmp_number_of_multikanji_braces[1]..number_of_multikanji..tmp_number_of_multikanji_braces[2]
end
end
units[j] = table.concat(split_unit, "")
if number_of_multikanji > 0 then
units[j] = string.gsub(units[j], "“(.-)”’(.*)", "“%1%2”")
units[j] = string.gsub(units[j], "‘", "")
tmp_unit = units[j]
table.remove(units, j)
for new_unit in tmp_unit:gmatch"“(.-)”" do
table.insert(units, j, new_unit)
j = j + 1
end
end
end
for j,u in ipairs(units) do
if u == "" then
table.remove(units, j)
end
end
next_left_punct_ = ""
for j,u in ipairs(units) do
right_okuri_ = u:match("%{(.-)%}") or ""
kanji_ = u:match("⌊(.-)⌋") or ""
right_furi_ = u:match("%((.-)%)") or ""
left_furi_ = u:match("‹(.-)›") or ""
left_okuri_ = u:match("«(.-)»") or ""
punct_ = u:match("⦉(.-)⦊") or ""
--
-- punct_ = punct_:gsub("―", "\\tateten")
-- punct_ = punct_:gsub("—", "\\tateten")
-- punct_ = punct_:gsub("〻", "\\ninojiten")
--
kaeriten_ = u:match("%[(.-)%]") or ""
multikanji_ = u:match(tmp_number_of_multikanji_braces[1].."(.-)"..tmp_number_of_multikanji_braces[2]) or 0
if punctuation_str:match(kanji_) then
punct_ = kanji_
kanji_ = ""
end
left_punct_ = next_left_punct_
next_left_punct_ = ""
for p_ in punct_:gmatch(utf8.charpattern) do
if left_punctuation_str:match(p_) then
next_left_punct_ = next_left_punct_ .. p_
end
end
for p_ in left_punctuation_str:gmatch(utf8.charpattern) do
punct_ = string.gsub(punct_, p_, "")
end
if u:match(tmp_number_of_multikanji_braces[1]) then
multiruby_raise_by_ = 0
for trace_back_index = 1, multikanji_-1 do
multiruby_raise_by_ = multiruby_raise_by_ + to_TeX_box(units[j - trace_back_index]).width/(kanbunzwtosp)
end
units[j - multikanji_ + 1] = units[j - multikanji_ + 1]:gsub("\\kanjiunit", "\\hbox{\\kanjiunit")
units[j] = "\\kanjiunit{\\multifuriokuri["..multiruby_raise_by_.."]{"..right_furi_.."}{"..right_okuri_.."}}{"..left_punct_.."}{"..kanji_.."}{"..punct_.."}{"..kaeriten_.."}{\\multifuriokuri["..multiruby_raise_by_.."]{"..left_furi_.."}{"..left_okuri_.."}}}"
else
if kanji_ == "" then
units[j] = ""
else
units[j] = "\\kanjiunit{\\furiokuri{"..right_furi_.."}{"..right_okuri_.."}}{"..left_punct_.."}{"..kanji_.."}{"..punct_.."}{"..kaeriten_.."}{\\furiokuri{"..left_furi_.."}{"..left_okuri_.."}}"
end
end
end
line = table.concat(units, "")
table.insert(annotated_lines_table, line)
end
-- ouput
output = "{\\kanbunfont"..table.concat(annotated_lines_table, "\\par").."\\par}"
directtex("\\def\\printkanbun{"..output.."}")
directtex("\\def\\printkanbuncode{\\directlua{print('')print(output)}}")
-- nopar output
nopar_output = table.concat(annotated_lines_table, "\\\\")
directtex("\\def\\printkanbunnopar{"..nopar_output.."}")
directtex("\\def\\printkanbunnoparcode{\\directlua{print('')print(nopar_output)}}")
-- end loop in TeX
directtex("\\continuefalse")
end