Skip to content

Commit

Permalink
拆分校正20240625
Browse files Browse the repository at this point in the history
  • Loading branch information
cppxiaozhu committed Jun 25, 2024
1 parent 15543c3 commit 2ce709e
Show file tree
Hide file tree
Showing 39 changed files with 319,609 additions and 109,887 deletions.
Binary file modified GB18030-27533.txt
Binary file not shown.
Binary file modified Wubi98-Unicode15.1.txt
Binary file not shown.
6 changes: 6 additions & 0 deletions fcitx5-table-python/auto.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@echo off
cd /d "%~dp0"
echo ľąÇ°ÄżÂźĘÇŁş %cd%
python mk-fcitx5-auto.py
echo "´ŚŔíÍęąĎ"
::pause
89 changes: 89 additions & 0 deletions fcitx5-table-python/mk-fcitx5-auto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# pip安装模块
# pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple pandas tqdm
import os
import re
import time
import linecache
import tkinter as tk
from tkinter import filedialog
import shutil
current_path = os.getcwd()
os.makedirs(current_path + "/生成结果/",exist_ok = True)
print("已新建【生成结果】文件夹!")

root = tk.Tk()
root.withdraw()
table_path = current_path+ "/超集-单义表.txt"

# 重新排序

myDicList = []

with open(table_path, 'r', encoding='utf-16') as afile:
aline = afile.readline() # 读取第一行
while aline:
aline=aline.rstrip()
alst = aline.split('\t')
val = alst[0] #字符串-汉字
code = alst[1] # 编码
aDic = { "汉字":val, "编码": code }
myDicList.append(aDic)
aline = afile.readline() # 继续读取下一行,直到文件末尾返回空字符

print("创建词典列表,以便做汇总排序!")

sorted_DicList = sorted(myDicList, key=lambda x: x["编码"], reverse=False)
del myDicList

sp23 = open(table_path, 'w', encoding='utf-16')

for phrase in sorted_DicList:
strDic = phrase['汉字'] +'\t' + phrase['编码'] + '\n'
sp23.writelines(strDic)
sp23.close()
print("已【排序】成功,并覆盖写入!")
del sorted_DicList

import pandas as pd
data = pd.read_csv(table_path, sep='\t',header=None,encoding='utf-16')
data.columns = ["val", "code"]
data = data.drop_duplicates()
data.to_csv(table_path, sep='\t',index=False,header=False ,na_rep = 'nan', encoding='utf-16')
print("已【去重】处理,并覆盖写入!")

from tqdm import tqdm
import time

print("准备制作【fcitx5原生码表】文件,")

mkText = current_path + "/生成结果/fcitx5_" + os.path.basename(table_path)
fcitx5_file = open(mkText, 'w' ,encoding='utf-8')

shutil.copy(current_path + "/wubi98-large-title.txt", current_path + "/生成结果/wubi98-large.txt")

title_file = current_path + "/生成结果/wubi98-large.txt"
wubi98_large_file = open(title_file, 'a+' ,encoding='utf-8')

def fcitx5_progress(content_f1):
i = 0
allNum = len(content_f1)
while i < allNum:
for i in tqdm(range(allNum)):
line = content_f1[i]
line = line.rstrip()
line = line.lstrip()
alst = line.split('\t')
fcitx5_file.write(alst[1]+ ' '+ alst[0] + '\n')
wubi98_large_file.write(alst[1]+ ' '+ alst[0] + '\n')
i += 1

content_f1=[]
with open(table_path, 'r' ,encoding='utf-16') as f1:
content_f1 = f1.readlines()

if __name__ == "__main__":
fcitx5_progress(content_f1)
content_f1=[]
fcitx5_file.close()
wubi98_large_file.close()
print("【fcitx5原生码表】已生成!")
11 changes: 11 additions & 0 deletions fcitx5-table-python/wubi98-large-title.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
;fcitx通用码表
;收录 unicode15.1 规范中的全部汉字
;https://github.com/yanhuacuo/98wubi-unicode
KeyCode=abcdefghijklmnopqrstuvwxy
Length=4
Pinyin=@
[Rule]
e2=p11+p12+p21+p22
e3=p11+p21+p31+p32
a4=p11+p21+p31+n11
[Data]
8 changes: 4 additions & 4 deletions fcitx5-table-python/常见细分/4-生僻字-1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4275,7 +4275,7 @@
塯 fqyl
霤 fqyl
怷 fqyn
fqyr
yqyr
圴 fqyy
㽌 fqyy
堫 frbt
Expand Down Expand Up @@ -15933,7 +15933,7 @@
㣠 thtu
辠 thuj
箎 thwb
thwy
thww
篊 tiaw
笵 tibb
筂 tibb
Expand Down Expand Up @@ -19618,7 +19618,7 @@
糺 xnn
紀 xnn
彑 xnng
xnng
xhng
毌 xnnh
繏 xnnw
糹 xnny
Expand All @@ -19634,7 +19634,7 @@
熲 xodm
颎 xodm
纏 xojf
xojf
xdjf
継 xonn
緂 xooy
繗 xoqg
Expand Down
17 changes: 7 additions & 10 deletions fcitx5-table-python/生成结果/fcitx5_超集-单义表.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1462,7 +1462,6 @@ afqy 靮
afqy 韂
afqy 䩚
afqy 䪌
afqy 𦱘
afqy 𦮖
afqy 𦮢
afr 靳
Expand Down Expand Up @@ -23847,14 +23846,12 @@ fqxf 黿
fqxf 𰐠
fqxn 𭎀
fqy 朮
fqyc 𦫣
fqye 𡑩
fqye 𱗥
fqyi 朮
fqyl 塯
fqyl 霤
fqyn 怷
fqyr 㫄
fqyy 圴
fqyy 㽌
fqyy 𡋝
Expand Down Expand Up @@ -26313,7 +26310,6 @@ gfrf 𦓸
gfsf 𦔍
gfsl 𣌃
gfss 𪼬
gfsw 𦓹
gfsy 𤧀
gfsy 𮴳
gft 琽
Expand Down Expand Up @@ -73903,7 +73899,6 @@ rfqn 𢶉
rfqn 𭽋
rfqo 𬷂
rfqt 𢷛
rfqy 𢮔
rfqy 𢫖
rfsf 𱠲
rfsh 𢹩
Expand Down Expand Up @@ -79542,6 +79537,7 @@ skko 𣛫
skko 𣟭
skks 橾
skkt 欕
skkw 𬃅
skkx 𭇷
skky 𪳏
sklf 𪾌
Expand Down Expand Up @@ -79610,7 +79606,6 @@ sksk 𡃭
sksk 𠾳
sksk 𣗥
sksk 𣠆
sksk 𬃅
sksk 𬃷
sksn 𢣱
skso 𪃿
Expand Down Expand Up @@ -83930,6 +83925,7 @@ tfsm 𩓕
tfsu 筙
tfsu 𦓫
tfsu 𫂋
tfsw 𦓹
tfsy 𬕽
tfsy 𭛧
tft 箸
Expand Down Expand Up @@ -85193,9 +85189,9 @@ thwo 𤏀
thwp 𥳠
thws 𦈀
thwv 𮍔
thww 臮
thww 𠂹
thww 𦤈
thwy 臮
thwy 𧬑
thwy 𥟭
thwy 𦆗
Expand Down Expand Up @@ -102728,6 +102724,7 @@ xdi 𢎴
xdiy 𦂥
xdj 弼
xdjf 緾
xdjf 纒
xdjf 𫮸
xdjf 𱺦
xdjg 絔
Expand Down Expand Up @@ -103365,6 +103362,7 @@ xhmy 緽
xhn 顚
xhnb 𦗁
xhne 𭛄
xhng 黽
xhnm 顚
xhnn 𢏍
xhno 鷆
Expand Down Expand Up @@ -103963,7 +103961,6 @@ xnn 𫄙
xnn 紀
xnng 纟
xnng 彑
xnng 黽
xnng ⺔
xnng ⺰
xnng 𦁁
Expand Down Expand Up @@ -104046,7 +104043,6 @@ xoiy 𦅧
xoj 缠
xojf 缠
xojf 纏
xojf 纒
xokg 𦀭
xokl 𬑂
xolx 𢐮
Expand Down Expand Up @@ -106427,7 +106423,6 @@ yfq 䛃
yfqh 𫴬
yfqn 䛃
yfqo 𱈱
yfqy 𧩂
yfs 谋
yfsb 𬣌
yfsy 诔
Expand Down Expand Up @@ -108393,11 +108388,13 @@ yqwy 𪴩
yqxb 𬽇
yqxn 𮗽
yqy 訋
yqyc 𦫣
yqyf 𣁼
yqyg 𰵀
yqyj 𧨥
yqyl 𧪭
yqyq 𧫘
yqyr 㫄
yqyy 訋
yr 义
yr 离
Expand Down
Loading

0 comments on commit 2ce709e

Please sign in to comment.