-
Notifications
You must be signed in to change notification settings - Fork 0
/
script_pdf.py
158 lines (122 loc) · 5.45 KB
/
script_pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Author : wumaomao
Time :
Description:
"""
import tabula
import pandas as pd
import tkinter as tk
from tkinter import filedialog
def grade_analyze(grade=None, weight=None, semester=None, genre=None):
# filter the info from grade&weight, return a dictionary
if grade is None:
return "Parameter of grade not found"
if weight is None:
return "Lost weight parameter"
if genre is None:
genre = ['C']*len(grade)
if semester is None:
semester = [1]*len(grade)
grade_dic = {}
genre_dic = {}
semester_number = set(semester)
if '' in semester_number:
semester_number.remove('')
for i in semester_number:
grade_dic['grade_{}'.format(i)], grade_dic['weight_{}'.format(i)], genre_dic['genre_{}'.format(i)] = [], [], []
for j in range(len(grade)):
if i == semester[j]:
grade_dic['grade_{}'.format(i)].append(grade[j])
grade_dic['weight_{}'.format(i)].append(weight[j])
genre_dic['genre_{}'.format(i)].append(genre[j])
return grade_dic, genre_dic
def grade_calculate(grade_dictionary, genre_dictionary):
gpa_100_dic = {}
gpa_4_dic = {}
gpa_weight = {}
grade_series = pd.Series(grade_dictionary)
gpa_100_dic['GPA'] = 0
gpa_4_dic['GPA'] = 0
gpa_weight['weight'] = 0
gpa_weight['whole_weight'] = 0
# 100
for i in range(int(len(grade_series)/2)):
i += 1
gpa_100_dic['GPA_{}'.format(i)] = []
temp_result = 0
con_weight = 0
for j in range(len(grade_series['grade_{}'.format(i)])):
if genre_dictionary['genre_{}'.format(i)][j] == 0:
con_weight += grade_series['weight_{}'.format(i)][j]
else:
temp_result += grade_series['grade_{}'.format(i)][j] * grade_series['weight_{}'.format(i)][j]
if sum(grade_series['weight_{}'.format(i)]) != 0:
gpa_100_dic['GPA_{}'.format(i)] = format(
temp_result / (sum(grade_series['weight_{}'.format(i)]) - con_weight), '.2f')
else:
gpa_100_dic['GPA_{}'.format(i)] = 0
gpa_weight['whole_weight_{}'.format(i)] = sum(grade_series['weight_{}'.format(i)])
gpa_weight['weight_{}'.format(i)] = gpa_weight['whole_weight_{}'.format(i)] - con_weight
gpa_100_dic['GPA'] += float(gpa_100_dic['GPA_{}'.format(i)])*gpa_weight['weight_{}'.format(i)]
gpa_weight['weight'] += float(gpa_weight['weight_{}'.format(i)])
gpa_weight['whole_weight'] += float(gpa_weight['whole_weight_{}'.format(i)])
gpa_100_dic['GPA'] = format(gpa_100_dic['GPA'] / gpa_weight['weight'], '.2f')
# 4.0
# if you want the GPA including the optional curriculum, please change the weight to whole_weight of gpa_weight
for i in range(int(len(grade_series)/2)):
i += 1
gpa_4_dic['GPA_{}'.format(i)] = []
temp_result = 0
for j in range(len(grade_series['grade_{}'.format(i)])):
if grade_series['grade_{}'.format(i)][j] >= 85:
temp_result = temp_result + 4 * grade_series['weight_{}'.format(i)][j]
elif grade_series['grade_{}'.format(i)][j] < 60:
temp_result += 1.5 * grade_series['weight_{}'.format(i)][j]
else:
temp_result += (4 - 0.1 * (85 - grade_series['grade_{}'.format(i)][j])) * grade_series['weight_{}'.format(i)][j]
if sum(grade_series['weight_{}'.format(i)]) != 0:
gpa_4_dic['GPA_{}'.format(i)] = format(temp_result / sum(grade_series['weight_{}'.format(i)]), '.2f')
else:
gpa_4_dic['GPA_{}'.format(i)] = 0
gpa_4_dic['GPA'] += float(gpa_4_dic['GPA_{}'.format(i)])*gpa_weight['weight_{}'.format(i)]
gpa_4_dic['GPA'] = format(gpa_4_dic['GPA'] / gpa_weight['weight'], '.2f')
print(pd.Series(gpa_4_dic))
print(pd.Series(gpa_100_dic))
print(pd.Series(gpa_weight))
return gpa_4_dic, gpa_100_dic
if __name__ == '__main__':
root = tk.Tk()
root.withdraw()
Filepath = filedialog.askopenfilename()
df = tabula.read_pdf(Filepath, pages='all', silent=True)
pd.set_option('mode.chained_assignment', None)
credit1 = df[0]['学分']
credit2 = []
curriculum_name1 = pd.Series(df[0]['课程名称'])
curriculum_name2 = df[0]['课程名称 学分']
for i in range(len(curriculum_name2)):
temp = str(curriculum_name2.copy()[i]).split(' ')
temp1 = temp[-1]
temp1 = temp1.split('.')
temp1 = temp1[0]
credit2.append(temp1)
curriculum_name2.iloc[i] = temp[0]
curriculum_name = curriculum_name1.append(curriculum_name2, ignore_index=True)
credit2 = pd.Series(credit2)
credit = credit1.append(credit2, ignore_index=True)
result_list1 = df[0]['成绩']
result_list2 = pd.Series(df[0]['成绩.1']).astype("object")
result = result_list1.append(result_list2, ignore_index=True)
data_frame = pd.DataFrame({'name': curriculum_name, 'grade': result, 'credit': credit})
data_frame = data_frame.dropna().reset_index(drop=True)
grade_list = data_frame['grade'].astype('str').tolist()
weight_list = data_frame['credit'].astype('float').tolist()
# fix the grade_list
grade_list[grade_list.index('良')] = '0'
for i in range(len(grade_list)):
grade_list[i] = int(grade_list[i].split('.')[0])
grade_Dic, genre_Dic = grade_analyze(grade_list, weight_list)
grade_calculate(grade_Dic, genre_Dic)
# for packaging
print('press ENTER to quit')
input()