-
Notifications
You must be signed in to change notification settings - Fork 3
/
wiking.py
55 lines (43 loc) · 1.27 KB
/
wiking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import wikipedia
import os
list_file = '/home/ullas/movie_list.txt'
fh = open(list_file, 'r')
movies = [m.rstrip('\n') for m in fh.readlines()]
def fileify(s):
s = s.replace('\n', '')
s = s.split(' ')
name = str()
for c in s:
name = name + c.lower() + '_'
return name[:-1]
def get_movie_page(query, search_result):
for result in search_result:
if '(film)' in result and query.rstrip() in result:
return wikipedia.page(result)
return wikipedia.page(search_result[0])
def get_movie_plot(page_content):
index = page_content.find('== Plot ==')
if index == -1:
return None
else:
index += 11
plot = str()
for i in range(index, len(page_content)):
if page_content[i] + page_content[i+1] == '==':
return plot
else:
plot += page_content[i]
d = '/home/ullas/movie_plots'
for m in movies:
print('Done: ' + m)
page = get_movie_page(m, wikipedia.search(m))
fname = os.path.join(d, fileify(m))
fh = open(fname, 'w+')
plot = get_movie_plot(page.content)
if plot == None:
print('\r--> Unable to get plot for : ' + m)
fh.close()
os.remove(fname)
continue
fh.write(get_movie_plot(page.content))
fh.close()