-
Notifications
You must be signed in to change notification settings - Fork 0
/
book.py
123 lines (114 loc) · 4.6 KB
/
book.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import csv
import sys
import requests
import json
import time
from SPARQLWrapper import SPARQLWrapper, JSON
def search_book_info_from_google(author_name, book_title):
url = "https://www.googleapis.com/books/v1/volumes?q=intitle:{}+inauthor:{}".format(book_title, author_name)
response = requests.get(url)
while response.status_code != 200:
time.sleep(1)
response = requests.get(url)
data = json.loads(response.text)
if len(data.get("items", [])) == 0:
return None
result = data["items"][0]["volumeInfo"]
for item in data["items"]:
if result["publishedDate"] > item["volumeInfo"]["publishedDate"]:
result = item["volumeInfo"]
print(f"Le livre {book_title} de {author_name} a été trouvé via google.")
return result
def search_book_info_from_data(author_name,book_title):
query = """
PREFIX rdarelationships: <http://rdvocab.info/RDARelationshipsWEMI/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?title ?name ?date ?publisher
WHERE {
?person foaf:name ?name .
?oeuvre dcterms:creator ?person .
?person foaf:name ?name .
?oeuvre dcterms:title ?title .
?oeuvre dcterms:date ?date .
?edition rdarelationships:workManifested ?oeuvre .
?edition dcterms:publisher ?publisher
FILTER regex(?name, "%s", "i")
FILTER regex(?title, "%s")
}
""" % (author_name, book_title)
# Send the query and get the results
sparql = SPARQLWrapper("https://data.bnf.fr/sparql")
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
data = sparql.query().convert()
# Return None if no results
if len(data["results"]["bindings"]) == 0:
return None
# Return the first edition (date)
result = data["results"]["bindings"][0]
for item in data["results"]["bindings"]:
if result["date"]["value"] > item["date"]["value"]:
result = item
print(f"Le livre {book_title} de {author_name} a été trouvé via bnf.")
return result
if __name__ == "__main__":
csv_file = sys.argv[1]
livres = []
results = []
with open(csv_file, newline='', mode='r') as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar="'")
for row in reader:
author_name = row[0]
author_name = author_name.replace(".", "")
author_name = author_name.split(" ")
author_name = author_name[-1]
book_title = row[1]
book_title = book_title.strip()
livres.append((author_name, book_title))
try:
result = search_book_info_from_data(author_name, book_title)
except:
print(f"Une erreur est survenue lors de la recherche du livre {book_title} de {author_name} via bnf.")
if result is not None:
info = {
"title": result["title"]["value"],
"authors": result["name"]["value"],
"publishedDate": result["date"]["value"],
"edition": result["publisher"]["value"]
}
else:
try:
info = search_book_info_from_google(author_name, book_title)
if info is None:
print(f"Le livre {book_title} de {author_name} n'a pas été trouvé.")
continue
except:
print(f"Une erreur est survenue lors de la recherche du livre {book_title} de {author_name} via google.")
continue
results.append(info)
with open('livres.csv', 'w') as outfile:
writer = csv.writer(outfile, delimiter=';', quotechar="'")
# Write the header row
writer.writerow([
"authors",
"title",
"subtitle",
"publishedDate",
"edition"
"pageCount"
])
for result in results:
# Save the book info in a CSV file
# We may not have all the information
# so we use the get() method to avoid
# an exception
writer.writerow([
result.get("authors", ""),
result.get("title", ""),
result.get("subtitle", ""),
result.get("publishedDate", ""),
result.get("edition", ""),
result.get("pageCount", "")
])
# Print percentage of books found
print("{:.2f}% des livres ont été trouvés.".format(len(results) / len(livres) * 100))