-
Notifications
You must be signed in to change notification settings - Fork 0
/
api.py
142 lines (120 loc) · 5.06 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from flask import Flask, abort, request, jsonify
from flask_cors import CORS
from http import HTTPStatus
from io import BufferedReader
from playhouse.shortcuts import model_to_dict
from algorithms import (
CaseSearch,
CaseClustering,
CaseRecommendation,
CaseSimilarity,
case_oyez_brief,
)
from db.peewee.models import Opinion, Cluster, DEFAULT_SERIALIZATION_ARGS
from db.peewee.helpers import model_list_to_json, model_list_to_dicts
from extraction.pdf_engine import PdfEngine
from extraction.citation_extractor import CitationExtractor
from graph import CitationNetwork
from utils.logger import Logger
app = Flask(__name__)
CORS(app)
citation_network: CitationNetwork = None
similarity: CaseSimilarity = None
clustering: CaseClustering = None
recommendation: CaseRecommendation = None
@app.before_first_request
def initialize_app():
global citation_network, similarity, clustering, recommendation
citation_network = CitationNetwork.get_citation_network(enable_caching=True)
Logger.info("Loaded citation network.")
similarity = CaseSimilarity(citation_network)
clustering = CaseClustering(citation_network)
recommendation = CaseRecommendation(citation_network)
@app.after_request
def configure_caching(response: Flask.response_class):
response.cache_control.max_age = 300
return response
# TODO: If necessary (because extraction and parsing is slow), we can implement this as a stateful background job.
@app.route("/pdf/upload", methods=["POST"])
def upload_pdf():
file = request.files.get("file")
if file is None:
return "No file provided.", HTTPStatus.UNPROCESSABLE_ENTITY
pdf_text = PdfEngine(BufferedReader(file)).get_text()
citations = list(CitationExtractor(pdf_text).get_extracted_citations())
return model_list_to_json(citations, extra_attrs=["parentheticals"])
# TODO: All of these /cases/ routes can be refactored into their own Flask blueprint
@app.route("/cases/<int:resource_id>")
def get_case(resource_id: int):
try:
opinion = Opinion.get(resource_id=resource_id)
return model_to_dict(opinion, **DEFAULT_SERIALIZATION_ARGS)
except Opinion.DoesNotExist:
abort(HTTPStatus.NOT_FOUND)
@app.route("/cases/<int:resource_id>/html")
def get_case_html(resource_id: int):
try:
opinion = Opinion.get(resource_id=resource_id)
if not opinion.html_text:
raise FileNotFoundError()
return opinion.html_text
except (Opinion.DoesNotExist, FileNotFoundError):
abort(HTTPStatus.NOT_FOUND)
@app.route("/cases/similar")
def get_similar_cases():
case_resource_ids = request.args.getlist("cases")
max_cases = request.args.get("max_cases") or 25
if len(case_resource_ids) < 1:
return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY
similar_case_query = similarity.db_case_similarity(
frozenset(case_resource_ids), max_cases
)
similar_cases = [
similarity_record.opinion_b for similarity_record in similar_case_query
]
return model_list_to_json(similar_cases)
@app.route("/cases/recommendations")
def get_recommended_cases():
case_resource_ids = frozenset(map(int, request.args.getlist("cases")))
court_ids = frozenset(map(str, request.args.getlist("courts")))
max_cases = int(request.args.get("max_cases") or 10)
if len(case_resource_ids) < 1:
return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY
recommendations = recommendation.recommendations(
case_resource_ids, max_cases, courts=court_ids
)
recommended_opinions = sorted(
Opinion.select()
.join(Cluster)
.where(Opinion.resource_id << list(recommendations.keys())),
key=lambda op: recommendations[op.resource_id],
reverse=True,
)
return model_list_to_json(recommended_opinions)
@app.route("/cases/search")
def search():
search_query = request.args.get("query")
max_cases = request.args.get("max_cases")
if search_query is None or len(search_query) == 0:
return jsonify([])
search_results = CaseSearch.search_cases(search_query, max_cases=max_cases)
return model_list_to_json(search_results, extra_attrs=["headline"])
@app.route("/cases/<int:resource_id>/oyez_brief")
def get_oyez_brief(resource_id: int):
if brief := case_oyez_brief.from_resource_id(resource_id):
return brief._asdict()
abort(HTTPStatus.NOT_FOUND)
@app.route("/cases/cluster")
def get_case_clusters():
case_resource_ids = [int(c) for c in request.args.getlist("cases")]
num_clusters = int(request.args.get("num_clusters") or 0) or None
if len(case_resource_ids) < 1:
return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY
clusters = clustering.spectral_cluster(
set(case_resource_ids), num_clusters=num_clusters
)
output_dict = {}
for cluster_name, opinion_ids in clusters.items():
opinion_models = Opinion.select().where(Opinion.resource_id << opinion_ids)
output_dict[str(cluster_name)] = model_list_to_dicts(opinion_models)
return output_dict