-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
42 lines (35 loc) · 984 Bytes
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import subprocess
import requests
import time
from potassium import Potassium, Request, Response
# spawn shell process to start the server
cmd = "text-generation-launcher --model-id google/flan-t5-small"
process = subprocess.Popen(cmd, shell=True)
app = Potassium("server", experimental_num_workers=10)
@app.init
def init():
# wait for TGI server to be available
while True:
try:
requests.get("http://127.0.0.1:80")
break
except Exception as e:
time.sleep(1)
return {}
@app.handler(route="/generate")
def handler(context: dict, request: Request) -> Response:
# forward request to TGI server
data = request.json
response = requests.post(
'http://127.0.0.1:80/generate',
headers={
"Content-Type": "application/json",
},
json=data
)
return Response(
json = response.json(),
status=200
)
if __name__ == "__main__":
app.serve()