speech models deployment to GCP

BasedHardware · Sep 4, 2024 · 7a4b4ae · 7a4b4ae
1 parent 1f55723
commit 7a4b4ae
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 12 deletions.
diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml
@@ -0,0 +1,47 @@
+name: Deploy Speech Models to Cloud RUN
+
+# TODO: determine if changes to backend folder before pushing
+
+on:
+  push:
+    branches: [ "main", "development" ]
+    paths:
+      - 'backend/modal/**'
+
+env:
+  SERVICE: backend
+  REGION: us-central1
+
+jobs:
+  deploy:
+    environment: ${{ (github.ref == 'refs/heads/development' && 'development') || (github.ref == 'refs/heads/main' && 'prod') }}
+    permissions:
+      contents: 'read'
+      id-token: 'write'
+
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Google Auth
+        id: auth
+        uses: 'google-github-actions/auth@v0'
+        with:
+          credentials_json: ${{ secrets.GCP_CREDENTIALS }}
+      - run: gcloud auth configure-docker
+      - name: Build and Push Docker image
+        run: |
+          docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} -f backend/modal/Dockerfile .
+          docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
+      - name: Deploy to Cloud Run
+        id: deploy
+        uses: google-github-actions/deploy-cloudrun@v0
+        with:
+          service: ${{ env.SERVICE }}
+          region: ${{ env.REGION }}
+          image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
+
+      # If required, use the Cloud Run url output in later steps
+      - name: Show Output
+        run: echo ${{ steps.deploy.outputs.url }}
diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile
@@ -0,0 +1,16 @@
+FROM tiangolo/uvicorn-gunicorn:python3.11
+
+RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl
+RUN apt-get install unzip
+RUN apt-get -y install python3
+RUN apt-get -y install python3-pip
+RUN apt-get -y install git
+RUN apt-get -y install ffmpeg
+
+COPY backend/requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt
+
+COPY backend/ /app
+
+EXPOSE 8080
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/backend/modal/main.py b/backend/modal/main.py
@@ -0,0 +1,20 @@
+from typing import List
+
+from fastapi import FastAPI, UploadFile, File, Form
+
+from speech_profile_modal import ResponseItem, endpoint as speaker_identification_endpoint
+from vad_modal import endpoint as vad_endpoint
+
+app = FastAPI()
+
+
+@app.post('/v1/speaker-identification')
+def speaker_identification(
+        uid: str, audio_file: UploadFile = File, segments: str = Form(...)
+) -> List[ResponseItem]:
+    return speaker_identification_endpoint(uid, audio_file, segments)
+
+
+@app.post('/v1/vad')
+def vad(audio_file: UploadFile = File):
+    return vad_endpoint(audio_file)
diff --git a/backend/modal/speech_profile_modal.py b/backend/modal/speech_profile_modal.py
@@ -95,6 +95,7 @@ def classify_segments(
     .pip_install("pydub")
     .pip_install("requests")
     .pip_install("google-cloud-storage")
+    .pip_install("redis")
 )
 
 os.makedirs('_temp', exist_ok=True)

diff --git a/backend/utils/llm.py b/backend/utils/llm.py
@@ -148,18 +148,6 @@ def summarize_open_glass(photos: List[MemoryPhoto]) -> Structured:
 # ************* EXTERNAL INTEGRATIONS **************
 # **************************************************
 
-def summarize_screen_pipe(description: str) -> Structured:
-    prompt = f'''The user took a series of screenshots from his laptop, and used OCR to obtain the text from the screen.
-
-      For the title, use the main topic of the scenes.
-      For the overview, condense the descriptions into a brief summary with the main topics discussed, make sure to capture the key points and important details.
-      For the category, classify the scenes into one of the available categories.
-    
-      Screenshots: ```{description}```
-      '''.replace('    ', '').strip()
-    # return groq_llm_with_parser.invoke(prompt)
-    return llm_with_parser.invoke(prompt)
-
 
 def summarize_experience_text(text: str) -> Structured:
     prompt = f'''The user sent a text of their own experiences or thoughts, and wants to create a memory from it.