From 3335120ea9cb9cf8e5e4926576de5c4b20640d6d Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 12:14:48 +0100
Subject: [PATCH 1/9] BOT-1987 added "voices" query. BOT-1986 Added Google STT

---
 Makefile                    | 37 +++++++++++++++
 README.md                   |  5 --
 build_and_push.sh           | 19 --------
 docker-compose-dev.yml      | 50 +++++++++----------
 docker-compose.yml          |  4 +-
 frontend/package.json       | 45 +++++++++---------
 frontend/resources/.env     | 17 ++++---
 frontend/src/convert/sox.js |  2 +-
 frontend/src/routes.js      | 95 +++++++++++++++++++++++++++++++------
 frontend/src/stt/google.js  | 43 +++++++----------
 frontend/src/stt/kaldi.js   |  4 +-
 frontend/src/swagger.json   | 69 ++++++++++++++++++++++++++-
 frontend/src/tts/google.js  | 66 ++++++++++++++++++++++++++
 frontend/src/tts/marytts.js | 50 +++++++++++++++----
 frontend/src/tts/picotts.js | 56 +++++++++++++++++++---
 frontend/src/utils.js       | 22 ++++++++-
 16 files changed, 440 insertions(+), 144 deletions(-)
 create mode 100644 Makefile
 delete mode 100755 build_and_push.sh
 create mode 100644 frontend/src/tts/google.js

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4e426e6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,37 @@
+TAG_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1)
+VERSION := $(shell git describe --abbrev=0 --tags ${TAG_COMMIT} 2>/dev/null || true)
+
+docker_build:
+	docker build -t botium/botium-speech-frontend:$(VERSION) frontend
+	docker build -t botium/botium-speech-watcher:$(VERSION) watcher
+	docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:$(VERSION) stt
+	docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de:$(VERSION) stt
+	docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:$(VERSION) tts
+	docker build -t botium/botium-speech-dictate:$(VERSION) dictate
+
+docker_publish:
+  docker push botium/botium-speech-frontend:$(VERSION)
+  docker push botium/botium-speech-watcher:$(VERSION)
+  docker push botium/botium-speech-kaldi-en:$(VERSION)
+  docker push botium/botium-speech-kaldi-de:$(VERSION)
+  docker push botium/botium-speech-marytts:$(VERSION)
+  docker push botium/botium-speech-dictate:$(VERSION)
+
+docker_latest:
+	docker tag botium/botium-speech-frontend:$(VERSION) botium/botium-speech-frontend:latest
+  docker push botium/botium-speech-frontend:latest
+
+	docker tag botium/botium-speech-watcher:$(VERSION) botium/botium-speech-watcher:latest
+  docker push botium/botium-speech-watcher:latest
+
+	docker tag botium/botium-speech-kaldi-en:$(VERSION) botium/botium-speech-kaldi-en:latest
+  docker push botium/botium-speech-kaldi-en:latest
+
+	docker tag botium/botium-speech-kaldi-de:$(VERSION) botium/botium-speech-kaldi-de:latest
+  docker push botium/botium-speech-kaldi-de:latest
+
+	docker tag botium/botium-speech-marytts:$(VERSION) botium/botium-speech-marytts:latest
+  docker push botium/botium-speech-marytts:latest
+
+	docker tag botium/botium-speech-dictate:$(VERSION) botium/botium-speech-dictate:latest
+  docker push botium/botium-speech-dictate:latest
diff --git a/README.md b/README.md
index ca84c9d..b259ea2 100644
--- a/README.md
+++ b/README.md
@@ -6,11 +6,6 @@
 
 Botium Speech Processing is a unified, developer-friendly API to the best available free and Open-Source Speech-To-Text and Text-To-Speech services.
 
-**UPDATE 2020/06/15:** As Chatbots grow in importance, automated testing solutions will remain critical for ensuring that Chatbots actually do what their designers intend. We've been busy working on a product that allows testers to have visual insights and deeper understanding in their Chatbot's performance, offering several solutions to boost their interaction!
-[Botium Coach will be introduced to the market as part of our online event on the 24th of June.](https://www.botium.ai/coach/)
-
-[![](http://img.youtube.com/vi/WsNaDfZ7WHk/0.jpg)](http://www.youtube.com/watch?v=WsNaDfZ7WHk "Botium Coach is coming on 24th of June")
-
 ## What is it ?
 
 Botium Speech Processing is a *get-shit-done*-style Open-Source software stack, the configuration options are rudimentary: it is highly opinionated about the included tools, just get the shit done.
diff --git a/build_and_push.sh b/build_and_push.sh
deleted file mode 100755
index 833266d..0000000
--- a/build_and_push.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-echo "building docker images"
-docker build -t botium/botium-speech-frontend frontend
-docker build -t botium/botium-speech-watcher watcher
-docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en stt
-docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de stt
-docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts tts
-docker build -t botium/botium-speech-dictate dictate
-
-if [ "$1" == "--push" ]; then
-  echo "pushing docker images"
-  docker push botium/botium-speech-frontend
-  docker push botium/botium-speech-watcher
-  docker push botium/botium-speech-kaldi-en
-  docker push botium/botium-speech-kaldi-de
-  docker push botium/botium-speech-marytts
-  docker push botium/botium-speech-dictate
-fi
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 27a8813..671750e 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -13,33 +13,33 @@ services:
     restart: always
     volumes:
       - "./frontend/resources:/app/resources"
-  watcher:
-    build:
-      context: watcher
-    restart: always
-    volumes:
-      - "./watcher:/app/watch"
-      - "./logs/watcher:/app/logs"
-  stt_en:
-    build:
-      context: stt
-      dockerfile: Dockerfile.kaldi.en
-    restart: always
-    volumes:
-      - "./logs/stt_en:/opt/logs"
-  stt_de:
-    build:
-      context: stt
-      dockerfile: Dockerfile.kaldi.de
-    restart: always
-    volumes:
-      - "./logs/stt_de:/opt/logs"
+#  watcher:
+#    build:
+#      context: watcher
+#    restart: always
+#    volumes:
+#      - "./watcher:/app/watch"
+#      - "./logs/watcher:/app/logs"
+#  stt_en:
+#    build:
+#      context: stt
+#      dockerfile: Dockerfile.kaldi.en
+#    restart: always
+#    volumes:
+#      - "./logs/stt_en:/opt/logs"
+#  stt_de:
+#    build:
+#      context: stt
+#      dockerfile: Dockerfile.kaldi.de
+#    restart: always
+#    volumes:
+#      - "./logs/stt_de:/opt/logs"
   tts:
     build:
       context: tts
       dockerfile: Dockerfile.marytts
     restart: always
-  dictate:
-    build:
-      context: dictate
-    restart: always
+#  dictate:
+#    build:
+#      context: dictate
+#    restart: always
diff --git a/docker-compose.yml b/docker-compose.yml
index de9a527..b187ea9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -29,8 +29,8 @@ services:
     volumes:
       - "./logs/stt_de:/opt/logs"
   tts:
-    image: botium/botium-speech-marytts
+    image: botium/botium-speech-marytts:latest
     restart: always
   dictate:
-    image: botium/botium-speech-dictate
+    image: botium/botium-speech-dictate:latest
     restart: always
diff --git a/frontend/package.json b/frontend/package.json
index 2a4940d..4c870cd 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -11,33 +11,34 @@
   "author": "Botium GmbH",
   "license": "MIT",
   "dependencies": {
-    "@google-cloud/speech": "^3.6.0",
-    "@google-cloud/storage": "^4.3.0",
+    "@google-cloud/speech": "^4.1.5",
+    "@google-cloud/storage": "^5.7.0",
+    "@google-cloud/text-to-speech": "^3.1.3",
     "body-parser": "^1.19.0",
-    "cross-env": "^7.0.0",
-    "debug": "^4.1.1",
-    "dotenv-flow": "^3.1.0",
+    "cross-env": "^7.0.3",
+    "debug": "^4.3.1",
+    "dotenv-flow": "^3.2.0",
     "express": "^4.17.1",
-    "express-winston": "^4.0.1",
-    "lodash": "^4.17.15",
-    "mkdirp": "^0.5.1",
-    "mustache": "^3.1.0",
-    "nodemon": "^2.0.1",
-    "request": "^2.88.0",
-    "request-promise-native": "^1.0.8",
+    "express-winston": "^4.0.5",
+    "lodash": "^4.17.20",
+    "mkdirp": "^1.0.4",
+    "mustache": "^4.1.0",
+    "nodemon": "^2.0.6",
+    "request": "^2.88.2",
+    "request-promise-native": "^1.0.9",
     "sanitize-filename": "^1.6.3",
-    "swagger-jsdoc": "^3.5.0",
-    "swagger-ui-express": "^4.1.2",
-    "uuid": "^3.3.3",
-    "winston": "^3.2.1",
-    "word-error-rate": "0.0.7"
+    "swagger-jsdoc": "^6.0.0-rc.5",
+    "swagger-ui-express": "^4.1.5",
+    "uuid": "^8.3.2",
+    "winston": "^3.3.3",
+    "word-error-rate": "^0.0.7"
   },
   "devDependencies": {
-    "eslint": "^6.7.2",
-    "eslint-config-standard": "^14.1.0",
-    "eslint-plugin-import": "^2.19.1",
-    "eslint-plugin-node": "^10.0.0",
+    "eslint": "^7.15.0",
+    "eslint-config-standard": "^16.0.2",
+    "eslint-plugin-import": "^2.22.1",
+    "eslint-plugin-node": "^11.1.0",
     "eslint-plugin-promise": "^4.2.1",
-    "eslint-plugin-standard": "^4.0.1"
+    "eslint-plugin-standard": "^5.0.0"
   }
 }
diff --git a/frontend/resources/.env b/frontend/resources/.env
index f0b78c5..d5c6f4e 100644
--- a/frontend/resources/.env
+++ b/frontend/resources/.env
@@ -10,28 +10,27 @@ BOTIUM_SPEECH_UPLOAD_LIMIT=50mb
 # Cache Path
 BOTIUM_SPEECH_CACHE_DIR=./resources/.cache
 
-# Provider for TTS (marytts or picotts)
+# Provider for TTS (google or marytts or picotts)
 BOTIUM_SPEECH_PROVIDER_TTS=marytts
 
 # Provider for STT (kaldi or google)
 BOTIUM_SPEECH_PROVIDER_STT=kaldi
 
-# TTS Provider MaryTTS URLs 
-BOTIUM_SPEECH_MARYTTS_URL_EN=http://tts:59125/process?INPUT_TEXT={{text}}&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&VOICE=dfki-spike&LOCALE=en_GB
-BOTIUM_SPEECH_MARYTTS_URL_DE=http://tts:59125/process?INPUT_TEXT={{text}}&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&VOICE=bits3-hsmm&LOCALE=de
+# TTS Provider MaryTTS URLs
+BOTIUM_SPEECH_MARYTTS_URL=http://tts:59125
 
 # TTS Provider Pico Command Line
-BOTIUM_SPEECH_PICO_CMDPREFIX_EN=pico2wave --lang=en-US --wave={{{output}}}
-BOTIUM_SPEECH_PICO_CMDPREFIX_DE=pico2wave --lang=de-DE --wave={{{output}}}
+BOTIUM_SPEECH_PICO_CMDPREFIX=pico2wave
 
 # STT Provider Kaldi URLs
 BOTIUM_SPEECH_KALDI_URL_EN=http://stt_en:80/client/dynamic/recognize
 BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize
 
 # STT Provider Google
-BOTIUM_SPEECH_GOOGLE_KEYFILE=./resources/google.json
-BOTIUM_SPEECH_GOOGLE_CONFIG_EN={ "languageCode": "en-US" }
-BOTIUM_SPEECH_GOOGLE_CONFIG_DE={ "languageCode": "de-DE" }
+#BOTIUM_SPEECH_GOOGLE_KEYFILE=./resources/google.json
+#BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL=
+#BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY=
+BOTIUM_SPEECH_GOOGLE_CONFIG={}
 # For files longer than 1 minute, you have to create a Google Cloud Storage Bucket as temporary storage (give read/write access to service user)
 #BOTIUM_SPEECH_GOOGLE_BUCKET_NAME=
 #BOTIUM_SPEECH_GOOGLE_API_VERSION=
diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js
index 942bab9..bf9a00d 100644
--- a/frontend/src/convert/sox.js
+++ b/frontend/src/convert/sox.js
@@ -1,7 +1,7 @@
 const fs = require('fs')
 const Mustache = require('mustache')
 const { spawn } = require('child_process')
-const uuidv1 = require('uuid/v1')
+const { v1: uuidv1 } = require('uuid')
 const debug = require('debug')('botium-speech-processing-convert-sox')
 
 const runsox = (cmdLine, { inputBuffer, start, end }) => {
diff --git a/frontend/src/routes.js b/frontend/src/routes.js
index e364628..ab60a31 100644
--- a/frontend/src/routes.js
+++ b/frontend/src/routes.js
@@ -10,11 +10,22 @@ const debug = require('debug')('botium-speech-processing-routes')
 
 const cachePathStt = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'stt')
 const cachePathTts = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'tts')
-const cacheKey = (data, language, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}${ext}`
+const cacheKeyStt = (data, language, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}${ext}`
+const cacheKeyTts = (data, language, voice, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}_${voice || 'default'}${ext}`
 
 if (cachePathStt) mkdirp.sync(cachePathStt)
 if (cachePathTts) mkdirp.sync(cachePathTts)
 
+const ttsEngines = {
+  google: new (require('./tts/google'))(),
+  marytts: new (require('./tts/marytts'))(),
+  picotts: new (require('./tts/picotts'))()
+}
+const sttEngines = {
+  google: new (require('./stt/google'))(),
+  kaldi: new (require('./stt/kaldi'))()
+}
+
 const router = express.Router()
 
 /**
@@ -60,7 +71,7 @@ router.get('/api/status', (req, res) => {
  *       - application/json
  *     parameters:
  *       - name: language
- *         description: ISO-639-1 language code
+ *         description: ISO-639-1 language code (2 letters)
  *         in: path
  *         required: true
  *         schema:
@@ -97,7 +108,7 @@ router.post('/api/stt/:language', async (req, res, next) => {
   if (Buffer.isBuffer(req.body)) {
     let cacheFile = null
     if (cachePathStt) {
-      cacheFile = path.join(cachePathStt, cacheKey(req.body, req.params.language, '.json'))
+      cacheFile = path.join(cachePathStt, cacheKeyStt(req.body, req.params.language, '.json'))
       if (fs.existsSync(cacheFile)) {
         try {
           const result = JSON.parse(fs.readFileSync(cacheFile).toString())
@@ -109,7 +120,7 @@ router.post('/api/stt/:language', async (req, res, next) => {
       }
     }
     try {
-      const stt = new (require(`./stt/${(req.query.stt && sanitize(req.query.stt)) || process.env.BOTIUM_SPEECH_PROVIDER_STT}`))()
+      const stt = sttEngines[(req.query.stt && sanitize(req.query.stt)) || process.env.BOTIUM_SPEECH_PROVIDER_STT]
 
       const result = await stt.stt({
         language: req.params.language,
@@ -121,8 +132,12 @@ router.post('/api/stt/:language', async (req, res, next) => {
       res.json(result).end()
 
       if (cachePathStt) {
-        fs.writeFileSync(cacheFile, JSON.stringify(result))
-        debug(`Writing stt result ${cacheFile} to cache: ${result.text}`)
+        try {
+          fs.writeFileSync(cacheFile, JSON.stringify(result))
+          debug(`Writing stt result ${cacheFile} to cache: ${result.text}`)
+        } catch (err) {
+          debug(`Writing stt result ${cacheFile} to cache: ${result.text} - failed: ${err.message}`)
+        }
       }
     } catch (err) {
       return next(err)
@@ -132,6 +147,47 @@ router.post('/api/stt/:language', async (req, res, next) => {
   }
 })
 
+/**
+ * @swagger
+ * /api/ttsvoices:
+ *   get:
+ *     description: Get list of voices
+ *     security:
+ *       - ApiKeyAuth: []
+ *     produces:
+ *       - application/json
+ *     parameters:
+ *       - name: tts
+ *         description: Text-to-speech backend
+ *         in: query
+ *         required: false
+ *         schema:
+ *           type: string
+ *           enum: [google, marytts, picotts]
+ *     responses:
+ *       200:
+ *         description: List of supported voices
+ *         schema:
+ *           type: array
+ *           items:
+ *             type: object
+ *             properties:
+ *               name:
+ *                 type: string
+ *               language:
+ *                 type: string
+ *               gender:
+ *                 type: [male, female, neutral]
+ */
+router.get('/api/ttsvoices', async (req, res, next) => {
+  try {
+    const tts = ttsEngines[(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS]
+    res.json(await tts.voices())
+  } catch (err) {
+    return next(err)
+  }
+})
+
 /**
  * @swagger
  * /api/tts/{language}:
@@ -143,7 +199,7 @@ router.post('/api/stt/:language', async (req, res, next) => {
  *       - audio/wav
  *     parameters:
  *       - name: language
- *         description: ISO-639-1 language code
+ *         description: ISO-639-1 language code (2 letters)
  *         in: path
  *         required: true
  *         schema:
@@ -154,13 +210,19 @@ router.post('/api/stt/:language', async (req, res, next) => {
  *         required: true
  *         schema:
  *           type: string
+ *       - name: voice
+ *         description: Voice name
+ *         in: query
+ *         required: false
+ *         schema:
+ *           type: string
  *       - name: tts
  *         description: Text-to-speech backend
  *         in: query
  *         required: false
  *         schema:
  *           type: string
- *           enum: [marytts, picotts]
+ *           enum: [google, marytts, picotts]
  *     responses:
  *       200:
  *         description: Audio file
@@ -175,8 +237,8 @@ router.get('/api/tts/:language', async (req, res, next) => {
     let cacheFileName = null
     let cacheFileBuffer = null
     if (cachePathTts) {
-      cacheFileName = path.join(cachePathTts, cacheKey(req.query.text, req.params.language, '.txt'))
-      cacheFileBuffer = path.join(cachePathTts, cacheKey(req.query.text, req.params.language, '.bin'))
+      cacheFileName = path.join(cachePathTts, cacheKeyTts(req.query.text, req.params.language, req.query.voice, '.txt'))
+      cacheFileBuffer = path.join(cachePathTts, cacheKeyTts(req.query.text, req.params.language, req.query.voice, '.bin'))
       if (fs.existsSync(cacheFileName) && fs.existsSync(cacheFileBuffer)) {
         try {
           const name = fs.readFileSync(cacheFileName).toString()
@@ -193,10 +255,11 @@ router.get('/api/tts/:language', async (req, res, next) => {
       }
     }
     try {
-      const tts = new (require(`./tts/${(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS}`))()
+      const tts = ttsEngines[(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS]
 
       const { buffer, name } = await tts.tts({
         language: req.params.language,
+        voice: req.query.voice,
         text: req.query.text
       })
       res.writeHead(200, {
@@ -206,9 +269,13 @@ router.get('/api/tts/:language', async (req, res, next) => {
       res.end(buffer)
 
       if (cachePathTts) {
-        fs.writeFileSync(cacheFileName, name)
-        fs.writeFileSync(cacheFileBuffer, buffer)
-        debug(`Writing tts result ${cacheFileName} to cache: ${name}`)
+        try {
+          fs.writeFileSync(cacheFileName, name)
+          fs.writeFileSync(cacheFileBuffer, buffer)
+          debug(`Writing tts result ${cacheFileName} to cache: ${name}`)
+        } catch (err) {
+          debug(`Writing tts result ${cacheFileName} to cache: ${name} - failed: ${err.message}`)
+        }
       }
     } catch (err) {
       return next(err)
diff --git a/frontend/src/stt/google.js b/frontend/src/stt/google.js
index 6769c49..a82e571 100644
--- a/frontend/src/stt/google.js
+++ b/frontend/src/stt/google.js
@@ -1,37 +1,30 @@
-const fs = require('fs')
-const uuidv1 = require('uuid/v1')
+const { v1: uuidv1 } = require('uuid')
 const speech = process.env.BOTIUM_SPEECH_GOOGLE_API_VERSION ? require('@google-cloud/speech')[process.env.BOTIUM_SPEECH_GOOGLE_API_VERSION] : require('@google-cloud/speech')
 const storage = require('@google-cloud/storage')
-const debug = require('debug')('botium-speech-processing-google')
+const debug = require('debug')('botium-speech-processing-google-stt')
 
-const credentialsPath = process.env.BOTIUM_SPEECH_GOOGLE_KEYFILE || './resources/google.json'
+const { googleOptions } = require('../utils')
 
-class Google {
+class GoogleSTT {
   async stt ({ language, buffer }) {
-    if (!fs.existsSync(credentialsPath)) throw new Error(`Google Cloud credentials file "${credentialsPath}" not found`)
-
-    const envVarConfig = `BOTIUM_SPEECH_GOOGLE_CONFIG_${language.toUpperCase()}`
-    if (!process.env[envVarConfig]) throw new Error(`Environment variable ${envVarConfig} empty`)
-
-    let config = null
-    try {
-      config = JSON.parse(process.env[envVarConfig])
-    } catch (err) {
-      throw new Error(`Google Cloud credentials config in ${envVarConfig} invalid: ${err.message}`)
-    }
-
-    const speechClient = new speech.SpeechClient({
-      keyFilename: credentialsPath
-    })
-    const storageClient = new storage.Storage({
-      keyFilename: credentialsPath
-    })
+    const speechClient = new speech.SpeechClient(googleOptions())
+    const storageClient = new storage.Storage(googleOptions())
 
     const request = {
-      config,
+      config: {
+        languageCode: language
+      },
       audio: {
       }
     }
+    if (process.env.BOTIUM_SPEECH_GOOGLE_CONFIG) {
+      try {
+        const defaultConfig = JSON.parse(process.env.BOTIUM_SPEECH_GOOGLE_CONFIG)
+        Object.assign(request.config, defaultConfig)
+      } catch (err) {
+        throw new Error(`Google Speech config in BOTIUM_SPEECH_GOOGLE_CONFIG invalid: ${err.message}`)
+      }
+    }
 
     const gcsFileName = `${uuidv1()}.wav`
     if (process.env.BOTIUM_SPEECH_GOOGLE_BUCKET_NAME) {
@@ -93,4 +86,4 @@ class Google {
   }
 }
 
-module.exports = Google
+module.exports = GoogleSTT
diff --git a/frontend/src/stt/kaldi.js b/frontend/src/stt/kaldi.js
index 5fd31a5..69a0484 100644
--- a/frontend/src/stt/kaldi.js
+++ b/frontend/src/stt/kaldi.js
@@ -3,7 +3,7 @@ const Mustache = require('mustache')
 const request = require('request-promise-native')
 const debug = require('debug')('botium-speech-processing-kaldi')
 
-class Kaldi {
+class KaldiSTT {
   async stt ({ language, buffer }) {
     const envVarUrl = `BOTIUM_SPEECH_KALDI_URL_${language.toUpperCase()}`
     if (!process.env[envVarUrl]) throw new Error(`Environment variable ${envVarUrl} empty`)
@@ -47,4 +47,4 @@ class Kaldi {
   }
 }
 
-module.exports = Kaldi
+module.exports = KaldiSTT
diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json
index aa4295c..f90ec54 100644
--- a/frontend/src/swagger.json
+++ b/frontend/src/swagger.json
@@ -50,7 +50,7 @@
         "parameters": [
           {
             "name": "language",
-            "description": "ISO-639-1 language code",
+            "description": "ISO-639-1 language code (2 letters)",
             "in": "path",
             "required": true,
             "schema": {
@@ -105,6 +105,61 @@
         }
       }
     },
+    "/api/ttsvoices": {
+      "get": {
+        "description": "Get list of voices",
+        "security": [
+          {
+            "ApiKeyAuth": []
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "parameters": [
+          {
+            "name": "tts",
+            "description": "Text-to-speech backend",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "google",
+                "marytts",
+                "picotts"
+              ]
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "List of supported voices",
+            "schema": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "name": {
+                    "type": "string"
+                  },
+                  "language": {
+                    "type": "string"
+                  },
+                  "gender": {
+                    "type": [
+                      "male",
+                      "female",
+                      "neutral"
+                    ]
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
     "/api/tts/{language}": {
       "get": {
         "description": "Convert text file to audio",
@@ -119,7 +174,7 @@
         "parameters": [
           {
             "name": "language",
-            "description": "ISO-639-1 language code",
+            "description": "ISO-639-1 language code (2 letters)",
             "in": "path",
             "required": true,
             "schema": {
@@ -135,6 +190,15 @@
               "type": "string"
             }
           },
+          {
+            "name": "voice",
+            "description": "Voice name",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string"
+            }
+          },
           {
             "name": "tts",
             "description": "Text-to-speech backend",
@@ -143,6 +207,7 @@
             "schema": {
               "type": "string",
               "enum": [
+                "google",
                 "marytts",
                 "picotts"
               ]
diff --git a/frontend/src/tts/google.js b/frontend/src/tts/google.js
new file mode 100644
index 0000000..69a8666
--- /dev/null
+++ b/frontend/src/tts/google.js
@@ -0,0 +1,66 @@
+const textToSpeech = require('@google-cloud/text-to-speech')
+const debug = require('debug')('botium-speech-processing-google-tts')
+
+const { googleOptions } = require('../utils')
+
+let googleVoices = null
+
+const genderMap = {
+  MALE: 'male',
+  FEMALE: 'female',
+  NEUTRAL: 'neutral'
+}
+
+class GoogleTTS {
+  async voices () {
+    if (googleVoices) return googleVoices
+
+    const client = new textToSpeech.TextToSpeechClient(googleOptions())
+
+    const [result] = await client.listVoices({})
+    const voices = result.voices
+
+    googleVoices = []
+    voices.forEach(voice => {
+      voice.languageCodes.forEach(languageCode => {
+        googleVoices.push({
+          name: voice.name,
+          gender: genderMap[voice.ssmlGender],
+          language: languageCode.split('-')[0]
+        })
+      })
+    })
+    return googleVoices
+  }
+
+  async tts ({ language, voice, text }) {
+    const voiceSelector = {
+      languageCode: language
+    }
+    if (voice) {
+      voiceSelector.name = voice
+    }
+
+    const client = new textToSpeech.TextToSpeechClient(googleOptions())
+    const request = {
+      input: {
+        text
+      },
+      voice: voiceSelector,
+      audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 16000 }
+    }
+
+    try {
+      const [response] = await client.synthesizeSpeech(request)
+      return {
+        buffer: response.audioContent,
+        name: 'tts.wav'
+      }
+    } catch (err) {
+      debug(err)
+      throw new Error(`Google Cloud STT failed: ${err.message}`)
+    }
+  }
+}
+
+module.exports = GoogleTTS
diff --git a/frontend/src/tts/marytts.js b/frontend/src/tts/marytts.js
index 9e3b006..1d09cd5 100644
--- a/frontend/src/tts/marytts.js
+++ b/frontend/src/tts/marytts.js
@@ -1,27 +1,57 @@
-const Mustache = require('mustache')
+const _ = require('lodash')
 const request = require('request-promise-native')
 const debug = require('debug')('botium-speech-processing-marytts')
 
+let maryVoices = null
+
 class MaryTTS {
-  build () {
+  async voices () {
+    if (maryVoices) return maryVoices
 
+    const requestOptions = {
+      method: 'GET',
+      uri: `${process.env.BOTIUM_SPEECH_MARYTTS_URL}/voices`
+    }
+    let response
+    try {
+      response = await request(requestOptions)
+    } catch (err) {
+      throw new Error(`Calling url ${requestOptions.uri} failed: ${err.message}`)
+    }
+    if (_.isString(response)) {
+      maryVoices = []
+      const lines = response.split('\n').map(l => l.trim()).filter(l => l)
+      for (const line of lines) {
+        const parts = line.split(' ')
+        maryVoices.push({
+          name: parts[0],
+          language: parts[1],
+          gender: parts[2]
+        })
+      }
+    }
+    return maryVoices
   }
 
-  async tts ({ language, text }) {
-    const envVarUrl = `BOTIUM_SPEECH_MARYTTS_URL_${language.toUpperCase()}`
-    if (!process.env[envVarUrl]) throw new Error(`Environment variable ${envVarUrl} empty`)
+  async tts ({ language, voice, text }) {
+    const voicesList = await this.voices()
+
+    const maryVoice = voicesList.find(v => {
+      if (language && v.language !== language) return false
+      if (voice && v.name !== voice) return false
+      return true
+    })
+    if (!maryVoice) throw new Error(`Voice <${voice || 'default'}> for language <${language}> not available`)
+
+    const maryUrl = `${process.env.BOTIUM_SPEECH_MARYTTS_URL}/process?INPUT_TEXT=${encodeURIComponent(text)}&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&VOICE=${encodeURIComponent(maryVoice.name)}&LOCALE=${encodeURIComponent(maryVoice.language)}`
 
     const requestOptions = {
       method: 'GET',
-      uri: Mustache.render(process.env[envVarUrl], {
-        language: encodeURIComponent(language),
-        text: encodeURIComponent(text)
-      }),
+      uri: maryUrl,
       encoding: null,
       resolveWithFullResponse: true,
       simple: false
     }
-
     let response
     try {
       response = await request(requestOptions)
diff --git a/frontend/src/tts/picotts.js b/frontend/src/tts/picotts.js
index 6fbaf29..83804a3 100644
--- a/frontend/src/tts/picotts.js
+++ b/frontend/src/tts/picotts.js
@@ -1,21 +1,63 @@
 const fs = require('fs')
-const Mustache = require('mustache')
 const { spawn } = require('child_process')
-const uuidv1 = require('uuid/v1')
+const { v1: uuidv1 } = require('uuid')
 const debug = require('debug')('botium-speech-processing-picotts')
 
+const voicesList = [
+  {
+    name: 'en-EN',
+    language: 'en',
+    gender: 'neutral'
+  },
+  {
+    name: 'en-GB',
+    language: 'en',
+    gender: 'neutral'
+  },
+  {
+    name: 'es-ES',
+    language: 'es',
+    gender: 'neutral'
+  },
+  {
+    name: 'de-DE',
+    language: 'de',
+    gender: 'neutral'
+  },
+  {
+    name: 'en-GB',
+    language: 'en',
+    gender: 'neutral'
+  },
+  {
+    name: 'fr-FR',
+    language: 'fr',
+    gender: 'neutral'
+  },
+  {
+    name: 'it-IT',
+    language: 'it',
+    gender: 'neutral'
+  }
+]
+
 class PicoTTS {
-  build () {
+  async voices () {
+    return voicesList
   }
 
-  async tts ({ language, text }) {
-    const envVarCmd = `BOTIUM_SPEECH_PICO_CMDPREFIX_${language.toUpperCase()}`
-    if (!process.env[envVarCmd]) throw new Error(`Environment variable ${envVarCmd} empty`)
+  async tts ({ language, voice, text }) {
+    const picoVoice = voicesList.find(v => {
+      if (language && v.language !== language) return false
+      if (voice && v.name !== voice) return false
+      return true
+    })
+    if (!picoVoice) throw new Error(`Voice <${voice || 'default'}> for language <${language}> not available`)
 
     return new Promise((resolve, reject) => {
       const output = `/tmp/${uuidv1()}.wav`
 
-      const cmdLinePico = Mustache.render(process.env[envVarCmd], { output })
+      const cmdLinePico = `${process.env.BOTIUM_SPEECH_PICO_CMDPREFIX || 'pico2wave'} --lang=${picoVoice.name} --wave=${output}`
       debug(`cmdLinePico: ${cmdLinePico}`)
       const cmdLinePicoParts = cmdLinePico.split(' ')
       const pico = spawn(cmdLinePicoParts[0], cmdLinePicoParts.slice(1).concat([text]))
diff --git a/frontend/src/utils.js b/frontend/src/utils.js
index a29ec4e..949a31c 100644
--- a/frontend/src/utils.js
+++ b/frontend/src/utils.js
@@ -1,3 +1,4 @@
+const fs = require('fs')
 const speechScorer = require('word-error-rate')
 
 const wer = async (text1, text2) => {
@@ -7,6 +8,25 @@ const wer = async (text1, text2) => {
   }
 }
 
+const cleanEnv = (envName) => {
+  return process.env[envName] && process.env[envName].replace(/\\n/g, '\n')
+}
+
+const googleOptions = () => {
+  const keyFilename = process.env.BOTIUM_SPEECH_GOOGLE_KEYFILE
+  if (keyFilename) {
+    if (!fs.existsSync(keyFilename)) throw new Error(`Google Cloud credentials file "${keyFilename}" not found`)
+    return { keyFilename }
+  }
+  const privateKey = cleanEnv('BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY')
+  const clientEmail = process.env.BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL
+  if (privateKey && clientEmail) {
+    return { credentials: { private_key: privateKey, client_email: clientEmail } }
+  }
+  throw new Error('Google Cloud credentials not found')
+}
+
 module.exports = {
-  wer
+  wer,
+  googleOptions
 }

From 5d5592694f27d5460420fa6260685f1a985457e5 Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 13:32:22 +0100
Subject: [PATCH 2/9] Using reasonable filenames for TTS result. Added some
 default configurations.

---
 Makefile                    | 22 +++++++++++++++++++---
 README.md                   |  6 ++++++
 dictate/Dockerfile          |  1 +
 docker-compose-dev.yml      |  6 ++++++
 docker-compose-google.yml   | 26 ++++++++++++++++++++++++++
 docker-compose-picotts.yml  | 37 +++++++++++++++++++++++++++++++++++++
 docker-compose.yml          |  6 ++++++
 frontend/Dockerfile         |  9 ++++++---
 frontend/package.json       |  1 +
 frontend/resources/.env     |  3 +++
 frontend/src/convert/sox.js |  2 +-
 frontend/src/routes.js      |  4 ++++
 frontend/src/tts/google.js  |  4 ++--
 frontend/src/tts/marytts.js |  4 +++-
 frontend/src/tts/picotts.js |  6 ++++--
 frontend/src/utils.js       |  8 ++++++++
 16 files changed, 133 insertions(+), 12 deletions(-)
 create mode 100644 docker-compose-google.yml
 create mode 100644 docker-compose-picotts.yml

diff --git a/Makefile b/Makefile
index 4e426e6..4f751c5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,23 @@
 TAG_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1)
 VERSION := $(shell git describe --abbrev=0 --tags ${TAG_COMMIT} 2>/dev/null || true)
 
-docker_build:
+docker_build_develop:
+	docker build -t botium/botium-speech-frontend:develop frontend
+	docker build -t botium/botium-speech-watcher:develop watcher
+	docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:develop) stt
+	docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de:develop stt
+	docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:develop tts
+	docker build -t botium/botium-speech-dictate:develop dictate
+
+docker_publish_develop:
+  docker push botium/botium-speech-frontend:develop
+  docker push botium/botium-speech-watcher:develop
+  docker push botium/botium-speech-kaldi-en:develop
+  docker push botium/botium-speech-kaldi-de:develop
+  docker push botium/botium-speech-marytts:develop
+  docker push botium/botium-speech-dictate:develop
+
+docker_build_release:
 	docker build -t botium/botium-speech-frontend:$(VERSION) frontend
 	docker build -t botium/botium-speech-watcher:$(VERSION) watcher
 	docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:$(VERSION) stt
@@ -9,7 +25,7 @@ docker_build:
 	docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:$(VERSION) tts
 	docker build -t botium/botium-speech-dictate:$(VERSION) dictate
 
-docker_publish:
+docker_publish_release:
   docker push botium/botium-speech-frontend:$(VERSION)
   docker push botium/botium-speech-watcher:$(VERSION)
   docker push botium/botium-speech-kaldi-en:$(VERSION)
@@ -17,7 +33,7 @@ docker_publish:
   docker push botium/botium-speech-marytts:$(VERSION)
   docker push botium/botium-speech-dictate:$(VERSION)
 
-docker_latest:
+docker_latest_release:
 	docker tag botium/botium-speech-frontend:$(VERSION) botium/botium-speech-frontend:latest
   docker push botium/botium-speech-frontend:latest
 
diff --git a/README.md b/README.md
index b259ea2..df53170 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,12 @@ This project is standing on the shoulders of giants.
 
 ## Changelog
 
+### 2020-12-18
+
+* Adding support for Google Text-To-Speech
+* Adding support for listing and using available TTS voices
+* Added sample docker-compose configurations for PicoTTS and Google
+
 ### 2020-03-05
 
 * Optional _start_/_end_ parameters for audio file conversion to trim an audio file by time codes formatted as mm:ss (_01:32_)
diff --git a/dictate/Dockerfile b/dictate/Dockerfile
index 3574082..ac9b726 100644
--- a/dictate/Dockerfile
+++ b/dictate/Dockerfile
@@ -11,4 +11,5 @@ RUN curl -L -o dicatejs.zip "https://github.com/Kaljurand/dictate.js/archive/mas
 
 EXPOSE 56100
 
+USER node
 CMD DICTATEDIR=/app/dictate.js-master npm start
\ No newline at end of file
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 671750e..8e9e5a8 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -10,12 +10,14 @@ services:
   frontend:
     build:
       context: frontend
+    image: botium/botium-speech-frontend:develop
     restart: always
     volumes:
       - "./frontend/resources:/app/resources"
 #  watcher:
 #    build:
 #      context: watcher
+#    image: botium/botium-speech-watcher:develop
 #    restart: always
 #    volumes:
 #      - "./watcher:/app/watch"
@@ -24,6 +26,7 @@ services:
 #    build:
 #      context: stt
 #      dockerfile: Dockerfile.kaldi.en
+#    image: botium/botium-speech-kaldi-en:develop
 #    restart: always
 #    volumes:
 #      - "./logs/stt_en:/opt/logs"
@@ -31,6 +34,7 @@ services:
 #    build:
 #      context: stt
 #      dockerfile: Dockerfile.kaldi.de
+#    image: botium/botium-speech-kaldi-de:develop
 #    restart: always
 #    volumes:
 #      - "./logs/stt_de:/opt/logs"
@@ -38,8 +42,10 @@ services:
     build:
       context: tts
       dockerfile: Dockerfile.marytts
+    image: botium/botium-speech-marytts:develop
     restart: always
 #  dictate:
 #    build:
 #      context: dictate
+#    image: botium/botium-speech-dictate:develop
 #    restart: always
diff --git a/docker-compose-google.yml b/docker-compose-google.yml
new file mode 100644
index 0000000..e2c00c3
--- /dev/null
+++ b/docker-compose-google.yml
@@ -0,0 +1,26 @@
+version: '3'
+services:
+  nginx:
+    image: nginx
+    restart: always
+    volumes:
+      - ./nginx.conf:/etc/nginx/nginx.conf
+    ports:
+      - 80:80
+  frontend:
+    image: botium/botium-speech-frontend:latest
+    restart: always
+    environment:
+      BOTIUM_API_TOKENS: 
+      BOTIUM_SPEECH_PROVIDER_TTS: google
+      BOTIUM_SPEECH_PROVIDER_STT: google
+      BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL:
+      BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY:
+    volumes:
+      - "./frontend/resources:/app/resources"
+  watcher:
+    image: botium/botium-speech-watcher:latest
+    restart: always
+    volumes:
+      - "./watcher:/app/watch"
+      - "./logs/watcher:/app/logs"
diff --git a/docker-compose-picotts.yml b/docker-compose-picotts.yml
new file mode 100644
index 0000000..ee88a4e
--- /dev/null
+++ b/docker-compose-picotts.yml
@@ -0,0 +1,37 @@
+version: '3'
+services:
+  nginx:
+    image: nginx
+    restart: always
+    volumes:
+      - ./nginx.conf:/etc/nginx/nginx.conf
+    ports:
+      - 80:80
+  frontend:
+    image: botium/botium-speech-frontend:latest
+    restart: always
+    environment:
+      BOTIUM_API_TOKENS: 
+      BOTIUM_SPEECH_PROVIDER_TTS: picotts
+      BOTIUM_SPEECH_PROVIDER_STT: kaldi
+    volumes:
+      - "./frontend/resources:/app/resources"
+  watcher:
+    image: botium/botium-speech-watcher:latest
+    restart: always
+    volumes:
+      - "./watcher:/app/watch"
+      - "./logs/watcher:/app/logs"
+  stt_en:
+    image: botium/botium-speech-kaldi-en:latest
+    restart: always
+    volumes:
+      - "./logs/stt_en:/opt/logs"
+  stt_de:
+    image: botium/botium-speech-kaldi-de:latest
+    restart: always
+    volumes:
+      - "./logs/stt_de:/opt/logs"
+  dictate:
+    image: botium/botium-speech-dictate:latest
+    restart: always
diff --git a/docker-compose.yml b/docker-compose.yml
index b187ea9..176147c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,6 +10,12 @@ services:
   frontend:
     image: botium/botium-speech-frontend:latest
     restart: always
+    environment:
+      BOTIUM_API_TOKENS: 
+      BOTIUM_SPEECH_PROVIDER_TTS: marytts
+      BOTIUM_SPEECH_PROVIDER_STT: kaldi
+      BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL:
+      BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY:
     volumes:
       - "./frontend/resources:/app/resources"
   watcher:
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index be7d89d..9f3176c 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -4,12 +4,15 @@ RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.node
 RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils
 
 WORKDIR /app
-COPY . /app
-RUN find . -type f -print0 | xargs -0 dos2unix
+COPY ./package.json /app/package.json
 RUN npm install --no-optional --production
+COPY . /app
+RUN find . -type f ! -path '*/node_modules/*' -print0 | xargs -0 dos2unix
 
 VOLUME /app/resources
 
 EXPOSE 56000
 
-CMD npm start
\ No newline at end of file
+RUN groupadd --gid 1000 node && useradd --uid 1000 --gid node --shell /bin/bash --create-home node
+USER node
+CMD npm run start-dist
\ No newline at end of file
diff --git a/frontend/package.json b/frontend/package.json
index 4c870cd..4ec94be 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -6,6 +6,7 @@
     "eslint": "eslint src",
     "eslint-fix": "eslint --fix src",
     "start": "cross-env DOTENV_FLOW_PATH=./resources nodemon -w ./resources/.env -w ./resources/.env.local -w ./src/ -x \"node -r dotenv-flow/config\" ./src/server.js",
+    "start-dist": "cross-env DOTENV_FLOW_PATH=./resources node -r dotenv-flow/config ./src/server.js",
     "jsdoc": "swagger-jsdoc -d ./src/swaggerDef.json -o ./src/swagger.json src/routes.js"
   },
   "author": "Botium GmbH",
diff --git a/frontend/resources/.env b/frontend/resources/.env
index d5c6f4e..afc07e5 100644
--- a/frontend/resources/.env
+++ b/frontend/resources/.env
@@ -10,6 +10,9 @@ BOTIUM_SPEECH_UPLOAD_LIMIT=50mb
 # Cache Path
 BOTIUM_SPEECH_CACHE_DIR=./resources/.cache
 
+# Temp Path
+BOTIUM_SPEECH_TMP_DIR=./resources/.tmp
+
 # Provider for TTS (google or marytts or picotts)
 BOTIUM_SPEECH_PROVIDER_TTS=marytts
 
diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js
index bf9a00d..59ef6c3 100644
--- a/frontend/src/convert/sox.js
+++ b/frontend/src/convert/sox.js
@@ -6,7 +6,7 @@ const debug = require('debug')('botium-speech-processing-convert-sox')
 
 const runsox = (cmdLine, { inputBuffer, start, end }) => {
   return new Promise((resolve, reject) => {
-    const output = `/tmp/${uuidv1()}.wav`
+    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}.wav`
 
     let cmdLineSox = Mustache.render(cmdLine, { output })
     if (start && end) {
diff --git a/frontend/src/routes.js b/frontend/src/routes.js
index ab60a31..93f67c4 100644
--- a/frontend/src/routes.js
+++ b/frontend/src/routes.js
@@ -16,6 +16,10 @@ const cacheKeyTts = (data, language, voice, ext) => `${crypto.createHash('md5').
 if (cachePathStt) mkdirp.sync(cachePathStt)
 if (cachePathTts) mkdirp.sync(cachePathTts)
 
+if (process.env.BOTIUM_SPEECH_TMP_DIR) {
+  mkdirp.sync(process.env.BOTIUM_SPEECH_TMP_DIR)
+}
+
 const ttsEngines = {
   google: new (require('./tts/google'))(),
   marytts: new (require('./tts/marytts'))(),
diff --git a/frontend/src/tts/google.js b/frontend/src/tts/google.js
index 69a8666..568d6a9 100644
--- a/frontend/src/tts/google.js
+++ b/frontend/src/tts/google.js
@@ -1,7 +1,7 @@
 const textToSpeech = require('@google-cloud/text-to-speech')
 const debug = require('debug')('botium-speech-processing-google-tts')
 
-const { googleOptions } = require('../utils')
+const { googleOptions, ttsFilename } = require('../utils')
 
 let googleVoices = null
 
@@ -54,7 +54,7 @@ class GoogleTTS {
       const [response] = await client.synthesizeSpeech(request)
       return {
         buffer: response.audioContent,
-        name: 'tts.wav'
+        name: `${ttsFilename(text)}.wav`
       }
     } catch (err) {
       debug(err)
diff --git a/frontend/src/tts/marytts.js b/frontend/src/tts/marytts.js
index 1d09cd5..ed95ed7 100644
--- a/frontend/src/tts/marytts.js
+++ b/frontend/src/tts/marytts.js
@@ -2,6 +2,8 @@ const _ = require('lodash')
 const request = require('request-promise-native')
 const debug = require('debug')('botium-speech-processing-marytts')
 
+const { ttsFilename } = require('../utils')
+
 let maryVoices = null
 
 class MaryTTS {
@@ -62,7 +64,7 @@ class MaryTTS {
       debug(`Called url ${requestOptions.uri} success`)
       return {
         buffer: response.body,
-        name: 'tts.wav'
+        name: `${ttsFilename(text)}.wav`
       }
     } else {
       throw new Error(`Calling url ${requestOptions.uri} failed with code ${response.statusCode}: ${response.statusMessage}`)
diff --git a/frontend/src/tts/picotts.js b/frontend/src/tts/picotts.js
index 83804a3..7b6e431 100644
--- a/frontend/src/tts/picotts.js
+++ b/frontend/src/tts/picotts.js
@@ -3,6 +3,8 @@ const { spawn } = require('child_process')
 const { v1: uuidv1 } = require('uuid')
 const debug = require('debug')('botium-speech-processing-picotts')
 
+const { ttsFilename } = require('../utils')
+
 const voicesList = [
   {
     name: 'en-EN',
@@ -55,7 +57,7 @@ class PicoTTS {
     if (!picoVoice) throw new Error(`Voice <${voice || 'default'}> for language <${language}> not available`)
 
     return new Promise((resolve, reject) => {
-      const output = `/tmp/${uuidv1()}.wav`
+      const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}.wav`
 
       const cmdLinePico = `${process.env.BOTIUM_SPEECH_PICO_CMDPREFIX || 'pico2wave'} --lang=${picoVoice.name} --wave=${output}`
       debug(`cmdLinePico: ${cmdLinePico}`)
@@ -70,7 +72,7 @@ class PicoTTS {
             fs.unlinkSync(output)
             resolve({
               buffer: outputBuffer,
-              name: 'tts.wav'
+              name: `${ttsFilename(text)}.wav`
             })
           } catch (err) {
             reject(new Error(`pico2wave process output file ${output} not readable: ${err.message}`))
diff --git a/frontend/src/utils.js b/frontend/src/utils.js
index 949a31c..306544c 100644
--- a/frontend/src/utils.js
+++ b/frontend/src/utils.js
@@ -1,4 +1,6 @@
 const fs = require('fs')
+const _ = require('lodash')
+const sanitize = require('sanitize-filename')
 const speechScorer = require('word-error-rate')
 
 const wer = async (text1, text2) => {
@@ -8,6 +10,11 @@ const wer = async (text1, text2) => {
   }
 }
 
+const ttsFilename = (text) => {
+  const shortenedText = _.truncate(text, { length: 500 })
+  return sanitize(shortenedText)
+}
+
 const cleanEnv = (envName) => {
   return process.env[envName] && process.env[envName].replace(/\\n/g, '\n')
 }
@@ -28,5 +35,6 @@ const googleOptions = () => {
 
 module.exports = {
   wer,
+  ttsFilename,
   googleOptions
 }

From 570097c36a83d6296a3551c7aa61b51c815673e0 Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 15:44:24 +0100
Subject: [PATCH 3/9] BOT-1862 added support for webm

---
 docker-compose-dev.yml          | 46 ++++++++++++-------------
 frontend/Dockerfile             |  2 +-
 frontend/resources/.env         |  8 +++--
 frontend/src/convert/convert.js | 60 ++++++++++++++++++++++++++++++++
 frontend/src/convert/sox.js     | 61 ---------------------------------
 frontend/src/routes.js          | 34 ++++++++++++++----
 frontend/src/swagger.json       | 26 +++++++++++++-
 7 files changed, 142 insertions(+), 95 deletions(-)
 create mode 100644 frontend/src/convert/convert.js
 delete mode 100644 frontend/src/convert/sox.js

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 8e9e5a8..7c99e19 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -22,30 +22,30 @@ services:
 #    volumes:
 #      - "./watcher:/app/watch"
 #      - "./logs/watcher:/app/logs"
-#  stt_en:
-#    build:
-#      context: stt
-#      dockerfile: Dockerfile.kaldi.en
-#    image: botium/botium-speech-kaldi-en:develop
-#    restart: always
-#    volumes:
-#      - "./logs/stt_en:/opt/logs"
-#  stt_de:
-#    build:
-#      context: stt
-#      dockerfile: Dockerfile.kaldi.de
-#    image: botium/botium-speech-kaldi-de:develop
-#    restart: always
-#    volumes:
-#      - "./logs/stt_de:/opt/logs"
-  tts:
+  stt_en:
     build:
-      context: tts
-      dockerfile: Dockerfile.marytts
-    image: botium/botium-speech-marytts:develop
+      context: stt
+      dockerfile: Dockerfile.kaldi.en
+    image: botium/botium-speech-kaldi-en:develop
     restart: always
-#  dictate:
+    volumes:
+      - "./logs/stt_en:/opt/logs"
+  stt_de:
+    build:
+      context: stt
+      dockerfile: Dockerfile.kaldi.de
+    image: botium/botium-speech-kaldi-de:develop
+    restart: always
+    volumes:
+      - "./logs/stt_de:/opt/logs"
+#  tts:
 #    build:
-#      context: dictate
-#    image: botium/botium-speech-dictate:develop
+#      context: tts
+#      dockerfile: Dockerfile.marytts
+#    image: botium/botium-speech-marytts:develop
 #    restart: always
+  dictate:
+    build:
+      context: dictate
+    image: botium/botium-speech-dictate:develop
+    restart: always
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index 9f3176c..d84bf5f 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.nodesource.com/setup_14.x  | bash - && apt-get -y install nodejs
-RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils
+RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg
 
 WORKDIR /app
 COPY ./package.json /app/package.json
diff --git a/frontend/resources/.env b/frontend/resources/.env
index afc07e5..c1c7dc8 100644
--- a/frontend/resources/.env
+++ b/frontend/resources/.env
@@ -39,7 +39,9 @@ BOTIUM_SPEECH_GOOGLE_CONFIG={}
 #BOTIUM_SPEECH_GOOGLE_API_VERSION=
 
 # WAV Conversation Command Line
-BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_SOX=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
 BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
-BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_SOX=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
-BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
\ No newline at end of file
+BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
+BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav
diff --git a/frontend/src/convert/convert.js b/frontend/src/convert/convert.js
new file mode 100644
index 0000000..f70df2a
--- /dev/null
+++ b/frontend/src/convert/convert.js
@@ -0,0 +1,60 @@
+const fs = require('fs')
+const Mustache = require('mustache')
+const { spawn } = require('child_process')
+const { v1: uuidv1 } = require('uuid')
+const debug = require('debug')('botium-speech-processing-convert')
+
+const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
+  return new Promise((resolve, reject) => {
+    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}`
+
+    let cmdLineFull = Mustache.render(cmdLine, { output })
+    if (start && end) {
+      cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
+    } else if (start && !end) {
+      cmdLineFull = `${cmdLineFull} trim ${start}`
+    } else if (!start && end) {
+      cmdLineFull = `${cmdLineFull} trim 0 ${end}`
+    }
+    debug(`cmdLineFull: ${cmdLineFull}`)
+    const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
+
+    childProcess.once('exit', (code, signal) => {
+      debug(`conversion process exited with code ${code}, signal ${signal}`)
+      if (code === 0) {
+        try {
+          const outputBuffer = fs.readFileSync(output)
+          fs.unlinkSync(output)
+          resolve(outputBuffer)
+        } catch (err) {
+          reject(new Error(`conversion process output file ${output} not readable: ${err.message}`))
+        }
+      } else {
+        reject(new Error(`conversion process exited with code ${code}, signal ${signal}`))
+      }
+    })
+    childProcess.once('error', (err) => {
+      debug(`conversion process failed: ${err.message}`)
+      reject(new Error(`conversion process failed: ${err.message}`))
+    })
+    childProcess.stdout.on('error', (err) => {
+      debug('stdout err ' + err)
+    })
+    childProcess.stderr.on('error', (err) => {
+      debug('stderr err ' + err)
+    })
+    childProcess.stdin.on('error', (err) => {
+      debug('stdin err ' + err)
+    })
+    childProcess.stderr.on('data', (data) => {
+      debug('stderr ' + data)
+    })
+
+    childProcess.stdin.write(inputBuffer)
+    childProcess.stdin.end()
+  })
+}
+
+module.exports = {
+  runconvert
+}
diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js
deleted file mode 100644
index 59ef6c3..0000000
--- a/frontend/src/convert/sox.js
+++ /dev/null
@@ -1,61 +0,0 @@
-const fs = require('fs')
-const Mustache = require('mustache')
-const { spawn } = require('child_process')
-const { v1: uuidv1 } = require('uuid')
-const debug = require('debug')('botium-speech-processing-convert-sox')
-
-const runsox = (cmdLine, { inputBuffer, start, end }) => {
-  return new Promise((resolve, reject) => {
-    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}.wav`
-
-    let cmdLineSox = Mustache.render(cmdLine, { output })
-    if (start && end) {
-      cmdLineSox = `${cmdLineSox} trim ${start} ${end}`
-    } else if (start && !end) {
-      cmdLineSox = `${cmdLineSox} trim ${start}`
-    } else if (!start && end) {
-      cmdLineSox = `${cmdLineSox} trim 0 ${end}`
-    }
-    debug(`cmdLineSox: ${cmdLineSox}`)
-    const cmdLineSoxParts = cmdLineSox.split(' ')
-    const sox = spawn(cmdLineSoxParts[0], cmdLineSoxParts.slice(1))
-
-    sox.once('exit', (code, signal) => {
-      debug(`sox process exited with code ${code}, signal ${signal}`)
-      if (code === 0) {
-        try {
-          const outputBuffer = fs.readFileSync(output)
-          fs.unlinkSync(output)
-          resolve(outputBuffer)
-        } catch (err) {
-          reject(new Error(`sox process output file ${output} not readable: ${err.message}`))
-        }
-      } else {
-        reject(new Error(`sox process exited with code ${code}, signal ${signal}`))
-      }
-    })
-    sox.once('error', (err) => {
-      debug(`sox process failed: ${err.message}`)
-      reject(new Error(`sox process failed: ${err.message}`))
-    })
-    sox.stdout.on('error', (err) => {
-      debug('stdout err ' + err)
-    })
-    sox.stderr.on('error', (err) => {
-      debug('stderr err ' + err)
-    })
-    sox.stdin.on('error', (err) => {
-      debug('stdin err ' + err)
-    })
-    sox.stderr.on('data', (data) => {
-      debug('stderr ' + data)
-    })
-
-    sox.stdin.write(inputBuffer)
-    sox.stdin.end()
-  })
-}
-
-module.exports = {
-  runsox
-}
diff --git a/frontend/src/routes.js b/frontend/src/routes.js
index 93f67c4..0385427 100644
--- a/frontend/src/routes.js
+++ b/frontend/src/routes.js
@@ -4,7 +4,7 @@ const mkdirp = require('mkdirp')
 const crypto = require('crypto')
 const express = require('express')
 const sanitize = require('sanitize-filename')
-const { runsox } = require('./convert/sox')
+const { runconvert } = require('./convert/convert')
 const { wer } = require('./utils')
 const debug = require('debug')('botium-speech-processing-routes')
 
@@ -289,6 +289,27 @@ router.get('/api/tts/:language', async (req, res, next) => {
   }
 })
 
+/**
+ * @swagger
+ * /api/convertprofiles:
+ *   get:
+ *     description: Get list of audio conversion profile
+ *     security:
+ *       - ApiKeyAuth: []
+ *     produces:
+ *       - application/json
+ *     responses:
+ *       200:
+ *         description: List of supported audio conversion profiles
+ *         schema:
+ *           type: array
+ *           items:
+ *             type: string
+ */
+router.get('/api/convertprofiles', async (req, res, next) => {
+  res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4]))
+})
+
 /**
  * @swagger
  * /api/convert/{profile}:
@@ -320,7 +341,7 @@ router.get('/api/tts/:language', async (req, res, next) => {
  *     requestBody:
  *       description: Audio file
  *       content:
- *         audio/*:
+ *         audio/wav:
  *           schema:
  *             type: string
  *             format: binary
@@ -334,12 +355,13 @@ router.get('/api/tts/:language', async (req, res, next) => {
  *               format: binary
  */
 router.post('/api/convert/:profile', async (req, res, next) => {
+  console.log(req.body)
   if (!Buffer.isBuffer(req.body)) {
     return next(new Error('req.body is not a buffer'))
   }
-  const envVarSox = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_SOX`
-  if (!process.env[envVarSox]) {
-    return next(new Error(`Environment variable ${envVarSox} empty`))
+  const envVarCmd = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_CMD`
+  if (!process.env[envVarCmd]) {
+    return next(new Error(`Environment variable ${envVarCmd} empty`))
   }
   const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
   if (!process.env[envVarOutput]) {
@@ -347,7 +369,7 @@ router.post('/api/convert/:profile', async (req, res, next) => {
   }
 
   try {
-    const outputBuffer = await runsox(process.env[envVarSox], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
+    const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
     res.writeHead(200, {
       'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`,
       'Content-Length': outputBuffer.length
diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json
index f90ec54..33865a4 100644
--- a/frontend/src/swagger.json
+++ b/frontend/src/swagger.json
@@ -229,6 +229,30 @@
         }
       }
     },
+    "/api/convertprofiles": {
+      "get": {
+        "description": "Get list of audio conversion profile",
+        "security": [
+          {
+            "ApiKeyAuth": []
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of supported audio conversion profiles",
+            "schema": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            }
+          }
+        }
+      }
+    },
     "/api/convert/{profile}": {
       "post": {
         "description": "Convert audio file",
@@ -272,7 +296,7 @@
         "requestBody": {
           "description": "Audio file",
           "content": {
-            "audio/*": {
+            "audio/wav": {
               "schema": {
                 "type": "string",
                 "format": "binary"

From fe594f8abc9e8d2ef2b8b4cfa7751f468b0c8a2f Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 16:06:57 +0100
Subject: [PATCH 4/9] docker-compose

---
 docker-compose-dev.yml | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 7c99e19..82ad67e 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -14,14 +14,14 @@ services:
     restart: always
     volumes:
       - "./frontend/resources:/app/resources"
-#  watcher:
-#    build:
-#      context: watcher
-#    image: botium/botium-speech-watcher:develop
-#    restart: always
-#    volumes:
-#      - "./watcher:/app/watch"
-#      - "./logs/watcher:/app/logs"
+  watcher:
+    build:
+      context: watcher
+    image: botium/botium-speech-watcher:develop
+    restart: always
+    volumes:
+      - "./watcher:/app/watch"
+      - "./logs/watcher:/app/logs"
   stt_en:
     build:
       context: stt
@@ -38,12 +38,12 @@ services:
     restart: always
     volumes:
       - "./logs/stt_de:/opt/logs"
-#  tts:
-#    build:
-#      context: tts
-#      dockerfile: Dockerfile.marytts
-#    image: botium/botium-speech-marytts:develop
-#    restart: always
+  tts:
+    build:
+      context: tts
+      dockerfile: Dockerfile.marytts
+    image: botium/botium-speech-marytts:develop
+    restart: always
   dictate:
     build:
       context: dictate

From 281c7dd1cc203bbdae2c64e2d3412f67d973a480 Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 16:29:59 +0100
Subject: [PATCH 5/9] .env file for docker-compose

---
 .env                                 |  1 +
 .env.develop                         |  2 ++
 .gitignore                           |  3 ++-
 docker-compose.override.template.yml |  7 +++++++
 docker-compose.yml                   | 12 ++++++------
 5 files changed, 18 insertions(+), 7 deletions(-)
 create mode 100644 .env.develop
 create mode 100644 docker-compose.override.template.yml

diff --git a/.env b/.env
index c920216..b9cf953 100644
--- a/.env
+++ b/.env
@@ -1 +1,2 @@
 COMPOSE_PROJECT_NAME=botiumspeechprocessing
+TAG=latest
diff --git a/.env.develop b/.env.develop
new file mode 100644
index 0000000..29a9aac
--- /dev/null
+++ b/.env.develop
@@ -0,0 +1,2 @@
+COMPOSE_PROJECT_NAME=botiumspeechprocessing
+TAG=develop
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index cd3d225..46f714c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-logs
\ No newline at end of file
+logs
+docker-compose.override.yml
\ No newline at end of file
diff --git a/docker-compose.override.template.yml b/docker-compose.override.template.yml
new file mode 100644
index 0000000..c787ca6
--- /dev/null
+++ b/docker-compose.override.template.yml
@@ -0,0 +1,7 @@
+version: '3'
+services:
+  dictate:
+    image: botium/botium-speech-dictate:develop
+    environment:
+      STT_URL_DE: wss://speech.botiumbox.com/stt_de
+      STT_URL_EN: wss://speech.botiumbox.com/stt_en
diff --git a/docker-compose.yml b/docker-compose.yml
index 176147c..4ed7ec4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,7 +8,7 @@ services:
     ports:
       - 80:80
   frontend:
-    image: botium/botium-speech-frontend:latest
+    image: botium/botium-speech-frontend:${TAG}
     restart: always
     environment:
       BOTIUM_API_TOKENS: 
@@ -19,24 +19,24 @@ services:
     volumes:
       - "./frontend/resources:/app/resources"
   watcher:
-    image: botium/botium-speech-watcher:latest
+    image: botium/botium-speech-watcher:${TAG}
     restart: always
     volumes:
       - "./watcher:/app/watch"
       - "./logs/watcher:/app/logs"
   stt_en:
-    image: botium/botium-speech-kaldi-en:latest
+    image: botium/botium-speech-kaldi-en:${TAG}
     restart: always
     volumes:
       - "./logs/stt_en:/opt/logs"
   stt_de:
-    image: botium/botium-speech-kaldi-de:latest
+    image: botium/botium-speech-kaldi-de:${TAG}
     restart: always
     volumes:
       - "./logs/stt_de:/opt/logs"
   tts:
-    image: botium/botium-speech-marytts:latest
+    image: botium/botium-speech-marytts:${TAG}
     restart: always
   dictate:
-    image: botium/botium-speech-dictate:latest
+    image: botium/botium-speech-dictate:${TAG}
     restart: always

From f8b0048005d09a5c9f12c9825d27de09f7ba1c7d Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 16:37:58 +0100
Subject: [PATCH 6/9] tabs vs spaces

---
 Makefile | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/Makefile b/Makefile
index 4f751c5..90c065c 100644
--- a/Makefile
+++ b/Makefile
@@ -10,12 +10,12 @@ docker_build_develop:
 	docker build -t botium/botium-speech-dictate:develop dictate
 
 docker_publish_develop:
-  docker push botium/botium-speech-frontend:develop
-  docker push botium/botium-speech-watcher:develop
-  docker push botium/botium-speech-kaldi-en:develop
-  docker push botium/botium-speech-kaldi-de:develop
-  docker push botium/botium-speech-marytts:develop
-  docker push botium/botium-speech-dictate:develop
+	docker push botium/botium-speech-frontend:develop
+	docker push botium/botium-speech-watcher:develop
+	docker push botium/botium-speech-kaldi-en:develop
+	docker push botium/botium-speech-kaldi-de:develop
+	docker push botium/botium-speech-marytts:develop
+	docker push botium/botium-speech-dictate:develop
 
 docker_build_release:
 	docker build -t botium/botium-speech-frontend:$(VERSION) frontend
@@ -26,28 +26,28 @@ docker_build_release:
 	docker build -t botium/botium-speech-dictate:$(VERSION) dictate
 
 docker_publish_release:
-  docker push botium/botium-speech-frontend:$(VERSION)
-  docker push botium/botium-speech-watcher:$(VERSION)
-  docker push botium/botium-speech-kaldi-en:$(VERSION)
-  docker push botium/botium-speech-kaldi-de:$(VERSION)
-  docker push botium/botium-speech-marytts:$(VERSION)
-  docker push botium/botium-speech-dictate:$(VERSION)
+	docker push botium/botium-speech-frontend:$(VERSION)
+	docker push botium/botium-speech-watcher:$(VERSION)
+	docker push botium/botium-speech-kaldi-en:$(VERSION)
+	docker push botium/botium-speech-kaldi-de:$(VERSION)
+	docker push botium/botium-speech-marytts:$(VERSION)
+	docker push botium/botium-speech-dictate:$(VERSION)
 
 docker_latest_release:
 	docker tag botium/botium-speech-frontend:$(VERSION) botium/botium-speech-frontend:latest
-  docker push botium/botium-speech-frontend:latest
+	docker push botium/botium-speech-frontend:latest
 
 	docker tag botium/botium-speech-watcher:$(VERSION) botium/botium-speech-watcher:latest
-  docker push botium/botium-speech-watcher:latest
+	docker push botium/botium-speech-watcher:latest
 
 	docker tag botium/botium-speech-kaldi-en:$(VERSION) botium/botium-speech-kaldi-en:latest
-  docker push botium/botium-speech-kaldi-en:latest
+	docker push botium/botium-speech-kaldi-en:latest
 
 	docker tag botium/botium-speech-kaldi-de:$(VERSION) botium/botium-speech-kaldi-de:latest
-  docker push botium/botium-speech-kaldi-de:latest
+	docker push botium/botium-speech-kaldi-de:latest
 
 	docker tag botium/botium-speech-marytts:$(VERSION) botium/botium-speech-marytts:latest
-  docker push botium/botium-speech-marytts:latest
+	docker push botium/botium-speech-marytts:latest
 
 	docker tag botium/botium-speech-dictate:$(VERSION) botium/botium-speech-dictate:latest
-  docker push botium/botium-speech-dictate:latest
+	docker push botium/botium-speech-dictate:latest

From 94ac2955b76aa51a3d2f4ccae84f6656ee14a2d9 Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 16:44:18 +0100
Subject: [PATCH 7/9] adding apt-get update

---
 frontend/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index d84bf5f..325d9a5 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.nodesource.com/setup_14.x  | bash - && apt-get -y install nodejs
-RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg
+RUN apt-get update && apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg
 
 WORKDIR /app
 COPY ./package.json /app/package.json

From 6fa4343143608ee284e1ac6bcb80734ef001d7ec Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Fri, 18 Dec 2020 16:45:38 +0100
Subject: [PATCH 8/9] fix Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 90c065c..13d8ec2 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ VERSION := $(shell git describe --abbrev=0 --tags ${TAG_COMMIT} 2>/dev/null || t
 docker_build_develop:
 	docker build -t botium/botium-speech-frontend:develop frontend
 	docker build -t botium/botium-speech-watcher:develop watcher
-	docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:develop) stt
+	docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:develop stt
 	docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de:develop stt
 	docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:develop tts
 	docker build -t botium/botium-speech-dictate:develop dictate

From ea0659d6edd8e3ba7b2bab690f2691f0c58054a3 Mon Sep 17 00:00:00 2001
From: Florian Treml <florian.treml@botium.at>
Date: Mon, 21 Dec 2020 15:49:32 +0100
Subject: [PATCH 9/9] Load .env file from internal file

---
 README.md               | 5 +++--
 frontend/Dockerfile     | 1 +
 frontend/package.json   | 4 ++--
 frontend/resources/.env | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index df53170..506b093 100644
--- a/README.md
+++ b/README.md
@@ -44,10 +44,11 @@ Clone or download this repository and start with docker-compose:
 
     > docker-compose up -d
 
-This will download the prebuilt images from Dockerhub.
+This will download the latest released prebuilt images from Dockerhub. To download the latest developer images from Dockerhub:
 
-Point your browser to http://127.0.0.1 to open the [Swagger UI](https://swagger.io/tools/swagger-ui/) and browse/use the API definition.
+    > docker-compose --env-file .env.develop up
 
+Point your browser to http://127.0.0.1 to open the [Swagger UI](https://swagger.io/tools/swagger-ui/) and browse/use the API definition.
 
 ### Optional: Build Docker Images
 
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index 325d9a5..776acb3 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -7,6 +7,7 @@ WORKDIR /app
 COPY ./package.json /app/package.json
 RUN npm install --no-optional --production
 COPY . /app
+COPY ./resources/.env /app/.env
 RUN find . -type f ! -path '*/node_modules/*' -print0 | xargs -0 dos2unix
 
 VOLUME /app/resources
diff --git a/frontend/package.json b/frontend/package.json
index 4ec94be..b4831cc 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -5,8 +5,8 @@
   "scripts": {
     "eslint": "eslint src",
     "eslint-fix": "eslint --fix src",
-    "start": "cross-env DOTENV_FLOW_PATH=./resources nodemon -w ./resources/.env -w ./resources/.env.local -w ./src/ -x \"node -r dotenv-flow/config\" ./src/server.js",
-    "start-dist": "cross-env DOTENV_FLOW_PATH=./resources node -r dotenv-flow/config ./src/server.js",
+    "start-dev": "cross-env DOTENV_FLOW_PATH=./resources nodemon -w ./resources/.env -w ./resources/.env.local -w ./src/ -x \"node -r dotenv-flow/config\" ./src/server.js",
+    "start-dist": "node -r dotenv-flow/config ./src/server.js",
     "jsdoc": "swagger-jsdoc -d ./src/swaggerDef.json -o ./src/swagger.json src/routes.js"
   },
   "author": "Botium GmbH",
diff --git a/frontend/resources/.env b/frontend/resources/.env
index c1c7dc8..4b38ffb 100644
--- a/frontend/resources/.env
+++ b/frontend/resources/.env
@@ -33,7 +33,7 @@ BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize
 #BOTIUM_SPEECH_GOOGLE_KEYFILE=./resources/google.json
 #BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL=
 #BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY=
-BOTIUM_SPEECH_GOOGLE_CONFIG={}
+#BOTIUM_SPEECH_GOOGLE_CONFIG={}
 # For files longer than 1 minute, you have to create a Google Cloud Storage Bucket as temporary storage (give read/write access to service user)
 #BOTIUM_SPEECH_GOOGLE_BUCKET_NAME=
 #BOTIUM_SPEECH_GOOGLE_API_VERSION=