From 3335120ea9cb9cf8e5e4926576de5c4b20640d6d Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 12:14:48 +0100 Subject: [PATCH 1/9] BOT-1987 added "voices" query. BOT-1986 Added Google STT --- Makefile | 37 +++++++++++++++ README.md | 5 -- build_and_push.sh | 19 -------- docker-compose-dev.yml | 50 +++++++++---------- docker-compose.yml | 4 +- frontend/package.json | 45 +++++++++--------- frontend/resources/.env | 17 ++++--- frontend/src/convert/sox.js | 2 +- frontend/src/routes.js | 95 +++++++++++++++++++++++++++++++------ frontend/src/stt/google.js | 43 +++++++---------- frontend/src/stt/kaldi.js | 4 +- frontend/src/swagger.json | 69 ++++++++++++++++++++++++++- frontend/src/tts/google.js | 66 ++++++++++++++++++++++++++ frontend/src/tts/marytts.js | 50 +++++++++++++++---- frontend/src/tts/picotts.js | 56 +++++++++++++++++++--- frontend/src/utils.js | 22 ++++++++- 16 files changed, 440 insertions(+), 144 deletions(-) create mode 100644 Makefile delete mode 100755 build_and_push.sh create mode 100644 frontend/src/tts/google.js diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4e426e6 --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +TAG_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1) +VERSION := $(shell git describe --abbrev=0 --tags ${TAG_COMMIT} 2>/dev/null || true) + +docker_build: + docker build -t botium/botium-speech-frontend:$(VERSION) frontend + docker build -t botium/botium-speech-watcher:$(VERSION) watcher + docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:$(VERSION) stt + docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de:$(VERSION) stt + docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:$(VERSION) tts + docker build -t botium/botium-speech-dictate:$(VERSION) dictate + +docker_publish: + docker push botium/botium-speech-frontend:$(VERSION) + docker push botium/botium-speech-watcher:$(VERSION) + docker push botium/botium-speech-kaldi-en:$(VERSION) + docker push botium/botium-speech-kaldi-de:$(VERSION) + docker push botium/botium-speech-marytts:$(VERSION) + docker push botium/botium-speech-dictate:$(VERSION) + +docker_latest: + docker tag botium/botium-speech-frontend:$(VERSION) botium/botium-speech-frontend:latest + docker push botium/botium-speech-frontend:latest + + docker tag botium/botium-speech-watcher:$(VERSION) botium/botium-speech-watcher:latest + docker push botium/botium-speech-watcher:latest + + docker tag botium/botium-speech-kaldi-en:$(VERSION) botium/botium-speech-kaldi-en:latest + docker push botium/botium-speech-kaldi-en:latest + + docker tag botium/botium-speech-kaldi-de:$(VERSION) botium/botium-speech-kaldi-de:latest + docker push botium/botium-speech-kaldi-de:latest + + docker tag botium/botium-speech-marytts:$(VERSION) botium/botium-speech-marytts:latest + docker push botium/botium-speech-marytts:latest + + docker tag botium/botium-speech-dictate:$(VERSION) botium/botium-speech-dictate:latest + docker push botium/botium-speech-dictate:latest diff --git a/README.md b/README.md index ca84c9d..b259ea2 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,6 @@ Botium Speech Processing is a unified, developer-friendly API to the best available free and Open-Source Speech-To-Text and Text-To-Speech services. -**UPDATE 2020/06/15:** As Chatbots grow in importance, automated testing solutions will remain critical for ensuring that Chatbots actually do what their designers intend. We've been busy working on a product that allows testers to have visual insights and deeper understanding in their Chatbot's performance, offering several solutions to boost their interaction! -[Botium Coach will be introduced to the market as part of our online event on the 24th of June.](https://www.botium.ai/coach/) - -[![](http://img.youtube.com/vi/WsNaDfZ7WHk/0.jpg)](http://www.youtube.com/watch?v=WsNaDfZ7WHk "Botium Coach is coming on 24th of June") - ## What is it ? Botium Speech Processing is a *get-shit-done*-style Open-Source software stack, the configuration options are rudimentary: it is highly opinionated about the included tools, just get the shit done. diff --git a/build_and_push.sh b/build_and_push.sh deleted file mode 100755 index 833266d..0000000 --- a/build_and_push.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -echo "building docker images" -docker build -t botium/botium-speech-frontend frontend -docker build -t botium/botium-speech-watcher watcher -docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en stt -docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de stt -docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts tts -docker build -t botium/botium-speech-dictate dictate - -if [ "$1" == "--push" ]; then - echo "pushing docker images" - docker push botium/botium-speech-frontend - docker push botium/botium-speech-watcher - docker push botium/botium-speech-kaldi-en - docker push botium/botium-speech-kaldi-de - docker push botium/botium-speech-marytts - docker push botium/botium-speech-dictate -fi diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 27a8813..671750e 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -13,33 +13,33 @@ services: restart: always volumes: - "./frontend/resources:/app/resources" - watcher: - build: - context: watcher - restart: always - volumes: - - "./watcher:/app/watch" - - "./logs/watcher:/app/logs" - stt_en: - build: - context: stt - dockerfile: Dockerfile.kaldi.en - restart: always - volumes: - - "./logs/stt_en:/opt/logs" - stt_de: - build: - context: stt - dockerfile: Dockerfile.kaldi.de - restart: always - volumes: - - "./logs/stt_de:/opt/logs" +# watcher: +# build: +# context: watcher +# restart: always +# volumes: +# - "./watcher:/app/watch" +# - "./logs/watcher:/app/logs" +# stt_en: +# build: +# context: stt +# dockerfile: Dockerfile.kaldi.en +# restart: always +# volumes: +# - "./logs/stt_en:/opt/logs" +# stt_de: +# build: +# context: stt +# dockerfile: Dockerfile.kaldi.de +# restart: always +# volumes: +# - "./logs/stt_de:/opt/logs" tts: build: context: tts dockerfile: Dockerfile.marytts restart: always - dictate: - build: - context: dictate - restart: always +# dictate: +# build: +# context: dictate +# restart: always diff --git a/docker-compose.yml b/docker-compose.yml index de9a527..b187ea9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,8 +29,8 @@ services: volumes: - "./logs/stt_de:/opt/logs" tts: - image: botium/botium-speech-marytts + image: botium/botium-speech-marytts:latest restart: always dictate: - image: botium/botium-speech-dictate + image: botium/botium-speech-dictate:latest restart: always diff --git a/frontend/package.json b/frontend/package.json index 2a4940d..4c870cd 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -11,33 +11,34 @@ "author": "Botium GmbH", "license": "MIT", "dependencies": { - "@google-cloud/speech": "^3.6.0", - "@google-cloud/storage": "^4.3.0", + "@google-cloud/speech": "^4.1.5", + "@google-cloud/storage": "^5.7.0", + "@google-cloud/text-to-speech": "^3.1.3", "body-parser": "^1.19.0", - "cross-env": "^7.0.0", - "debug": "^4.1.1", - "dotenv-flow": "^3.1.0", + "cross-env": "^7.0.3", + "debug": "^4.3.1", + "dotenv-flow": "^3.2.0", "express": "^4.17.1", - "express-winston": "^4.0.1", - "lodash": "^4.17.15", - "mkdirp": "^0.5.1", - "mustache": "^3.1.0", - "nodemon": "^2.0.1", - "request": "^2.88.0", - "request-promise-native": "^1.0.8", + "express-winston": "^4.0.5", + "lodash": "^4.17.20", + "mkdirp": "^1.0.4", + "mustache": "^4.1.0", + "nodemon": "^2.0.6", + "request": "^2.88.2", + "request-promise-native": "^1.0.9", "sanitize-filename": "^1.6.3", - "swagger-jsdoc": "^3.5.0", - "swagger-ui-express": "^4.1.2", - "uuid": "^3.3.3", - "winston": "^3.2.1", - "word-error-rate": "0.0.7" + "swagger-jsdoc": "^6.0.0-rc.5", + "swagger-ui-express": "^4.1.5", + "uuid": "^8.3.2", + "winston": "^3.3.3", + "word-error-rate": "^0.0.7" }, "devDependencies": { - "eslint": "^6.7.2", - "eslint-config-standard": "^14.1.0", - "eslint-plugin-import": "^2.19.1", - "eslint-plugin-node": "^10.0.0", + "eslint": "^7.15.0", + "eslint-config-standard": "^16.0.2", + "eslint-plugin-import": "^2.22.1", + "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.2.1", - "eslint-plugin-standard": "^4.0.1" + "eslint-plugin-standard": "^5.0.0" } } diff --git a/frontend/resources/.env b/frontend/resources/.env index f0b78c5..d5c6f4e 100644 --- a/frontend/resources/.env +++ b/frontend/resources/.env @@ -10,28 +10,27 @@ BOTIUM_SPEECH_UPLOAD_LIMIT=50mb # Cache Path BOTIUM_SPEECH_CACHE_DIR=./resources/.cache -# Provider for TTS (marytts or picotts) +# Provider for TTS (google or marytts or picotts) BOTIUM_SPEECH_PROVIDER_TTS=marytts # Provider for STT (kaldi or google) BOTIUM_SPEECH_PROVIDER_STT=kaldi -# TTS Provider MaryTTS URLs -BOTIUM_SPEECH_MARYTTS_URL_EN=http://tts:59125/process?INPUT_TEXT={{text}}&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&VOICE=dfki-spike&LOCALE=en_GB -BOTIUM_SPEECH_MARYTTS_URL_DE=http://tts:59125/process?INPUT_TEXT={{text}}&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&VOICE=bits3-hsmm&LOCALE=de +# TTS Provider MaryTTS URLs +BOTIUM_SPEECH_MARYTTS_URL=http://tts:59125 # TTS Provider Pico Command Line -BOTIUM_SPEECH_PICO_CMDPREFIX_EN=pico2wave --lang=en-US --wave={{{output}}} -BOTIUM_SPEECH_PICO_CMDPREFIX_DE=pico2wave --lang=de-DE --wave={{{output}}} +BOTIUM_SPEECH_PICO_CMDPREFIX=pico2wave # STT Provider Kaldi URLs BOTIUM_SPEECH_KALDI_URL_EN=http://stt_en:80/client/dynamic/recognize BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize # STT Provider Google -BOTIUM_SPEECH_GOOGLE_KEYFILE=./resources/google.json -BOTIUM_SPEECH_GOOGLE_CONFIG_EN={ "languageCode": "en-US" } -BOTIUM_SPEECH_GOOGLE_CONFIG_DE={ "languageCode": "de-DE" } +#BOTIUM_SPEECH_GOOGLE_KEYFILE=./resources/google.json +#BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL= +#BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY= +BOTIUM_SPEECH_GOOGLE_CONFIG={} # For files longer than 1 minute, you have to create a Google Cloud Storage Bucket as temporary storage (give read/write access to service user) #BOTIUM_SPEECH_GOOGLE_BUCKET_NAME= #BOTIUM_SPEECH_GOOGLE_API_VERSION= diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js index 942bab9..bf9a00d 100644 --- a/frontend/src/convert/sox.js +++ b/frontend/src/convert/sox.js @@ -1,7 +1,7 @@ const fs = require('fs') const Mustache = require('mustache') const { spawn } = require('child_process') -const uuidv1 = require('uuid/v1') +const { v1: uuidv1 } = require('uuid') const debug = require('debug')('botium-speech-processing-convert-sox') const runsox = (cmdLine, { inputBuffer, start, end }) => { diff --git a/frontend/src/routes.js b/frontend/src/routes.js index e364628..ab60a31 100644 --- a/frontend/src/routes.js +++ b/frontend/src/routes.js @@ -10,11 +10,22 @@ const debug = require('debug')('botium-speech-processing-routes') const cachePathStt = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'stt') const cachePathTts = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'tts') -const cacheKey = (data, language, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}${ext}` +const cacheKeyStt = (data, language, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}${ext}` +const cacheKeyTts = (data, language, voice, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}_${voice || 'default'}${ext}` if (cachePathStt) mkdirp.sync(cachePathStt) if (cachePathTts) mkdirp.sync(cachePathTts) +const ttsEngines = { + google: new (require('./tts/google'))(), + marytts: new (require('./tts/marytts'))(), + picotts: new (require('./tts/picotts'))() +} +const sttEngines = { + google: new (require('./stt/google'))(), + kaldi: new (require('./stt/kaldi'))() +} + const router = express.Router() /** @@ -60,7 +71,7 @@ router.get('/api/status', (req, res) => { * - application/json * parameters: * - name: language - * description: ISO-639-1 language code + * description: ISO-639-1 language code (2 letters) * in: path * required: true * schema: @@ -97,7 +108,7 @@ router.post('/api/stt/:language', async (req, res, next) => { if (Buffer.isBuffer(req.body)) { let cacheFile = null if (cachePathStt) { - cacheFile = path.join(cachePathStt, cacheKey(req.body, req.params.language, '.json')) + cacheFile = path.join(cachePathStt, cacheKeyStt(req.body, req.params.language, '.json')) if (fs.existsSync(cacheFile)) { try { const result = JSON.parse(fs.readFileSync(cacheFile).toString()) @@ -109,7 +120,7 @@ router.post('/api/stt/:language', async (req, res, next) => { } } try { - const stt = new (require(`./stt/${(req.query.stt && sanitize(req.query.stt)) || process.env.BOTIUM_SPEECH_PROVIDER_STT}`))() + const stt = sttEngines[(req.query.stt && sanitize(req.query.stt)) || process.env.BOTIUM_SPEECH_PROVIDER_STT] const result = await stt.stt({ language: req.params.language, @@ -121,8 +132,12 @@ router.post('/api/stt/:language', async (req, res, next) => { res.json(result).end() if (cachePathStt) { - fs.writeFileSync(cacheFile, JSON.stringify(result)) - debug(`Writing stt result ${cacheFile} to cache: ${result.text}`) + try { + fs.writeFileSync(cacheFile, JSON.stringify(result)) + debug(`Writing stt result ${cacheFile} to cache: ${result.text}`) + } catch (err) { + debug(`Writing stt result ${cacheFile} to cache: ${result.text} - failed: ${err.message}`) + } } } catch (err) { return next(err) @@ -132,6 +147,47 @@ router.post('/api/stt/:language', async (req, res, next) => { } }) +/** + * @swagger + * /api/ttsvoices: + * get: + * description: Get list of voices + * security: + * - ApiKeyAuth: [] + * produces: + * - application/json + * parameters: + * - name: tts + * description: Text-to-speech backend + * in: query + * required: false + * schema: + * type: string + * enum: [google, marytts, picotts] + * responses: + * 200: + * description: List of supported voices + * schema: + * type: array + * items: + * type: object + * properties: + * name: + * type: string + * language: + * type: string + * gender: + * type: [male, female, neutral] + */ +router.get('/api/ttsvoices', async (req, res, next) => { + try { + const tts = ttsEngines[(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS] + res.json(await tts.voices()) + } catch (err) { + return next(err) + } +}) + /** * @swagger * /api/tts/{language}: @@ -143,7 +199,7 @@ router.post('/api/stt/:language', async (req, res, next) => { * - audio/wav * parameters: * - name: language - * description: ISO-639-1 language code + * description: ISO-639-1 language code (2 letters) * in: path * required: true * schema: @@ -154,13 +210,19 @@ router.post('/api/stt/:language', async (req, res, next) => { * required: true * schema: * type: string + * - name: voice + * description: Voice name + * in: query + * required: false + * schema: + * type: string * - name: tts * description: Text-to-speech backend * in: query * required: false * schema: * type: string - * enum: [marytts, picotts] + * enum: [google, marytts, picotts] * responses: * 200: * description: Audio file @@ -175,8 +237,8 @@ router.get('/api/tts/:language', async (req, res, next) => { let cacheFileName = null let cacheFileBuffer = null if (cachePathTts) { - cacheFileName = path.join(cachePathTts, cacheKey(req.query.text, req.params.language, '.txt')) - cacheFileBuffer = path.join(cachePathTts, cacheKey(req.query.text, req.params.language, '.bin')) + cacheFileName = path.join(cachePathTts, cacheKeyTts(req.query.text, req.params.language, req.query.voice, '.txt')) + cacheFileBuffer = path.join(cachePathTts, cacheKeyTts(req.query.text, req.params.language, req.query.voice, '.bin')) if (fs.existsSync(cacheFileName) && fs.existsSync(cacheFileBuffer)) { try { const name = fs.readFileSync(cacheFileName).toString() @@ -193,10 +255,11 @@ router.get('/api/tts/:language', async (req, res, next) => { } } try { - const tts = new (require(`./tts/${(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS}`))() + const tts = ttsEngines[(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS] const { buffer, name } = await tts.tts({ language: req.params.language, + voice: req.query.voice, text: req.query.text }) res.writeHead(200, { @@ -206,9 +269,13 @@ router.get('/api/tts/:language', async (req, res, next) => { res.end(buffer) if (cachePathTts) { - fs.writeFileSync(cacheFileName, name) - fs.writeFileSync(cacheFileBuffer, buffer) - debug(`Writing tts result ${cacheFileName} to cache: ${name}`) + try { + fs.writeFileSync(cacheFileName, name) + fs.writeFileSync(cacheFileBuffer, buffer) + debug(`Writing tts result ${cacheFileName} to cache: ${name}`) + } catch (err) { + debug(`Writing tts result ${cacheFileName} to cache: ${name} - failed: ${err.message}`) + } } } catch (err) { return next(err) diff --git a/frontend/src/stt/google.js b/frontend/src/stt/google.js index 6769c49..a82e571 100644 --- a/frontend/src/stt/google.js +++ b/frontend/src/stt/google.js @@ -1,37 +1,30 @@ -const fs = require('fs') -const uuidv1 = require('uuid/v1') +const { v1: uuidv1 } = require('uuid') const speech = process.env.BOTIUM_SPEECH_GOOGLE_API_VERSION ? require('@google-cloud/speech')[process.env.BOTIUM_SPEECH_GOOGLE_API_VERSION] : require('@google-cloud/speech') const storage = require('@google-cloud/storage') -const debug = require('debug')('botium-speech-processing-google') +const debug = require('debug')('botium-speech-processing-google-stt') -const credentialsPath = process.env.BOTIUM_SPEECH_GOOGLE_KEYFILE || './resources/google.json' +const { googleOptions } = require('../utils') -class Google { +class GoogleSTT { async stt ({ language, buffer }) { - if (!fs.existsSync(credentialsPath)) throw new Error(`Google Cloud credentials file "${credentialsPath}" not found`) - - const envVarConfig = `BOTIUM_SPEECH_GOOGLE_CONFIG_${language.toUpperCase()}` - if (!process.env[envVarConfig]) throw new Error(`Environment variable ${envVarConfig} empty`) - - let config = null - try { - config = JSON.parse(process.env[envVarConfig]) - } catch (err) { - throw new Error(`Google Cloud credentials config in ${envVarConfig} invalid: ${err.message}`) - } - - const speechClient = new speech.SpeechClient({ - keyFilename: credentialsPath - }) - const storageClient = new storage.Storage({ - keyFilename: credentialsPath - }) + const speechClient = new speech.SpeechClient(googleOptions()) + const storageClient = new storage.Storage(googleOptions()) const request = { - config, + config: { + languageCode: language + }, audio: { } } + if (process.env.BOTIUM_SPEECH_GOOGLE_CONFIG) { + try { + const defaultConfig = JSON.parse(process.env.BOTIUM_SPEECH_GOOGLE_CONFIG) + Object.assign(request.config, defaultConfig) + } catch (err) { + throw new Error(`Google Speech config in BOTIUM_SPEECH_GOOGLE_CONFIG invalid: ${err.message}`) + } + } const gcsFileName = `${uuidv1()}.wav` if (process.env.BOTIUM_SPEECH_GOOGLE_BUCKET_NAME) { @@ -93,4 +86,4 @@ class Google { } } -module.exports = Google +module.exports = GoogleSTT diff --git a/frontend/src/stt/kaldi.js b/frontend/src/stt/kaldi.js index 5fd31a5..69a0484 100644 --- a/frontend/src/stt/kaldi.js +++ b/frontend/src/stt/kaldi.js @@ -3,7 +3,7 @@ const Mustache = require('mustache') const request = require('request-promise-native') const debug = require('debug')('botium-speech-processing-kaldi') -class Kaldi { +class KaldiSTT { async stt ({ language, buffer }) { const envVarUrl = `BOTIUM_SPEECH_KALDI_URL_${language.toUpperCase()}` if (!process.env[envVarUrl]) throw new Error(`Environment variable ${envVarUrl} empty`) @@ -47,4 +47,4 @@ class Kaldi { } } -module.exports = Kaldi +module.exports = KaldiSTT diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json index aa4295c..f90ec54 100644 --- a/frontend/src/swagger.json +++ b/frontend/src/swagger.json @@ -50,7 +50,7 @@ "parameters": [ { "name": "language", - "description": "ISO-639-1 language code", + "description": "ISO-639-1 language code (2 letters)", "in": "path", "required": true, "schema": { @@ -105,6 +105,61 @@ } } }, + "/api/ttsvoices": { + "get": { + "description": "Get list of voices", + "security": [ + { + "ApiKeyAuth": [] + } + ], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "tts", + "description": "Text-to-speech backend", + "in": "query", + "required": false, + "schema": { + "type": "string", + "enum": [ + "google", + "marytts", + "picotts" + ] + } + } + ], + "responses": { + "200": { + "description": "List of supported voices", + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "language": { + "type": "string" + }, + "gender": { + "type": [ + "male", + "female", + "neutral" + ] + } + } + } + } + } + } + } + }, "/api/tts/{language}": { "get": { "description": "Convert text file to audio", @@ -119,7 +174,7 @@ "parameters": [ { "name": "language", - "description": "ISO-639-1 language code", + "description": "ISO-639-1 language code (2 letters)", "in": "path", "required": true, "schema": { @@ -135,6 +190,15 @@ "type": "string" } }, + { + "name": "voice", + "description": "Voice name", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, { "name": "tts", "description": "Text-to-speech backend", @@ -143,6 +207,7 @@ "schema": { "type": "string", "enum": [ + "google", "marytts", "picotts" ] diff --git a/frontend/src/tts/google.js b/frontend/src/tts/google.js new file mode 100644 index 0000000..69a8666 --- /dev/null +++ b/frontend/src/tts/google.js @@ -0,0 +1,66 @@ +const textToSpeech = require('@google-cloud/text-to-speech') +const debug = require('debug')('botium-speech-processing-google-tts') + +const { googleOptions } = require('../utils') + +let googleVoices = null + +const genderMap = { + MALE: 'male', + FEMALE: 'female', + NEUTRAL: 'neutral' +} + +class GoogleTTS { + async voices () { + if (googleVoices) return googleVoices + + const client = new textToSpeech.TextToSpeechClient(googleOptions()) + + const [result] = await client.listVoices({}) + const voices = result.voices + + googleVoices = [] + voices.forEach(voice => { + voice.languageCodes.forEach(languageCode => { + googleVoices.push({ + name: voice.name, + gender: genderMap[voice.ssmlGender], + language: languageCode.split('-')[0] + }) + }) + }) + return googleVoices + } + + async tts ({ language, voice, text }) { + const voiceSelector = { + languageCode: language + } + if (voice) { + voiceSelector.name = voice + } + + const client = new textToSpeech.TextToSpeechClient(googleOptions()) + const request = { + input: { + text + }, + voice: voiceSelector, + audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 16000 } + } + + try { + const [response] = await client.synthesizeSpeech(request) + return { + buffer: response.audioContent, + name: 'tts.wav' + } + } catch (err) { + debug(err) + throw new Error(`Google Cloud STT failed: ${err.message}`) + } + } +} + +module.exports = GoogleTTS diff --git a/frontend/src/tts/marytts.js b/frontend/src/tts/marytts.js index 9e3b006..1d09cd5 100644 --- a/frontend/src/tts/marytts.js +++ b/frontend/src/tts/marytts.js @@ -1,27 +1,57 @@ -const Mustache = require('mustache') +const _ = require('lodash') const request = require('request-promise-native') const debug = require('debug')('botium-speech-processing-marytts') +let maryVoices = null + class MaryTTS { - build () { + async voices () { + if (maryVoices) return maryVoices + const requestOptions = { + method: 'GET', + uri: `${process.env.BOTIUM_SPEECH_MARYTTS_URL}/voices` + } + let response + try { + response = await request(requestOptions) + } catch (err) { + throw new Error(`Calling url ${requestOptions.uri} failed: ${err.message}`) + } + if (_.isString(response)) { + maryVoices = [] + const lines = response.split('\n').map(l => l.trim()).filter(l => l) + for (const line of lines) { + const parts = line.split(' ') + maryVoices.push({ + name: parts[0], + language: parts[1], + gender: parts[2] + }) + } + } + return maryVoices } - async tts ({ language, text }) { - const envVarUrl = `BOTIUM_SPEECH_MARYTTS_URL_${language.toUpperCase()}` - if (!process.env[envVarUrl]) throw new Error(`Environment variable ${envVarUrl} empty`) + async tts ({ language, voice, text }) { + const voicesList = await this.voices() + + const maryVoice = voicesList.find(v => { + if (language && v.language !== language) return false + if (voice && v.name !== voice) return false + return true + }) + if (!maryVoice) throw new Error(`Voice <${voice || 'default'}> for language <${language}> not available`) + + const maryUrl = `${process.env.BOTIUM_SPEECH_MARYTTS_URL}/process?INPUT_TEXT=${encodeURIComponent(text)}&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&VOICE=${encodeURIComponent(maryVoice.name)}&LOCALE=${encodeURIComponent(maryVoice.language)}` const requestOptions = { method: 'GET', - uri: Mustache.render(process.env[envVarUrl], { - language: encodeURIComponent(language), - text: encodeURIComponent(text) - }), + uri: maryUrl, encoding: null, resolveWithFullResponse: true, simple: false } - let response try { response = await request(requestOptions) diff --git a/frontend/src/tts/picotts.js b/frontend/src/tts/picotts.js index 6fbaf29..83804a3 100644 --- a/frontend/src/tts/picotts.js +++ b/frontend/src/tts/picotts.js @@ -1,21 +1,63 @@ const fs = require('fs') -const Mustache = require('mustache') const { spawn } = require('child_process') -const uuidv1 = require('uuid/v1') +const { v1: uuidv1 } = require('uuid') const debug = require('debug')('botium-speech-processing-picotts') +const voicesList = [ + { + name: 'en-EN', + language: 'en', + gender: 'neutral' + }, + { + name: 'en-GB', + language: 'en', + gender: 'neutral' + }, + { + name: 'es-ES', + language: 'es', + gender: 'neutral' + }, + { + name: 'de-DE', + language: 'de', + gender: 'neutral' + }, + { + name: 'en-GB', + language: 'en', + gender: 'neutral' + }, + { + name: 'fr-FR', + language: 'fr', + gender: 'neutral' + }, + { + name: 'it-IT', + language: 'it', + gender: 'neutral' + } +] + class PicoTTS { - build () { + async voices () { + return voicesList } - async tts ({ language, text }) { - const envVarCmd = `BOTIUM_SPEECH_PICO_CMDPREFIX_${language.toUpperCase()}` - if (!process.env[envVarCmd]) throw new Error(`Environment variable ${envVarCmd} empty`) + async tts ({ language, voice, text }) { + const picoVoice = voicesList.find(v => { + if (language && v.language !== language) return false + if (voice && v.name !== voice) return false + return true + }) + if (!picoVoice) throw new Error(`Voice <${voice || 'default'}> for language <${language}> not available`) return new Promise((resolve, reject) => { const output = `/tmp/${uuidv1()}.wav` - const cmdLinePico = Mustache.render(process.env[envVarCmd], { output }) + const cmdLinePico = `${process.env.BOTIUM_SPEECH_PICO_CMDPREFIX || 'pico2wave'} --lang=${picoVoice.name} --wave=${output}` debug(`cmdLinePico: ${cmdLinePico}`) const cmdLinePicoParts = cmdLinePico.split(' ') const pico = spawn(cmdLinePicoParts[0], cmdLinePicoParts.slice(1).concat([text])) diff --git a/frontend/src/utils.js b/frontend/src/utils.js index a29ec4e..949a31c 100644 --- a/frontend/src/utils.js +++ b/frontend/src/utils.js @@ -1,3 +1,4 @@ +const fs = require('fs') const speechScorer = require('word-error-rate') const wer = async (text1, text2) => { @@ -7,6 +8,25 @@ const wer = async (text1, text2) => { } } +const cleanEnv = (envName) => { + return process.env[envName] && process.env[envName].replace(/\\n/g, '\n') +} + +const googleOptions = () => { + const keyFilename = process.env.BOTIUM_SPEECH_GOOGLE_KEYFILE + if (keyFilename) { + if (!fs.existsSync(keyFilename)) throw new Error(`Google Cloud credentials file "${keyFilename}" not found`) + return { keyFilename } + } + const privateKey = cleanEnv('BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY') + const clientEmail = process.env.BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL + if (privateKey && clientEmail) { + return { credentials: { private_key: privateKey, client_email: clientEmail } } + } + throw new Error('Google Cloud credentials not found') +} + module.exports = { - wer + wer, + googleOptions } From 5d5592694f27d5460420fa6260685f1a985457e5 Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 13:32:22 +0100 Subject: [PATCH 2/9] Using reasonable filenames for TTS result. Added some default configurations. --- Makefile | 22 +++++++++++++++++++--- README.md | 6 ++++++ dictate/Dockerfile | 1 + docker-compose-dev.yml | 6 ++++++ docker-compose-google.yml | 26 ++++++++++++++++++++++++++ docker-compose-picotts.yml | 37 +++++++++++++++++++++++++++++++++++++ docker-compose.yml | 6 ++++++ frontend/Dockerfile | 9 ++++++--- frontend/package.json | 1 + frontend/resources/.env | 3 +++ frontend/src/convert/sox.js | 2 +- frontend/src/routes.js | 4 ++++ frontend/src/tts/google.js | 4 ++-- frontend/src/tts/marytts.js | 4 +++- frontend/src/tts/picotts.js | 6 ++++-- frontend/src/utils.js | 8 ++++++++ 16 files changed, 133 insertions(+), 12 deletions(-) create mode 100644 docker-compose-google.yml create mode 100644 docker-compose-picotts.yml diff --git a/Makefile b/Makefile index 4e426e6..4f751c5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,23 @@ TAG_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1) VERSION := $(shell git describe --abbrev=0 --tags ${TAG_COMMIT} 2>/dev/null || true) -docker_build: +docker_build_develop: + docker build -t botium/botium-speech-frontend:develop frontend + docker build -t botium/botium-speech-watcher:develop watcher + docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:develop) stt + docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de:develop stt + docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:develop tts + docker build -t botium/botium-speech-dictate:develop dictate + +docker_publish_develop: + docker push botium/botium-speech-frontend:develop + docker push botium/botium-speech-watcher:develop + docker push botium/botium-speech-kaldi-en:develop + docker push botium/botium-speech-kaldi-de:develop + docker push botium/botium-speech-marytts:develop + docker push botium/botium-speech-dictate:develop + +docker_build_release: docker build -t botium/botium-speech-frontend:$(VERSION) frontend docker build -t botium/botium-speech-watcher:$(VERSION) watcher docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:$(VERSION) stt @@ -9,7 +25,7 @@ docker_build: docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:$(VERSION) tts docker build -t botium/botium-speech-dictate:$(VERSION) dictate -docker_publish: +docker_publish_release: docker push botium/botium-speech-frontend:$(VERSION) docker push botium/botium-speech-watcher:$(VERSION) docker push botium/botium-speech-kaldi-en:$(VERSION) @@ -17,7 +33,7 @@ docker_publish: docker push botium/botium-speech-marytts:$(VERSION) docker push botium/botium-speech-dictate:$(VERSION) -docker_latest: +docker_latest_release: docker tag botium/botium-speech-frontend:$(VERSION) botium/botium-speech-frontend:latest docker push botium/botium-speech-frontend:latest diff --git a/README.md b/README.md index b259ea2..df53170 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,12 @@ This project is standing on the shoulders of giants. ## Changelog +### 2020-12-18 + +* Adding support for Google Text-To-Speech +* Adding support for listing and using available TTS voices +* Added sample docker-compose configurations for PicoTTS and Google + ### 2020-03-05 * Optional _start_/_end_ parameters for audio file conversion to trim an audio file by time codes formatted as mm:ss (_01:32_) diff --git a/dictate/Dockerfile b/dictate/Dockerfile index 3574082..ac9b726 100644 --- a/dictate/Dockerfile +++ b/dictate/Dockerfile @@ -11,4 +11,5 @@ RUN curl -L -o dicatejs.zip "https://github.com/Kaljurand/dictate.js/archive/mas EXPOSE 56100 +USER node CMD DICTATEDIR=/app/dictate.js-master npm start \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 671750e..8e9e5a8 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -10,12 +10,14 @@ services: frontend: build: context: frontend + image: botium/botium-speech-frontend:develop restart: always volumes: - "./frontend/resources:/app/resources" # watcher: # build: # context: watcher +# image: botium/botium-speech-watcher:develop # restart: always # volumes: # - "./watcher:/app/watch" @@ -24,6 +26,7 @@ services: # build: # context: stt # dockerfile: Dockerfile.kaldi.en +# image: botium/botium-speech-kaldi-en:develop # restart: always # volumes: # - "./logs/stt_en:/opt/logs" @@ -31,6 +34,7 @@ services: # build: # context: stt # dockerfile: Dockerfile.kaldi.de +# image: botium/botium-speech-kaldi-de:develop # restart: always # volumes: # - "./logs/stt_de:/opt/logs" @@ -38,8 +42,10 @@ services: build: context: tts dockerfile: Dockerfile.marytts + image: botium/botium-speech-marytts:develop restart: always # dictate: # build: # context: dictate +# image: botium/botium-speech-dictate:develop # restart: always diff --git a/docker-compose-google.yml b/docker-compose-google.yml new file mode 100644 index 0000000..e2c00c3 --- /dev/null +++ b/docker-compose-google.yml @@ -0,0 +1,26 @@ +version: '3' +services: + nginx: + image: nginx + restart: always + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf + ports: + - 80:80 + frontend: + image: botium/botium-speech-frontend:latest + restart: always + environment: + BOTIUM_API_TOKENS: + BOTIUM_SPEECH_PROVIDER_TTS: google + BOTIUM_SPEECH_PROVIDER_STT: google + BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL: + BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY: + volumes: + - "./frontend/resources:/app/resources" + watcher: + image: botium/botium-speech-watcher:latest + restart: always + volumes: + - "./watcher:/app/watch" + - "./logs/watcher:/app/logs" diff --git a/docker-compose-picotts.yml b/docker-compose-picotts.yml new file mode 100644 index 0000000..ee88a4e --- /dev/null +++ b/docker-compose-picotts.yml @@ -0,0 +1,37 @@ +version: '3' +services: + nginx: + image: nginx + restart: always + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf + ports: + - 80:80 + frontend: + image: botium/botium-speech-frontend:latest + restart: always + environment: + BOTIUM_API_TOKENS: + BOTIUM_SPEECH_PROVIDER_TTS: picotts + BOTIUM_SPEECH_PROVIDER_STT: kaldi + volumes: + - "./frontend/resources:/app/resources" + watcher: + image: botium/botium-speech-watcher:latest + restart: always + volumes: + - "./watcher:/app/watch" + - "./logs/watcher:/app/logs" + stt_en: + image: botium/botium-speech-kaldi-en:latest + restart: always + volumes: + - "./logs/stt_en:/opt/logs" + stt_de: + image: botium/botium-speech-kaldi-de:latest + restart: always + volumes: + - "./logs/stt_de:/opt/logs" + dictate: + image: botium/botium-speech-dictate:latest + restart: always diff --git a/docker-compose.yml b/docker-compose.yml index b187ea9..176147c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,6 +10,12 @@ services: frontend: image: botium/botium-speech-frontend:latest restart: always + environment: + BOTIUM_API_TOKENS: + BOTIUM_SPEECH_PROVIDER_TTS: marytts + BOTIUM_SPEECH_PROVIDER_STT: kaldi + BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL: + BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY: volumes: - "./frontend/resources:/app/resources" watcher: diff --git a/frontend/Dockerfile b/frontend/Dockerfile index be7d89d..9f3176c 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -4,12 +4,15 @@ RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.node RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils WORKDIR /app -COPY . /app -RUN find . -type f -print0 | xargs -0 dos2unix +COPY ./package.json /app/package.json RUN npm install --no-optional --production +COPY . /app +RUN find . -type f ! -path '*/node_modules/*' -print0 | xargs -0 dos2unix VOLUME /app/resources EXPOSE 56000 -CMD npm start \ No newline at end of file +RUN groupadd --gid 1000 node && useradd --uid 1000 --gid node --shell /bin/bash --create-home node +USER node +CMD npm run start-dist \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json index 4c870cd..4ec94be 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -6,6 +6,7 @@ "eslint": "eslint src", "eslint-fix": "eslint --fix src", "start": "cross-env DOTENV_FLOW_PATH=./resources nodemon -w ./resources/.env -w ./resources/.env.local -w ./src/ -x \"node -r dotenv-flow/config\" ./src/server.js", + "start-dist": "cross-env DOTENV_FLOW_PATH=./resources node -r dotenv-flow/config ./src/server.js", "jsdoc": "swagger-jsdoc -d ./src/swaggerDef.json -o ./src/swagger.json src/routes.js" }, "author": "Botium GmbH", diff --git a/frontend/resources/.env b/frontend/resources/.env index d5c6f4e..afc07e5 100644 --- a/frontend/resources/.env +++ b/frontend/resources/.env @@ -10,6 +10,9 @@ BOTIUM_SPEECH_UPLOAD_LIMIT=50mb # Cache Path BOTIUM_SPEECH_CACHE_DIR=./resources/.cache +# Temp Path +BOTIUM_SPEECH_TMP_DIR=./resources/.tmp + # Provider for TTS (google or marytts or picotts) BOTIUM_SPEECH_PROVIDER_TTS=marytts diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js index bf9a00d..59ef6c3 100644 --- a/frontend/src/convert/sox.js +++ b/frontend/src/convert/sox.js @@ -6,7 +6,7 @@ const debug = require('debug')('botium-speech-processing-convert-sox') const runsox = (cmdLine, { inputBuffer, start, end }) => { return new Promise((resolve, reject) => { - const output = `/tmp/${uuidv1()}.wav` + const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}.wav` let cmdLineSox = Mustache.render(cmdLine, { output }) if (start && end) { diff --git a/frontend/src/routes.js b/frontend/src/routes.js index ab60a31..93f67c4 100644 --- a/frontend/src/routes.js +++ b/frontend/src/routes.js @@ -16,6 +16,10 @@ const cacheKeyTts = (data, language, voice, ext) => `${crypto.createHash('md5'). if (cachePathStt) mkdirp.sync(cachePathStt) if (cachePathTts) mkdirp.sync(cachePathTts) +if (process.env.BOTIUM_SPEECH_TMP_DIR) { + mkdirp.sync(process.env.BOTIUM_SPEECH_TMP_DIR) +} + const ttsEngines = { google: new (require('./tts/google'))(), marytts: new (require('./tts/marytts'))(), diff --git a/frontend/src/tts/google.js b/frontend/src/tts/google.js index 69a8666..568d6a9 100644 --- a/frontend/src/tts/google.js +++ b/frontend/src/tts/google.js @@ -1,7 +1,7 @@ const textToSpeech = require('@google-cloud/text-to-speech') const debug = require('debug')('botium-speech-processing-google-tts') -const { googleOptions } = require('../utils') +const { googleOptions, ttsFilename } = require('../utils') let googleVoices = null @@ -54,7 +54,7 @@ class GoogleTTS { const [response] = await client.synthesizeSpeech(request) return { buffer: response.audioContent, - name: 'tts.wav' + name: `${ttsFilename(text)}.wav` } } catch (err) { debug(err) diff --git a/frontend/src/tts/marytts.js b/frontend/src/tts/marytts.js index 1d09cd5..ed95ed7 100644 --- a/frontend/src/tts/marytts.js +++ b/frontend/src/tts/marytts.js @@ -2,6 +2,8 @@ const _ = require('lodash') const request = require('request-promise-native') const debug = require('debug')('botium-speech-processing-marytts') +const { ttsFilename } = require('../utils') + let maryVoices = null class MaryTTS { @@ -62,7 +64,7 @@ class MaryTTS { debug(`Called url ${requestOptions.uri} success`) return { buffer: response.body, - name: 'tts.wav' + name: `${ttsFilename(text)}.wav` } } else { throw new Error(`Calling url ${requestOptions.uri} failed with code ${response.statusCode}: ${response.statusMessage}`) diff --git a/frontend/src/tts/picotts.js b/frontend/src/tts/picotts.js index 83804a3..7b6e431 100644 --- a/frontend/src/tts/picotts.js +++ b/frontend/src/tts/picotts.js @@ -3,6 +3,8 @@ const { spawn } = require('child_process') const { v1: uuidv1 } = require('uuid') const debug = require('debug')('botium-speech-processing-picotts') +const { ttsFilename } = require('../utils') + const voicesList = [ { name: 'en-EN', @@ -55,7 +57,7 @@ class PicoTTS { if (!picoVoice) throw new Error(`Voice <${voice || 'default'}> for language <${language}> not available`) return new Promise((resolve, reject) => { - const output = `/tmp/${uuidv1()}.wav` + const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}.wav` const cmdLinePico = `${process.env.BOTIUM_SPEECH_PICO_CMDPREFIX || 'pico2wave'} --lang=${picoVoice.name} --wave=${output}` debug(`cmdLinePico: ${cmdLinePico}`) @@ -70,7 +72,7 @@ class PicoTTS { fs.unlinkSync(output) resolve({ buffer: outputBuffer, - name: 'tts.wav' + name: `${ttsFilename(text)}.wav` }) } catch (err) { reject(new Error(`pico2wave process output file ${output} not readable: ${err.message}`)) diff --git a/frontend/src/utils.js b/frontend/src/utils.js index 949a31c..306544c 100644 --- a/frontend/src/utils.js +++ b/frontend/src/utils.js @@ -1,4 +1,6 @@ const fs = require('fs') +const _ = require('lodash') +const sanitize = require('sanitize-filename') const speechScorer = require('word-error-rate') const wer = async (text1, text2) => { @@ -8,6 +10,11 @@ const wer = async (text1, text2) => { } } +const ttsFilename = (text) => { + const shortenedText = _.truncate(text, { length: 500 }) + return sanitize(shortenedText) +} + const cleanEnv = (envName) => { return process.env[envName] && process.env[envName].replace(/\\n/g, '\n') } @@ -28,5 +35,6 @@ const googleOptions = () => { module.exports = { wer, + ttsFilename, googleOptions } From 570097c36a83d6296a3551c7aa61b51c815673e0 Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 15:44:24 +0100 Subject: [PATCH 3/9] BOT-1862 added support for webm --- docker-compose-dev.yml | 46 ++++++++++++------------- frontend/Dockerfile | 2 +- frontend/resources/.env | 8 +++-- frontend/src/convert/convert.js | 60 ++++++++++++++++++++++++++++++++ frontend/src/convert/sox.js | 61 --------------------------------- frontend/src/routes.js | 34 ++++++++++++++---- frontend/src/swagger.json | 26 +++++++++++++- 7 files changed, 142 insertions(+), 95 deletions(-) create mode 100644 frontend/src/convert/convert.js delete mode 100644 frontend/src/convert/sox.js diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 8e9e5a8..7c99e19 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -22,30 +22,30 @@ services: # volumes: # - "./watcher:/app/watch" # - "./logs/watcher:/app/logs" -# stt_en: -# build: -# context: stt -# dockerfile: Dockerfile.kaldi.en -# image: botium/botium-speech-kaldi-en:develop -# restart: always -# volumes: -# - "./logs/stt_en:/opt/logs" -# stt_de: -# build: -# context: stt -# dockerfile: Dockerfile.kaldi.de -# image: botium/botium-speech-kaldi-de:develop -# restart: always -# volumes: -# - "./logs/stt_de:/opt/logs" - tts: + stt_en: build: - context: tts - dockerfile: Dockerfile.marytts - image: botium/botium-speech-marytts:develop + context: stt + dockerfile: Dockerfile.kaldi.en + image: botium/botium-speech-kaldi-en:develop restart: always -# dictate: + volumes: + - "./logs/stt_en:/opt/logs" + stt_de: + build: + context: stt + dockerfile: Dockerfile.kaldi.de + image: botium/botium-speech-kaldi-de:develop + restart: always + volumes: + - "./logs/stt_de:/opt/logs" +# tts: # build: -# context: dictate -# image: botium/botium-speech-dictate:develop +# context: tts +# dockerfile: Dockerfile.marytts +# image: botium/botium-speech-marytts:develop # restart: always + dictate: + build: + context: dictate + image: botium/botium-speech-dictate:develop + restart: always diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 9f3176c..d84bf5f 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.nodesource.com/setup_14.x | bash - && apt-get -y install nodejs -RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils +RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg WORKDIR /app COPY ./package.json /app/package.json diff --git a/frontend/resources/.env b/frontend/resources/.env index afc07e5..c1c7dc8 100644 --- a/frontend/resources/.env +++ b/frontend/resources/.env @@ -39,7 +39,9 @@ BOTIUM_SPEECH_GOOGLE_CONFIG={} #BOTIUM_SPEECH_GOOGLE_API_VERSION= # WAV Conversation Command Line -BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_SOX=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}} +BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}} BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav -BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_SOX=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}} -BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav \ No newline at end of file +BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}} +BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav +BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}} +BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav diff --git a/frontend/src/convert/convert.js b/frontend/src/convert/convert.js new file mode 100644 index 0000000..f70df2a --- /dev/null +++ b/frontend/src/convert/convert.js @@ -0,0 +1,60 @@ +const fs = require('fs') +const Mustache = require('mustache') +const { spawn } = require('child_process') +const { v1: uuidv1 } = require('uuid') +const debug = require('debug')('botium-speech-processing-convert') + +const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => { + return new Promise((resolve, reject) => { + const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}` + + let cmdLineFull = Mustache.render(cmdLine, { output }) + if (start && end) { + cmdLineFull = `${cmdLineFull} trim ${start} ${end}` + } else if (start && !end) { + cmdLineFull = `${cmdLineFull} trim ${start}` + } else if (!start && end) { + cmdLineFull = `${cmdLineFull} trim 0 ${end}` + } + debug(`cmdLineFull: ${cmdLineFull}`) + const childProcess = spawn('/bin/sh', ['-c', cmdLineFull]) + + childProcess.once('exit', (code, signal) => { + debug(`conversion process exited with code ${code}, signal ${signal}`) + if (code === 0) { + try { + const outputBuffer = fs.readFileSync(output) + fs.unlinkSync(output) + resolve(outputBuffer) + } catch (err) { + reject(new Error(`conversion process output file ${output} not readable: ${err.message}`)) + } + } else { + reject(new Error(`conversion process exited with code ${code}, signal ${signal}`)) + } + }) + childProcess.once('error', (err) => { + debug(`conversion process failed: ${err.message}`) + reject(new Error(`conversion process failed: ${err.message}`)) + }) + childProcess.stdout.on('error', (err) => { + debug('stdout err ' + err) + }) + childProcess.stderr.on('error', (err) => { + debug('stderr err ' + err) + }) + childProcess.stdin.on('error', (err) => { + debug('stdin err ' + err) + }) + childProcess.stderr.on('data', (data) => { + debug('stderr ' + data) + }) + + childProcess.stdin.write(inputBuffer) + childProcess.stdin.end() + }) +} + +module.exports = { + runconvert +} diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js deleted file mode 100644 index 59ef6c3..0000000 --- a/frontend/src/convert/sox.js +++ /dev/null @@ -1,61 +0,0 @@ -const fs = require('fs') -const Mustache = require('mustache') -const { spawn } = require('child_process') -const { v1: uuidv1 } = require('uuid') -const debug = require('debug')('botium-speech-processing-convert-sox') - -const runsox = (cmdLine, { inputBuffer, start, end }) => { - return new Promise((resolve, reject) => { - const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}.wav` - - let cmdLineSox = Mustache.render(cmdLine, { output }) - if (start && end) { - cmdLineSox = `${cmdLineSox} trim ${start} ${end}` - } else if (start && !end) { - cmdLineSox = `${cmdLineSox} trim ${start}` - } else if (!start && end) { - cmdLineSox = `${cmdLineSox} trim 0 ${end}` - } - debug(`cmdLineSox: ${cmdLineSox}`) - const cmdLineSoxParts = cmdLineSox.split(' ') - const sox = spawn(cmdLineSoxParts[0], cmdLineSoxParts.slice(1)) - - sox.once('exit', (code, signal) => { - debug(`sox process exited with code ${code}, signal ${signal}`) - if (code === 0) { - try { - const outputBuffer = fs.readFileSync(output) - fs.unlinkSync(output) - resolve(outputBuffer) - } catch (err) { - reject(new Error(`sox process output file ${output} not readable: ${err.message}`)) - } - } else { - reject(new Error(`sox process exited with code ${code}, signal ${signal}`)) - } - }) - sox.once('error', (err) => { - debug(`sox process failed: ${err.message}`) - reject(new Error(`sox process failed: ${err.message}`)) - }) - sox.stdout.on('error', (err) => { - debug('stdout err ' + err) - }) - sox.stderr.on('error', (err) => { - debug('stderr err ' + err) - }) - sox.stdin.on('error', (err) => { - debug('stdin err ' + err) - }) - sox.stderr.on('data', (data) => { - debug('stderr ' + data) - }) - - sox.stdin.write(inputBuffer) - sox.stdin.end() - }) -} - -module.exports = { - runsox -} diff --git a/frontend/src/routes.js b/frontend/src/routes.js index 93f67c4..0385427 100644 --- a/frontend/src/routes.js +++ b/frontend/src/routes.js @@ -4,7 +4,7 @@ const mkdirp = require('mkdirp') const crypto = require('crypto') const express = require('express') const sanitize = require('sanitize-filename') -const { runsox } = require('./convert/sox') +const { runconvert } = require('./convert/convert') const { wer } = require('./utils') const debug = require('debug')('botium-speech-processing-routes') @@ -289,6 +289,27 @@ router.get('/api/tts/:language', async (req, res, next) => { } }) +/** + * @swagger + * /api/convertprofiles: + * get: + * description: Get list of audio conversion profile + * security: + * - ApiKeyAuth: [] + * produces: + * - application/json + * responses: + * 200: + * description: List of supported audio conversion profiles + * schema: + * type: array + * items: + * type: string + */ +router.get('/api/convertprofiles', async (req, res, next) => { + res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4])) +}) + /** * @swagger * /api/convert/{profile}: @@ -320,7 +341,7 @@ router.get('/api/tts/:language', async (req, res, next) => { * requestBody: * description: Audio file * content: - * audio/*: + * audio/wav: * schema: * type: string * format: binary @@ -334,12 +355,13 @@ router.get('/api/tts/:language', async (req, res, next) => { * format: binary */ router.post('/api/convert/:profile', async (req, res, next) => { + console.log(req.body) if (!Buffer.isBuffer(req.body)) { return next(new Error('req.body is not a buffer')) } - const envVarSox = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_SOX` - if (!process.env[envVarSox]) { - return next(new Error(`Environment variable ${envVarSox} empty`)) + const envVarCmd = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_CMD` + if (!process.env[envVarCmd]) { + return next(new Error(`Environment variable ${envVarCmd} empty`)) } const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT` if (!process.env[envVarOutput]) { @@ -347,7 +369,7 @@ router.post('/api/convert/:profile', async (req, res, next) => { } try { - const outputBuffer = await runsox(process.env[envVarSox], { inputBuffer: req.body, start: req.query.start, end: req.query.end }) + const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end }) res.writeHead(200, { 'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`, 'Content-Length': outputBuffer.length diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json index f90ec54..33865a4 100644 --- a/frontend/src/swagger.json +++ b/frontend/src/swagger.json @@ -229,6 +229,30 @@ } } }, + "/api/convertprofiles": { + "get": { + "description": "Get list of audio conversion profile", + "security": [ + { + "ApiKeyAuth": [] + } + ], + "produces": [ + "application/json" + ], + "responses": { + "200": { + "description": "List of supported audio conversion profiles", + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + }, "/api/convert/{profile}": { "post": { "description": "Convert audio file", @@ -272,7 +296,7 @@ "requestBody": { "description": "Audio file", "content": { - "audio/*": { + "audio/wav": { "schema": { "type": "string", "format": "binary" From fe594f8abc9e8d2ef2b8b4cfa7751f468b0c8a2f Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 16:06:57 +0100 Subject: [PATCH 4/9] docker-compose --- docker-compose-dev.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 7c99e19..82ad67e 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -14,14 +14,14 @@ services: restart: always volumes: - "./frontend/resources:/app/resources" -# watcher: -# build: -# context: watcher -# image: botium/botium-speech-watcher:develop -# restart: always -# volumes: -# - "./watcher:/app/watch" -# - "./logs/watcher:/app/logs" + watcher: + build: + context: watcher + image: botium/botium-speech-watcher:develop + restart: always + volumes: + - "./watcher:/app/watch" + - "./logs/watcher:/app/logs" stt_en: build: context: stt @@ -38,12 +38,12 @@ services: restart: always volumes: - "./logs/stt_de:/opt/logs" -# tts: -# build: -# context: tts -# dockerfile: Dockerfile.marytts -# image: botium/botium-speech-marytts:develop -# restart: always + tts: + build: + context: tts + dockerfile: Dockerfile.marytts + image: botium/botium-speech-marytts:develop + restart: always dictate: build: context: dictate From 281c7dd1cc203bbdae2c64e2d3412f67d973a480 Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 16:29:59 +0100 Subject: [PATCH 5/9] .env file for docker-compose --- .env | 1 + .env.develop | 2 ++ .gitignore | 3 ++- docker-compose.override.template.yml | 7 +++++++ docker-compose.yml | 12 ++++++------ 5 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 .env.develop create mode 100644 docker-compose.override.template.yml diff --git a/.env b/.env index c920216..b9cf953 100644 --- a/.env +++ b/.env @@ -1 +1,2 @@ COMPOSE_PROJECT_NAME=botiumspeechprocessing +TAG=latest diff --git a/.env.develop b/.env.develop new file mode 100644 index 0000000..29a9aac --- /dev/null +++ b/.env.develop @@ -0,0 +1,2 @@ +COMPOSE_PROJECT_NAME=botiumspeechprocessing +TAG=develop \ No newline at end of file diff --git a/.gitignore b/.gitignore index cd3d225..46f714c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -logs \ No newline at end of file +logs +docker-compose.override.yml \ No newline at end of file diff --git a/docker-compose.override.template.yml b/docker-compose.override.template.yml new file mode 100644 index 0000000..c787ca6 --- /dev/null +++ b/docker-compose.override.template.yml @@ -0,0 +1,7 @@ +version: '3' +services: + dictate: + image: botium/botium-speech-dictate:develop + environment: + STT_URL_DE: wss://speech.botiumbox.com/stt_de + STT_URL_EN: wss://speech.botiumbox.com/stt_en diff --git a/docker-compose.yml b/docker-compose.yml index 176147c..4ed7ec4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,7 +8,7 @@ services: ports: - 80:80 frontend: - image: botium/botium-speech-frontend:latest + image: botium/botium-speech-frontend:${TAG} restart: always environment: BOTIUM_API_TOKENS: @@ -19,24 +19,24 @@ services: volumes: - "./frontend/resources:/app/resources" watcher: - image: botium/botium-speech-watcher:latest + image: botium/botium-speech-watcher:${TAG} restart: always volumes: - "./watcher:/app/watch" - "./logs/watcher:/app/logs" stt_en: - image: botium/botium-speech-kaldi-en:latest + image: botium/botium-speech-kaldi-en:${TAG} restart: always volumes: - "./logs/stt_en:/opt/logs" stt_de: - image: botium/botium-speech-kaldi-de:latest + image: botium/botium-speech-kaldi-de:${TAG} restart: always volumes: - "./logs/stt_de:/opt/logs" tts: - image: botium/botium-speech-marytts:latest + image: botium/botium-speech-marytts:${TAG} restart: always dictate: - image: botium/botium-speech-dictate:latest + image: botium/botium-speech-dictate:${TAG} restart: always From f8b0048005d09a5c9f12c9825d27de09f7ba1c7d Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 16:37:58 +0100 Subject: [PATCH 6/9] tabs vs spaces --- Makefile | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 4f751c5..90c065c 100644 --- a/Makefile +++ b/Makefile @@ -10,12 +10,12 @@ docker_build_develop: docker build -t botium/botium-speech-dictate:develop dictate docker_publish_develop: - docker push botium/botium-speech-frontend:develop - docker push botium/botium-speech-watcher:develop - docker push botium/botium-speech-kaldi-en:develop - docker push botium/botium-speech-kaldi-de:develop - docker push botium/botium-speech-marytts:develop - docker push botium/botium-speech-dictate:develop + docker push botium/botium-speech-frontend:develop + docker push botium/botium-speech-watcher:develop + docker push botium/botium-speech-kaldi-en:develop + docker push botium/botium-speech-kaldi-de:develop + docker push botium/botium-speech-marytts:develop + docker push botium/botium-speech-dictate:develop docker_build_release: docker build -t botium/botium-speech-frontend:$(VERSION) frontend @@ -26,28 +26,28 @@ docker_build_release: docker build -t botium/botium-speech-dictate:$(VERSION) dictate docker_publish_release: - docker push botium/botium-speech-frontend:$(VERSION) - docker push botium/botium-speech-watcher:$(VERSION) - docker push botium/botium-speech-kaldi-en:$(VERSION) - docker push botium/botium-speech-kaldi-de:$(VERSION) - docker push botium/botium-speech-marytts:$(VERSION) - docker push botium/botium-speech-dictate:$(VERSION) + docker push botium/botium-speech-frontend:$(VERSION) + docker push botium/botium-speech-watcher:$(VERSION) + docker push botium/botium-speech-kaldi-en:$(VERSION) + docker push botium/botium-speech-kaldi-de:$(VERSION) + docker push botium/botium-speech-marytts:$(VERSION) + docker push botium/botium-speech-dictate:$(VERSION) docker_latest_release: docker tag botium/botium-speech-frontend:$(VERSION) botium/botium-speech-frontend:latest - docker push botium/botium-speech-frontend:latest + docker push botium/botium-speech-frontend:latest docker tag botium/botium-speech-watcher:$(VERSION) botium/botium-speech-watcher:latest - docker push botium/botium-speech-watcher:latest + docker push botium/botium-speech-watcher:latest docker tag botium/botium-speech-kaldi-en:$(VERSION) botium/botium-speech-kaldi-en:latest - docker push botium/botium-speech-kaldi-en:latest + docker push botium/botium-speech-kaldi-en:latest docker tag botium/botium-speech-kaldi-de:$(VERSION) botium/botium-speech-kaldi-de:latest - docker push botium/botium-speech-kaldi-de:latest + docker push botium/botium-speech-kaldi-de:latest docker tag botium/botium-speech-marytts:$(VERSION) botium/botium-speech-marytts:latest - docker push botium/botium-speech-marytts:latest + docker push botium/botium-speech-marytts:latest docker tag botium/botium-speech-dictate:$(VERSION) botium/botium-speech-dictate:latest - docker push botium/botium-speech-dictate:latest + docker push botium/botium-speech-dictate:latest From 94ac2955b76aa51a3d2f4ccae84f6656ee14a2d9 Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 16:44:18 +0100 Subject: [PATCH 7/9] adding apt-get update --- frontend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/Dockerfile b/frontend/Dockerfile index d84bf5f..325d9a5 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.nodesource.com/setup_14.x | bash - && apt-get -y install nodejs -RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg +RUN apt-get update && apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg WORKDIR /app COPY ./package.json /app/package.json From 6fa4343143608ee284e1ac6bcb80734ef001d7ec Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Fri, 18 Dec 2020 16:45:38 +0100 Subject: [PATCH 8/9] fix Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 90c065c..13d8ec2 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ VERSION := $(shell git describe --abbrev=0 --tags ${TAG_COMMIT} 2>/dev/null || t docker_build_develop: docker build -t botium/botium-speech-frontend:develop frontend docker build -t botium/botium-speech-watcher:develop watcher - docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:develop) stt + docker build -f stt/Dockerfile.kaldi.en -t botium/botium-speech-kaldi-en:develop stt docker build -f stt/Dockerfile.kaldi.de -t botium/botium-speech-kaldi-de:develop stt docker build -f tts/Dockerfile.marytts -t botium/botium-speech-marytts:develop tts docker build -t botium/botium-speech-dictate:develop dictate From ea0659d6edd8e3ba7b2bab690f2691f0c58054a3 Mon Sep 17 00:00:00 2001 From: Florian Treml Date: Mon, 21 Dec 2020 15:49:32 +0100 Subject: [PATCH 9/9] Load .env file from internal file --- README.md | 5 +++-- frontend/Dockerfile | 1 + frontend/package.json | 4 ++-- frontend/resources/.env | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index df53170..506b093 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,11 @@ Clone or download this repository and start with docker-compose: > docker-compose up -d -This will download the prebuilt images from Dockerhub. +This will download the latest released prebuilt images from Dockerhub. To download the latest developer images from Dockerhub: -Point your browser to http://127.0.0.1 to open the [Swagger UI](https://swagger.io/tools/swagger-ui/) and browse/use the API definition. + > docker-compose --env-file .env.develop up +Point your browser to http://127.0.0.1 to open the [Swagger UI](https://swagger.io/tools/swagger-ui/) and browse/use the API definition. ### Optional: Build Docker Images diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 325d9a5..776acb3 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -7,6 +7,7 @@ WORKDIR /app COPY ./package.json /app/package.json RUN npm install --no-optional --production COPY . /app +COPY ./resources/.env /app/.env RUN find . -type f ! -path '*/node_modules/*' -print0 | xargs -0 dos2unix VOLUME /app/resources diff --git a/frontend/package.json b/frontend/package.json index 4ec94be..b4831cc 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -5,8 +5,8 @@ "scripts": { "eslint": "eslint src", "eslint-fix": "eslint --fix src", - "start": "cross-env DOTENV_FLOW_PATH=./resources nodemon -w ./resources/.env -w ./resources/.env.local -w ./src/ -x \"node -r dotenv-flow/config\" ./src/server.js", - "start-dist": "cross-env DOTENV_FLOW_PATH=./resources node -r dotenv-flow/config ./src/server.js", + "start-dev": "cross-env DOTENV_FLOW_PATH=./resources nodemon -w ./resources/.env -w ./resources/.env.local -w ./src/ -x \"node -r dotenv-flow/config\" ./src/server.js", + "start-dist": "node -r dotenv-flow/config ./src/server.js", "jsdoc": "swagger-jsdoc -d ./src/swaggerDef.json -o ./src/swagger.json src/routes.js" }, "author": "Botium GmbH", diff --git a/frontend/resources/.env b/frontend/resources/.env index c1c7dc8..4b38ffb 100644 --- a/frontend/resources/.env +++ b/frontend/resources/.env @@ -33,7 +33,7 @@ BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize #BOTIUM_SPEECH_GOOGLE_KEYFILE=./resources/google.json #BOTIUM_SPEECH_GOOGLE_CLIENT_EMAIL= #BOTIUM_SPEECH_GOOGLE_PRIVATE_KEY= -BOTIUM_SPEECH_GOOGLE_CONFIG={} +#BOTIUM_SPEECH_GOOGLE_CONFIG={} # For files longer than 1 minute, you have to create a Google Cloud Storage Bucket as temporary storage (give read/write access to service user) #BOTIUM_SPEECH_GOOGLE_BUCKET_NAME= #BOTIUM_SPEECH_GOOGLE_API_VERSION=