From 6c79d93e8f4e57422238a4076eb90fe49c9f14bf Mon Sep 17 00:00:00 2001
From: Angelo Paparazzi <angelo.paparazzi@ibm.com>
Date: Wed, 15 May 2024 13:35:46 -0500
Subject: [PATCH] feat(stt): remove interimResults and lowLatency wss params

---
 lib/recognize-stream.ts             |  8 ++-----
 package-lock.json                   | 34 ++++++++++++++---------------
 speech-to-text/v1-generated.ts      |  4 ++--
 speech-to-text/v1.ts                |  2 --
 test/unit/speech-to-text.v1.test.js |  2 +-
 5 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/lib/recognize-stream.ts b/lib/recognize-stream.ts
index 11eee76d96..e8a71e5517 100644
--- a/lib/recognize-stream.ts
+++ b/lib/recognize-stream.ts
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright IBM Corp. 2014, 2020.
+ * (C) Copyright IBM Corp. 2014, 2024.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,7 +62,7 @@ class RecognizeStream extends Duplex {
    *
    * Uses WebSockets under the hood. For audio with no recognizable speech, no `data` events are emitted.
    *
-   * By default, only finalized text is emitted in the data events, however when `objectMode`/`readableObjectMode` and `interim_results` are enabled, both interim and final results objects are emitted.
+   * By default, only finalized text is emitted in the data events, however when `objectMode`/`readableObjectMode` is enabled, both interim and final results objects are emitted.
    * WriteableElementStream uses this, for example, to live-update the DOM with word-by-word transcriptions.
    *
    * Note that the WebSocket connection is not established until the first chunk of data is recieved. This allows for auto-detection of content type (for wav/flac/opus audio).
@@ -86,7 +86,6 @@ class RecognizeStream extends Duplex {
    * @param {string} [options.contentType] - The format (MIME type) of the audio
    * @param {number} [options.customizationWeight] - Tell the service how much weight to give to words from the custom language model compared to those from the base model for the current request
    * @param {number} [options.inactivityTimeout] - The time in seconds after which, if only silence (no speech) is detected in the audio, the connection is closed (default=30)
-   * @param {boolean} [options.interimResults] - If true, the service returns interim results as a stream of JSON SpeechRecognitionResults objects (default=false)
    * @param {string[]} [options.keywords] - An array of keyword strings to spot in the audio
    * @param {number} [options.keywordsThreshold] - A confidence value that is the lower bound for spotting a keyword
    * @param {number} [options.maxAlternatives] - The maximum number of alternative transcripts that the service is to return (default=1)
@@ -105,7 +104,6 @@ class RecognizeStream extends Duplex {
    * @param {boolean} [options.splitTranscriptAtPhraseEnd] - If `true`, directs the service to split the transcript into multiple final results based on semantic features of the input
    * @param {number} [options.speechDetectorSensitivity] - The sensitivity of speech activity detection that the service is to perform
    * @param {number} [options.backgroundAudioSuppression] - The level to which the service is to suppress background audio based on its volume to prevent it from being transcribed as speech
-   * @param {boolean} [params.lowLatency] - If `true` for next-generation `Multimedia` and `Telephony` models that support low latency, directs the service to produce results even more quickly than it usually does
    * @constructor
    */
   constructor(options: RecognizeStream.Options) {
@@ -168,7 +166,6 @@ class RecognizeStream extends Duplex {
       'timestamps',
       'word_confidence',
       'content-type',
-      'interim_results',
       'keywords',
       'keywords_threshold',
       'max_alternatives',
@@ -182,7 +179,6 @@ class RecognizeStream extends Duplex {
       'split_transcript_at_phrase_end',
       'speech_detector_sensitivity',
       'background_audio_suppression',
-      'low_latency',
     ];
     const openingMessage = processUserParameters(options, openingMessageParamsAllowed);
     openingMessage.action = 'start';
diff --git a/package-lock.json b/package-lock.json
index ddb735f930..fa027eb529 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -7952,6 +7952,18 @@
         "node": ">=8"
       }
     },
+    "node_modules/jsdoc/node_modules/marked": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz",
+      "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==",
+      "dev": true,
+      "bin": {
+        "marked": "bin/marked.js"
+      },
+      "engines": {
+        "node": ">= 12"
+      }
+    },
     "node_modules/jsdoc/node_modules/mkdirp": {
       "version": "1.0.4",
       "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
@@ -8615,15 +8627,15 @@
       "dev": true
     },
     "node_modules/marked": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz",
-      "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==",
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/marked/-/marked-2.1.3.tgz",
+      "integrity": "sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA==",
       "dev": true,
       "bin": {
-        "marked": "bin/marked.js"
+        "marked": "bin/marked"
       },
       "engines": {
-        "node": ">= 12"
+        "node": ">= 10"
       }
     },
     "node_modules/marked-terminal": {
@@ -13069,18 +13081,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/semantic-release/node_modules/marked": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/marked/-/marked-2.1.3.tgz",
-      "integrity": "sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA==",
-      "dev": true,
-      "bin": {
-        "marked": "bin/marked"
-      },
-      "engines": {
-        "node": ">= 10"
-      }
-    },
     "node_modules/semantic-release/node_modules/yargs": {
       "version": "16.2.0",
       "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz",
diff --git a/speech-to-text/v1-generated.ts b/speech-to-text/v1-generated.ts
index 5752f3a281..b8d6cf9474 100644
--- a/speech-to-text/v1-generated.ts
+++ b/speech-to-text/v1-generated.ts
@@ -6602,13 +6602,13 @@ namespace SpeechToTextV1 {
      *  elements: the word followed by its start and end time in seconds, for example:
      *  `[["hello",0.0,1.2],["world",1.2,2.5]]`. Timestamps are returned only for the best alternative.
      */
-    timestamps?: string[];
+    timestamps?: [string, number, number][];
     /** A confidence score for each word of the transcript as a list of lists. Each inner list consists of two
      *  elements: the word and its confidence score in the range of 0.0 to 1.0, for example:
      *  `[["hello",0.95],["world",0.86]]`. Confidence scores are returned only for the best alternative and only with
      *  results marked as final.
      */
-    word_confidence?: string[];
+    word_confidence?: [string, number][];
   }
 
   /** Component results for a speech recognition request. */
diff --git a/speech-to-text/v1.ts b/speech-to-text/v1.ts
index 380726a585..6fbc167a38 100644
--- a/speech-to-text/v1.ts
+++ b/speech-to-text/v1.ts
@@ -266,7 +266,6 @@ namespace SpeechToTextV1 {
     contentType?: string;
     customizationWeight?: number;
     inactivityTimeout?: number;
-    interimResults?: boolean;
     keywords?: string[];
     keywordsThreshold?: number;
     maxAlternatives?: number;
@@ -286,7 +285,6 @@ namespace SpeechToTextV1 {
     splitTranscriptAtPhraseEnd?: boolean;
     speechDetectorSensitivity?: number;
     backgroundAudioSuppression?: number;
-    lowLatency?: boolean;
     characterInsertionBias?: number;
   }
 }
diff --git a/test/unit/speech-to-text.v1.test.js b/test/unit/speech-to-text.v1.test.js
index 8870bb5fc2..bacfa2cbd3 100644
--- a/test/unit/speech-to-text.v1.test.js
+++ b/test/unit/speech-to-text.v1.test.js
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright IBM Corp. 2024.
+ * (C) Copyright IBM Corp. 2018, 2024.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.