Merge branch 'ggerganov:master' into master

z-zawhtet-a · Jan 7, 2024 · 72fd6a5 · 72fd6a5
2 parents 4e1c856 + 022756a
commit 72fd6a5
Show file tree

Hide file tree

Showing 30 changed files with 1,554 additions and 611 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)
 
-project(whisper.cpp VERSION 1.5.2)
+project(whisper.cpp VERSION 1.5.4)
 
 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")

diff --git a/Package.swift b/Package.swift
@@ -13,9 +13,13 @@ let package = Package(
     products: [
         .library(name: "whisper", targets: ["whisper"]),
     ],
+    dependencies: [
+        .package(url: "https://github.com/ggerganov/ggml.git", .branch("master"))
+    ],
     targets: [
         .target(
             name: "whisper",
+            dependencies: ["ggml"],
             path: ".",
             exclude: [
                "bindings",
@@ -32,14 +36,8 @@ let package = Package(
                "Makefile"
             ],
             sources: [
-                "ggml.c",
                 "whisper.cpp",
-                "ggml-alloc.c",
-                "ggml-backend.c",
-                "ggml-quants.c",
-                "ggml-metal.m"
             ],
-            resources: [.process("ggml-metal.metal")],
             publicHeadersPath: "spm-headers",
             cSettings: [
                 .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),

diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
 
-Stable: [v1.5.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
 
 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
 

diff --git a/bindings/ios b/bindings/ios
diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json
@@ -1,6 +1,6 @@
 {
   "name": "whisper.cpp",
-  "version": "1.5.2",
+  "version": "1.5.4",
   "description": "Whisper speech recognition",
   "main": "whisper.js",
   "scripts": {

diff --git a/bindings/ruby/ext/ggml-backend-impl.h b/bindings/ruby/ext/ggml-backend-impl.h
@@ -70,7 +70,7 @@ extern "C" {
         void                      (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
 
         // compute graph without a plan
-        void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+        bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
 
         // check if the backend supports an operation
         bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);

diff --git a/bindings/ruby/ext/ggml-backend.c b/bindings/ruby/ext/ggml-backend.c
@@ -156,8 +156,8 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
     backend->iface.graph_plan_compute(backend, plan);
 }
 
-void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
-    backend->iface.graph_compute(backend, cgraph);
+bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+    return backend->iface.graph_compute(backend, cgraph);
 }
 
 bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {

diff --git a/bindings/ruby/ext/ggml-backend.h b/bindings/ruby/ext/ggml-backend.h
@@ -52,7 +52,7 @@ extern "C" {
 
     GGML_API void ggml_backend_graph_plan_free   (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
     GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
-    GGML_API void ggml_backend_graph_compute     (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+    GGML_API bool ggml_backend_graph_compute     (ggml_backend_t backend, struct ggml_cgraph * cgraph);
     GGML_API bool ggml_backend_supports_op       (ggml_backend_t backend, const struct ggml_tensor * op);
 
     // tensor copy between different backends

diff --git a/coreml/whisper-encoder.mm b/coreml/whisper-encoder.mm
@@ -24,9 +24,9 @@
 
     // select which device to run the Core ML model on
     MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
-    config.computeUnits = MLComputeUnitsCPUAndGPU;
+    // config.computeUnits = MLComputeUnitsCPUAndGPU;
     //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
-    //config.computeUnits = MLComputeUnitsAll;
+    config.computeUnits = MLComputeUnitsAll;
 
     const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
 

diff --git a/examples/server/README.md b/examples/server/README.md
@@ -46,7 +46,7 @@ options:
   --convert,                     [false  ] Convert audio to WAV, requires ffmpeg on the server
 ```
 
-> [!WARNING]  
+> [!WARNING]
 > **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
 
 ## request examples
@@ -56,8 +56,9 @@ options:
 curl 127.0.0.1:8080/inference \
 -H "Content-Type: multipart/form-data" \
 -F file="@<file-path>" \
--F temperature="0.2" \
--F response-format="json"
+-F temperature="0.0" \
+-F temperature_inc="0.2" \
+-F response_format="json"
 ```
 
 **/load**

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -44,26 +44,27 @@ struct server_params
     int32_t port          = 8080;
     int32_t read_timeout  = 600;
     int32_t write_timeout = 600;
-    
+
     bool ffmpeg_converter = false;
 };
 
 struct whisper_params {
-    int32_t n_threads    = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t n_processors =  1;
-    int32_t offset_t_ms  =  0;
-    int32_t offset_n     =  0;
-    int32_t duration_ms  =  0;
-    int32_t progress_step =  5;
-    int32_t max_context  = -1;
-    int32_t max_len      =  0;
-    int32_t best_of      =  2;
-    int32_t beam_size    = -1;
-
-    float word_thold    =  0.01f;
-    float entropy_thold =  2.40f;
-    float logprob_thold = -1.00f;
-    float userdef_temp  =  0.20f;
+    int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t n_processors  = 1;
+    int32_t offset_t_ms   = 0;
+    int32_t offset_n      = 0;
+    int32_t duration_ms   = 0;
+    int32_t progress_step = 5;
+    int32_t max_context   = -1;
+    int32_t max_len       = 0;
+    int32_t best_of       = 2;
+    int32_t beam_size     = -1;
+
+    float word_thold      =  0.01f;
+    float entropy_thold   =  2.40f;
+    float logprob_thold   = -1.00f;
+    float temperature     =  0.00f;
+    float temperature_inc =  0.20f;
 
     bool speed_up        = false;
     bool debug_mode      = false;
@@ -395,34 +396,37 @@ std::string output_str(struct whisper_context * ctx, const whisper_params & para
 
 void get_req_parameters(const Request & req, whisper_params & params)
 {
-    // user model configu.has_fileion
-    if (req.has_file("offset-t"))
+    if (req.has_file("offset_t"))
     {
-        params.offset_t_ms = std::stoi(req.get_file_value("offset-t").content);
+        params.offset_t_ms = std::stoi(req.get_file_value("offset_t").content);
     }
-    if (req.has_file("offset-n"))
+    if (req.has_file("offset_n"))
     {
-        params.offset_n = std::stoi(req.get_file_value("offset-n").content);
+        params.offset_n = std::stoi(req.get_file_value("offset_n").content);
     }
     if (req.has_file("duration"))
     {
         params.duration_ms = std::stoi(req.get_file_value("duration").content);
     }
-    if (req.has_file("max-context"))
+    if (req.has_file("max_context"))
     {
-        params.max_context = std::stoi(req.get_file_value("max-context").content);
+        params.max_context = std::stoi(req.get_file_value("max_context").content);
     }
     if (req.has_file("prompt"))
     {
         params.prompt = req.get_file_value("prompt").content;
     }
-    if (req.has_file("response-format"))
+    if (req.has_file("response_format"))
     {
-        params.response_format = req.get_file_value("response-format").content;
+        params.response_format = req.get_file_value("response_format").content;
     }
     if (req.has_file("temperature"))
     {
-        params.userdef_temp = std::stof(req.get_file_value("temperature").content);
+        params.temperature = std::stof(req.get_file_value("temperature").content);
+    }
+    if (req.has_file("temperature_inc"))
+    {
+        params.temperature_inc = std::stof(req.get_file_value("temperature_inc").content);
     }
 }
 
@@ -513,7 +517,7 @@ int main(int argc, char ** argv) {
         temp_file.close();
 
         // if file is not wav, convert to wav
-        
+
         if (sparams.ffmpeg_converter) {
             std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
             const bool is_converted = convert_to_wav(temp_filename, error_resp);
@@ -602,7 +606,8 @@ int main(int argc, char ** argv) {
             wparams.greedy.best_of        = params.best_of;
             wparams.beam_search.beam_size = params.beam_size;
 
-            wparams.temperature_inc  = params.userdef_temp;
+            wparams.temperature      = params.temperature;
+            wparams.temperature_inc  = params.temperature_inc;
             wparams.entropy_thold    = params.entropy_thold;
             wparams.logprob_thold    = params.logprob_thold;
+1 −1		Makefile
+2 −2		Sources/whisper/coreml/whisper-encoder.mm
+13 −5		Sources/whisper/ggml-alloc.c
+13 −9		Sources/whisper/ggml-backend-impl.h
+94 −20		Sources/whisper/ggml-backend.c
+8 −1		Sources/whisper/ggml-backend.h
+4 −1		Sources/whisper/ggml-metal.h
+236 −66		Sources/whisper/ggml-metal.m
+475 −247		Sources/whisper/ggml-metal.metal
+84 −399		Sources/whisper/ggml-quants.c
+1 −1		Sources/whisper/ggml-quants.h
+500 −161		Sources/whisper/ggml.c
+40 −16		Sources/whisper/include/ggml.h
+62 −74		Sources/whisper/whisper.cpp