Skip to content

Commit

Permalink
Merge branch 'ggerganov:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
z-zawhtet-a authored Jan 7, 2024
2 parents 4e1c856 + 022756a commit 72fd6a5
Show file tree
Hide file tree
Showing 30 changed files with 1,554 additions and 611 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.5)

project(whisper.cpp VERSION 1.5.2)
project(whisper.cpp VERSION 1.5.4)

# Add path to modules
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
Expand Down
10 changes: 4 additions & 6 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ let package = Package(
products: [
.library(name: "whisper", targets: ["whisper"]),
],
dependencies: [
.package(url: "https://github.com/ggerganov/ggml.git", .branch("master"))
],
targets: [
.target(
name: "whisper",
dependencies: ["ggml"],
path: ".",
exclude: [
"bindings",
Expand All @@ -32,14 +36,8 @@ let package = Package(
"Makefile"
],
sources: [
"ggml.c",
"whisper.cpp",
"ggml-alloc.c",
"ggml-backend.c",
"ggml-quants.c",
"ggml-metal.m"
],
resources: [.process("ggml-metal.metal")],
publicHeadersPath: "spm-headers",
cSettings: [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

Stable: [v1.5.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

Expand Down
2 changes: 1 addition & 1 deletion bindings/javascript/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "whisper.cpp",
"version": "1.5.2",
"version": "1.5.4",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
Expand Down
2 changes: 1 addition & 1 deletion bindings/ruby/ext/ggml-backend-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ extern "C" {
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);

// compute graph without a plan
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);

// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
Expand Down
4 changes: 2 additions & 2 deletions bindings/ruby/ext/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
backend->iface.graph_plan_compute(backend, plan);
}

void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
backend->iface.graph_compute(backend, cgraph);
bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
return backend->iface.graph_compute(backend, cgraph);
}

bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
Expand Down
2 changes: 1 addition & 1 deletion bindings/ruby/ext/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ extern "C" {

GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);

// tensor copy between different backends
Expand Down
4 changes: 2 additions & 2 deletions coreml/whisper-encoder.mm
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@

// select which device to run the Core ML model on
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
config.computeUnits = MLComputeUnitsCPUAndGPU;
// config.computeUnits = MLComputeUnitsCPUAndGPU;
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
//config.computeUnits = MLComputeUnitsAll;
config.computeUnits = MLComputeUnitsAll;

const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);

Expand Down
7 changes: 4 additions & 3 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ options:
--convert, [false ] Convert audio to WAV, requires ffmpeg on the server
```

> [!WARNING]
> [!WARNING]
> **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
## request examples
Expand All @@ -56,8 +56,9 @@ options:
curl 127.0.0.1:8080/inference \
-H "Content-Type: multipart/form-data" \
-F file="@<file-path>" \
-F temperature="0.2" \
-F response-format="json"
-F temperature="0.0" \
-F temperature_inc="0.2" \
-F response_format="json"
```

**/load**
Expand Down
61 changes: 33 additions & 28 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,27 @@ struct server_params
int32_t port = 8080;
int32_t read_timeout = 600;
int32_t write_timeout = 600;

bool ffmpeg_converter = false;
};

struct whisper_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_processors = 1;
int32_t offset_t_ms = 0;
int32_t offset_n = 0;
int32_t duration_ms = 0;
int32_t progress_step = 5;
int32_t max_context = -1;
int32_t max_len = 0;
int32_t best_of = 2;
int32_t beam_size = -1;

float word_thold = 0.01f;
float entropy_thold = 2.40f;
float logprob_thold = -1.00f;
float userdef_temp = 0.20f;
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_processors = 1;
int32_t offset_t_ms = 0;
int32_t offset_n = 0;
int32_t duration_ms = 0;
int32_t progress_step = 5;
int32_t max_context = -1;
int32_t max_len = 0;
int32_t best_of = 2;
int32_t beam_size = -1;

float word_thold = 0.01f;
float entropy_thold = 2.40f;
float logprob_thold = -1.00f;
float temperature = 0.00f;
float temperature_inc = 0.20f;

bool speed_up = false;
bool debug_mode = false;
Expand Down Expand Up @@ -395,34 +396,37 @@ std::string output_str(struct whisper_context * ctx, const whisper_params & para

void get_req_parameters(const Request & req, whisper_params & params)
{
// user model configu.has_fileion
if (req.has_file("offset-t"))
if (req.has_file("offset_t"))
{
params.offset_t_ms = std::stoi(req.get_file_value("offset-t").content);
params.offset_t_ms = std::stoi(req.get_file_value("offset_t").content);
}
if (req.has_file("offset-n"))
if (req.has_file("offset_n"))
{
params.offset_n = std::stoi(req.get_file_value("offset-n").content);
params.offset_n = std::stoi(req.get_file_value("offset_n").content);
}
if (req.has_file("duration"))
{
params.duration_ms = std::stoi(req.get_file_value("duration").content);
}
if (req.has_file("max-context"))
if (req.has_file("max_context"))
{
params.max_context = std::stoi(req.get_file_value("max-context").content);
params.max_context = std::stoi(req.get_file_value("max_context").content);
}
if (req.has_file("prompt"))
{
params.prompt = req.get_file_value("prompt").content;
}
if (req.has_file("response-format"))
if (req.has_file("response_format"))
{
params.response_format = req.get_file_value("response-format").content;
params.response_format = req.get_file_value("response_format").content;
}
if (req.has_file("temperature"))
{
params.userdef_temp = std::stof(req.get_file_value("temperature").content);
params.temperature = std::stof(req.get_file_value("temperature").content);
}
if (req.has_file("temperature_inc"))
{
params.temperature_inc = std::stof(req.get_file_value("temperature_inc").content);
}
}

Expand Down Expand Up @@ -513,7 +517,7 @@ int main(int argc, char ** argv) {
temp_file.close();

// if file is not wav, convert to wav

if (sparams.ffmpeg_converter) {
std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
const bool is_converted = convert_to_wav(temp_filename, error_resp);
Expand Down Expand Up @@ -602,7 +606,8 @@ int main(int argc, char ** argv) {
wparams.greedy.best_of = params.best_of;
wparams.beam_search.beam_size = params.beam_size;

wparams.temperature_inc = params.userdef_temp;
wparams.temperature = params.temperature;
wparams.temperature_inc = params.temperature_inc;
wparams.entropy_thold = params.entropy_thold;
wparams.logprob_thold = params.logprob_thold;

Expand Down
Loading

0 comments on commit 72fd6a5

Please sign in to comment.