Skip to content

Commit

Permalink
Add Piper as TTS provider
Browse files Browse the repository at this point in the history
  • Loading branch information
rkusa committed May 24, 2024
1 parent a075e32 commit 91e5d06
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 3 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ tts.provider.gcloud.defaultVoice = "en-GB-Neural2-A"
-- Requires at least Windows Server 2019 to work properly.
tts.provider.win.defaultVoice = "David"

-- The default Piper language model to use (must be installed manually).
tts.provider.piper.defaultVoice = "..."

-- The default Piper speech speed (1.0 is the default; lower is quicker, higher is slower).
tts.provider.piper.defaultSpeed = 1.0

-- Your SRS server's address.
srs.addr = "127.0.0.1:5002"
```
Expand Down Expand Up @@ -167,6 +173,17 @@ You can also check for the present of a `\Logs\grpc.log` file.

The server will be running on port 50051 by default.

## Install Piper TTS

This is only necessary if you plan to use Piper as your TTS provider.

1. Download `piper_windows_amd64.zip` from the latest [Piper release](https://github.com/rhasspy/piper/releases).
2. Extract the `piper` directory from this zip file and place it at `DCS.openbeta\Mods\tech\DCS-gRPC\piper`.
3. Download at least one voice from [Piper Voices](https://github.com/rhasspy/piper/blob/master/VOICES.md). You need both the `model` and the `config`. For the SRS voice quality, a `low` model is sufficient.
4. Place the model and config into your `DCS.openbeta\Mods\tech\DCS-gRPC\piper\` directory (e.g. `DCS.openbeta\Mods\tech\DCS-gRPC\piper\en_US-amy-low.onnx` and `DCS.openbeta\Mods\tech\DCS-gRPC\piper\en_US-amy-low.onnx.json`.
5. Set one of your installed voices as the default voice in your config (`tts.provider.piper.defaultVoice = "..."`, e.g. `tts.provider.piper.defaultVoice = "en_US-amy-low.onnx"`).
6. If you want to use Piper, don't forget to set it as your default provider, or enable it on a per-transmission basis.

## Lua API

`DCS-gRPC` provides the following Lua APIs to interact with the server from within Lua.
Expand Down Expand Up @@ -202,6 +219,7 @@ The server will be running on port 50051 by default.
-- `= { azure = {} }` / `= { azure = { voice = "..." } }` enable Azure TTS
-- `= { gcloud = {} }` / `= { gcloud = { voice = "..." } }` enable Google Cloud TTS
-- `= { win = {} }` / `= { win = { voice = "..." } }` enable Windows TTS
-- `= { piper = {} }` / `= { piper = { voice = "...", speed = 1.0 } }` enable Piper TTS
provider = null,
}
```
Expand Down
2 changes: 1 addition & 1 deletion lua/DCS-gRPC/grpc-mission.lua
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
if not GRPC then
GRPC = {
-- scaffold nested tables to allow direct assignment in config file
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {} } },
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {}, piper = {} } },
srs = {},
}
end
Expand Down
2 changes: 1 addition & 1 deletion lua/Hooks/DCS-gRPC.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ local function init()
if not GRPC then
_G.GRPC = {
-- scaffold nested tables to allow direct assignment in config file
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {} } },
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {}, piper = {} } },
srs = {},
}
end
Expand Down
12 changes: 12 additions & 0 deletions protos/dcs/srs/v0/srs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,25 @@ message TransmitRequest {
optional string voice = 1;
}

message Piper {
// The voice model the text is synthesized in (corresponds to a model placed in your
// `DCS.openbeta\Mods\tech\DCS-gRPC\piper\` directory).
optional string voice = 1;

// The speed of the generated speech; 1.0 is the default; lower is quicker, higher is slower.
optional float speed = 2;
}


// Optional TTS provider to be use. Defaults to the one configured in your
// config or to Windows' built-in TTS.
oneof provider {
Aws aws = 8;
Azure azure = 9;
GCloud gcloud = 10;
Windows win = 11;
// Piper does not support SSML, only use it with plain text.
Piper piper = 12;
}
}

Expand Down
9 changes: 9 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub struct TtsProviderConfig {
pub azure: Option<AzureConfig>,
pub gcloud: Option<GCloudConfig>,
pub win: Option<WinConfig>,
pub piper: Option<PiperConfig>,
}

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
Expand All @@ -48,6 +49,7 @@ pub enum TtsProvider {
GCloud,
#[default]
Win,
Piper,
}

#[derive(Clone, Deserialize, Serialize)]
Expand Down Expand Up @@ -80,6 +82,13 @@ pub struct WinConfig {
pub default_voice: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PiperConfig {
pub default_voice: Option<String>,
pub default_speed: Option<f32>,
}

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SrsConfig {
Expand Down
36 changes: 35 additions & 1 deletion src/rpc/srs.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use std::error;
use std::future::Future;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::path::PathBuf;
use std::str::FromStr;
use std::time::{Duration, Instant};

use ::srs::Sender;
#[cfg(target_os = "windows")]
use ::tts::WinConfig;
use ::tts::{AwsConfig, AwsRegion, AzureConfig, GCloudConfig, TtsConfig};
use ::tts::{AwsConfig, AwsRegion, AzureConfig, GCloudConfig, PiperConfig, TtsConfig};
use futures_util::FutureExt;
use stubs::common::v0::{Coalition, Unit};
use stubs::mission::v0::stream_events_response::{Event, TtsEvent};
Expand All @@ -27,6 +28,7 @@ use crate::srs::SrsClients;
pub struct Srs {
tts_config: crate::config::TtsConfig,
srs_config: crate::config::SrsConfig,
write_dir: PathBuf,
rpc: MissionRpc,
srs_clients: SrsClients,
shutdown_signal: ShutdownHandle,
Expand All @@ -36,13 +38,15 @@ impl Srs {
pub fn new(
tts_config: crate::config::TtsConfig,
srs_config: crate::config::SrsConfig,
write_dir: PathBuf,
rpc: MissionRpc,
srs_clients: SrsClients,
shutdown_signal: ShutdownHandle,
) -> Self {
Self {
tts_config,
srs_config,
write_dir,
rpc,
srs_clients,
shutdown_signal,
Expand Down Expand Up @@ -105,6 +109,10 @@ impl SrsService for Srs {
TtsProvider::Win => {
transmit_request::Provider::Win(transmit_request::Windows { voice: None })
}
TtsProvider::Piper => transmit_request::Provider::Piper(transmit_request::Piper {
voice: None,
speed: None,
}),
}) {
transmit_request::Provider::Aws(transmit_request::Aws { voice }) => {
TtsConfig::Aws(AwsConfig {
Expand Down Expand Up @@ -215,6 +223,32 @@ impl SrsService for Srs {
"Windows TTS is only available on Windows",
));
}
transmit_request::Provider::Piper(transmit_request::Piper { voice, speed }) => {
TtsConfig::Piper(PiperConfig {
voice: voice
.or_else(|| {
self.tts_config
.provider
.as_ref()
.and_then(|p| p.piper.as_ref())
.and_then(|p| p.default_voice.clone())
})
.filter(|v| !v.is_empty())
.ok_or_else(|| {
Status::failed_precondition("tts.provider.piper.default_voice not set")
})?,
speed: speed
.or_else(|| {
self.tts_config
.provider
.as_ref()
.and_then(|p| p.piper.as_ref())
.and_then(|p| p.default_speed)
})
.unwrap_or(1.0),
piper_path: self.write_dir.join("Mods/tech/DCS-gRPC/piper"),
})
}
};

let frames = ::tts::synthesize(&request.ssml, &config)
Expand Down
6 changes: 6 additions & 0 deletions src/server.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::future::Future;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;

Expand Down Expand Up @@ -49,6 +50,7 @@ struct ServerState {
stats: Stats,
tts_config: TtsConfig,
srs_config: SrsConfig,
write_dir: PathBuf,
srs_transmit: Arc<Mutex<mpsc::Receiver<TransmitRequest>>>,
}

Expand All @@ -70,6 +72,7 @@ impl Server {
stats: Stats::new(shutdown.handle()),
tts_config: config.tts.clone().unwrap_or_default(),
srs_config: config.srs.clone().unwrap_or_default(),
write_dir: PathBuf::from(&config.write_dir),
srs_transmit: Arc::new(Mutex::new(rx)),
},
srs_transmit: tx,
Expand Down Expand Up @@ -202,6 +205,7 @@ async fn try_run(
stats,
tts_config,
srs_config,
write_dir,
srs_transmit,
} = state;

Expand All @@ -225,6 +229,7 @@ async fn try_run(
let srs = Srs::new(
tts_config.clone(),
srs_config.clone(),
write_dir.clone(),
mission_rpc.clone(),
srs_clients.clone(),
shutdown_signal.clone(),
Expand Down Expand Up @@ -256,6 +261,7 @@ async fn try_run(
.add_service(SrsServiceServer::new(Srs::new(
tts_config,
srs_config,
write_dir,
mission_rpc.clone(),
srs_clients,
shutdown_signal.clone(),
Expand Down
4 changes: 4 additions & 0 deletions tts/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use std::error;
pub use aws::{AwsConfig, Region as AwsRegion};
pub use azure::AzureConfig;
pub use gcloud::GCloudConfig;
pub use piper::PiperConfig;
#[cfg(target_os = "windows")]
pub use win::WinConfig;

mod aws;
mod azure;
mod gcloud;
mod piper;
#[cfg(target_os = "windows")]
mod win;

Expand All @@ -19,6 +21,7 @@ pub enum TtsConfig {
GCloud(gcloud::GCloudConfig),
#[cfg(target_os = "windows")]
Win(win::WinConfig),
Piper(piper::PiperConfig),
}

/// Synthesize the `text` to speech. Returns a vec of opus frames.
Expand All @@ -32,6 +35,7 @@ pub async fn synthesize(
TtsConfig::GCloud(config) => gcloud::synthesize(text, config).await?,
#[cfg(target_os = "windows")]
TtsConfig::Win(config) => win::synthesize(text, config).await?,
TtsConfig::Piper(config) => piper::synthesize(text, config).await?,
})
}

Expand Down
57 changes: 57 additions & 0 deletions tts/src/piper.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use std::path::PathBuf;
use std::process::Stdio;

use tokio::io::AsyncWriteExt;
use tokio::process::Command;

#[derive(Debug)]
pub struct PiperConfig {
pub voice: String,
pub speed: f32,
pub piper_path: PathBuf,
}

pub async fn synthesize(text: &str, config: &PiperConfig) -> Result<Vec<Vec<u8>>, std::io::Error> {
let mut command = Command::new(config.piper_path.join("piper.exe"));
command
.arg("--model")
.arg(&config.voice)
.arg("--length_scale")
.arg(format!("{}", config.speed))
.arg("--output-raw")
.current_dir(&config.piper_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped());

#[cfg(target_os = "windows")]
{
const CREATE_NO_WINDOW: u32 = 0x08000000;
command.creation_flags(CREATE_NO_WINDOW);
}

let mut child = command.spawn()?;

child
.stdin
.as_mut()
.unwrap()
.write_all(text.as_bytes())
.await?;
let output = child.wait_with_output().await?;

if !output.status.success() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
if output.stderr.is_empty() {
"failed to execute piper (maybe voice model not found)".into()
} else {
String::from_utf8_lossy(&output.stderr)
},
));
}

crate::wav_to_opus(output.stdout.into())
.await
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
}

0 comments on commit 91e5d06

Please sign in to comment.