Skip to content

Commit

Permalink
feat:Add pitch Option
Browse files Browse the repository at this point in the history
  • Loading branch information
czyt committed Jan 26, 2024
1 parent 2b7e081 commit 947e08a
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 12 deletions.
21 changes: 12 additions & 9 deletions internal/communicate/communicate.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (

const (
ssmlHeaderTemplate = "X-RequestId:%s\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:%sZ\r\nPath:ssml\r\n\r\n"
ssmlTemplate = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='%s'><prosody pitch='+0Hz' rate='%s' volume='%s'>%s</prosody></voice></speak>"
ssmlTemplate = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='%s'><prosody pitch='%s' rate='%s' volume='%s'>%s</prosody></voice></speak>"
)

var (
Expand All @@ -46,26 +46,27 @@ func init() {
type Communicate struct {
text string
voice string
voiceLanguageRegion string
pitch string
rate string
volume string
voiceLanguageRegion string

httpProxy string
socket5Proxy string
socket5ProxyUser string
socket5ProxyPass string
op chan map[string]interface{}

audioDataIndex int
prevIdx int
shiftTime int
finalUtterance map[int]int
op chan map[string]interface{}
}

type textEntry struct {
Text string `json:"text"`
Length int64 `json:"Length"`
BoundaryType string `json:"BoundaryType"`
Length int64 `json:"Length"`
}
type dataEntry struct {
Offset int `json:"Offset"`
Expand Down Expand Up @@ -112,6 +113,7 @@ func NewCommunicate(text string, opt *communicateOption.CommunicateOption) (*Com
}
return &Communicate{
text: text,
pitch: opt.Pitch,
voice: opt.Voice,
voiceLanguageRegion: opt.VoiceLangRegion,
rate: opt.Rate,
Expand Down Expand Up @@ -171,7 +173,7 @@ func makeHeaders() http.Header {
func (c *Communicate) stream() (<-chan map[string]interface{}, error) {
texts := splitTextByByteLength(
escape(removeIncompatibleCharacters(c.text)),
calculateMaxMessageSize(c.voice, c.rate, c.volume),
calculateMaxMessageSize(c.pitch, c.voice, c.rate, c.volume),
)
c.audioDataIndex = len(texts)

Expand Down Expand Up @@ -234,7 +236,7 @@ func (c *Communicate) sendSSML(conn *websocket.Conn, currentTime string, text []
ssmlHeadersAppendExtraData(
generateConnectID(),
currentTime,
makeSsml(string(text), c.voice, c.rate, c.volume),
makeSsml(string(text), c.pitch, c.voice, c.rate, c.volume),
),
))
}
Expand Down Expand Up @@ -474,10 +476,11 @@ func splitTextByByteLength(text string, byteLength int) [][]byte {
return result
}

func makeSsml(text string, voice string, rate string, volume string) string {
func makeSsml(text string, pitch, voice string, rate string, volume string) string {
ssml := fmt.Sprintf(
ssmlTemplate,
voice,
pitch,
rate,
volume,
text)
Expand All @@ -500,9 +503,9 @@ func ssmlHeadersAppendExtraData(requestID string, timestamp string, ssml string)
return headers + ssml
}

func calculateMaxMessageSize(voice string, rate string, volume string) int {
func calculateMaxMessageSize(pitch, voice string, rate string, volume string) int {
websocketMaxSize := 1 << 16
overheadPerMessage := len(ssmlHeadersAppendExtraData(generateConnectID(), currentTimeInMST(), makeSsml("", voice, rate, volume))) + 50
overheadPerMessage := len(ssmlHeadersAppendExtraData(generateConnectID(), currentTimeInMST(), makeSsml("", pitch, voice, rate, volume))) + 50
return websocketMaxSize - overheadPerMessage
}

Expand Down
7 changes: 6 additions & 1 deletion internal/communicateOption/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package communicateOption

import (
"fmt"
"github.com/lib-x/edgetts/internal/businessConsts"
"strings"

"github.com/lib-x/edgetts/internal/businessConsts"
)

type CommunicateOption struct {
Voice string
VoiceLangRegion string
Pitch string
Rate string
Volume string
HttpProxy string
Expand All @@ -31,6 +33,9 @@ func (c *CommunicateOption) CheckAndApplyDefaultOption() {
name := voiceParsed[2]
c.VoiceLangRegion = fmt.Sprintf(businessConsts.VoiceNameTemplate, lang, region, name)
}
if c.Pitch == "" {
c.Pitch = "+0Hz"
}
if c.Rate == "" {
c.Rate = "+0%"
}
Expand Down
11 changes: 9 additions & 2 deletions internal/validate/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ package validate

import (
"errors"
"github.com/lib-x/edgetts/internal/communicateOption"
"regexp"

"github.com/lib-x/edgetts/internal/communicateOption"
)

var (
validPitchPattern = regexp.MustCompile(`^[+-]\d+Hz$`)
validVoicePattern = regexp.MustCompile(`^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$`)
validRateVolumePattern = regexp.MustCompile(`^[+-]\d+%$`)
)

var (
InvalidVoiceError = errors.New("invalid voice")
InvalidPitchError = errors.New("invalid pitch")
InvalidRateError = errors.New("invalid rate")
InvalidVolumeError = errors.New("invalid volume")
)
Expand All @@ -24,8 +27,11 @@ func WithCommunicateOption(c *communicateOption.CommunicateOption) error {
return InvalidVoiceError
}

// WithCommunicateOption pitch
if !validPitchPattern.MatchString(c.Pitch) {
return InvalidPitchError
}
// WithCommunicateOption rate

if !validRateVolumePattern.MatchString(c.Rate) {
return InvalidRateError
}
Expand All @@ -34,5 +40,6 @@ func WithCommunicateOption(c *communicateOption.CommunicateOption) error {
if !validRateVolumePattern.MatchString(c.Volume) {
return InvalidVolumeError
}

return nil
}
11 changes: 11 additions & 0 deletions option.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import "github.com/lib-x/edgetts/internal/communicateOption"
type option struct {
Voice string
VoiceLangRegion string
Pitch string
Rate string
Volume string
HttpProxy string
Expand All @@ -17,6 +18,7 @@ func (o *option) toInternalOption() *communicateOption.CommunicateOption {
return &communicateOption.CommunicateOption{
Voice: o.Voice,
VoiceLangRegion: o.VoiceLangRegion,
Pitch: o.Pitch,
Rate: o.Rate,
Volume: o.Volume,
HttpProxy: o.HttpProxy,
Expand All @@ -41,12 +43,21 @@ func WithVoiceLangRegion(voiceLangRegion string) Option {

}

// WithPitch set pitch of the tts output.such as +50Hz,-50Hz
func WithPitch(pitch string) Option {
return func(option *option) {
option.Pitch = pitch
}
}

// WithRate set rate of the tts output.rate=-50% means rate down 50%,rate=+50% means rate up 50%
func WithRate(rate string) Option {
return func(option *option) {
option.Rate = rate
}
}

// WithVolume set volume of the tts output.volume=-50% means volume down 50%,volume=+50% means volume up 50%
func WithVolume(volume string) Option {
return func(option *option) {
option.Volume = volume
Expand Down

0 comments on commit 947e08a

Please sign in to comment.