first commit
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
|
||||
'''
|
||||
Utility functions to help with Python 2/3 compatibility
|
||||
'''
|
||||
from .. import six
|
||||
|
||||
def toUtf8(value):
|
||||
'''
|
||||
Takes in a value and converts it to a text (unicode) type. Then decodes that
|
||||
type to a byte array encoded in utf-8. In 2.X the resulting object will be a
|
||||
str and in 3.X the resulting object will be bytes. In both 2.X and 3.X any
|
||||
object can be passed in and the object's __str__ will be used (or __repr__ if
|
||||
__str__ is not defined) if the object is not already a text type.
|
||||
'''
|
||||
return six.text_type(value).encode('utf-8')
|
||||
|
||||
def fromUtf8(value):
|
||||
'''
|
||||
Takes in a byte array encoded as utf-8 and returns a text (unicode) type. In
|
||||
2.X we expect a str type and return a unicde type. In 3.X we expect a bytes
|
||||
type and return a str type.
|
||||
'''
|
||||
return value.decode('utf-8')
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
500
venv/lib/python3.11/site-packages/pyttsx3/drivers/_espeak.py
Normal file
500
venv/lib/python3.11/site-packages/pyttsx3/drivers/_espeak.py
Normal file
@@ -0,0 +1,500 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import time
|
||||
from ctypes import (CFUNCTYPE, POINTER, Structure, Union, c_char_p, c_int,
|
||||
c_long, c_short, c_ubyte, c_uint, c_ulong, c_void_p,
|
||||
c_wchar, cdll)
|
||||
|
||||
|
||||
def cfunc(name, dll, result, *args):
|
||||
"""build and apply a ctypes prototype complete with parameter flags"""
|
||||
atypes = []
|
||||
aflags = []
|
||||
for arg in args:
|
||||
atypes.append(arg[1])
|
||||
aflags.append((arg[2], arg[0]) + arg[3:])
|
||||
return CFUNCTYPE(result, *atypes)((name, dll), tuple(aflags))
|
||||
|
||||
|
||||
dll = None
|
||||
|
||||
def load_library():
|
||||
global dll
|
||||
paths = [
|
||||
# macOS paths
|
||||
'/usr/local/lib/libespeak-ng.1.dylib',
|
||||
'/usr/local/lib/libespeak.dylib',
|
||||
|
||||
# Linux paths
|
||||
'libespeak-ng.so.1',
|
||||
'/usr/local/lib/libespeak-ng.so.1',
|
||||
'libespeak.so.1',
|
||||
|
||||
# Windows paths
|
||||
r'C:\Program Files\eSpeak NG\libespeak-ng.dll',
|
||||
r'C:\Program Files (x86)\eSpeak NG\libespeak-ng.dll'
|
||||
]
|
||||
|
||||
for path in paths:
|
||||
try:
|
||||
dll = cdll.LoadLibrary(path)
|
||||
return True
|
||||
except Exception:
|
||||
continue # Try the next path
|
||||
return False
|
||||
|
||||
try:
|
||||
if not load_library():
|
||||
raise RuntimeError("This means you probably do not have eSpeak or eSpeak-ng installed!")
|
||||
except Exception as exp:
|
||||
raise
|
||||
|
||||
# constants and such from speak_lib.h
|
||||
|
||||
EVENT_LIST_TERMINATED = 0
|
||||
EVENT_WORD = 1
|
||||
EVENT_SENTENCE = 2
|
||||
EVENT_MARK = 3
|
||||
EVENT_PLAY = 4
|
||||
EVENT_END = 5
|
||||
EVENT_MSG_TERMINATED = 6
|
||||
|
||||
|
||||
class numberORname(Union):
|
||||
_fields_ = [
|
||||
('number', c_int),
|
||||
('name', c_char_p)
|
||||
]
|
||||
|
||||
|
||||
class EVENT(Structure):
|
||||
_fields_ = [
|
||||
('type', c_int),
|
||||
('unique_identifier', c_uint),
|
||||
('text_position', c_int),
|
||||
('length', c_int),
|
||||
('audio_position', c_int),
|
||||
('sample', c_int),
|
||||
('user_data', c_void_p),
|
||||
('id', numberORname)
|
||||
]
|
||||
|
||||
|
||||
AUDIO_OUTPUT_PLAYBACK = 0
|
||||
AUDIO_OUTPUT_RETRIEVAL = 1
|
||||
AUDIO_OUTPUT_SYNCHRONOUS = 2
|
||||
AUDIO_OUTPUT_SYNCH_PLAYBACK = 3
|
||||
|
||||
EE_OK = 0
|
||||
EE_INTERNAL_ERROR = -1
|
||||
EE_BUFFER_FULL = 1
|
||||
EE_NOT_FOUND = 2
|
||||
|
||||
Initialize = cfunc('espeak_Initialize', dll, c_int,
|
||||
('output', c_int, 1, AUDIO_OUTPUT_PLAYBACK),
|
||||
('bufflength', c_int, 1, 100),
|
||||
('path', c_char_p, 1, None),
|
||||
('option', c_int, 1, 0))
|
||||
Initialize.__doc__ = """Must be called before any synthesis functions are called.
|
||||
output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
|
||||
buflength: The length in mS of sound buffers passed to the SynthCallback function.
|
||||
path: The directory which contains the espeak-data directory, or NULL for the default location.
|
||||
options: bit 0: 1=allow espeakEVENT_PHONEME events.
|
||||
|
||||
Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR)."""
|
||||
|
||||
t_espeak_callback = CFUNCTYPE(c_int, POINTER(c_short), c_int, POINTER(EVENT))
|
||||
|
||||
cSetSynthCallback = cfunc('espeak_SetSynthCallback', dll, None,
|
||||
('SynthCallback', t_espeak_callback, 1))
|
||||
SynthCallback = None
|
||||
|
||||
|
||||
def SetSynthCallback(cb):
|
||||
global SynthCallback
|
||||
SynthCallback = t_espeak_callback(cb)
|
||||
cSetSynthCallback(SynthCallback)
|
||||
|
||||
|
||||
SetSynthCallback.__doc__ = """Must be called before any synthesis functions are called.
|
||||
This specifies a function in the calling program which is called when a buffer of
|
||||
speech sound data has been produced.
|
||||
|
||||
|
||||
The callback function is of the form:
|
||||
|
||||
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
|
||||
|
||||
wav: is the speech sound data which has been produced.
|
||||
NULL indicates that the synthesis has been completed.
|
||||
|
||||
numsamples: is the number of entries in wav. This number may vary, may be less than
|
||||
the value implied by the buflength parameter given in espeak_Initialize, and may
|
||||
sometimes be zero (which does NOT indicate end of synthesis).
|
||||
|
||||
events: an array of espeak_EVENT items which indicate word and sentence events, and
|
||||
also the occurance if <mark> and <audio> elements within the text.
|
||||
|
||||
|
||||
Callback returns: 0=continue synthesis, 1=abort synthesis."""
|
||||
|
||||
t_UriCallback = CFUNCTYPE(c_int, c_int, c_char_p, c_char_p)
|
||||
|
||||
cSetUriCallback = cfunc('espeak_SetUriCallback', dll, None,
|
||||
('UriCallback', t_UriCallback, 1))
|
||||
UriCallback = None
|
||||
|
||||
|
||||
def SetUriCallback(cb):
|
||||
global UriCallback
|
||||
UriCallback = t_UriCallback(UriCallback)
|
||||
cSetUriCallback(UriCallback)
|
||||
|
||||
|
||||
SetUriCallback.__doc__ = """This function must be called before synthesis functions are used, in order to deal with
|
||||
<audio> tags. It specifies a callback function which is called when an <audio> element is
|
||||
encountered and allows the calling program to indicate whether the sound file which
|
||||
is specified in the <audio> element is available and is to be played.
|
||||
|
||||
The callback function is of the form:
|
||||
|
||||
int UriCallback(int type, const char *uri, const char *base);
|
||||
|
||||
type: type of callback event. Currently only 1= <audio> element
|
||||
|
||||
uri: the "src" attribute from the <audio> element
|
||||
|
||||
base: the "xml:base" attribute (if any) from the <speak> element
|
||||
|
||||
Return: 1=don't play the sound, but speak the text alternative.
|
||||
0=place a PLAY event in the event list at the point where the <audio> element
|
||||
occurs. The calling program can then play the sound at that point."""
|
||||
|
||||
# a few manifest constants
|
||||
CHARS_AUTO = 0
|
||||
CHARS_UTF8 = 1
|
||||
CHARS_8BIT = 2
|
||||
CHARS_WCHAR = 3
|
||||
|
||||
SSML = 0x10
|
||||
PHONEMES = 0x100
|
||||
ENDPAUSE = 0x1000
|
||||
KEEP_NAMEDATA = 0x2000
|
||||
|
||||
POS_CHARACTER = 1
|
||||
POS_WORD = 2
|
||||
POS_SENTENCE = 3
|
||||
|
||||
|
||||
def Synth(text, position=0, position_type=POS_CHARACTER, end_position=0, flags=0, user_data=None):
|
||||
return cSynth(text, len(text) * 10, position, position_type, end_position, flags, None, user_data)
|
||||
|
||||
|
||||
cSynth = cfunc('espeak_Synth', dll, c_int,
|
||||
('text', c_char_p, 1),
|
||||
('size', c_long, 1),
|
||||
('position', c_uint, 1, 0),
|
||||
('position_type', c_int, 1, POS_CHARACTER),
|
||||
('end_position', c_uint, 1, 0),
|
||||
('flags', c_uint, 1, CHARS_AUTO),
|
||||
('unique_identifier', POINTER(c_uint), 1, None),
|
||||
('user_data', c_void_p, 1, None))
|
||||
Synth.__doc__ = """Synthesize speech for the specified text. The speech sound data is passed to the calling
|
||||
program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
|
||||
|
||||
text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
|
||||
wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
|
||||
parameter.
|
||||
|
||||
size: Equal to (or greater than) the size of the text data, in bytes. This is used in order
|
||||
to allocate internal storage space for the text. This value is not used for
|
||||
AUDIO_OUTPUT_SYNCHRONOUS mode.
|
||||
|
||||
position: The position in the text where speaking starts. Zero indicates speak from the
|
||||
start of the text.
|
||||
|
||||
position_type: Determines whether "position" is a number of characters, words, or sentences.
|
||||
Values:
|
||||
|
||||
end_position: If set, this gives a character position at which speaking will stop. A value
|
||||
of zero indicates no end position.
|
||||
|
||||
flags: These may be OR'd together:
|
||||
Type of character codes, one of:
|
||||
espeak.CHARS_UTF8 UTF8 encoding
|
||||
espeak.CHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
|
||||
espeak.CHARS_AUTO 8 bit or UTF8 (this is the default)
|
||||
espeak.CHARS_WCHAR Wide characters (wchar_t)
|
||||
|
||||
espeak.SSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
|
||||
|
||||
espeak.PHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirschenbaum encoding).
|
||||
|
||||
espeak.ENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
|
||||
this pause is suppressed.
|
||||
|
||||
unique_identifier: message identifier; helpful for identifying later
|
||||
data supplied to the callback.
|
||||
|
||||
user_data: pointer which will be passed to the callback function.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
|
||||
def Synth_Mark(text, index_mark, end_position=0, flags=CHARS_AUTO):
|
||||
cSynth_Mark(text, len(text) + 1, index_mark, end_position, flags)
|
||||
|
||||
|
||||
cSynth_Mark = cfunc('espeak_Synth_Mark', dll, c_int,
|
||||
('text', c_char_p, 1),
|
||||
('size', c_ulong, 1),
|
||||
('index_mark', c_char_p, 1),
|
||||
('end_position', c_uint, 1, 0),
|
||||
('flags', c_uint, 1, CHARS_AUTO),
|
||||
('unique_identifier', POINTER(c_uint), 1, None),
|
||||
('user_data', c_void_p, 1, None))
|
||||
Synth_Mark.__doc__ = """Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
|
||||
specified by the name of a <mark> element in the text.
|
||||
|
||||
index_mark: The "name" attribute of a <mark> element within the text which specified the
|
||||
point at which synthesis starts. UTF8 string.
|
||||
|
||||
For the other parameters, see espeak_Synth()
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
Key = cfunc('espeak_Key', dll, c_int,
|
||||
('key_name', c_char_p, 1))
|
||||
Key.__doc__ = """Speak the name of a keyboard key.
|
||||
Currently this just speaks the "key_name" as given
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
Char = cfunc('espeak_Char', dll, c_int,
|
||||
('character', c_wchar, 1))
|
||||
Char.__doc__ = """Speak the name of the given character
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
# Speech Parameters
|
||||
SILENCE = 0 # internal use
|
||||
RATE = 1
|
||||
VOLUME = 2
|
||||
PITCH = 3
|
||||
RANGE = 4
|
||||
PUNCTUATION = 5
|
||||
CAPITALS = 6
|
||||
EMPHASIS = 7 # internal use
|
||||
LINELENGTH = 8 # internal use
|
||||
|
||||
PUNCT_NONE = 0
|
||||
PUNCT_ALL = 1
|
||||
PUNCT_SOME = 2
|
||||
|
||||
SetParameter = cfunc('espeak_SetParameter', dll, c_int,
|
||||
('parameter', c_int, 1),
|
||||
('value', c_int, 1),
|
||||
('relative', c_int, 1, 0))
|
||||
SetParameter.__doc__ = """Sets the value of the specified parameter.
|
||||
relative=0 Sets the absolute value of the parameter.
|
||||
relative=1 Sets a relative value of the parameter.
|
||||
|
||||
parameter:
|
||||
espeak.RATE: speaking speed in word per minute.
|
||||
|
||||
espeak.VOLUME: volume in range 0-100 0=silence
|
||||
|
||||
espeak.PITCH: base pitch, range 0-100. 50=normal
|
||||
|
||||
espeak.RANGE: pitch range, range 0-100. 0-monotone, 50=normal
|
||||
|
||||
espeak.PUNCTUATION: which punctuation characters to announce:
|
||||
value in espeak_PUNCT_TYPE (none, all, some),
|
||||
see espeak_GetParameter() to specify which characters are announced.
|
||||
|
||||
espeak.CAPITALS: announce capital letters by:
|
||||
0=none,
|
||||
1=sound icon,
|
||||
2=spelling,
|
||||
3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
|
||||
of a word raised to indicate it has a capital letter.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
GetParameter = cfunc('espeak_GetParameter', dll, c_int,
|
||||
('parameter', c_int, 1))
|
||||
GetParameter.__doc__ = """current=0 Returns the default value of the specified parameter.
|
||||
current=1 Returns the current value of the specified parameter, as set by SetParameter()"""
|
||||
|
||||
SetPunctuationList = cfunc('espeak_SetPunctuationList', dll, c_int,
|
||||
('punctlist', c_wchar, 1))
|
||||
SetPunctuationList.__doc__ = """Specified a list of punctuation characters whose names are
|
||||
to be spoken when the value of the Punctuation parameter is set to "some".
|
||||
|
||||
punctlist: A list of character codes, terminated by a zero character.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
SetPhonemeTrace = cfunc('espeak_SetPhonemeTrace', dll, None,
|
||||
('value', c_int, 1),
|
||||
('stream', c_void_p, 1))
|
||||
SetPhonemeTrace.__doc__ = """Controls the output of phoneme symbols for the text
|
||||
value=0 No phoneme output (default)
|
||||
value=1 Output the translated phoneme symbols for the text
|
||||
value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries)
|
||||
|
||||
stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout."""
|
||||
|
||||
CompileDictionary = cfunc('espeak_CompileDictionary', dll, None,
|
||||
('path', c_char_p, 1),
|
||||
('log', c_void_p, 1))
|
||||
CompileDictionary.__doc__ = """Compile pronunciation dictionary for a language which corresponds to the currently
|
||||
selected voice. The required voice should be selected before calling this function.
|
||||
|
||||
path: The directory which contains the language's '_rules' and '_list' files.
|
||||
'path' should end with a path separator character ('/').
|
||||
log: Stream for error reports and statistics information. If log=NULL then stderr will be used."""
|
||||
|
||||
|
||||
class VOICE(Structure):
|
||||
_fields_ = [
|
||||
('name', c_char_p),
|
||||
('languages', c_char_p),
|
||||
('identifier', c_char_p),
|
||||
('gender', c_ubyte),
|
||||
('age', c_ubyte),
|
||||
('variant', c_ubyte),
|
||||
('xx1', c_ubyte),
|
||||
('score', c_int),
|
||||
('spare', c_void_p),
|
||||
]
|
||||
|
||||
def __repr__(self):
|
||||
"""Print the fields"""
|
||||
res = []
|
||||
for field in self._fields_:
|
||||
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
|
||||
return self.__class__.__name__ + '(' + ','.join(res) + ')'
|
||||
|
||||
|
||||
cListVoices = cfunc('espeak_ListVoices', dll, POINTER(POINTER(VOICE)),
|
||||
('voice_spec', POINTER(VOICE), 1))
|
||||
cListVoices.__doc__ = """Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers.
|
||||
The list is terminated by a NULL pointer
|
||||
|
||||
If voice_spec is NULL then all voices are listed.
|
||||
If voice spec is given, then only the voices which are compatible with the voice_spec
|
||||
are listed, and they are listed in preference order."""
|
||||
|
||||
|
||||
def ListVoices(voice_spec=None):
|
||||
"""Reads the voice files from espeak-data/voices and returns a list of VOICE objects.
|
||||
|
||||
If voice_spec is None then all voices are listed.
|
||||
If voice spec is given, then only the voices which are compatible with the voice_spec
|
||||
are listed, and they are listed in preference order."""
|
||||
ppv = cListVoices(voice_spec)
|
||||
res = []
|
||||
i = 0
|
||||
while ppv[i]:
|
||||
res.append(ppv[i][0])
|
||||
i += 1
|
||||
return res
|
||||
|
||||
|
||||
SetVoiceByName = cfunc('espeak_SetVoiceByName', dll, c_int,
|
||||
('name', c_char_p, 1))
|
||||
SetVoiceByName.__doc__ = """Searches for a voice with a matching "name" field. Language is not considered.
|
||||
"name" is a UTF8 string.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_BUFFER_FULL: the command can not be buffered;
|
||||
you may try after a while to call the function again.
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
SetVoiceByProperties = cfunc('espeak_SetVoiceByProperties', dll, c_int,
|
||||
('voice_spec', POINTER(VOICE), 1))
|
||||
SetVoiceByProperties.__doc__ = """An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
|
||||
fields may be set:
|
||||
|
||||
name NULL, or a voice name
|
||||
|
||||
languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
|
||||
|
||||
gender 0=not specified, 1=male, 2=female
|
||||
|
||||
age 0=not specified, or an age in years
|
||||
|
||||
variant After a list of candidates is produced, scored and sorted, "variant" is used to index
|
||||
that list and choose a voice.
|
||||
variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc"""
|
||||
|
||||
GetCurrentVoice = cfunc('espeak_GetCurrentVoice', dll, POINTER(VOICE),
|
||||
)
|
||||
GetCurrentVoice.__doc__ = """Returns the espeak_VOICE data for the currently selected voice.
|
||||
This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>"""
|
||||
|
||||
Cancel = cfunc('espeak_Cancel', dll, c_int)
|
||||
Cancel.__doc__ = """Stop immediately synthesis and audio output of the current text. When this
|
||||
function returns, the audio output is fully stopped and the synthesizer is ready to
|
||||
synthesize a new message.
|
||||
|
||||
Return: EE_OK: operation achieved
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
IsPlaying = cfunc('espeak_IsPlaying', dll, c_int)
|
||||
IsPlaying.__doc__ = """Returns 1 if audio is played, 0 otherwise."""
|
||||
|
||||
Synchronize = cfunc('espeak_Synchronize', dll, c_int)
|
||||
Synchronize.__doc__ = """This function returns when all data have been spoken.
|
||||
Return: EE_OK: operation achieved
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
Terminate = cfunc('espeak_Terminate', dll, c_int)
|
||||
Terminate.__doc__ = """last function to be called.
|
||||
Return: EE_OK: operation achieved
|
||||
EE_INTERNAL_ERROR."""
|
||||
|
||||
Info = cfunc('espeak_Info', dll, c_char_p, ('ptr', c_void_p, 1, 0))
|
||||
Info.__doc__ = """Returns the version number string.
|
||||
The parameter is for future use, and should be set to NULL"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
def synth_cb(wav, numsample, events):
|
||||
print(numsample, end="")
|
||||
i = 0
|
||||
while True:
|
||||
if events[i].type == EVENT_LIST_TERMINATED:
|
||||
break
|
||||
print(events[i].type, end="")
|
||||
i += 1
|
||||
return 0
|
||||
|
||||
|
||||
samplerate = Initialize(output=AUDIO_OUTPUT_PLAYBACK)
|
||||
SetSynthCallback(synth_cb)
|
||||
s = 'This is a test, only a test. '
|
||||
uid = c_uint(0)
|
||||
# print 'pitch=',GetParameter(PITCH)
|
||||
# SetParameter(PITCH, 50, 0)
|
||||
print(Synth(s))
|
||||
while IsPlaying():
|
||||
time.sleep(0.1)
|
175
venv/lib/python3.11/site-packages/pyttsx3/drivers/dummy.py
Normal file
175
venv/lib/python3.11/site-packages/pyttsx3/drivers/dummy.py
Normal file
@@ -0,0 +1,175 @@
|
||||
|
||||
from ..voice import Voice
|
||||
import time
|
||||
|
||||
def buildDriver(proxy):
|
||||
'''
|
||||
Builds a new instance of a driver and returns it for use by the driver
|
||||
proxy.
|
||||
|
||||
@param proxy: Proxy creating the driver
|
||||
@type proxy: L{driver.DriverProxy}
|
||||
'''
|
||||
return DummyDriver(proxy)
|
||||
|
||||
class DummyDriver(object):
|
||||
'''
|
||||
Dummy speech engine implementation. Documents the interface, notifications,
|
||||
properties, and sequencing responsibilities of a driver implementation.
|
||||
|
||||
@ivar _proxy: Driver proxy that manages this instance
|
||||
@type _proxy: L{driver.DriverProxy}
|
||||
@ivar _config: Dummy configuration
|
||||
@type _config: dict
|
||||
@ivar _looping: True when in the dummy event loop, False when not
|
||||
@ivar _looping: bool
|
||||
'''
|
||||
def __init__(self, proxy):
|
||||
'''
|
||||
Constructs the driver.
|
||||
|
||||
@param proxy: Proxy creating the driver
|
||||
@type proxy: L{driver.DriverProxy}
|
||||
'''
|
||||
self._proxy = proxy
|
||||
self._looping = False
|
||||
# hold config values as if we had a real tts implementation that
|
||||
# supported them
|
||||
voices = [
|
||||
Voice('dummy.voice1', 'John Doe', ['en-US', 'en-GB'], 'male', 'adult'),
|
||||
Voice('dummy.voice2', 'Jane Doe', ['en-US', 'en-GB'], 'female', 'adult'),
|
||||
Voice('dummy.voice3', 'Jimmy Doe', ['en-US', 'en-GB'], 'male', 10)
|
||||
]
|
||||
self._config = {
|
||||
'rate' : 200,
|
||||
'volume' : 1.0,
|
||||
'voice' : voices[0],
|
||||
'voices' : voices
|
||||
}
|
||||
|
||||
def destroy(self):
|
||||
'''
|
||||
Optional method that will be called when the driver proxy is being
|
||||
destroyed. Can cleanup any resources to make sure the engine terminates
|
||||
properly.
|
||||
'''
|
||||
pass
|
||||
|
||||
def startLoop(self):
|
||||
'''
|
||||
Starts a blocking run loop in which driver callbacks are properly
|
||||
invoked.
|
||||
|
||||
@precondition: There was no previous successful call to L{startLoop}
|
||||
without an intervening call to L{stopLoop}.
|
||||
'''
|
||||
first = True
|
||||
self._looping = True
|
||||
while self._looping:
|
||||
if first:
|
||||
self._proxy.setBusy(False)
|
||||
first = False
|
||||
time.sleep(0.5)
|
||||
|
||||
def endLoop(self):
|
||||
'''
|
||||
Stops a previously started run loop.
|
||||
|
||||
@precondition: A previous call to L{startLoop} suceeded and there was
|
||||
no intervening call to L{endLoop}.
|
||||
'''
|
||||
self._looping = False
|
||||
|
||||
def iterate(self):
|
||||
'''
|
||||
Iterates from within an external run loop.
|
||||
'''
|
||||
self._proxy.setBusy(False)
|
||||
yield
|
||||
|
||||
def say(self, text):
|
||||
'''
|
||||
Speaks the given text. Generates the following notifications during
|
||||
output:
|
||||
|
||||
started-utterance: When speech output has started
|
||||
started-word: When a word is about to be spoken. Includes the character
|
||||
"location" of the start of the word in the original utterance text
|
||||
and the "length" of the word in characters.
|
||||
finished-utterance: When speech output has finished. Includes a flag
|
||||
indicating if the entire utterance was "completed" or not.
|
||||
|
||||
The proxy automatically adds any "name" associated with the utterance
|
||||
to the notifications on behalf of the driver.
|
||||
|
||||
When starting to output an utterance, the driver must inform its proxy
|
||||
that it is busy by invoking L{driver.DriverProxy.setBusy} with a flag
|
||||
of True. When the utterance completes or is interrupted, the driver
|
||||
inform the proxy that it is no longer busy by invoking
|
||||
L{driver.DriverProxy.setBusy} with a flag of False.
|
||||
|
||||
@param text: Unicode text to speak
|
||||
@type text: unicode
|
||||
'''
|
||||
self._proxy.setBusy(True)
|
||||
self._proxy.notify('started-utterance')
|
||||
i = 0
|
||||
for word in text.split(' '):
|
||||
self._proxy.notify('started-word', location=i, length=len(word))
|
||||
try:
|
||||
i = text.index(' ', i+1)+1
|
||||
except Exception:
|
||||
pass
|
||||
self._proxy.notify('finished-utterance', completed=True)
|
||||
self._proxy.setBusy(False)
|
||||
|
||||
def stop(self):
|
||||
'''
|
||||
Stops any current output. If an utterance was being spoken, the driver
|
||||
is still responsible for sending the closing finished-utterance
|
||||
notification documented above and resetting the busy state of the
|
||||
proxy.
|
||||
'''
|
||||
pass
|
||||
|
||||
def getProperty(self, name):
|
||||
'''
|
||||
Gets a property value of the speech engine. The suppoted properties
|
||||
and their values are:
|
||||
|
||||
voices: List of L{voice.Voice} objects supported by the driver
|
||||
voice: String ID of the current voice
|
||||
rate: Integer speech rate in words per minute
|
||||
volume: Floating point volume of speech in the range [0.0, 1.0]
|
||||
|
||||
@param name: Property name
|
||||
@type name: str
|
||||
@raise KeyError: When the property name is unknown
|
||||
'''
|
||||
try:
|
||||
return self._config[name]
|
||||
except KeyError:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
'''
|
||||
Sets one of the supported property values of the speech engine listed
|
||||
above. If a value is invalid, attempts to clip it / coerce so it is
|
||||
valid before giving up and firing an exception.
|
||||
|
||||
@param name: Property name
|
||||
@type name: str
|
||||
@param value: Property value
|
||||
@type value: object
|
||||
@raise KeyError: When the property name is unknown
|
||||
@raise ValueError: When the value cannot be coerced to fit the property
|
||||
'''
|
||||
if name == 'voice':
|
||||
v = filter(lambda v: v.id == value, self._config['voices'])
|
||||
self._config['voice'] = v[0]
|
||||
elif name == 'rate':
|
||||
self._config['rate'] = value
|
||||
elif name == 'volume':
|
||||
self._config['volume'] = value
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
233
venv/lib/python3.11/site-packages/pyttsx3/drivers/espeak.py
Normal file
233
venv/lib/python3.11/site-packages/pyttsx3/drivers/espeak.py
Normal file
@@ -0,0 +1,233 @@
|
||||
import os
|
||||
import wave
|
||||
import platform
|
||||
import ctypes
|
||||
import time
|
||||
import subprocess
|
||||
from tempfile import NamedTemporaryFile
|
||||
if platform.system() == 'Windows':
|
||||
import winsound
|
||||
|
||||
from ..voice import Voice
|
||||
from . import _espeak, fromUtf8, toUtf8
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def buildDriver(proxy):
|
||||
return EspeakDriver(proxy)
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
class EspeakDriver(object):
|
||||
_moduleInitialized = False
|
||||
_defaultVoice = ''
|
||||
|
||||
def __init__(self, proxy):
|
||||
if not EspeakDriver._moduleInitialized:
|
||||
# espeak cannot initialize more than once per process and has
|
||||
# issues when terminating from python (assert error on close)
|
||||
# so just keep it alive and init once
|
||||
rate = _espeak.Initialize(_espeak.AUDIO_OUTPUT_RETRIEVAL, 1000)
|
||||
if rate == -1:
|
||||
raise RuntimeError('could not initialize espeak')
|
||||
EspeakDriver._defaultVoice = 'default'
|
||||
EspeakDriver._moduleInitialized = True
|
||||
self._proxy = proxy
|
||||
self._looping = False
|
||||
self._stopping = False
|
||||
self._speaking = False
|
||||
self._text_to_say = None
|
||||
self._data_buffer = b''
|
||||
self._numerise_buffer = []
|
||||
|
||||
_espeak.SetSynthCallback(self._onSynth)
|
||||
self.setProperty('voice', EspeakDriver._defaultVoice)
|
||||
self.setProperty('rate', 200)
|
||||
self.setProperty('volume', 1.0)
|
||||
|
||||
def numerise(self, data):
|
||||
self._numerise_buffer.append(data)
|
||||
return ctypes.c_void_p(len(self._numerise_buffer))
|
||||
|
||||
def decode_numeric(self, data):
|
||||
return self._numerise_buffer[int(data) - 1]
|
||||
|
||||
@staticmethod
|
||||
def destroy():
|
||||
_espeak.SetSynthCallback(None)
|
||||
|
||||
def stop(self):
|
||||
if _espeak.IsPlaying():
|
||||
self._stopping = True
|
||||
_espeak.Cancel()
|
||||
|
||||
@staticmethod
|
||||
def getProperty(name: str):
|
||||
if name == 'voices':
|
||||
voices = []
|
||||
for v in _espeak.ListVoices(None):
|
||||
kwargs = {'id': fromUtf8(v.name), 'name': fromUtf8(v.name)}
|
||||
if v.languages:
|
||||
try:
|
||||
language_code_bytes = v.languages[1:]
|
||||
language_code = language_code_bytes.decode('utf-8', errors='ignore')
|
||||
kwargs['languages'] = [language_code]
|
||||
except UnicodeDecodeError as e:
|
||||
kwargs['languages'] = ["Unknown"]
|
||||
genders = [None, 'male', 'female']
|
||||
kwargs['gender'] = genders[v.gender]
|
||||
kwargs['age'] = v.age or None
|
||||
voices.append(Voice(**kwargs))
|
||||
return voices
|
||||
elif name == 'voice':
|
||||
voice = _espeak.GetCurrentVoice()
|
||||
return fromUtf8(voice.contents.name)
|
||||
elif name == 'rate':
|
||||
return _espeak.GetParameter(_espeak.RATE)
|
||||
elif name == 'volume':
|
||||
return _espeak.GetParameter(_espeak.VOLUME) / 100.0
|
||||
elif name == 'pitch':
|
||||
return _espeak.GetParameter(_espeak.PITCH)
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
@staticmethod
|
||||
def setProperty(name: str, value):
|
||||
if name == 'voice':
|
||||
if value is None:
|
||||
return
|
||||
try:
|
||||
utf8Value = toUtf8(value)
|
||||
_espeak.SetVoiceByName(utf8Value)
|
||||
except ctypes.ArgumentError as e:
|
||||
raise ValueError(str(e))
|
||||
elif name == 'rate':
|
||||
try:
|
||||
_espeak.SetParameter(_espeak.RATE, value, 0)
|
||||
except ctypes.ArgumentError as e:
|
||||
raise ValueError(str(e))
|
||||
elif name == 'volume':
|
||||
try:
|
||||
_espeak.SetParameter(
|
||||
_espeak.VOLUME, int(round(value * 100, 2)), 0)
|
||||
except TypeError as e:
|
||||
raise ValueError(str(e))
|
||||
elif name == 'pitch':
|
||||
try:
|
||||
_espeak.SetParameter(
|
||||
_espeak.PITCH, int(value), 0
|
||||
)
|
||||
except TypeError as e:
|
||||
raise ValueError(str(e))
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
def save_to_file(self, text, filename):
|
||||
code = self.numerise(filename)
|
||||
_espeak.Synth(toUtf8(text), flags=_espeak.ENDPAUSE | _espeak.CHARS_UTF8, user_data=code)
|
||||
|
||||
def _start_synthesis(self, text):
|
||||
self._proxy.setBusy(True)
|
||||
self._proxy.notify('started-utterance')
|
||||
self._speaking = True
|
||||
self._data_buffer = b'' # Ensure buffer is cleared before starting
|
||||
try:
|
||||
_espeak.Synth(toUtf8(text), flags=_espeak.ENDPAUSE | _espeak.CHARS_UTF8)
|
||||
except Exception as e:
|
||||
self._proxy.setBusy(False)
|
||||
self._proxy.notify('error', exception=e)
|
||||
raise
|
||||
|
||||
|
||||
def _onSynth(self, wav, numsamples, events):
|
||||
i = 0
|
||||
while True:
|
||||
event = events[i]
|
||||
if event.type == _espeak.EVENT_LIST_TERMINATED:
|
||||
break
|
||||
if event.type == _espeak.EVENT_WORD:
|
||||
|
||||
if self._text_to_say:
|
||||
start_index = event.text_position-1
|
||||
end_index = start_index + event.length
|
||||
word = self._text_to_say[start_index:end_index]
|
||||
else:
|
||||
word = "Unknown"
|
||||
|
||||
self._proxy.notify('started-word', name=word, location=event.text_position, length=event.length)
|
||||
|
||||
elif event.type == _espeak.EVENT_END:
|
||||
stream = NamedTemporaryFile(delete=False, suffix='.wav')
|
||||
|
||||
try:
|
||||
with wave.open(stream, 'wb') as f:
|
||||
f.setnchannels(1)
|
||||
f.setsampwidth(2)
|
||||
f.setframerate(22050.0)
|
||||
f.writeframes(self._data_buffer)
|
||||
self._data_buffer = b''
|
||||
|
||||
if event.user_data:
|
||||
os.system(f'ffmpeg -y -i {stream.name} {self.decode_numeric(event.user_data)} -loglevel quiet')
|
||||
else:
|
||||
if platform.system() == 'Darwin': # macOS
|
||||
try:
|
||||
result = subprocess.run(['afplay', stream.name], check=True, capture_output=True, text=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise RuntimeError(f"[EspeakDriver._onSynth] Mac afplay failed with error: {e}")
|
||||
elif platform.system() == 'Linux':
|
||||
os.system(f'aplay {stream.name} -q')
|
||||
elif platform.system() == 'Windows':
|
||||
winsound.PlaySound(stream.name, winsound.SND_FILENAME) # Blocking playback
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error during playback: {e}")
|
||||
|
||||
finally:
|
||||
try:
|
||||
stream.close() # Ensure the file is closed
|
||||
os.remove(stream.name)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error deleting temporary WAV file: {e}")
|
||||
|
||||
self._proxy.notify('finished-utterance', completed=True)
|
||||
self._proxy.setBusy(False)
|
||||
self.endLoop() # End the loop here
|
||||
break # Exit the loop after handling the termination event
|
||||
|
||||
i += 1
|
||||
|
||||
if numsamples > 0:
|
||||
self._data_buffer += ctypes.string_at(wav, numsamples * ctypes.sizeof(ctypes.c_short))
|
||||
return 0
|
||||
|
||||
|
||||
def endLoop(self):
|
||||
self._looping = False
|
||||
|
||||
def startLoop(self):
|
||||
first = True
|
||||
self._looping = True
|
||||
while self._looping:
|
||||
if not self._looping:
|
||||
break
|
||||
if first:
|
||||
self._proxy.setBusy(False)
|
||||
first = False
|
||||
if self._text_to_say:
|
||||
self._start_synthesis(self._text_to_say)
|
||||
self.iterate()
|
||||
time.sleep(0.01)
|
||||
|
||||
def iterate(self):
|
||||
if not self._looping:
|
||||
return
|
||||
if self._stopping:
|
||||
_espeak.Cancel()
|
||||
self._stopping = False
|
||||
self._proxy.notify('finished-utterance', completed=False)
|
||||
self._proxy.setBusy(False)
|
||||
self.endLoop()
|
||||
|
||||
def say(self, text):
|
||||
self._text_to_say = text
|
165
venv/lib/python3.11/site-packages/pyttsx3/drivers/nsss.py
Normal file
165
venv/lib/python3.11/site-packages/pyttsx3/drivers/nsss.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# noinspection PyUnresolvedReferences
|
||||
import objc
|
||||
from AppKit import NSSpeechSynthesizer
|
||||
from Foundation import *
|
||||
from PyObjCTools import AppHelper
|
||||
# noinspection PyProtectedMember
|
||||
from PyObjCTools.AppHelper import PyObjCAppHelperRunLoopStopper
|
||||
|
||||
from ..voice import Voice
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
class RunLoopStopper(PyObjCAppHelperRunLoopStopper):
|
||||
"""
|
||||
Overrides PyObjCAppHelperRunLoopStopper to terminate after endLoop.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.shouldStop = False
|
||||
|
||||
def init(self):
|
||||
return objc.super(RunLoopStopper, self).init()
|
||||
|
||||
def stop(self):
|
||||
self.shouldStop = True
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def buildDriver(proxy):
|
||||
return NSSpeechDriver.alloc().initWithProxy(proxy)
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences,PyPep8Naming,PyUnusedLocal
|
||||
class NSSpeechDriver(NSObject):
|
||||
|
||||
def __init__(self):
|
||||
self._proxy = None
|
||||
self._tts = None
|
||||
self._completed = False
|
||||
self._current_text = ''
|
||||
|
||||
@objc.python_method
|
||||
def initWithProxy(self, proxy):
|
||||
try:
|
||||
proxy_attr = objc.super(NSSpeechDriver, self).init()
|
||||
except AttributeError:
|
||||
proxy_attr = self
|
||||
if proxy_attr:
|
||||
self._proxy = proxy
|
||||
self._tts = NSSpeechSynthesizer.alloc().initWithVoice_(None)
|
||||
self._tts.setDelegate_(self)
|
||||
# default rate
|
||||
self._tts.setRate_(200)
|
||||
self._completed = True
|
||||
return self
|
||||
|
||||
def destroy(self):
|
||||
self._tts.setDelegate_(None)
|
||||
del self._tts
|
||||
|
||||
def onPumpFirst_(self, timer):
|
||||
self._proxy.setBusy(False)
|
||||
|
||||
def startLoop(self):
|
||||
# https://github.com/ronaldoussoren/pyobjc/blob/mater/pyobjc-framework-Cocoa/Lib/PyObjCTools/AppHelper.py#L243C25-L243C25 # noqa
|
||||
NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(
|
||||
0.0, self, 'onPumpFirst:', None, False
|
||||
)
|
||||
runLoop = NSRunLoop.currentRunLoop()
|
||||
stopper = RunLoopStopper.alloc().init()
|
||||
PyObjCAppHelperRunLoopStopper.addRunLoopStopper_toRunLoop_(stopper, runLoop)
|
||||
while stopper.shouldRun():
|
||||
nextfire = runLoop.limitDateForMode_(NSDefaultRunLoopMode)
|
||||
soon = NSDate.dateWithTimeIntervalSinceNow_(0) # maxTimeout in runConsoleEventLoop
|
||||
if nextfire is not None:
|
||||
nextfire = soon.earlierDate_(nextfire)
|
||||
if not runLoop.runMode_beforeDate_(NSDefaultRunLoopMode, nextfire):
|
||||
stopper.stop()
|
||||
break
|
||||
PyObjCAppHelperRunLoopStopper.removeRunLoopStopperFromRunLoop_(runLoop)
|
||||
|
||||
@staticmethod
|
||||
def endLoop():
|
||||
AppHelper.stopEventLoop()
|
||||
|
||||
def iterate(self):
|
||||
self._proxy.setBusy(False)
|
||||
yield
|
||||
|
||||
@objc.python_method
|
||||
def say(self, text):
|
||||
self._proxy.setBusy(True)
|
||||
self._completed = True
|
||||
self._proxy.notify('started-utterance')
|
||||
self._current_text = text
|
||||
self._tts.startSpeakingString_(text)
|
||||
|
||||
def stop(self):
|
||||
if self._proxy.isBusy():
|
||||
self._completed = False
|
||||
self._tts.stopSpeaking()
|
||||
|
||||
@objc.python_method
|
||||
def _toVoice(self, attr):
|
||||
return Voice(attr.get('VoiceIdentifier'), attr.get('VoiceName'),
|
||||
[attr.get('VoiceLocaleIdentifier', attr.get('VoiceLanguage'))], attr.get('VoiceGender'),
|
||||
attr.get('VoiceAge'))
|
||||
|
||||
@objc.python_method
|
||||
def getProperty(self, name):
|
||||
if name == 'voices':
|
||||
return [self._toVoice(NSSpeechSynthesizer.attributesForVoice_(v))
|
||||
for v in list(NSSpeechSynthesizer.availableVoices())]
|
||||
elif name == 'voice':
|
||||
return self._tts.voice()
|
||||
elif name == 'rate':
|
||||
return self._tts.rate()
|
||||
elif name == 'volume':
|
||||
return self._tts.volume()
|
||||
elif name == "pitch":
|
||||
print("Pitch adjustment not supported when using NSSS")
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
@objc.python_method
|
||||
def setProperty(self, name, value):
|
||||
if name == 'voice':
|
||||
# vol/rate gets reset, so store and restore it
|
||||
vol = self._tts.volume()
|
||||
rate = self._tts.rate()
|
||||
self._tts.setVoice_(value)
|
||||
self._tts.setRate_(rate)
|
||||
self._tts.setVolume_(vol)
|
||||
elif name == 'rate':
|
||||
self._tts.setRate_(value)
|
||||
elif name == 'volume':
|
||||
self._tts.setVolume_(value)
|
||||
elif name == 'pitch':
|
||||
print("Pitch adjustment not supported when using NSSS")
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
@objc.python_method
|
||||
def save_to_file(self, text, filename):
|
||||
self._proxy.setBusy(True)
|
||||
self._completed = True
|
||||
url = Foundation.NSURL.fileURLWithPath_(filename)
|
||||
self._tts.startSpeakingString_toURL_(text, url)
|
||||
|
||||
def speechSynthesizer_didFinishSpeaking_(self, tts, success):
|
||||
if not self._completed:
|
||||
success = False
|
||||
else:
|
||||
success = True
|
||||
self._proxy.notify('finished-utterance', completed=success)
|
||||
self._proxy.setBusy(False)
|
||||
|
||||
def speechSynthesizer_willSpeakWord_ofString_(self, tts, rng, text):
|
||||
if self._current_text:
|
||||
current_word = self._current_text[rng.location:rng.location + rng.length]
|
||||
else:
|
||||
current_word = "Unknown"
|
||||
|
||||
self._proxy.notify('started-word', name=current_word, location=rng.location,
|
||||
length=rng.length)
|
187
venv/lib/python3.11/site-packages/pyttsx3/drivers/sapi5.py
Normal file
187
venv/lib/python3.11/site-packages/pyttsx3/drivers/sapi5.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# noinspection PyUnresolvedReferences
|
||||
import comtypes.client # Importing comtypes.client will make the gen subpackage
|
||||
|
||||
try:
|
||||
from comtypes.gen import SpeechLib # comtypes
|
||||
except ImportError:
|
||||
# Generate the SpeechLib lib and any associated files
|
||||
engine = comtypes.client.CreateObject("SAPI.SpVoice")
|
||||
stream = comtypes.client.CreateObject("SAPI.SpFileStream")
|
||||
# noinspection PyUnresolvedReferences
|
||||
from comtypes.gen import SpeechLib
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
import weakref
|
||||
|
||||
import pythoncom
|
||||
|
||||
from ..voice import Voice
|
||||
from . import fromUtf8, toUtf8
|
||||
|
||||
# common voices
|
||||
MSSAM = 'HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\MSSam'
|
||||
MSMARY = 'HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\MSMary'
|
||||
MSMIKE = 'HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\MSMike'
|
||||
|
||||
# coeffs for wpm conversion
|
||||
E_REG = {MSSAM: (137.89, 1.11),
|
||||
MSMARY: (156.63, 1.11),
|
||||
MSMIKE: (154.37, 1.11)}
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def buildDriver(proxy):
|
||||
return SAPI5Driver(proxy)
|
||||
|
||||
|
||||
# noinspection PyPep8Naming,PyShadowingNames
|
||||
class SAPI5Driver(object):
|
||||
def __init__(self, proxy):
|
||||
self._tts = comtypes.client.CreateObject('SAPI.SPVoice')
|
||||
# all events
|
||||
self._tts.EventInterests = 33790
|
||||
self._event_sink = SAPI5DriverEventSink()
|
||||
self._event_sink.setDriver(weakref.proxy(self))
|
||||
self._advise = comtypes.client.GetEvents(self._tts, self._event_sink)
|
||||
self._proxy = proxy
|
||||
self._looping = False
|
||||
self._speaking = False
|
||||
self._stopping = False
|
||||
self._current_text = ''
|
||||
# initial rate
|
||||
self._rateWpm = 200
|
||||
self.setProperty('voice', self.getProperty('voice'))
|
||||
|
||||
def destroy(self):
|
||||
self._tts.EventInterests = 0
|
||||
|
||||
def say(self, text):
|
||||
self._proxy.setBusy(True)
|
||||
self._proxy.notify('started-utterance')
|
||||
self._speaking = True
|
||||
self._current_text = text
|
||||
# call this async otherwise this blocks the callbacks
|
||||
# see SpeechVoiceSpeakFlags: https://docs.microsoft.com/en-us/previous-versions/windows/desktop/ms720892%28v%3dvs.85%29
|
||||
# and Speak : https://docs.microsoft.com/en-us/previous-versions/windows/desktop/ms723609(v=vs.85)
|
||||
self._tts.Speak(fromUtf8(toUtf8(text)), 1) # -> stream_number as described in the remarks of the documentation
|
||||
|
||||
def stop(self):
|
||||
if not self._speaking:
|
||||
return
|
||||
self._proxy.setBusy(True)
|
||||
self._stopping = True
|
||||
self._tts.Speak('', 3)
|
||||
|
||||
def save_to_file(self, text, filename):
|
||||
cwd = os.getcwd()
|
||||
stream = comtypes.client.CreateObject('SAPI.SPFileStream')
|
||||
stream.Open(filename, SpeechLib.SSFMCreateForWrite)
|
||||
temp_stream = self._tts.AudioOutputStream
|
||||
self._tts.AudioOutputStream = stream
|
||||
self._tts.Speak(fromUtf8(toUtf8(text)))
|
||||
self._tts.AudioOutputStream = temp_stream
|
||||
stream.close()
|
||||
os.chdir(cwd)
|
||||
|
||||
@staticmethod
|
||||
def _toVoice(attr):
|
||||
return Voice(attr.Id, attr.GetDescription())
|
||||
|
||||
def _tokenFromId(self, id_):
|
||||
tokens = self._tts.GetVoices()
|
||||
for token in tokens:
|
||||
if token.Id == id_:
|
||||
return token
|
||||
raise ValueError('unknown voice id %s', id_)
|
||||
|
||||
def getProperty(self, name):
|
||||
if name == 'voices':
|
||||
return [self._toVoice(attr) for attr in self._tts.GetVoices()]
|
||||
elif name == 'voice':
|
||||
return self._tts.Voice.Id
|
||||
elif name == 'rate':
|
||||
return self._rateWpm
|
||||
elif name == 'volume':
|
||||
return self._tts.Volume / 100.0
|
||||
elif name == 'pitch':
|
||||
print("Pitch adjustment not supported when using SAPI5")
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
if name == 'voice':
|
||||
token = self._tokenFromId(value)
|
||||
self._tts.Voice = token
|
||||
a, b = E_REG.get(value, E_REG[MSMARY])
|
||||
self._tts.Rate = int(math.log(self._rateWpm / a, b))
|
||||
elif name == 'rate':
|
||||
id_ = self._tts.Voice.Id
|
||||
a, b = E_REG.get(id_, E_REG[MSMARY])
|
||||
try:
|
||||
self._tts.Rate = int(math.log(value / a, b))
|
||||
except TypeError as e:
|
||||
raise ValueError(str(e))
|
||||
self._rateWpm = value
|
||||
elif name == 'volume':
|
||||
try:
|
||||
self._tts.Volume = int(round(value * 100, 2))
|
||||
except TypeError as e:
|
||||
raise ValueError(str(e))
|
||||
elif name == 'pitch':
|
||||
print("Pitch adjustment not supported when using SAPI5")
|
||||
else:
|
||||
raise KeyError('unknown property %s' % name)
|
||||
|
||||
def startLoop(self):
|
||||
first = True
|
||||
self._looping = True
|
||||
while self._looping:
|
||||
if first:
|
||||
self._proxy.setBusy(False)
|
||||
first = False
|
||||
pythoncom.PumpWaitingMessages()
|
||||
time.sleep(0.05)
|
||||
|
||||
def endLoop(self):
|
||||
self._looping = False
|
||||
|
||||
def iterate(self):
|
||||
self._proxy.setBusy(False)
|
||||
while 1:
|
||||
pythoncom.PumpWaitingMessages()
|
||||
yield
|
||||
|
||||
|
||||
# noinspection PyPep8Naming,PyProtectedMember,PyUnusedLocal,PyShadowingNames
|
||||
class SAPI5DriverEventSink(object):
|
||||
def __init__(self):
|
||||
self._driver = None
|
||||
|
||||
def setDriver(self, driver):
|
||||
self._driver = driver
|
||||
|
||||
def _ISpeechVoiceEvents_StartStream(self, stream_number, stream_position):
|
||||
self._driver._proxy.notify(
|
||||
'started-word', location=stream_number, length=stream_position)
|
||||
|
||||
def _ISpeechVoiceEvents_EndStream(self, stream_number, stream_position):
|
||||
d = self._driver
|
||||
if d._speaking:
|
||||
d._proxy.notify('finished-utterance', completed=not d._stopping)
|
||||
d._speaking = False
|
||||
d._stopping = False
|
||||
d._proxy.setBusy(False)
|
||||
d.endLoop() # hangs if you dont have this
|
||||
|
||||
def _ISpeechVoiceEvents_Word(self, stream_number, stream_position, char, length):
|
||||
current_text = self._driver._current_text
|
||||
if current_text:
|
||||
current_word = current_text[char:char + length]
|
||||
else:
|
||||
current_word = "Unknown"
|
||||
|
||||
self._driver._proxy.notify(
|
||||
'started-word', name=current_word, location=char, length=length)
|
Reference in New Issue
Block a user